From 0688724f03dd808becdf1249e13b962ccf8db446 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Wed, 31 Dec 2025 19:43:19 -0800 Subject: [PATCH] WIP --- src/pdf/font.rs | 62 +++++++++++++++++++++- src/pdf/parse.rs | 21 ++++++-- src/pdf/render.rs | 127 ++++++++++++++++++++++++++++++---------------- 3 files changed, 160 insertions(+), 50 deletions(-) diff --git a/src/pdf/font.rs b/src/pdf/font.rs index 2196bf6..5f096b4 100644 --- a/src/pdf/font.rs +++ b/src/pdf/font.rs @@ -246,6 +246,10 @@ impl fmt::Debug for PdfFontDescriptor { } } +impl PdfFontDescriptor { + pub const DEFAULT_MISSING_WIDTH: f32 = 0.0; +} + pdf_parse! { #[pdf(name)] #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] @@ -520,6 +524,15 @@ impl PdfParse for PdfFontType1 { } } +impl GetPdfInputPosition for PdfFontType1 { + fn get_pdf_input_position(&self) -> PdfInputPosition { + match self { + PdfFontType1::Standard(v) => v.get_pdf_input_position(), + PdfFontType1::Other(v) => v.get_pdf_input_position(), + } + } +} + #[derive(Clone)] pub struct PdfFontType1Common { pub ty: PdfFontType, @@ -535,6 +548,29 @@ pub struct PdfFontType1Common { pub rest: PdfDictionary, } +impl PdfFontType1Common { + fn validate_first_last_char_and_widths( + pos: PdfInputPosition, + first_char: Option, + last_char: Option, + widths: Option<&[f32]>, + ) -> Result<(), PdfParseError> { + if first_char.is_some() || last_char.is_some() || widths.is_some() { + let (Some(first_char), Some(last_char), Some(widths)) = (first_char, last_char, widths) + else { + return Err(PdfParseError::InvalidFontFirstLastCharWidths { pos }); + }; + let Some(widths_len) = (u64::from(last_char) + 1).checked_sub(first_char.into()) else { + return Err(PdfParseError::InvalidFontFirstLastCharWidths { pos }); + }; + if u64::try_from(widths.len()).ok() != Some(widths_len) { + return Err(PdfParseError::InvalidFontFirstLastCharWidths { pos }); + } + } + Ok(()) + } +} + impl fmt::Debug for PdfFontType1Common { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { DagDebugState::scope(|_state| { @@ -568,8 +604,16 @@ impl fmt::Debug for PdfFontType1Common { } } +impl GetPdfInputPosition for PdfFontType1Common { + fn get_pdf_input_position(&self) -> PdfInputPosition { + self.rest.pos() + } +} + pdf_parse! { - #[pdf] + #[pdf(validate = |pos| { + PdfFontType1Common::validate_first_last_char_and_widths(pos, first_char, last_char, widths.as_deref())?; + })] #[derive(Clone)] pub struct PdfFontType1Standard { #[pdf(name = "Type")] @@ -661,8 +705,16 @@ impl PdfFontType1Standard { } } +impl GetPdfInputPosition for PdfFontType1Standard { + fn get_pdf_input_position(&self) -> PdfInputPosition { + self.rest.pos() + } +} + pdf_parse! { - #[pdf] + #[pdf(validate = |pos| { + PdfFontType1Common::validate_first_last_char_and_widths(pos, Some(first_char), Some(last_char), Some(&widths))?; + })] #[derive(Clone)] pub struct PdfFontType1Other { #[pdf(name = "Type")] @@ -754,6 +806,12 @@ impl PdfFontType1Other { } } +impl GetPdfInputPosition for PdfFontType1Other { + fn get_pdf_input_position(&self) -> PdfInputPosition { + self.rest.pos() + } +} + pdf_parse! { #[pdf(name)] #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] diff --git a/src/pdf/parse.rs b/src/pdf/parse.rs index 4e5502a..3f0ff21 100644 --- a/src/pdf/parse.rs +++ b/src/pdf/parse.rs @@ -303,6 +303,9 @@ pub enum PdfParseError { InvalidUtf16 { pos: PdfInputPosition, }, + InvalidFontFirstLastCharWidths { + pos: PdfInputPosition, + }, } impl From for PdfParseError { @@ -354,7 +357,8 @@ impl GetPdfInputPosition for PdfParseError { | PdfParseError::MissingBeginTextOperator { pos } | PdfParseError::MissingSetFontOperator { pos } | PdfParseError::InvalidTokenInToUnicodeStream { pos, .. } - | PdfParseError::InvalidUtf16 { pos } => pos, + | PdfParseError::InvalidUtf16 { pos } + | PdfParseError::InvalidFontFirstLastCharWidths { pos } => pos, PdfParseError::OperatorNotAllowedHere { ref operator } => operator.pos(), PdfParseError::OperatorHasTooFewOperands { ref operator } | PdfParseError::OperatorHasTooManyOperands { ref operator } => operator.pos(), @@ -540,6 +544,9 @@ impl fmt::Display for PdfParseError { PdfParseError::InvalidUtf16 { pos } => { write!(f, "at {pos}: invalid UTF-16") } + PdfParseError::InvalidFontFirstLastCharWidths { pos } => { + write!(f, "at {pos}: invalid font first/last_char and/or widths") + } } } } @@ -992,7 +999,7 @@ macro_rules! pdf_parse { }; ( @impl - #[pdf] + #[pdf$(($(validate = |$pos:pat_param| $validate:expr)?))?] struct $Struct:ident$(<$($StructParam:ident $(: $StructBound:tt)?),* $(,)?>)? { $($(#[$($field_meta:tt)*])* $field_name:ident: $field_ty:ty,)* @@ -1046,6 +1053,10 @@ macro_rules! pdf_parse { $(#[$($field_meta)*])* $field_name: $field_ty })* + $($({ + let $pos = pos; + $validate + })?)? $crate::__std::result::Result::Ok(Self { $($field_name,)* }) @@ -1085,7 +1096,8 @@ macro_rules! pdf_parse { [$(#[$($field_meta:tt)*])*] $field_name:ident: $field_ty:ty ) => { - let $field_name = <$field_ty as $crate::pdf::parse::PdfParse>::parse( + #[allow(unused_mut)] + let mut $field_name = <$field_ty as $crate::pdf::parse::PdfParse>::parse( $crate::pdf::object::PdfObject::Dictionary( $crate::pdf::object::PdfDictionary::from_fields($pos, $object), ), @@ -1097,7 +1109,8 @@ macro_rules! pdf_parse { $field_name:ident: $field_ty:ty ) => { let $field_name = $crate::__std::convert::AsRef::<[$crate::__std::primitive::u8]>::as_ref($name); - let $field_name = <$field_ty as $crate::pdf::parse::PdfParse>::parse( + #[allow(unused_mut)] + let mut $field_name = <$field_ty as $crate::pdf::parse::PdfParse>::parse( $object_mut .remove($field_name) .unwrap_or($crate::pdf::object::PdfObject::Null($crate::pdf::object::PdfNull::new($pos))), diff --git a/src/pdf/render.rs b/src/pdf/render.rs index 586b6c4..4603ae1 100644 --- a/src/pdf/render.rs +++ b/src/pdf/render.rs @@ -35,12 +35,15 @@ use crate::{ PdfOperatorUnparsed, }, document_structure::{PdfPage, PdfResourcesDictionary}, - font::{PdfFont, PdfSimpleFontEncodingTableEntry, PdfTodo}, + font::{PdfFont, PdfFontDescriptor, PdfSimpleFontEncodingTableEntry, PdfTodo}, object::{ - IsPdfNull, PdfMatrix, PdfName, PdfNumber, PdfObject, PdfObjectDirect, + IsPdfNull, PdfMatrix, PdfName, PdfNumber, PdfObject, PdfObjectDirect, PdfString, PdfStringOrNumber, PdfVec2D, }, - parse::{PdfInputPosition, PdfInputPositionNoCompare, PdfParse, PdfParseError}, + parse::{ + GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse, + PdfParseError, + }, }, pdf_parse, }; @@ -270,8 +273,7 @@ impl PdfGraphicsState { pos: PdfInputPosition, glyph_displacement: PdfVec2D, position_adjustment: f32, - has_char_spacing: bool, - has_word_spacing: bool, + use_word_spacing: bool, ) -> Result<(), PdfParseError> { let text_object = PdfTextObjectState::require(self.text_state.text_object.as_mut(), pos)?; let (tx, ty) = if self @@ -283,21 +285,19 @@ impl PdfGraphicsState { { let mut ty = (glyph_displacement.y - position_adjustment * 1e-3) * self.text_state.font_size; - if has_char_spacing { - ty += self.text_state.char_spacing; - } - if has_word_spacing { + if use_word_spacing { ty += self.text_state.word_spacing; + } else { + ty += self.text_state.char_spacing; } (0.0, ty) } else { let mut tx = (glyph_displacement.x - position_adjustment * 1e-3) * self.text_state.font_size; - if has_char_spacing { - tx += self.text_state.char_spacing; - } - if has_word_spacing { + if use_word_spacing { tx += self.text_state.word_spacing; + } else { + tx += self.text_state.char_spacing; } (tx * self.text_state.horizontal_scaling_percent * 1e-2, 0.0) }; @@ -362,6 +362,73 @@ impl<'a> PdfRenderState<'a> { ) -> Result<(), PdfParseError> { todo!() } + pub fn render_string( + &mut self, + operator_pos: PdfInputPosition, + position_adjustment: f32, + s: &PdfString, + ) -> Result<(), PdfParseError> { + for &glyph in s.bytes().iter() { + let font = self + .graphics_state + .text_state + .font + .as_ref() + .ok_or(PdfParseError::MissingSetFontOperator { pos: operator_pos })?; + let PdfFont::Type1(font) = font else { todo!() }; + let Some(encoding) = font.encoding() else { + todo!(); + }; + let Some(widths) = font.widths() else { + todo!(); + }; + let Some(first_char) = font.first_char() else { + todo!(); + }; + let Some(last_char) = font.last_char() else { + todo!(); + }; + let Some(font_descriptor) = font.font_descriptor() else { + todo!(); + }; + let Some(font_program) = font_descriptor + .font_file + .as_ref() + .and_then(|v| v.decoded_data().as_ref().ok()) + else { + todo!(); + }; + if font_program.vertical_writing_mode { + todo!(); + } + let width = if u32::from(glyph) >= first_char && u32::from(glyph) <= last_char { + widths[usize::from(glyph) - first_char as usize] + } else { + font_descriptor + .missing_width + .unwrap_or(PdfFontDescriptor::DEFAULT_MISSING_WIDTH) + }; + todo!("handle position_adjustment"); + let matrix = self.graphics_state.text_rendering_matrix(s.pos())?; + let table = encoding.table(|| font_program.encoding.clone()); + let PdfSimpleFontEncodingTableEntry { + name, + presumed_unicode, + } = table.table[usize::from(glyph)].clone(); + todo!("{name:?} {presumed_unicode:?} {:#?}", font.to_unicode()); + self.graphics_state.advance_text_matrix( + s.pos(), + PdfVec2D { + pos: font.get_pdf_input_position().into(), + x: width * 1e-3, + y: 0.0, + }, + position_adjustment, + glyph == 32, + )?; + } + Ok(()) + } } pub trait PdfRenderOperator: Into { @@ -913,45 +980,17 @@ impl PdfRenderOperator for PdfOperatorShowTextWithGlyphPositioning { pos, ref text_and_positioning, } = *self; - let font = state - .graphics_state - .text_state - .font - .as_ref() - .ok_or(PdfParseError::MissingSetFontOperator { pos: pos.0 })?; - let PdfFont::Type1(font) = font else { todo!() }; let mut positioning = 0.0; for text_or_positioning in text_and_positioning.iter() { match text_or_positioning { PdfStringOrNumber::String(s) => { - for glyph in s.bytes().iter() { - let positioning = std::mem::replace(&mut positioning, 0.0); - let Some(encoding) = font.encoding() else { - todo!(); - }; - let table = encoding.table(|| { - let Some(font_encoding) = font - .font_descriptor() - .and_then(|v| v.font_file.as_ref()) - .and_then(|v| v.decoded_data().as_ref().ok()) - .map(|v| v.encoding.clone()) - else { - todo!() - }; - font_encoding - }); - let PdfSimpleFontEncodingTableEntry { - name, - presumed_unicode, - } = table.table[usize::from(*glyph)].clone(); - todo!("{name:?} {presumed_unicode:?} {:#?}", font.to_unicode()); - } + let positioning = std::mem::replace(&mut positioning, 0.0); + state.render_string(pos.0, positioning, s)?; } PdfStringOrNumber::Number(number) => positioning = number.as_f32(), } } - let _ = state; - todo!("{text_and_positioning:?}") + Ok(()) } }