diff --git a/Cargo.lock b/Cargo.lock index 07f112f..10be7b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,37 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "flate2" version = "1.1.5" @@ -48,6 +79,27 @@ name = "parse_powerisa_pdf" version = "0.1.0" dependencies = [ "flate2", + "rayon", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c5d18eb..20ecf46 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,4 +6,5 @@ license = "LGPL-3.0-or-later" [dependencies] flate2 = "1.1.5" +rayon = "1.11.0" diff --git a/src/pdf.rs b/src/pdf.rs index 1933489..be57b31 100644 --- a/src/pdf.rs +++ b/src/pdf.rs @@ -5,22 +5,23 @@ use crate::{ object::{ PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject, PdfObjectIdentifier, PdfObjectIndirect, PdfObjectStreamDictionary, PdfReal, PdfStream, - PdfStreamDictionary, PdfString, UnparsedPdfStreamDictionary, + PdfStreamDictionary, PdfString, }, parse::{ GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown, PdfParse, PdfParseError, }, }, pdf_parse, - util::ArcOrRef, + util::{ArcOrRef, DagDebugState}, }; use std::{ - collections::BTreeMap, + any::{Any, TypeId}, + collections::{BTreeMap, HashMap}, convert::Infallible, fmt, num::NonZero, str::FromStr, - sync::{Arc, OnceLock}, + sync::{Arc, Mutex, OnceLock}, }; pub mod content_stream; @@ -28,10 +29,100 @@ pub mod document_structure; pub mod font; pub mod object; pub mod parse; +pub mod render; pub mod stream_filters; +struct ParseCache { + parse_results: HashMap>, + steps_till_next_gc: usize, +} + +impl Default for ParseCache { + fn default() -> Self { + Self { + parse_results: HashMap::new(), + steps_till_next_gc: 1, + } + } +} + +impl ParseCache { + fn gc(&mut self) { + if self.steps_till_next_gc == 0 { + self.parse_results.retain(|_k, v| v.strong_count() > 0); + let mut adjusted_len = self.parse_results.len(); + if adjusted_len < 10 { + adjusted_len = 10; + } + self.steps_till_next_gc = adjusted_len.saturating_mul(20); + } else { + self.steps_till_next_gc -= 1; + } + } + fn get(&mut self) -> Option> { + self.gc(); + let Ok(retval) = self + .parse_results + .get(&TypeId::of::())? + .upgrade()? + .downcast() + else { + unreachable!(); + }; + Some(retval) + } + fn get_or_insert( + &mut self, + value: Arc, + ) -> (Arc, impl Sized + use) { + use std::collections::hash_map::Entry; + self.gc(); + match self.parse_results.entry(TypeId::of::()) { + Entry::Occupied(mut entry) => { + if let Some(retval) = entry.get().upgrade() { + let Ok(retval) = retval.downcast::() else { + unreachable!(); + }; + (retval, Some(value)) + } else { + entry.insert(Arc::::downgrade(&value)); + (value, None) + } + } + Entry::Vacant(entry) => { + entry.insert(Arc::::downgrade(&value)); + (value, None) + } + } + } +} + +struct PdfObjectAndParseCache { + object: PdfObject, + parse_cache: Mutex, +} + +impl PdfObjectAndParseCache { + fn new(object: PdfObject) -> Self { + Self { + object, + parse_cache: Mutex::default(), + } + } + fn parse_cache_get(&self) -> Option> { + self.parse_cache.lock().expect("not poisoned").get() + } + fn parse_cache_get_or_insert(&self, value: Arc) -> Arc { + let mut parse_cache = self.parse_cache.lock().expect("not poisoned"); + let (retval, to_drop_after_unlock) = parse_cache.get_or_insert(value); + drop(parse_cache); + drop(to_drop_after_unlock); + retval + } +} + struct PdfObjectsInner { - objects: BTreeMap, + objects: BTreeMap, #[allow(dead_code)] object_streams: Vec>, } @@ -52,7 +143,7 @@ impl PdfHeader { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfTrailerDictionary { #[pdf(name = "Size")] pub size: usize, @@ -71,6 +162,31 @@ pdf_parse! { } } +impl fmt::Debug for PdfTrailerDictionary { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { + size, + prev, + root, + encrypt, + info, + id, + rest, + } = self; + f.debug_struct("PdfTrailerDictionary") + .field("size", size) + .field("prev", prev) + .field("root", root) + .field("encrypt", encrypt) + .field("info", info) + .field("id", id) + .field("rest", rest) + .finish() + }) + } +} + pdf_parse! { #[pdf(name)] #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] @@ -83,7 +199,7 @@ pdf_parse! { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfXRefStreamDictionaryRest { #[pdf(name = "Type")] pub ty: PdfXRefName, @@ -96,7 +212,21 @@ pdf_parse! { } } -#[derive(Clone, Debug)] +impl fmt::Debug for PdfXRefStreamDictionaryRest { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { ty, index, w, rest } = self; + f.debug_struct("PdfXRefStreamDictionaryRest") + .field("ty", ty) + .field("index", index) + .field("w", w) + .field("rest", rest) + .finish() + }) + } +} + +#[derive(Clone)] pub enum PdfTrailer { Trailer { trailer_dictionary: PdfTrailerDictionary, @@ -108,6 +238,29 @@ pub enum PdfTrailer { }, } +impl fmt::Debug for PdfTrailer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| match self { + Self::Trailer { + trailer_dictionary, + start_xref, + } => f + .debug_struct("Trailer") + .field("trailer_dictionary", trailer_dictionary) + .field("start_xref", start_xref) + .finish(), + Self::Stream { + xref_stream, + start_xref, + } => f + .debug_struct("Stream") + .field("xref_stream", xref_stream) + .field("start_xref", start_xref) + .finish(), + }) + } +} + impl PdfTrailer { pub fn trailer_dictionary(&self) -> &PdfTrailerDictionary { match self { @@ -722,7 +875,7 @@ impl<'a> PdfParser<'a> { struct PdfFileParser<'a> { parser: PdfParser<'a>, - objects_map: BTreeMap, + objects_map: BTreeMap, } impl<'a> PdfFileParser<'a> { @@ -823,7 +976,11 @@ impl<'a> PdfFileParser<'a> { let Some(PdfToken::Regular(b"endobj")) = self.parser.tokenizer.next() else { return Err(PdfParseError::MissingEndObj { pos: end_obj_pos }); }; - if self.objects_map.insert(id, object).is_some() { + if self + .objects_map + .insert(id, PdfObjectAndParseCache::new(object)) + .is_some() + { Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id }) } else { Ok(Some(())) @@ -882,7 +1039,11 @@ impl<'a> PdfFileParser<'a> { let object = parser .parse_object_or_operator()? .error_on_stream_or_operator()?; - if self.objects_map.insert(id, object).is_some() { + if self + .objects_map + .insert(id, PdfObjectAndParseCache::new(object)) + .is_some() + { return Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id }); } } @@ -892,7 +1053,7 @@ impl<'a> PdfFileParser<'a> { while let Some(()) = self.parse_indirect_object_definition()? {} let mut object_streams: Vec> = Vec::new(); for object in self.objects_map.values_mut() { - let stream = match object { + let stream = match &object.object { PdfObject::Stream(stream) => stream, PdfObject::Boolean(_) | PdfObject::Integer(_) @@ -907,7 +1068,7 @@ impl<'a> PdfFileParser<'a> { if PdfObjectStreamDictionary::parse_type_from_dictionary(&stream.dictionary().rest) .is_ok() { - object_streams.push(PdfStream::parse(object.clone())?); + object_streams.push(PdfStream::parse(object.object.clone())?); } } for object_stream in &object_streams { @@ -1012,11 +1173,6 @@ impl<'a> PdfFileParser<'a> { self.parse_body()?; self.parse_xref_table()?; let trailer = self.parse_trailer()?; - for page in trailer.trailer_dictionary().root.pages.pages().iter() { - for content in page.contents.iter() { - content.decoded_data().as_ref()?; - } - } Ok(Pdf { header, objects: self.parser.objects, diff --git a/src/pdf/content_stream.rs b/src/pdf/content_stream.rs index 2552df7..79764c2 100644 --- a/src/pdf/content_stream.rs +++ b/src/pdf/content_stream.rs @@ -10,6 +10,10 @@ use crate::{ GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown, PdfInputPositionNoCompare, PdfParse, PdfParseError, }, + render::{ + PdfColorDeviceGray, PdfColorDeviceRgb, PdfRenderOperator, PdfRenderState, + PdfRenderingIntent, + }, }, util::ArcOrRef, }; @@ -259,6 +263,18 @@ macro_rules! make_pdf_operator_enum { } } } + + impl PdfRenderOperator for $PdfOperatorAndOperands { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + match self { + Self::$Unknown { + operands, + operator, + } => state.handle_unknown_operator(operator, operands), + $(Self::$Variant(v) => <$VariantStruct as PdfRenderOperator>::render(v, state),)* + } + } + } }; ( @impl_variant_parse @@ -459,13 +475,13 @@ make_pdf_operator_enum! { SetStrokeGray(PdfOperatorSetStrokeGray { pos: PdfInputPositionNoCompare, #[parse(gray)] - gray: f32, + gray: PdfColorDeviceGray, }), #[kw = b"g"] SetNonStrokeGray(PdfOperatorSetNonStrokeGray { pos: PdfInputPositionNoCompare, #[parse(gray)] - gray: f32, + gray: PdfColorDeviceGray, }), #[kw = b"gs"] SetGraphicsState(PdfOperatorSetGraphicsState { @@ -570,28 +586,20 @@ make_pdf_operator_enum! { #[kw = b"RG"] SetStrokeRgb(PdfOperatorSetStrokeRgb { pos: PdfInputPositionNoCompare, - #[parse(r)] - r: f32, - #[parse(g)] - g: f32, - #[parse(b)] - b: f32, + #[parse_flat(r, g, b)] + color: PdfColorDeviceRgb, }), #[kw = b"rg"] SetNonStrokeRgb(PdfOperatorSetNonStrokeRgb { pos: PdfInputPositionNoCompare, - #[parse(r)] - r: f32, - #[parse(g)] - g: f32, - #[parse(b)] - b: f32, + #[parse_flat(r, g, b)] + color: PdfColorDeviceRgb, }), #[kw = b"ri"] SetColorRenderingIntent(PdfOperatorSetColorRenderingIntent { pos: PdfInputPositionNoCompare, #[parse(intent)] - intent: PdfName, + intent: PdfRenderingIntent, }), #[kw = b"s"] CloseAndStrokePath(PdfOperatorCloseAndStrokePath { @@ -760,11 +768,19 @@ impl GetPdfInputPosition for PdfOperatorAndOperands { } } -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct PdfContentStreamData { pub operators: Arc<[PdfOperatorAndOperands]>, } +impl fmt::Debug for PdfContentStreamData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("PdfContentStreamData") + .field("operators", &self.operators) + .finish() + } +} + impl PdfStreamContents for PdfContentStreamData { fn parse( data: &[u8], diff --git a/src/pdf/document_structure.rs b/src/pdf/document_structure.rs index 13c0de3..268d503 100644 --- a/src/pdf/document_structure.rs +++ b/src/pdf/document_structure.rs @@ -1,16 +1,19 @@ -use core::fmt; -use std::{borrow::Cow, sync::Arc}; - -use crate::pdf::{ - content_stream::PdfContentStream, - font::PdfFont, - object::{ - IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject, - PdfObjectDirect, PdfObjectIndirect, PdfRectangle, PdfStream, PdfString, +use crate::{ + pdf::{ + content_stream::PdfContentStream, + font::PdfFont, + object::{ + IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject, + PdfObjectDirect, PdfObjectIndirect, PdfRectangle, PdfStream, PdfString, + }, + parse::{PdfParse, PdfParseError}, + pdf_parse, + render::{PdfRenderOperator, PdfRenderState}, }, - parse::{PdfParse, PdfParseError}, - pdf_parse, + util::DagDebugState, }; +use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator}; +use std::{borrow::Cow, fmt, sync::Arc}; pdf_parse! { #[pdf(name)] @@ -24,7 +27,7 @@ pdf_parse! { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfDocumentCatalog { #[pdf(name = "Type")] pub ty: PdfDocumentCatalogType, @@ -40,6 +43,27 @@ pdf_parse! { } } +impl fmt::Debug for PdfDocumentCatalog { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { + ty, + version, + extensions, + pages, + rest, + } = self; + f.debug_struct("PdfDocumentCatalog") + .field("ty", ty) + .field("version", version) + .field("extensions", extensions) + .field("pages", pages) + .field("rest", rest) + .finish() + }) + } +} + pdf_parse! { #[pdf] #[derive(Clone, Debug)] @@ -58,14 +82,15 @@ pub struct PdfPageTree { } impl PdfPageTree { - fn parse_pages(node: &PdfPageTreeNode, pages: &mut Vec) -> Result<(), PdfParseError> { + fn collect_leaves( + node: &PdfPageTreeNode, + leaves: &mut Vec, + ) -> Result<(), PdfParseError> { for kid in node.kids.iter() { match kid { - PdfPageTreeNodeOrLeaf::Node(node) => Self::parse_pages(node, pages)?, + PdfPageTreeNodeOrLeaf::Node(node) => Self::collect_leaves(node, leaves)?, PdfPageTreeNodeOrLeaf::Leaf(leaf) => { - pages.push(PdfPage::parse_after_propagating_inheritable_data( - leaf.clone(), - )?); + leaves.push(leaf.clone()); } PdfPageTreeNodeOrLeaf::Other(v) => { return Err(PdfParseError::InvalidType { @@ -80,11 +105,16 @@ impl PdfPageTree { } pub fn try_from_page_tree_root(mut page_tree: PdfPageTreeNode) -> Result { page_tree.propagate_inheritable_data_to_leaves(); - let mut pages = Vec::new(); - Self::parse_pages(&page_tree, &mut pages)?; + let mut leaves = Vec::new(); + Self::collect_leaves(&page_tree, &mut leaves)?; Ok(Self { page_tree, - pages: Arc::from(pages), + pages: Result::from_par_iter( + leaves + .into_par_iter() + .map(PdfPage::parse_after_propagating_inheritable_data) + .panic_fuse(), + )?, }) } pub fn page_tree(&self) -> &PdfPageTreeNode { @@ -97,9 +127,15 @@ impl PdfPageTree { impl fmt::Debug for PdfPageTree { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PdfPageTree") - .field("pages", &self.pages) - .finish_non_exhaustive() + DagDebugState::scope(|_state| { + let Self { + page_tree: _, + pages, + } = self; + f.debug_struct("PdfPageTree") + .field("pages", pages) + .finish_non_exhaustive() + }) } } @@ -120,7 +156,7 @@ impl PdfParse for PdfPageTree { pdf_parse! { #[pdf] - #[derive(Clone, Debug, Default)] + #[derive(Clone, Default, Debug)] pub struct PdfPageInheritableData { #[pdf(name = "Resources")] pub resources: Option, @@ -168,7 +204,7 @@ pdf_parse! { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfPageTreeNode { #[pdf(name = "Type")] pub ty: PdfPageTreeNodeType, @@ -184,6 +220,27 @@ pdf_parse! { } } +impl fmt::Debug for PdfPageTreeNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + DagDebugState::scope(|_state| { + let Self { + ty, + parent, + kids, + count, + inheritable, + } = self; + f.debug_struct("PdfPageTreeNode") + .field("ty", ty) + .field("parent", parent) + .field("kids", kids) + .field("count", count) + .field("inheritable", inheritable) + .finish() + }) + } +} + impl PdfPageTreeNode { pub fn propagate_inheritable_data_to_leaves(&mut self) { for kid in Arc::make_mut(&mut self.kids) { @@ -222,7 +279,7 @@ pdf_parse! { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfPageTreeLeaf { #[pdf(name = "Type")] pub ty: PdfPageType, @@ -281,6 +338,74 @@ pdf_parse! { } } +impl fmt::Debug for PdfPageTreeLeaf { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + DagDebugState::scope(|_state| { + let Self { + ty, + parent, + last_modified, + bleed_box, + trim_box, + art_box, + box_color_info, + contents, + group, + thumbnail, + beads, + duration, + transition, + annotations, + additional_actions, + metadata, + piece_info, + structural_parents, + parent_web_capture_content_set_id, + preferred_zoom_factor, + separation_info, + annotations_tab_order, + template_instantiated, + pres_steps, + user_unit, + viewports, + inheritable, + } = self; + f.debug_struct("PdfPageTreeLeaf") + .field("ty", ty) + .field("parent", parent) + .field("last_modified", last_modified) + .field("bleed_box", bleed_box) + .field("trim_box", trim_box) + .field("art_box", art_box) + .field("box_color_info", box_color_info) + .field("contents", contents) + .field("group", group) + .field("thumbnail", thumbnail) + .field("beads", beads) + .field("duration", duration) + .field("transition", transition) + .field("annotations", annotations) + .field("additional_actions", additional_actions) + .field("metadata", metadata) + .field("piece_info", piece_info) + .field("structural_parents", structural_parents) + .field( + "parent_web_capture_content_set_id", + parent_web_capture_content_set_id, + ) + .field("preferred_zoom_factor", preferred_zoom_factor) + .field("separation_info", separation_info) + .field("annotations_tab_order", annotations_tab_order) + .field("template_instantiated", template_instantiated) + .field("pres_steps", pres_steps) + .field("user_unit", user_unit) + .field("viewports", viewports) + .field("inheritable", inheritable) + .finish() + }) + } +} + pdf_parse! { #[pdf(tag = "Type")] #[derive(Clone)] @@ -377,7 +502,7 @@ impl PdfParse for PdfPageRotation { } } -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct PdfPage { pub ty: PdfPageType, pub parent: PdfObjectIndirect, @@ -410,9 +535,16 @@ pub struct PdfPage { pub user_unit: f32, pub viewports: Option>, pub rest: PdfDictionary, + rendered_objects: Option, } impl PdfPage { + pub fn rendered_objects(&self) -> &PdfPageRenderedObjects { + let Some(retval) = &self.rendered_objects else { + unreachable!(); + }; + retval + } pub fn parse_after_propagating_inheritable_data( leaf: PdfPageTreeLeaf, ) -> Result { @@ -465,7 +597,7 @@ impl PdfPage { })?; let crop_box = crop_box.unwrap_or(media_box); let rotate = rotate.unwrap_or(PdfPageRotation::NoRotation); - Ok(Self { + let mut retval = Self { ty, parent, last_modified, @@ -497,6 +629,115 @@ impl PdfPage { user_unit: user_unit.unwrap_or(1.0), viewports, rest, + rendered_objects: None, + }; + retval.rendered_objects = Some(PdfPageRenderedObjects::render_page(&retval)?); + Ok(retval) + } +} + +impl fmt::Debug for PdfPage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { + ty, + parent, + last_modified, + resources, + media_box, + crop_box, + bleed_box, + trim_box, + art_box, + box_color_info, + contents, + rotate, + group, + thumbnail, + beads, + duration, + transition, + annotations, + additional_actions, + metadata, + piece_info, + structural_parents, + parent_web_capture_content_set_id, + preferred_zoom_factor, + separation_info, + annotations_tab_order, + template_instantiated, + pres_steps, + user_unit, + viewports, + rest, + rendered_objects, + } = self; + struct Unparsed; + impl fmt::Debug for Unparsed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("") + } + } + f.debug_struct("PdfPage") + .field("ty", ty) + .field("parent", parent) + .field("last_modified", last_modified) + .field("resources", resources) + .field("media_box", media_box) + .field("crop_box", crop_box) + .field("bleed_box", bleed_box) + .field("trim_box", trim_box) + .field("art_box", art_box) + .field("box_color_info", box_color_info) + .field("contents", contents) + .field("rotate", rotate) + .field("group", group) + .field("thumbnail", thumbnail) + .field("beads", beads) + .field("duration", duration) + .field("transition", transition) + .field("annotations", annotations) + .field("additional_actions", additional_actions) + .field("metadata", metadata) + .field("piece_info", piece_info) + .field("structural_parents", structural_parents) + .field( + "parent_web_capture_content_set_id", + parent_web_capture_content_set_id, + ) + .field("preferred_zoom_factor", preferred_zoom_factor) + .field("separation_info", separation_info) + .field("annotations_tab_order", annotations_tab_order) + .field("template_instantiated", template_instantiated) + .field("pres_steps", pres_steps) + .field("user_unit", user_unit) + .field("viewports", viewports) + .field("rest", rest) + .field( + "rendered_objects", + if let Some(rendered_objects) = rendered_objects { + rendered_objects + } else { + &Unparsed + }, + ) + .finish() }) } } + +#[derive(Clone, Debug)] +pub struct PdfPageRenderedObjects {} + +impl PdfPageRenderedObjects { + fn render_page(page: &PdfPage) -> Result { + let mut state = PdfRenderState::new(page); + for content_stream in page.contents.iter() { + for op in content_stream.decoded_data().as_ref()?.operators.iter() { + op.render(&mut state)?; + } + } + Ok(Self {}) + } +} diff --git a/src/pdf/font.rs b/src/pdf/font.rs index bfc52b7..14086f1 100644 --- a/src/pdf/font.rs +++ b/src/pdf/font.rs @@ -1,17 +1,19 @@ -use std::{borrow::Cow, sync::Arc}; - -use crate::pdf::{ - object::{ - IsPdfNull, PdfDictionary, PdfName, PdfObject, PdfObjectDirect, PdfRectangle, PdfStream, - PdfString, +use crate::{ + pdf::{ + object::{ + IsPdfNull, PdfDictionary, PdfName, PdfObject, PdfObjectDirect, PdfRectangle, PdfStream, + PdfString, + }, + parse::{PdfParse, PdfParseError}, + pdf_parse, }, - parse::{PdfParse, PdfParseError}, - pdf_parse, + util::DagDebugState, }; +use std::{borrow::Cow, fmt, sync::Arc}; pdf_parse! { #[pdf(transparent)] - #[derive(Clone, Debug)] + #[derive(Clone)] // TODO: actually parse the stream pub struct PdfFontToUnicode { #[pdf] @@ -19,6 +21,17 @@ pdf_parse! { } } +impl fmt::Debug for PdfFontToUnicode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { stream } = self; + f.debug_struct("PdfFontToUnicode") + .field("stream", stream) + .finish() + }) + } +} + pdf_parse! { #[pdf(name)] #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] @@ -56,7 +69,7 @@ pdf_parse! { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfFontDescriptor { #[pdf(name = "Type")] pub ty: PdfFontDescriptorType, @@ -107,6 +120,63 @@ pdf_parse! { } } +impl fmt::Debug for PdfFontDescriptor { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { + ty, + font_name, + font_family, + font_stretch, + font_weight, + flags, + font_bounding_box, + italic_angle, + ascent, + descent, + leading, + cap_height, + x_height, + stem_v, + stem_h, + avg_width, + max_width, + missing_width, + font_file, + font_file2, + font_file3, + char_set, + rest, + } = self; + f.debug_struct("PdfFontDescriptor") + .field("ty", ty) + .field("font_name", font_name) + .field("font_family", font_family) + .field("font_stretch", font_stretch) + .field("font_weight", font_weight) + .field("flags", flags) + .field("font_bounding_box", font_bounding_box) + .field("italic_angle", italic_angle) + .field("ascent", ascent) + .field("descent", descent) + .field("leading", leading) + .field("cap_height", cap_height) + .field("x_height", x_height) + .field("stem_v", stem_v) + .field("stem_h", stem_h) + .field("avg_width", avg_width) + .field("max_width", max_width) + .field("missing_width", missing_width) + .field("font_file", font_file) + .field("font_file2", font_file2) + .field("font_file3", font_file3) + .field("char_set", char_set) + .field("rest", rest) + .finish() + }) + } +} + pdf_parse! { #[pdf(name)] #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] @@ -117,9 +187,15 @@ pdf_parse! { } } -#[derive(Clone, Debug)] +#[derive(Clone)] pub enum PdfTodo {} +impl fmt::Debug for PdfTodo { + fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self {} + } +} + impl IsPdfNull for PdfTodo { fn is_pdf_null(&self) -> bool { match *self {} @@ -138,14 +214,31 @@ impl PdfParse for PdfTodo { pdf_parse! { #[pdf(tag = "Subtype")] - #[derive(Clone, Debug)] + #[derive(Clone)] pub enum PdfFont { #[pdf(tag_value = "Type0")] - Type0(PdfFontType0), + Type0(Arc), #[pdf(tag_value = "Type1")] Type1(PdfFontType1), #[pdf(other)] - Other(PdfTodo), + Other(Arc), + } +} + +impl fmt::Debug for PdfFont { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|state| match self { + PdfFont::Type0(v) => state.debug_or_id(v, "PdfFontType0(...)").fmt(f), + PdfFont::Type1(v) => v.fmt(f), + PdfFont::Other(v) => match **v {}, + }) + } +} + +impl PdfFont { + pub(crate) fn is_vertical_writing_mode(&self) -> bool { + // TODO: + false } } @@ -161,7 +254,7 @@ pdf_parse! { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfFontType0 { #[pdf(name = "Type")] pub ty: PdfFontType, @@ -182,6 +275,31 @@ pdf_parse! { } } +impl fmt::Debug for PdfFontType0 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { + ty, + subtype, + base_font, + encoding, + descendent_fonts, + to_unicode, + rest, + } = self; + f.debug_struct("PdfFontType0") + .field("ty", ty) + .field("subtype", subtype) + .field("base_font", base_font) + .field("encoding", encoding) + .field("descendent_fonts", descendent_fonts) + .field("to_unicode", to_unicode) + .field("rest", rest) + .finish() + }) + } +} + pdf_parse! { #[pdf(name)] #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] @@ -227,10 +345,19 @@ pdf_parse! { } } -#[derive(Clone, Debug)] +#[derive(Clone)] pub enum PdfFontType1 { - Standard(PdfFontType1Standard), - Other(PdfFontType1Other), + Standard(Arc), + Other(Arc), +} + +impl fmt::Debug for PdfFontType1 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|state| match self { + PdfFontType1::Standard(v) => state.debug_or_id(v, "PdfFontType1Standard(...)").fmt(f), + PdfFontType1::Other(v) => state.debug_or_id(v, "PdfFontType1Other(...)").fmt(f), + }) + } } impl PdfFontType1 { @@ -309,17 +436,17 @@ impl PdfParse for PdfFontType1 { fn parse(object: PdfObject) -> Result { let object = object.into(); let PdfObjectDirect::Dictionary(object) = object else { - return PdfFontType1Other::parse(object.into()).map(Self::Other); + return Arc::::parse(object.into()).map(Self::Other); }; if let Ok(_) = PdfStandardFontName::parse(object.get_or_null(b"BaseFont".as_slice())) { - PdfFontType1Standard::parse(object.into()).map(Self::Standard) + Arc::::parse(object.into()).map(Self::Standard) } else { - PdfFontType1Other::parse(object.into()).map(Self::Other) + Arc::::parse(object.into()).map(Self::Other) } } } -#[derive(Clone, Debug)] +#[derive(Clone)] pub struct PdfFontType1Common { pub ty: PdfFontType, pub subtype: PdfFontType1Subtype, @@ -334,9 +461,42 @@ pub struct PdfFontType1Common { pub rest: PdfDictionary, } +impl fmt::Debug for PdfFontType1Common { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { + ty, + subtype, + name, + base_font, + first_char, + last_char, + widths, + font_descriptor, + encoding, + to_unicode, + rest, + } = self; + f.debug_struct("PdfFontType1Common") + .field("ty", ty) + .field("subtype", subtype) + .field("name", name) + .field("base_font", base_font) + .field("first_char", first_char) + .field("last_char", last_char) + .field("widths", widths) + .field("font_descriptor", font_descriptor) + .field("encoding", encoding) + .field("to_unicode", to_unicode) + .field("rest", rest) + .finish() + }) + } +} + pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfFontType1Standard { #[pdf(name = "Type")] pub ty: PdfFontType, @@ -364,6 +524,39 @@ pdf_parse! { } } +impl fmt::Debug for PdfFontType1Standard { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { + ty, + subtype, + name, + base_font, + first_char, + last_char, + widths, + font_descriptor, + encoding, + to_unicode, + rest, + } = self; + f.debug_struct("PdfFontType1Standard") + .field("ty", ty) + .field("subtype", subtype) + .field("name", name) + .field("base_font", base_font) + .field("first_char", first_char) + .field("last_char", last_char) + .field("widths", widths) + .field("font_descriptor", font_descriptor) + .field("encoding", encoding) + .field("to_unicode", to_unicode) + .field("rest", rest) + .finish() + }) + } +} + impl PdfFontType1Standard { pub fn common(&self) -> PdfFontType1Common { let Self { @@ -397,7 +590,7 @@ impl PdfFontType1Standard { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfFontType1Other { #[pdf(name = "Type")] pub ty: PdfFontType, @@ -425,6 +618,39 @@ pdf_parse! { } } +impl fmt::Debug for PdfFontType1Other { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { + ty, + subtype, + name, + base_font, + first_char, + last_char, + widths, + font_descriptor, + encoding, + to_unicode, + rest, + } = self; + f.debug_struct("PdfFontType1Other") + .field("ty", ty) + .field("subtype", subtype) + .field("name", name) + .field("base_font", base_font) + .field("first_char", first_char) + .field("last_char", last_char) + .field("widths", widths) + .field("font_descriptor", font_descriptor) + .field("encoding", encoding) + .field("to_unicode", to_unicode) + .field("rest", rest) + .finish() + }) + } +} + impl PdfFontType1Other { pub fn common(&self) -> PdfFontType1Common { let Self { diff --git a/src/pdf/object.rs b/src/pdf/object.rs index de3b6da..0931286 100644 --- a/src/pdf/object.rs +++ b/src/pdf/object.rs @@ -1,6 +1,6 @@ use crate::{ pdf::{ - PdfObjects, + PdfObjectAndParseCache, PdfObjects, parse::{ GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse, PdfParseError, @@ -8,7 +8,7 @@ use crate::{ stream_filters::PdfStreamFilter, }, pdf_parse, - util::ArcOrRef, + util::{ArcOrRef, DagDebugState}, }; use std::{ any::TypeId, @@ -892,17 +892,35 @@ impl PdfObjectIndirect { final_id: Arc::new(OnceLock::new()), } } - pub fn get(&self) -> PdfObjectDirect { - let Some(objects) = self.objects.upgrade() else { - panic!("PdfObjects is no longer available"); - }; + pub(crate) fn cache_parse( + &self, + parse_inner: impl FnOnce(PdfObjectDirect) -> Result, E>, + ) -> Result, E> { + self.get_object_and_parse_cache(|object, object_and_parse_cache| { + match object_and_parse_cache { + Some(object_and_parse_cache) => { + if let Some(retval) = object_and_parse_cache.parse_cache_get::() { + println!("cache reused for {object:?}"); + return Ok(retval); + } + parse_inner(object) + .map(|retval| object_and_parse_cache.parse_cache_get_or_insert::(retval)) + } + None => parse_inner(object), + } + }) + } + fn get_object_and_parse_cache_inner<'a>( + &self, + objects: &'a PdfObjects, + ) -> (PdfObjectDirect, Option<&'a PdfObjectAndParseCache>) { if let Some(objects) = objects.inner.get() { let final_id = self.final_id.get().copied(); let limit = if final_id.is_some() { 1 } else { 1000usize }; let mut id = final_id.unwrap_or(self.id); for _ in 0..limit { - if let Some(object) = objects.objects.get(&self.id) { - let retval = match object { + if let Some(object_and_parse_cache) = objects.objects.get(&self.id) { + let object = match &object_and_parse_cache.object { PdfObject::Boolean(v) => PdfObjectDirect::Boolean(*v), PdfObject::Integer(v) => PdfObjectDirect::Integer(*v), PdfObject::Real(v) => PdfObjectDirect::Real(*v), @@ -919,13 +937,26 @@ impl PdfObjectIndirect { }; // we could be racing with another thread, so set can fail but that's not a problem let _ = self.final_id.set(id); - return retval; + return (object, Some(object_and_parse_cache)); } else { - return PdfObjectDirect::Null(PdfNull::new(id.pos)); + return (PdfNull::new(id.pos).into(), None); } } } - PdfObjectDirect::Null(PdfNull::new(self.pos())) + (PdfNull::new(self.pos()).into(), None) + } + fn get_object_and_parse_cache( + &self, + f: impl FnOnce(PdfObjectDirect, Option<&PdfObjectAndParseCache>) -> R, + ) -> R { + let Some(objects) = self.objects.upgrade() else { + panic!("PdfObjects is no longer available"); + }; + let (object, object_and_parse_cache) = self.get_object_and_parse_cache_inner(&objects); + f(object, object_and_parse_cache) + } + pub fn get(&self) -> PdfObjectDirect { + self.get_object_and_parse_cache(|object, _object_and_parse_cache| object) } pub fn id(&self) -> PdfObjectIdentifier { self.id @@ -1067,9 +1098,17 @@ impl<'a, T> IntoIterator for &'a PdfDictionary { } } -impl fmt::Debug for PdfDictionary { +impl fmt::Debug for PdfDictionary { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_map().entries(self).finish() + DagDebugState::scope(|state| { + state + .debug_or_id_with( + &self.fields, + |_, f| f.debug_map().entries(self).finish(), + |f| f.write_str("{...}"), + ) + .fmt(f) + }) } } @@ -1364,6 +1403,31 @@ pub struct PdfMatrix { pub elements: [f32; 6], } +impl PdfMatrix { + pub fn identity(pos: impl Into) -> Self { + Self { + pos: pos.into(), + elements: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0], + } + } + #[must_use] + pub fn mul(self, other: PdfMatrix, new_pos: impl Into) -> Self { + let [la, lb, lc, ld, le, lf] = self.elements; + let [ra, rb, rc, rd, re, rf] = other.elements; + Self { + pos: new_pos.into(), + elements: [ + lb * rc + la * ra, + lb * rd + la * rb, + ld * rc + lc * ra, + ld * rd + lc * rb, + re + lf * rc + le * ra, + rf + lf * rd + le * rb, + ], + } + } +} + impl fmt::Debug for PdfMatrix { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let Self { pos, elements } = *self; @@ -1582,7 +1646,7 @@ impl PdfParse for PdfFileSpecification { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfStreamDictionary { #[pdf(name = "Length")] pub len: usize, @@ -1603,6 +1667,33 @@ pdf_parse! { } } +impl fmt::Debug for PdfStreamDictionary { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + DagDebugState::scope(|_state| { + let Self { + len, + filters, + decode_parms, + file, + file_filters, + file_decode_parms, + decoded_len, + rest, + } = self; + f.debug_struct("PdfStreamDictionary") + .field("len", len) + .field("filters", filters) + .field("decode_parms", decode_parms) + .field("file", file) + .field("file_filters", file_filters) + .field("file_decode_parms", file_decode_parms) + .field("decoded_len", decoded_len) + .field("rest", rest) + .finish() + }) + } +} + #[derive(Debug, Clone, Default)] pub struct PdfStreamDictionaryFiltersAndParms<'a> { filters: std::iter::Enumerate>, @@ -1697,23 +1788,6 @@ impl PdfStreamDictionary { } } -pub(crate) struct UnparsedPdfStreamDictionary { - unparsed_dictionary: PdfDictionary, - dictionary: Arc>>, -} - -impl UnparsedPdfStreamDictionary { - pub(crate) fn finish_parsing(self) -> Result<(), PdfParseError> { - let Ok(()) = self - .dictionary - .set(PdfParse::parse(self.unparsed_dictionary.into())?) - else { - unreachable!(); - }; - Ok(()) - } -} - pub trait PdfStreamContents: Sized + fmt::Debug + 'static { fn parse( data: &[u8], @@ -1786,34 +1860,45 @@ impl fmt::Display for DumpBytes<'_> { impl fmt::Debug for PdfStream { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { - pos, - objects: _, - dictionary, - encoded_data, - decoded_data, - } = self; - let mut debug_struct = f.debug_struct("PdfStream"); - debug_struct.field("pos", pos); - debug_struct.field("dictionary", dictionary); - debug_struct.field("encoded_data", &DumpBytes(encoded_data)); - if let Some(decoded_data) = decoded_data.get() { - match decoded_data { - Ok(decoded_data) => { - if let Some(decoded_data) = - ::downcast_ref::>(decoded_data) - { - debug_struct.field("decoded_data", &DumpBytes(&**decoded_data)) - } else { - debug_struct.field("decoded_data", decoded_data) - } - } - Err(e) => debug_struct.field("decoded_data", &Err::<(), _>(e)), - }; - } else { - debug_struct.field("decoded_data", &format_args!("")); - } - debug_struct.finish() + DagDebugState::scope(|state| { + state + .debug_or_id_with( + &self.decoded_data, + |_, f| { + let Self { + pos, + objects: _, + dictionary, + encoded_data, + decoded_data, + } = self; + let mut debug_struct = f.debug_struct("PdfStream"); + debug_struct.field("pos", pos); + debug_struct.field("dictionary", dictionary); + debug_struct.field("encoded_data", &DumpBytes(encoded_data)); + if let Some(decoded_data) = decoded_data.get() { + match decoded_data { + Ok(decoded_data) => { + if let Some(decoded_data) = + ::downcast_ref::>(decoded_data) + { + debug_struct + .field("decoded_data", &DumpBytes(&**decoded_data)) + } else { + debug_struct.field("decoded_data", decoded_data) + } + } + Err(e) => debug_struct.field("decoded_data", &Err::<(), _>(e)), + }; + } else { + debug_struct.field("decoded_data", &format_args!("")); + } + debug_struct.finish() + }, + |f| f.write_str("PdfStream(...)"), + ) + .fmt(f) + }) } } @@ -1957,7 +2042,7 @@ pdf_parse! { pdf_parse! { #[pdf] - #[derive(Clone, Debug)] + #[derive(Clone)] pub struct PdfObjectStreamDictionary { #[pdf(name = Self::TYPE_NAME)] pub ty: PdfObjectStreamType, diff --git a/src/pdf/parse.rs b/src/pdf/parse.rs index 95e58ac..4ec885e 100644 --- a/src/pdf/parse.rs +++ b/src/pdf/parse.rs @@ -283,6 +283,19 @@ pub enum PdfParseError { OperatorHasTooManyOperands { operator: PdfOperator, }, + CantRestoreGraphicsStateWithEmptyStack { + pos: PdfInputPosition, + }, + FontResourceNotFound { + pos: PdfInputPosition, + font: PdfName, + }, + MissingBeginTextOperator { + pos: PdfInputPosition, + }, + MissingSetFontOperator { + pos: PdfInputPosition, + }, } impl From for PdfParseError { @@ -328,7 +341,11 @@ impl GetPdfInputPosition for PdfParseError { | PdfParseError::UnknownStreamFilter { pos, .. } | PdfParseError::StreamFilterError { pos, .. } | PdfParseError::StreamNotAllowedHere { pos } - | PdfParseError::MissingOperator { pos } => pos, + | PdfParseError::MissingOperator { pos } + | PdfParseError::CantRestoreGraphicsStateWithEmptyStack { pos } + | PdfParseError::FontResourceNotFound { pos, .. } + | PdfParseError::MissingBeginTextOperator { pos } + | PdfParseError::MissingSetFontOperator { pos } => pos, PdfParseError::OperatorNotAllowedHere { ref operator } => operator.pos(), PdfParseError::OperatorHasTooFewOperands { ref operator } | PdfParseError::OperatorHasTooManyOperands { ref operator } => operator.pos(), @@ -487,6 +504,27 @@ impl fmt::Display for PdfParseError { operator.pos(), ) } + PdfParseError::CantRestoreGraphicsStateWithEmptyStack { pos } => { + write!( + f, + "at {pos}: can't restore graphics state when the graphics state stack is empty" + ) + } + PdfParseError::FontResourceNotFound { pos, ref font } => { + write!(f, "at {pos}: font resource not found: {font:?}") + } + PdfParseError::MissingBeginTextOperator { pos } => { + write!( + f, + "at {pos}: missing begin text `BT` operator before this text operator" + ) + } + PdfParseError::MissingSetFontOperator { pos } => { + write!( + f, + "at {pos}: missing set font `Tf` operator before this text showing operator" + ) + } } } } @@ -808,6 +846,40 @@ impl PdfParse for Arc<[T]> { } } +impl IsPdfNull for Arc { + fn is_pdf_null(&self) -> bool { + false + } +} + +impl PdfParse for Arc { + fn type_name() -> Cow<'static, str> { + T::type_name() + } + fn parse(object: PdfObject) -> Result { + if let PdfObject::Indirect(indirect) = object { + indirect.cache_parse(|object| T::parse(object.into()).map(Arc::new)) + } else { + T::parse(object).map(Arc::new) + } + } + fn parse_option(object: PdfObject) -> Result, PdfParseError> { + if let PdfObject::Indirect(indirect) = object { + match indirect.cache_parse(|object| match T::parse_option(object.into()) { + Ok(Some(v)) => Ok(Arc::new(v)), + Ok(None) => Err(None), + Err(e) => Err(Some(e)), + }) { + Ok(v) => Ok(Some(v)), + Err(None) => Ok(None), + Err(Some(e)) => Err(e), + } + } else { + Ok(T::parse_option(object)?.map(Arc::new)) + } + } +} + impl IsPdfNull for MaybeArray { fn is_pdf_null(&self) -> bool { false diff --git a/src/pdf/render.rs b/src/pdf/render.rs new file mode 100644 index 0000000..4fb56eb --- /dev/null +++ b/src/pdf/render.rs @@ -0,0 +1,1054 @@ +use crate::{ + pdf::{ + content_stream::{ + PdfOperatorAndOperands, PdfOperatorBeginCompatibilitySection, + PdfOperatorBeginInlineImage, PdfOperatorBeginInlineImageData, + PdfOperatorBeginMarkedContent, PdfOperatorBeginMarkedContentWithProperties, + PdfOperatorBeginText, PdfOperatorClip, PdfOperatorClipEvenOdd, + PdfOperatorCloseAndStrokePath, PdfOperatorCloseFillAndStrokePath, + PdfOperatorCloseFillAndStrokePathEvenOdd, PdfOperatorCloseSubpath, + PdfOperatorConcatMatrix, PdfOperatorCurveTo, PdfOperatorCurveTo13, + PdfOperatorCurveTo23, PdfOperatorDesignateMarkedContentPoint, + PdfOperatorDesignateMarkedContentPointWithProperties, + PdfOperatorEndCompatibilitySection, PdfOperatorEndInlineImage, + PdfOperatorEndMarkedContent, PdfOperatorEndPath, PdfOperatorEndText, + PdfOperatorFillAndStrokePath, PdfOperatorFillAndStrokePathEvenOdd, PdfOperatorFillPath, + PdfOperatorFillPathEvenOdd, PdfOperatorFillPathObsolete, PdfOperatorFontType3SetWidth, + PdfOperatorFontType3SetWidthAndBBox, PdfOperatorLineTo, PdfOperatorMoveTo, + PdfOperatorPaintXObject, PdfOperatorRectangle, PdfOperatorRestoreGraphicsState, + PdfOperatorSaveGraphicsState, PdfOperatorSetCharacterSpacing, + PdfOperatorSetColorRenderingIntent, PdfOperatorSetFlatnessTolerance, + PdfOperatorSetFontAndSize, PdfOperatorSetGraphicsState, PdfOperatorSetLineCapStyle, + PdfOperatorSetLineDashPattern, PdfOperatorSetLineJoinStyle, PdfOperatorSetLineWidth, + PdfOperatorSetMiterLimit, PdfOperatorSetNonStrokeCmyk, PdfOperatorSetNonStrokeColor, + PdfOperatorSetNonStrokeColorSpace, PdfOperatorSetNonStrokeColorWithName, + PdfOperatorSetNonStrokeGray, PdfOperatorSetNonStrokeRgb, + PdfOperatorSetSpacingThenTextNextLineAndShow, PdfOperatorSetStrokeCmyk, + PdfOperatorSetStrokeColor, PdfOperatorSetStrokeColorSpace, + PdfOperatorSetStrokeColorWithName, PdfOperatorSetStrokeGray, PdfOperatorSetStrokeRgb, + PdfOperatorSetTextHorizontalScaling, PdfOperatorSetTextLeading, + PdfOperatorSetTextMatrix, PdfOperatorSetTextRenderingMode, PdfOperatorSetTextRise, + PdfOperatorSetWordSpacing, PdfOperatorShade, PdfOperatorShowText, + PdfOperatorShowTextWithGlyphPositioning, PdfOperatorStrokePath, + PdfOperatorTextNextLine, PdfOperatorTextNextLineAndShow, + PdfOperatorTextNextLineWithOffset, PdfOperatorTextNextLineWithOffsetAndLeading, + PdfOperatorUnparsed, + }, + document_structure::{PdfPage, PdfResourcesDictionary}, + font::{PdfFont, PdfTodo}, + object::{ + IsPdfNull, PdfMatrix, PdfName, PdfNumber, PdfObject, PdfObjectDirect, + PdfStringOrNumber, PdfVec2D, + }, + parse::{ + GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse, + PdfParseError, + }, + }, + pdf_parse, +}; +use std::borrow::Cow; + +#[derive(Clone, Debug)] +#[non_exhaustive] +pub struct PdfPath {} + +pdf_parse! { + #[pdf(name)] + #[derive(Clone, Debug)] + pub enum PdfColorSpace { + #[pdf(name = "DeviceGray")] + DeviceGray, + #[pdf(name = "DeviceRGB")] + DeviceRgb, + // TODO: add others + #[pdf(other)] + Unknown(PdfName), + } +} + +pdf_parse! { + #[pdf(name)] + #[derive(Clone, Debug)] + pub enum PdfRenderingIntent { + #[pdf(name = "RelativeColorimetric")] + RelativeColorimetric, + // TODO: add others + #[pdf(other)] + Unknown(PdfName), + } +} + +pdf_parse! { + #[pdf(name)] + #[derive(Clone, Debug)] + pub enum PdfBlendMode { + #[pdf(name = "Normal")] + Normal, + // TODO: add others + #[pdf(other)] + Unknown(PdfName), + } +} + +#[derive(Clone, Copy, PartialEq, PartialOrd)] +pub struct PdfColorDeviceGray { + pos: PdfInputPositionNoCompare, + level: f32, +} + +impl PdfColorDeviceGray { + pub fn pos(self) -> PdfInputPosition { + self.pos.0 + } + pub fn level(self) -> f32 { + self.level + } +} + +impl IsPdfNull for PdfColorDeviceGray { + fn is_pdf_null(&self) -> bool { + false + } +} + +impl PdfParse for PdfColorDeviceGray { + fn type_name() -> Cow<'static, str> { + Cow::Borrowed("PdfColorDeviceGray") + } + fn parse(object: PdfObject) -> Result { + let number = PdfNumber::parse(object)?; + Ok(Self { + pos: number.pos().into(), + level: number.as_f32(), + }) + } +} + +impl std::fmt::Debug for PdfColorDeviceGray { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { pos, level } = self; + write!(f, "PdfColorDeviceGray(at {pos}, {level})") + } +} + +#[derive(Clone, Copy, PartialEq, PartialOrd)] +pub struct PdfColorDeviceRgb { + pos: PdfInputPositionNoCompare, + r: f32, + g: f32, + b: f32, +} + +impl PdfColorDeviceRgb { + pub fn pos(self) -> PdfInputPosition { + self.pos.0 + } + pub fn r(self) -> f32 { + self.r + } + pub fn g(self) -> f32 { + self.g + } + pub fn b(self) -> f32 { + self.b + } + pub fn parse_flat(r: PdfObject, g: PdfObject, b: PdfObject) -> Result { + let r = PdfNumber::parse(r)?; + let g = f32::parse(g)?; + let b = f32::parse(b)?; + Ok(Self { + pos: r.pos().into(), + r: r.as_f32(), + g, + b, + }) + } +} + +impl IsPdfNull for PdfColorDeviceRgb { + fn is_pdf_null(&self) -> bool { + false + } +} + +impl std::fmt::Debug for PdfColorDeviceRgb { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let Self { pos, r, g, b } = self; + write!(f, "PdfColorDeviceRgb(at {pos}, {r}, {g}, {b})") + } +} + +#[derive(Clone, Debug)] +pub enum PdfColor { + DeviceGray(PdfColorDeviceGray), + DeviceRgb(PdfColorDeviceRgb), + // TODO +} + +#[derive(Clone, Debug)] +pub struct PdfTextObjectState { + pub text_matrix: PdfMatrix, + pub text_line_matrix: PdfMatrix, +} + +impl PdfTextObjectState { + fn require>( + v: Option, + pos: PdfInputPosition, + ) -> Result { + v.ok_or(PdfParseError::MissingBeginTextOperator { pos }) + } +} + +#[derive(Clone, Debug)] +pub struct PdfTextState { + pub char_spacing: f32, + pub word_spacing: f32, + pub horizontal_scaling_percent: f32, + pub leading: f32, + pub font: Option, + pub font_size: f32, + pub rendering_mode: u8, // TODO: replace with enum + pub rise: f32, + pub knockout: bool, + pub text_object: Option, +} + +#[derive(Clone, Debug)] +#[non_exhaustive] +pub struct PdfLineDashPattern { + // TODO +} + +impl PdfLineDashPattern { + pub fn solid() -> Self { + Self {} + } +} + +#[derive(Clone, Debug)] +pub struct PdfGraphicsState { + pub current_transformation_matrix: PdfMatrix, + pub clipping_path: PdfPath, + pub stroking_color: PdfColor, + pub non_stroking_color: PdfColor, + pub text_state: PdfTextState, + pub line_width: f32, + pub line_cap_style: u8, // TODO: replace with enum + pub line_join_style: u8, // TODO: replace with enum + pub miter_limit: f32, + pub line_dash_pattern: PdfLineDashPattern, + pub rendering_intent: PdfRenderingIntent, + pub automatic_stroke_adjustment: bool, + pub current_blend_mode: PdfBlendMode, + pub soft_mask: Option, // TODO: replace with struct + pub stroking_alpha_constant: f32, + pub non_stroking_alpha_constant: f32, + pub alpha_source: bool, +} + +impl PdfGraphicsState { + pub fn text_rendering_matrix(&self, pos: PdfInputPosition) -> Result { + let text_object = PdfTextObjectState::require(self.text_state.text_object.as_ref(), pos)?; + Ok(PdfMatrix { + pos: PdfInputPositionNoCompare::empty(), + elements: [ + self.text_state.font_size * self.text_state.horizontal_scaling_percent * 1e-2, + 0.0, + 0.0, + self.text_state.font_size, + 0.0, + self.text_state.rise, + ], + } + .mul(text_object.text_matrix, PdfInputPositionNoCompare::empty()) + .mul( + self.current_transformation_matrix, + text_object.text_matrix.pos, + )) + } + pub fn advance_text_matrix( + &mut self, + pos: PdfInputPosition, + glyph_displacement: PdfVec2D, + position_adjustment: f32, + has_char_spacing: bool, + has_word_spacing: bool, + ) -> Result<(), PdfParseError> { + let text_object = PdfTextObjectState::require(self.text_state.text_object.as_mut(), pos)?; + let (tx, ty) = if self + .text_state + .font + .as_ref() + .ok_or(PdfParseError::MissingSetFontOperator { pos })? + .is_vertical_writing_mode() + { + let mut ty = + (glyph_displacement.y - position_adjustment * 1e-3) * self.text_state.font_size; + if has_char_spacing { + ty += self.text_state.char_spacing; + } + if has_word_spacing { + ty += self.text_state.word_spacing; + } + (0.0, ty) + } else { + let mut tx = + (glyph_displacement.x - position_adjustment * 1e-3) * self.text_state.font_size; + if has_char_spacing { + tx += self.text_state.char_spacing; + } + if has_word_spacing { + tx += self.text_state.word_spacing; + } + (tx * self.text_state.horizontal_scaling_percent * 1e-2, 0.0) + }; + text_object.text_matrix = PdfMatrix { + pos: pos.into(), + elements: [1.0, 0.0, 0.0, 1.0, tx, ty], + } + .mul(text_object.text_matrix, pos); + Ok(()) + } +} + +#[derive(Debug)] +pub struct PdfRenderState<'a> { + pub graphics_state: PdfGraphicsState, + pub graphics_state_stack: Vec, + pub resources: &'a PdfResourcesDictionary, +} + +impl<'a> PdfRenderState<'a> { + pub fn new(page: &'a PdfPage) -> Self { + let pos = page.rest.pos().into(); + Self { + graphics_state: PdfGraphicsState { + current_transformation_matrix: PdfMatrix::identity(pos), + clipping_path: PdfPath {}, + stroking_color: PdfColor::DeviceGray(PdfColorDeviceGray { pos, level: 0.0 }), + non_stroking_color: PdfColor::DeviceGray(PdfColorDeviceGray { pos, level: 0.0 }), + text_state: PdfTextState { + char_spacing: 0.0, + word_spacing: 0.0, + horizontal_scaling_percent: 100.0, + leading: 0.0, + font: None, + font_size: 0.0, + rendering_mode: 0, + rise: 0.0, + knockout: true, + text_object: None, + }, + line_width: 1.0, + line_cap_style: 0, + line_join_style: 0, + miter_limit: 10.0, + line_dash_pattern: PdfLineDashPattern::solid(), + rendering_intent: PdfRenderingIntent::RelativeColorimetric, + automatic_stroke_adjustment: false, + current_blend_mode: PdfBlendMode::Normal, + soft_mask: None, + stroking_alpha_constant: 1.0, + non_stroking_alpha_constant: 1.0, + alpha_source: false, + }, + graphics_state_stack: Vec::with_capacity(3), + resources: &page.resources, + } + } + pub fn handle_unknown_operator( + &mut self, + operator: &PdfOperatorUnparsed, + operands: &[PdfObjectDirect], + ) -> Result<(), PdfParseError> { + todo!() + } +} + +pub trait PdfRenderOperator: Into { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError>; +} + +impl PdfRenderOperator for PdfOperatorCloseFillAndStrokePath { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorFillAndStrokePath { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorCloseFillAndStrokePathEvenOdd { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorFillAndStrokePathEvenOdd { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorBeginMarkedContentWithProperties { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + tag, + properties, + } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorBeginInlineImage { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorBeginMarkedContent { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, tag } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorBeginText { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos } = *self; + state.graphics_state.text_state.text_object = Some(PdfTextObjectState { + text_matrix: PdfMatrix::identity(pos), + text_line_matrix: PdfMatrix::identity(pos), + }); + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorBeginCompatibilitySection { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorCurveTo { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, p1, p2, p3 } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorConcatMatrix { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos, matrix } = *self; + state.graphics_state.current_transformation_matrix = state + .graphics_state + .current_transformation_matrix + .mul(matrix, pos); + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorSetStrokeColorSpace { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, name } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetNonStrokeColorSpace { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, name } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetLineDashPattern { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + dash_array, + dash_phase, + } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorFontType3SetWidth { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, width } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorFontType3SetWidthAndBBox { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, width, bbox } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorPaintXObject { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, name } = self; */ + let _ = state; + // TODO: implement + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorDesignateMarkedContentPointWithProperties { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + tag, + properties, + } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorEndInlineImage { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorEndMarkedContent { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorEndText { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos: _ } = self; + state.graphics_state.text_state.text_object = None; + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorEndCompatibilitySection { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorFillPath { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorFillPathObsolete { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorFillPathEvenOdd { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetStrokeGray { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos: _, gray } = *self; + state.graphics_state.stroking_color = PdfColor::DeviceGray(gray); + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorSetNonStrokeGray { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos: _, gray } = *self; + state.graphics_state.non_stroking_color = PdfColor::DeviceGray(gray); + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorSetGraphicsState { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + dictionary_name, + } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorCloseSubpath { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetFlatnessTolerance { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, flatness } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorBeginInlineImageData { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetLineJoinStyle { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + line_join_style, + } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetLineCapStyle { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + line_cap_style, + } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetStrokeCmyk { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, c, m, y, k } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetNonStrokeCmyk { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, c, m, y, k } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorLineTo { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, to } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorMoveTo { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, to } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetMiterLimit { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, limit } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorDesignateMarkedContentPoint { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, tag } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorEndPath { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSaveGraphicsState { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos: _ } = self; + state + .graphics_state_stack + .push(state.graphics_state.clone()); + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorRestoreGraphicsState { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos } = self; + state.graphics_state = state + .graphics_state_stack + .pop() + .ok_or(PdfParseError::CantRestoreGraphicsStateWithEmptyStack { pos: pos.0 })?; + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorRectangle { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, p, size } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetStrokeRgb { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos: _, color } = *self; + state.graphics_state.stroking_color = PdfColor::DeviceRgb(color); + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorSetNonStrokeRgb { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos: _, color } = *self; + state.graphics_state.non_stroking_color = PdfColor::DeviceRgb(color); + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorSetColorRenderingIntent { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, intent } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorCloseAndStrokePath { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorStrokePath { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetStrokeColor { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, color } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetNonStrokeColor { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, color } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetStrokeColorWithName { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + color_and_name, + } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetNonStrokeColorWithName { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + color_and_name, + } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorShade { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorTextNextLine { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetCharacterSpacing { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, char_space } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorTextNextLineWithOffset { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { pos, offset } = *self; + let PdfTextObjectState { + text_matrix, + text_line_matrix, + } = PdfTextObjectState::require( + state.graphics_state.text_state.text_object.as_mut(), + pos.0, + )?; + let matrix = PdfMatrix { + pos, + elements: [1.0, 0.0, 0.0, 1.0, offset.x, offset.y], + } + .mul(*text_line_matrix, pos); + *text_line_matrix = matrix; + *text_matrix = matrix; + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorTextNextLineWithOffsetAndLeading { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, offset } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetFontAndSize { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { + pos, + ref font, + size, + } = *self; + state.graphics_state.text_state.font = Some( + state + .resources + .fonts + .get(font) + .ok_or_else(|| PdfParseError::FontResourceNotFound { + pos: pos.0, + font: font.clone(), + })? + .clone(), + ); + state.graphics_state.text_state.font_size = size; + Ok(()) + } +} + +impl PdfRenderOperator for PdfOperatorShowText { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, text } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorShowTextWithGlyphPositioning { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + let Self { + pos, + ref text_and_positioning, + } = *self; + let font = state + .graphics_state + .text_state + .font + .as_ref() + .ok_or(PdfParseError::MissingSetFontOperator { pos: pos.0 })?; + let PdfFont::Type1(font) = font else { todo!() }; + let mut positioning = 0.0; + for text_or_positioning in text_and_positioning.iter() { + match text_or_positioning { + PdfStringOrNumber::String(s) => { + for glyph in s.bytes().iter() { + let positioning = std::mem::replace(&mut positioning, 0.0); + let encoding = font.encoding(); + todo!("{encoding:?}"); + } + } + PdfStringOrNumber::Number(number) => positioning = number.as_f32(), + } + } + let _ = state; + todo!("{text_and_positioning:?}") + } +} + +impl PdfRenderOperator for PdfOperatorSetTextLeading { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, leading } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetTextMatrix { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, matrix } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetTextRenderingMode { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + rendering_mode, + } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetTextRise { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, rise } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetWordSpacing { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, word_space } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetTextHorizontalScaling { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, scale_percent } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorCurveTo23 { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetLineWidth { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, line_width } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorClip { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorClipEvenOdd { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorCurveTo13 { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorTextNextLineAndShow { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { pos, text } = self; */ + let _ = state; + todo!() + } +} + +impl PdfRenderOperator for PdfOperatorSetSpacingThenTextNextLineAndShow { + fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { + /* let Self { + pos, + word_space, + char_space, + text, + } = self; */ + let _ = state; + todo!() + } +} diff --git a/src/util.rs b/src/util.rs index a7a4978..1a4440c 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,5 +1,8 @@ use std::{ + any::{Any, TypeId}, borrow::Borrow, + cell::Cell, + collections::HashMap, fmt, hash::{Hash, Hasher}, sync::Arc, @@ -100,3 +103,221 @@ impl fmt::Display for ArcOrRef<'_, T> { T::fmt(self, f) } } + +trait DagDebugStateSealed {} + +#[expect(private_bounds)] +pub trait SupportsDagDebugState: DagDebugStateSealed + 'static + Clone { + type Key: Clone + Hash + Eq + 'static; + fn key(this: &Self) -> Self::Key; +} + +impl DagDebugStateSealed for Arc {} + +impl SupportsDagDebugState for Arc { + type Key = *const T; + + fn key(this: &Self) -> Self::Key { + Arc::as_ptr(this) + } +} + +impl DagDebugStateSealed for Arc<[T]> {} + +impl SupportsDagDebugState for Arc<[T]> { + type Key = *const [T]; + + fn key(this: &Self) -> Self::Key { + Arc::as_ptr(this) + } +} + +impl DagDebugStateSealed for Arc {} + +impl SupportsDagDebugState for Arc { + type Key = *const str; + + fn key(this: &Self) -> Self::Key { + Arc::as_ptr(this) + } +} + +trait DagDebugStatePartTrait: 'static { + fn reset(&mut self); + fn as_any_mut(&mut self) -> &mut dyn Any; +} + +struct DagDebugStatePart { + table: HashMap, + next_id: u64, +} + +impl DagDebugStatePartTrait for DagDebugStatePart { + fn reset(&mut self) { + let Self { table, next_id } = self; + table.clear(); + *next_id = 0; + } + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } +} + +impl DagDebugStatePart { + fn insert(&mut self, value: &T) -> DagDebugStateInsertResult { + use std::collections::hash_map::Entry; + match self.table.entry(T::key(value)) { + Entry::Occupied(entry) => DagDebugStateInsertResult::Old { id: entry.get().0 }, + Entry::Vacant(entry) => { + let value = T::clone(value); + let id = self.next_id; + self.next_id += 1; + entry.insert((id, value)); + DagDebugStateInsertResult::New { id } + } + } + } +} + +impl Default for DagDebugStatePart { + fn default() -> Self { + Self { + table: HashMap::default(), + next_id: 0, + } + } +} + +pub struct DagDebugState { + parts: std::cell::RefCell>>, + ref_count: Cell, +} + +#[derive(Clone, Copy, Debug)] +pub enum DagDebugStateInsertResult { + New { id: u64 }, + Old { id: u64 }, +} + +impl DagDebugStateInsertResult { + pub fn id(self) -> u64 { + match self { + Self::New { id } | Self::Old { id } => id, + } + } +} + +impl DagDebugState { + fn with_part( + &self, + f: impl FnOnce(&mut DagDebugStatePart) -> R, + ) -> R { + let mut parts = self.parts.borrow_mut(); + let Some(part) = parts + .entry(TypeId::of::>()) + .or_insert_with(|| Box::new(DagDebugStatePart::::default())) + .as_any_mut() + .downcast_mut::>() + else { + unreachable!() + }; + f(part) + } + pub fn insert(&self, value: &T) -> DagDebugStateInsertResult { + self.with_part(|part: &mut DagDebugStatePart| part.insert(value)) + } + pub fn debug_or_id<'a, T: SupportsDagDebugState + fmt::Debug, Abbreviated: fmt::Display>( + &self, + value: &'a T, + abbreviated: Abbreviated, + ) -> impl fmt::Debug + fmt::Display + use<'a, T, Abbreviated> { + self.debug_or_id_with(value, fmt::Debug::fmt, move |f| abbreviated.fmt(f)) + } + pub fn debug_or_id_with< + 'a, + T: SupportsDagDebugState, + DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result, + DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, + >( + &self, + value: &'a T, + debug_value: DebugValue, + debug_abbreviated: DebugAbbreviated, + ) -> impl fmt::Debug + fmt::Display + use<'a, T, DebugValue, DebugAbbreviated> { + struct DebugOrIdWith< + 'a, + T: SupportsDagDebugState, + DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result, + DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, + > { + insert_result: DagDebugStateInsertResult, + value: &'a T, + debug_value: DebugValue, + debug_abbreviated: DebugAbbreviated, + } + impl< + 'a, + T: SupportsDagDebugState, + DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result, + DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, + > fmt::Debug for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated> + { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self, f) + } + } + impl< + 'a, + T: SupportsDagDebugState, + DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result, + DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, + > fmt::Display for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated> + { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "#{} ", self.insert_result.id())?; + match self.insert_result { + DagDebugStateInsertResult::New { id: _ } => (self.debug_value)(self.value, f), + DagDebugStateInsertResult::Old { id: _ } => (self.debug_abbreviated)(f), + } + } + } + DebugOrIdWith { + insert_result: self.insert(value), + value, + debug_value, + debug_abbreviated, + } + } + #[must_use] + fn inc_ref_count_scope(&self) -> impl Sized { + struct DecRefCountOnDrop<'a>(&'a DagDebugState); + impl Drop for DecRefCountOnDrop<'_> { + fn drop(&mut self) { + self.0.ref_count.set(self.0.ref_count.get() - 1); + if self.0.ref_count.get() == 0 { + self.0 + .parts + .borrow_mut() + .values_mut() + .for_each(|v| v.reset()); + } + } + } + self.ref_count.set( + self.ref_count + .get() + .checked_add(1) + .expect("too many nested calls"), + ); + DecRefCountOnDrop(self) + } + pub fn scope(f: impl FnOnce(&Self) -> R) -> R { + thread_local! { + static STATE: DagDebugState = DagDebugState { parts: Default::default(), ref_count: Cell::new(0) }; + } + STATE.with(|state| { + let _scope = state.inc_ref_count_scope(); + f(state) + }) + } +}