diff --git a/.gitignore b/.gitignore index e11e257..50e4eb1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,5 +3,4 @@ *.egg-info __pycache__ *.log -/powerisa-instructions.xml -/target +/powerisa-instructions.xml \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 10be7b1..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,109 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "adler2" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "crc32fast" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "flate2" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - -[[package]] -name = "miniz_oxide" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" -dependencies = [ - "adler2", - "simd-adler32", -] - -[[package]] -name = "parse_powerisa_pdf" -version = "0.1.0" -dependencies = [ - "flate2", - "rayon", -] - -[[package]] -name = "rayon" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "simd-adler32" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" diff --git a/Cargo.toml b/Cargo.toml deleted file mode 100644 index 20ecf46..0000000 --- a/Cargo.toml +++ /dev/null @@ -1,10 +0,0 @@ -[package] -name = "parse_powerisa_pdf" -version = "0.1.0" -edition = "2024" -license = "LGPL-3.0-or-later" - -[dependencies] -flate2 = "1.1.5" -rayon = "1.11.0" - diff --git a/src/lib.rs b/src/lib.rs deleted file mode 100644 index d0e7860..0000000 --- a/src/lib.rs +++ /dev/null @@ -1,5 +0,0 @@ -#[doc(hidden)] -pub use std as __std; - -pub mod pdf; -pub mod util; diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 533a408..0000000 --- a/src/main.rs +++ /dev/null @@ -1,38 +0,0 @@ -use parse_powerisa_pdf::pdf::Pdf; -use std::{ - error::Error, - io::{IsTerminal, Read}, - process::ExitCode, -}; - -fn main() -> Result> { - let args: Vec<_> = std::env::args_os().collect(); - if args - .iter() - .skip(1) - .any(|v| v.as_encoded_bytes().starts_with(b"-") && v != "-") - || args.len() > 2 - || (args.len() == 1 && std::io::stdin().is_terminal()) - { - eprintln!( - "Usage: {} []\n\ - Reads the PDF file passed on the command line,\n\ - Reads stdin if no arguments are passed or if the file name is just a dash `-`.\n\ - If stdin is a terminal, you have to pass `-` explicitly to read from it.", - args[0].display() - ); - return Ok(ExitCode::FAILURE); - } - let file_path = args.get(1).filter(|v| *v != "-"); - let input = if let Some(file_path) = file_path { - std::fs::read(file_path)? - } else { - let mut buf = Vec::new(); - std::io::stdin().lock().read_to_end(&mut buf)?; - buf - }; - let pdf = Pdf::parse(input)?; - println!("{:#?}", pdf.trailer.trailer_dictionary()); - todo!(); - Ok(ExitCode::SUCCESS) -} diff --git a/src/pdf.rs b/src/pdf.rs deleted file mode 100644 index be57b31..0000000 --- a/src/pdf.rs +++ /dev/null @@ -1,1279 +0,0 @@ -use crate::{ - pdf::{ - content_stream::PdfOperatorUnparsed, - document_structure::PdfDocumentCatalog, - object::{ - PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject, - PdfObjectIdentifier, PdfObjectIndirect, PdfObjectStreamDictionary, PdfReal, PdfStream, - PdfStreamDictionary, PdfString, - }, - parse::{ - GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown, PdfParse, PdfParseError, - }, - }, - pdf_parse, - util::{ArcOrRef, DagDebugState}, -}; -use std::{ - any::{Any, TypeId}, - collections::{BTreeMap, HashMap}, - convert::Infallible, - fmt, - num::NonZero, - str::FromStr, - sync::{Arc, Mutex, OnceLock}, -}; - -pub mod content_stream; -pub mod document_structure; -pub mod font; -pub mod object; -pub mod parse; -pub mod render; -pub mod stream_filters; - -struct ParseCache { - parse_results: HashMap>, - steps_till_next_gc: usize, -} - -impl Default for ParseCache { - fn default() -> Self { - Self { - parse_results: HashMap::new(), - steps_till_next_gc: 1, - } - } -} - -impl ParseCache { - fn gc(&mut self) { - if self.steps_till_next_gc == 0 { - self.parse_results.retain(|_k, v| v.strong_count() > 0); - let mut adjusted_len = self.parse_results.len(); - if adjusted_len < 10 { - adjusted_len = 10; - } - self.steps_till_next_gc = adjusted_len.saturating_mul(20); - } else { - self.steps_till_next_gc -= 1; - } - } - fn get(&mut self) -> Option> { - self.gc(); - let Ok(retval) = self - .parse_results - .get(&TypeId::of::())? - .upgrade()? - .downcast() - else { - unreachable!(); - }; - Some(retval) - } - fn get_or_insert( - &mut self, - value: Arc, - ) -> (Arc, impl Sized + use) { - use std::collections::hash_map::Entry; - self.gc(); - match self.parse_results.entry(TypeId::of::()) { - Entry::Occupied(mut entry) => { - if let Some(retval) = entry.get().upgrade() { - let Ok(retval) = retval.downcast::() else { - unreachable!(); - }; - (retval, Some(value)) - } else { - entry.insert(Arc::::downgrade(&value)); - (value, None) - } - } - Entry::Vacant(entry) => { - entry.insert(Arc::::downgrade(&value)); - (value, None) - } - } - } -} - -struct PdfObjectAndParseCache { - object: PdfObject, - parse_cache: Mutex, -} - -impl PdfObjectAndParseCache { - fn new(object: PdfObject) -> Self { - Self { - object, - parse_cache: Mutex::default(), - } - } - fn parse_cache_get(&self) -> Option> { - self.parse_cache.lock().expect("not poisoned").get() - } - fn parse_cache_get_or_insert(&self, value: Arc) -> Arc { - let mut parse_cache = self.parse_cache.lock().expect("not poisoned"); - let (retval, to_drop_after_unlock) = parse_cache.get_or_insert(value); - drop(parse_cache); - drop(to_drop_after_unlock); - retval - } -} - -struct PdfObjectsInner { - objects: BTreeMap, - #[allow(dead_code)] - object_streams: Vec>, -} - -pub struct PdfObjects { - inner: OnceLock, -} - -#[derive(Copy, Clone, Debug)] -pub struct PdfHeader { - pub major: NonZero, - pub minor: u16, -} - -impl PdfHeader { - pub const PREFIX: &str = "%PDF-"; -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfTrailerDictionary { - #[pdf(name = "Size")] - pub size: usize, - #[pdf(name = "Prev")] - pub prev: Option, - #[pdf(name = "Root")] - pub root: PdfDocumentCatalog, - #[pdf(name = "Encrypt")] - pub encrypt: Option, - #[pdf(name = "Info")] - pub info: Option, - #[pdf(name = "ID")] - pub id: Option<[PdfString; 2]>, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl fmt::Debug for PdfTrailerDictionary { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { - size, - prev, - root, - encrypt, - info, - id, - rest, - } = self; - f.debug_struct("PdfTrailerDictionary") - .field("size", size) - .field("prev", prev) - .field("root", root) - .field("encrypt", encrypt) - .field("info", info) - .field("id", id) - .field("rest", rest) - .finish() - }) - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] - pub enum PdfXRefName { - #[pdf(name = "XRef")] - #[default] - XRef, - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfXRefStreamDictionaryRest { - #[pdf(name = "Type")] - pub ty: PdfXRefName, - #[pdf(name = "Index")] - pub index: Option>, - #[pdf(name = "W")] - pub w: Option>, - #[pdf(flatten)] - pub rest: PdfTrailerDictionary, - } -} - -impl fmt::Debug for PdfXRefStreamDictionaryRest { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { ty, index, w, rest } = self; - f.debug_struct("PdfXRefStreamDictionaryRest") - .field("ty", ty) - .field("index", index) - .field("w", w) - .field("rest", rest) - .finish() - }) - } -} - -#[derive(Clone)] -pub enum PdfTrailer { - Trailer { - trailer_dictionary: PdfTrailerDictionary, - start_xref: usize, - }, - Stream { - xref_stream: PdfStream, - start_xref: usize, - }, -} - -impl fmt::Debug for PdfTrailer { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| match self { - Self::Trailer { - trailer_dictionary, - start_xref, - } => f - .debug_struct("Trailer") - .field("trailer_dictionary", trailer_dictionary) - .field("start_xref", start_xref) - .finish(), - Self::Stream { - xref_stream, - start_xref, - } => f - .debug_struct("Stream") - .field("xref_stream", xref_stream) - .field("start_xref", start_xref) - .finish(), - }) - } -} - -impl PdfTrailer { - pub fn trailer_dictionary(&self) -> &PdfTrailerDictionary { - match self { - PdfTrailer::Trailer { - trailer_dictionary, .. - } => trailer_dictionary, - PdfTrailer::Stream { xref_stream, .. } => &xref_stream.dictionary().rest.rest, - } - } -} - -pub struct Pdf { - pub header: PdfHeader, - pub objects: Arc, - pub trailer: PdfTrailer, -} - -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -enum PdfCharCategory { - Regular, - Whitespace, - LParen, - RParen, - LAngle, - RAngle, - LBracket, - RBracket, - LBrace, - RBrace, - FSlash, - Percent, -} - -impl PdfCharCategory { - fn new(b: u8) -> Self { - match b { - b'\0' | b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' => Self::Whitespace, - b'(' => Self::LParen, - b')' => Self::RParen, - b'<' => Self::LAngle, - b'>' => Self::RAngle, - b'[' => Self::LBracket, - b']' => Self::RBracket, - b'{' => Self::LBrace, - b'}' => Self::RBrace, - b'/' => Self::FSlash, - b'%' => Self::Percent, - _ => Self::Regular, - } - } -} - -#[derive(Clone, Copy, PartialEq)] -enum PdfToken<'a> { - Regular(&'a [u8]), - LParen, - RParen, - LAngle, - RAngle, - LBracket, - RBracket, - LBrace, - RBrace, - FSlash, - Comment(&'a [u8]), -} - -impl<'a> fmt::Debug for PdfToken<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Regular(contents) => { - if let Ok(contents) = str::from_utf8(contents) { - write!(f, "Regular({contents:?})") - } else { - write!(f, "Regular({contents:?})") - } - } - Self::LParen => write!(f, "LParen"), - Self::RParen => write!(f, "RParen"), - Self::LAngle => write!(f, "LAngle"), - Self::RAngle => write!(f, "RAngle"), - Self::LBracket => write!(f, "LBracket"), - Self::RBracket => write!(f, "RBracket"), - Self::LBrace => write!(f, "LBrace"), - Self::RBrace => write!(f, "RBrace"), - Self::FSlash => write!(f, "FSlash"), - Self::Comment(contents) => { - if let Ok(contents) = str::from_utf8(contents) { - write!(f, "Comment({contents:?})") - } else { - write!(f, "Comment({contents:?})") - } - } - } - } -} - -#[derive(Clone)] -struct PdfTokenizerPeek<'a> { - token: PdfToken<'a>, - pos_after_token: usize, -} - -#[derive(Clone)] -struct PdfTokenizer<'a> { - bytes: &'a [u8], - pos: PdfInputPositionKnown, - peek_cache: Option>, -} - -impl<'a> PdfTokenizer<'a> { - fn new(bytes: &'a [u8], pos: PdfInputPositionKnown) -> Self { - Self { - bytes, - pos, - peek_cache: None, - } - } - fn pos(&self) -> PdfInputPosition { - PdfInputPosition::new(Some(self.pos)) - } - fn peek_byte(&mut self) -> Option { - self.bytes.get(self.pos.pos).copied() - } - fn next_byte(&mut self) -> Option { - let b = self.bytes.get(self.pos.pos)?; - self.pos.pos += 1; - self.peek_cache = None; - Some(*b) - } - fn skip_whitespace(&mut self) { - while let Some(PdfCharCategory::Whitespace) = self.peek_byte().map(PdfCharCategory::new) { - self.next_byte(); - } - } - fn peek(&mut self) -> Option> { - if let Some(PdfTokenizerPeek { token, .. }) = self.peek_cache { - return Some(token); - } - let mut tokenizer = self.clone(); - let token = tokenizer.next()?; - self.peek_cache = Some(PdfTokenizerPeek { - token, - pos_after_token: tokenizer.pos.pos, - }); - Some(token) - } - fn read_bytes(&mut self, len: usize) -> Option<&'a [u8]> { - let retval = self - .bytes - .get(self.pos.pos..self.pos.pos.saturating_add(len))?; - self.peek_cache = None; - self.pos.pos += len; - Some(retval) - } -} - -impl<'a> Iterator for PdfTokenizer<'a> { - type Item = PdfToken<'a>; - - fn next(&mut self) -> Option { - if let Some(PdfTokenizerPeek { - token, - pos_after_token, - }) = self.peek_cache.take() - { - self.pos.pos = pos_after_token; - return Some(token); - } - loop { - let start_pos = self.pos.pos; - break match PdfCharCategory::new(self.next_byte()?) { - PdfCharCategory::Whitespace => continue, - PdfCharCategory::LParen => Some(PdfToken::LParen), - PdfCharCategory::RParen => Some(PdfToken::RParen), - PdfCharCategory::LAngle => Some(PdfToken::LAngle), - PdfCharCategory::RAngle => Some(PdfToken::RAngle), - PdfCharCategory::LBracket => Some(PdfToken::LBracket), - PdfCharCategory::RBracket => Some(PdfToken::RBracket), - PdfCharCategory::LBrace => Some(PdfToken::LBrace), - PdfCharCategory::RBrace => Some(PdfToken::RBrace), - PdfCharCategory::FSlash => Some(PdfToken::FSlash), - PdfCharCategory::Percent => { - loop { - match self.next_byte() { - None | Some(b'\n') => break, - Some(b'\r') => { - if let Some(b'\n') = self.peek_byte() { - self.pos.pos += 1; - } - break; - } - Some(_) => continue, - } - } - Some(PdfToken::Comment(&self.bytes[start_pos..self.pos.pos])) - } - PdfCharCategory::Regular => { - while let Some(PdfCharCategory::Regular) = - self.peek_byte().map(PdfCharCategory::new) - { - self.pos.pos += 1; - } - Some(PdfToken::Regular(&self.bytes[start_pos..self.pos.pos])) - } - }; - } - } -} - -struct PdfParser<'a> { - objects: Arc, - tokenizer: PdfTokenizer<'a>, -} - -enum PdfObjectOrStreamDictionaryOrOperator { - StreamDictionary { - dictionary: PdfDictionary, - stream_kw_pos: PdfInputPosition, - }, - Object(PdfObject), - Operator(PdfOperatorUnparsed), -} - -impl PdfObjectOrStreamDictionaryOrOperator { - fn error_on_stream_or_operator(self) -> Result { - match self { - PdfObjectOrStreamDictionaryOrOperator::StreamDictionary { - dictionary: _, - stream_kw_pos, - } => Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }), - PdfObjectOrStreamDictionaryOrOperator::Object(object) => Ok(object), - PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => { - Err(PdfParseError::OperatorNotAllowedHere { operator }) - } - } - } -} - -impl<'a> PdfParser<'a> { - fn skip_comments_and_whitespace(&mut self) { - self.tokenizer.skip_whitespace(); - while let Some(PdfToken::Comment(_)) = self.tokenizer.peek() { - self.tokenizer.next(); - self.tokenizer.skip_whitespace(); - } - } - fn parse_digits( - &mut self, - on_parse_failed: impl FnOnce(PdfInputPosition) -> Result, PdfParseError>, - ) -> Result, PdfParseError> { - self.skip_comments_and_whitespace(); - let old_tokenizer = self.tokenizer.clone(); - let pos = self.tokenizer.pos(); - let Some(PdfToken::Regular(number)) = self.tokenizer.next() else { - self.tokenizer = old_tokenizer; - return Ok(None); - }; - if !number.iter().all(|b| b.is_ascii_digit()) { - self.tokenizer = old_tokenizer; - return Ok(None); - } - let Some(number) = str::from_utf8(number).ok().and_then(|v| v.parse().ok()) else { - self.tokenizer = old_tokenizer; - return Ok(match on_parse_failed(pos)? { - None => None, - }); - }; - Ok(Some((pos, number))) - } - fn parse_object_identifier( - &mut self, - return_none_for_out_of_range: bool, - ) -> Result, PdfParseError> { - let old_tokenizer = self.tokenizer.clone(); - let Some((pos, object_number)) = self.parse_digits(|pos| { - if return_none_for_out_of_range { - Ok(None) - } else { - Err(PdfParseError::InvalidObjectNumber { pos }) - } - })? - else { - self.tokenizer = old_tokenizer; - return Ok(None); - }; - let Some((_pos, generation_number)) = self.parse_digits(|pos| { - if return_none_for_out_of_range { - Ok(None) - } else { - Err(PdfParseError::InvalidGenerationNumber { pos }) - } - })? - else { - self.tokenizer = old_tokenizer; - return Ok(None); - }; - Ok(Some(PdfObjectIdentifier { - pos: pos.into(), - object_number, - generation_number, - })) - } - fn parse_indirect_object(&mut self) -> Result, PdfParseError> { - let old_tokenizer = self.tokenizer.clone(); - let Some(id) = self.parse_object_identifier(true)? else { - self.tokenizer = old_tokenizer; - return Ok(None); - }; - if let Some(PdfToken::Regular(b"R")) = self.tokenizer.next() { - Ok(Some(PdfObjectIndirect::new(&self.objects, id))) - } else { - self.tokenizer = old_tokenizer; - Ok(None) - } - } - fn parse_string_after_l_paren(&mut self) -> Result { - let mut contents = Vec::new(); - let mut paren_level = NonZero::new(1usize).expect("non-zero"); - let string_pos = self.tokenizer.pos(); - while let Some(b) = self.tokenizer.next_byte() { - contents.push(match b { - b'(' => { - paren_level = paren_level.checked_add(1).expect("overflow"); - b - } - b')' => { - let Some(new_paren_level) = NonZero::new(paren_level.get() - 1) else { - return Ok(PdfString::new( - string_pos, - ArcOrRef::Arc(Arc::from(contents)), - )); - }; - paren_level = new_paren_level; - b - } - b'\r' if self.tokenizer.peek_byte() == Some(b'\n') => { - self.tokenizer.next_byte(); - b'\n' - } - b'\r' | b'\n' => b'\n', - b'\\' => { - let pos = self.tokenizer.pos(); - let Some(b) = self.tokenizer.next_byte() else { - return Err(PdfParseError::InvalidStringEscape { pos }); - }; - match b { - b'\r' if self.tokenizer.peek_byte() == Some(b'\n') => { - self.tokenizer.next_byte(); - continue; - } - b'\r' | b'\n' => continue, - b'n' => b'\n', - b'r' => b'\r', - b't' => b'\t', - b'b' => b'\x08', - b'f' => b'\x0C', - b'(' | b')' | b'\\' => b, - b'0'..=b'7' => { - const MAX_OCTAL_DIGITS: usize = 3; - let mut value = b - b'0'; - let mut len = 1; - while len < MAX_OCTAL_DIGITS { - let Some(b @ b'0'..=b'7') = self.tokenizer.peek_byte() else { - break; - }; - value <<= 3; - value |= b - b'0'; - len += 1; - self.tokenizer.next_byte(); - } - value - } - _ => { - return Err(PdfParseError::InvalidStringEscape { pos }); - } - } - } - _ => b, - }); - } - Err(PdfParseError::TruncatedFile { - pos: self.tokenizer.pos(), - }) - } - fn parse_string_after_l_angle(&mut self) -> Result { - let mut contents = Vec::new(); - let mut high_digit_value = None; - let mut push_digit_value = |value: u8| { - high_digit_value = match high_digit_value { - Some(high_digit_value) => { - contents.push((high_digit_value << 4) | value); - None - } - None => Some(value), - }; - }; - let string_pos = self.tokenizer.pos(); - loop { - let pos = self.tokenizer.pos(); - match self.tokenizer.next_byte() { - None => { - return Err(PdfParseError::TruncatedFile { pos }); - } - Some(b) if PdfCharCategory::new(b) == PdfCharCategory::Whitespace => {} - Some(b'>') => { - // if we have an odd trailing digit, add the final digit, otherwise doesn't modify contents - push_digit_value(0); - return Ok(PdfString::new( - string_pos, - Arc::<[u8]>::from(contents).into(), - )); - } - Some(b) => { - let Some(value) = (b as char).to_digit(0x10) else { - return Err(PdfParseError::InvalidHexStringDigit { pos }); - }; - push_digit_value(value as u8); - } - } - } - } - fn parse_name_after_f_slash(&mut self) -> Result { - let mut name = vec![]; - let name_pos = self.tokenizer.pos(); - loop { - let Some(PdfCharCategory::Regular) = - self.tokenizer.peek_byte().map(PdfCharCategory::new) - else { - return Ok(PdfName::new(name_pos, ArcOrRef::Arc(Arc::from(name)))); - }; - let pos = self.tokenizer.pos(); - match self - .tokenizer - .next_byte() - .expect("just checked that it's not None") - { - b'#' => { - let mut value = 0u8; - for _ in 0..2 { - let Some(digit) = self - .tokenizer - .next_byte() - .and_then(|b| (b as char).to_digit(0x10)) - else { - return Err(PdfParseError::InvalidNameEscape { pos }); - }; - value <<= 4; - value |= digit as u8; - } - name.push(value); - } - b => name.push(b), - } - } - } - fn parse_array_after_l_bracket(&mut self) -> Result { - let array_pos = self.tokenizer.pos(); - let mut contents: Vec = Vec::new(); - loop { - self.skip_comments_and_whitespace(); - if let Some(PdfToken::RBracket) = self.tokenizer.peek() { - self.tokenizer.next(); - return Ok(PdfArray::from_elements(array_pos, Arc::from(contents))); - } - contents.push( - self.parse_object_or_operator()? - .error_on_stream_or_operator()?, - ); - } - } - /// assumes `self.tokenizer.peek_byte() == Some(b'<')` - fn parse_dictionary_after_one_l_angle(&mut self) -> Result { - let l_angle = self.tokenizer.next_byte(); - assert_eq!(l_angle, Some(b'<')); - let dictionary_pos = self.tokenizer.pos(); - let mut contents: BTreeMap = BTreeMap::new(); - loop { - self.skip_comments_and_whitespace(); - if let Some(PdfToken::RAngle) = self.tokenizer.peek() { - self.tokenizer.next(); - let pos = self.tokenizer.pos(); - let b'>' = self - .tokenizer - .next_byte() - .ok_or(PdfParseError::TruncatedFile { pos })? - else { - return Err(PdfParseError::InvalidDictionaryClosingDoubleRAngle { pos }); - }; - return Ok(PdfDictionary::from_fields( - dictionary_pos, - Arc::new(contents), - )); - } - let name = PdfName::parse( - self.parse_object_or_operator()? - .error_on_stream_or_operator()?, - )?; - let name_pos = name.pos(); - match contents.entry(name) { - std::collections::btree_map::Entry::Vacant(entry) => { - entry.insert( - self.parse_object_or_operator()? - .error_on_stream_or_operator()?, - ); - } - std::collections::btree_map::Entry::Occupied(entry) => { - return Err(PdfParseError::DuplicateDictionaryKey { - pos: name_pos, - name: entry.key().clone(), - }); - } - } - } - } - fn parse_object_or_operator( - &mut self, - ) -> Result { - self.skip_comments_and_whitespace(); - if let Some(indirect) = self.parse_indirect_object()? { - return Ok(PdfObjectOrStreamDictionaryOrOperator::Object( - indirect.into(), - )); - } - let pos = self.tokenizer.pos(); - Ok(PdfObjectOrStreamDictionaryOrOperator::Object( - match self - .tokenizer - .next() - .ok_or(PdfParseError::TruncatedFile { pos })? - { - PdfToken::Regular(b"true") => PdfObject::Boolean(PdfBoolean::new(pos, true)), - PdfToken::Regular(b"false") => PdfObject::Boolean(PdfBoolean::new(pos, false)), - PdfToken::Regular(b"null") => PdfObject::Null(PdfNull::new(pos)), - PdfToken::Regular( - number @ ([b'+' | b'-', b'0'..=b'9' | b'.', ..] | [b'0'..=b'9' | b'.', ..]), - ) => { - // parse number - let Ok(number) = str::from_utf8(number) else { - return Err(PdfParseError::InvalidNumber { pos }); - }; - let mut parts = number - .strip_prefix(&['+', '-']) - .unwrap_or(number) - .split('.'); - let integer_part = parts - .next() - .expect("split always returns at least one part"); - let fraction_part = parts.next(); - if parts.next().is_some() { - return Err(PdfParseError::InvalidNumber { pos }); - } - if integer_part.is_empty() && fraction_part.is_none_or(|v| v.is_empty()) { - return Err(PdfParseError::InvalidNumber { pos }); - } - if !integer_part.bytes().all(|v| v.is_ascii_digit()) { - return Err(PdfParseError::InvalidNumber { pos }); - } - if let Some(fraction_part) = fraction_part { - if !fraction_part.bytes().all(|v| v.is_ascii_digit()) { - return Err(PdfParseError::InvalidNumber { pos }); - } - PdfObject::Real(PdfReal::new( - pos, - number - .parse() - .map_err(|_| PdfParseError::InvalidNumber { pos })?, - )) - } else { - PdfObject::Integer(PdfInteger::new( - pos, - number - .parse() - .map_err(|_| PdfParseError::InvalidNumber { pos })?, - )) - } - } - PdfToken::Regular(name) => { - return Ok(PdfObjectOrStreamDictionaryOrOperator::Operator( - PdfOperatorUnparsed::new(pos, ArcOrRef::Arc(name.into())), - )); - } - PdfToken::LParen => PdfObject::String(self.parse_string_after_l_paren()?), - PdfToken::RParen => todo!(), - PdfToken::LAngle => { - if self.tokenizer.peek_byte() == Some(b'<') { - let dictionary = self.parse_dictionary_after_one_l_angle()?; - self.skip_comments_and_whitespace(); - if let Some(PdfToken::Regular(b"stream")) = self.tokenizer.peek() { - return Ok(PdfObjectOrStreamDictionaryOrOperator::StreamDictionary { - dictionary, - stream_kw_pos: self.tokenizer.pos(), - }); - } else { - dictionary.into() - } - } else { - self.parse_string_after_l_angle()?.into() - } - } - PdfToken::RAngle => todo!(), - PdfToken::LBracket => self.parse_array_after_l_bracket()?.into(), - PdfToken::RBracket => todo!(), - PdfToken::LBrace => todo!(), - PdfToken::RBrace => todo!(), - PdfToken::FSlash => self.parse_name_after_f_slash()?.into(), - PdfToken::Comment(_) => unreachable!(), - }, - )) - } -} - -struct PdfFileParser<'a> { - parser: PdfParser<'a>, - objects_map: BTreeMap, -} - -impl<'a> PdfFileParser<'a> { - fn parse_header(&mut self) -> Result { - let Some(b'%') = self.parser.tokenizer.bytes.first() else { - return Err(PdfParseError::NotAPdfFile); - }; - let Some(PdfToken::Comment(header)) = self.parser.tokenizer.next() else { - unreachable!() - }; - let Ok(header) = str::from_utf8(header) else { - return Err(PdfParseError::NotAPdfFile); - }; - let header = header.trim_end_matches(['\n', '\r']); - let Some(version) = header.strip_prefix(PdfHeader::PREFIX) else { - return Err(PdfParseError::NotAPdfFile); - }; - let Some((major_str, minor_str)) = version.split_once('.') else { - return Err(PdfParseError::NotAPdfFile); - }; - let (Ok(major), Ok(minor)) = (major_str.parse(), minor_str.parse()) else { - return Err(PdfParseError::NotAPdfFile); - }; - Ok(PdfHeader { major, minor }) - } - /// assumes `self.tokenizer.peek() == Some(PdfToken::Regular(b"stream"))` - fn parse_stream_after_dictionary( - &mut self, - dictionary: PdfDictionary, - ) -> Result { - self.parser.tokenizer.skip_whitespace(); - let stream_pos = self.parser.tokenizer.pos(); - let stream = self.parser.tokenizer.next(); - assert_eq!(stream, Some(PdfToken::Regular(b"stream"))); - let dictionary = PdfStreamDictionary::parse(dictionary.into())?; - let eol_pos = self.parser.tokenizer.pos(); - match self.parser.tokenizer.next_byte() { - None => return Err(PdfParseError::TruncatedFile { pos: eol_pos }), - Some(b'\r') => { - let Some(b'\n') = self.parser.tokenizer.next_byte() else { - return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword { - pos: eol_pos, - }); - }; - } - Some(b'\n') => {} - _ => return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword { pos: eol_pos }), - } - let Some(data) = self.parser.tokenizer.read_bytes(dictionary.len) else { - return Err(PdfParseError::TruncatedFile { - pos: PdfInputPosition::new(Some(PdfInputPositionKnown { - pos: self.parser.tokenizer.bytes.len(), - ..self.parser.tokenizer.pos - })), - }); - }; - let stream = PdfStream::new( - stream_pos, - &self.parser.objects, - dictionary, - Arc::from(data), - ); - self.parser.skip_comments_and_whitespace(); - let pos = self.parser.tokenizer.pos(); - if let Some(PdfToken::Regular(b"endstream")) = self.parser.tokenizer.next() { - Ok(stream) - } else { - Err(PdfParseError::MissingEndStreamKeyword { pos }) - } - } - fn parse_object(&mut self) -> Result { - match self.parser.parse_object_or_operator()? { - PdfObjectOrStreamDictionaryOrOperator::StreamDictionary { - dictionary, - stream_kw_pos: _, - } => Ok(PdfObject::Stream( - self.parse_stream_after_dictionary(dictionary)?, - )), - PdfObjectOrStreamDictionaryOrOperator::Object(object) => Ok(object), - PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => { - Err(PdfParseError::OperatorNotAllowedHere { operator }) - } - } - } - fn parse_indirect_object_definition(&mut self) -> Result, PdfParseError> { - self.parser.skip_comments_and_whitespace(); - let Some(id) = self.parser.parse_object_identifier(false)? else { - return Ok(None); - }; - self.parser.skip_comments_and_whitespace(); - let obj_pos = self.parser.tokenizer.pos(); - let Some(PdfToken::Regular(b"obj")) = self.parser.tokenizer.next() else { - return Err(PdfParseError::MissingObj { pos: obj_pos }); - }; - let object = self.parse_object()?; - self.parser.skip_comments_and_whitespace(); - let end_obj_pos = self.parser.tokenizer.pos(); - let Some(PdfToken::Regular(b"endobj")) = self.parser.tokenizer.next() else { - return Err(PdfParseError::MissingEndObj { pos: end_obj_pos }); - }; - if self - .objects_map - .insert(id, PdfObjectAndParseCache::new(object)) - .is_some() - { - Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id }) - } else { - Ok(Some(())) - } - } - fn parse_object_stream( - &mut self, - object_stream: &PdfStream, - ) -> Result<(), PdfParseError> { - let data = object_stream.decoded_data().as_ref()?; - let mut parser = PdfParser { - tokenizer: PdfTokenizer::new( - data, - PdfInputPositionKnown { - pos: 0, - containing_streams_pos: Some( - object_stream - .get_pdf_input_position() - .get() - .expect("known to be set") - .pos, - ), - }, - ), - objects: self.parser.objects.clone(), - }; - let mut object_ids_and_byte_positions = - Vec::<(PdfObjectIdentifier, usize)>::with_capacity(object_stream.dictionary().rest.n); - for _ in 0..object_stream.dictionary().rest.n { - parser.skip_comments_and_whitespace(); - let Some((pos, object_number)) = - parser.parse_digits(|pos| Err(PdfParseError::InvalidObjectNumber { pos }))? - else { - return Err(PdfParseError::InvalidObjectNumber { - pos: parser.tokenizer.pos(), - }); - }; - parser.skip_comments_and_whitespace(); - let Some((_, byte_position)) = - parser.parse_digits(|pos| Err(PdfParseError::InvalidNumber { pos }))? - else { - return Err(PdfParseError::InvalidNumber { - pos: parser.tokenizer.pos(), - }); - }; - object_ids_and_byte_positions.push(( - PdfObjectIdentifier { - pos: pos.into(), - object_number, - generation_number: 0, - }, - byte_position, - )); - } - for (id, _byte_position) in object_ids_and_byte_positions { - let object = parser - .parse_object_or_operator()? - .error_on_stream_or_operator()?; - if self - .objects_map - .insert(id, PdfObjectAndParseCache::new(object)) - .is_some() - { - return Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id }); - } - } - Ok(()) - } - fn parse_body(&mut self) -> Result<(), PdfParseError> { - while let Some(()) = self.parse_indirect_object_definition()? {} - let mut object_streams: Vec> = Vec::new(); - for object in self.objects_map.values_mut() { - let stream = match &object.object { - PdfObject::Stream(stream) => stream, - PdfObject::Boolean(_) - | PdfObject::Integer(_) - | PdfObject::Real(_) - | PdfObject::String(_) - | PdfObject::Name(_) - | PdfObject::Array(_) - | PdfObject::Dictionary(_) - | PdfObject::Null(_) - | PdfObject::Indirect(_) => continue, - }; - if PdfObjectStreamDictionary::parse_type_from_dictionary(&stream.dictionary().rest) - .is_ok() - { - object_streams.push(PdfStream::parse(object.object.clone())?); - } - } - for object_stream in &object_streams { - self.parse_object_stream(object_stream)?; - } - let Ok(()) = self.parser.objects.inner.set(PdfObjectsInner { - objects: std::mem::take(&mut self.objects_map), - object_streams, - }) else { - unreachable!(); - }; - Ok(()) - } - fn parse_xref_table(&mut self) -> Result<(), PdfParseError> { - self.parser.skip_comments_and_whitespace(); - let xref_pos = self.parser.tokenizer.pos(); - let Some(PdfToken::Regular(b"xref")) = self.parser.tokenizer.peek() else { - return Ok(()); - }; - todo!("{xref_pos}") - } - fn parse_trailer(&mut self) -> Result { - self.parser.skip_comments_and_whitespace(); - let trailer_pos = self.parser.tokenizer.pos(); - let trailer_dictionary = match self.parser.tokenizer.peek() { - Some(PdfToken::Regular(b"trailer")) => { - self.parser.tokenizer.next(); - Some(PdfTrailerDictionary::parse(self.parse_object()?)?) - } - Some(PdfToken::Regular(b"startxref")) => None, - _ => { - return Err(PdfParseError::MissingTrailer { pos: trailer_pos }); - } - }; - self.parser.skip_comments_and_whitespace(); - let start_xref_kw_pos = self.parser.tokenizer.pos(); - let Some(PdfToken::Regular(b"startxref")) = self.parser.tokenizer.next() else { - return Err(PdfParseError::MissingStartXRefKeyword { - pos: start_xref_kw_pos, - }); - }; - let start_xref_pos = self.parser.tokenizer.pos(); - let Some((start_xref_pos, start_xref)) = self - .parser - .parse_digits(|pos| Err(PdfParseError::IntegerOutOfRange { pos }))? - else { - return Err(PdfParseError::MissingStartXRefValue { - pos: start_xref_pos, - }); - }; - self.parser.tokenizer.skip_whitespace(); - let eof_comment_pos = self.parser.tokenizer.pos(); - let Some(PdfToken::Comment(b"%%EOF" | b"%%EOF\r" | b"%%EOF\r\n" | b"%%EOF\n")) = - self.parser.tokenizer.next() - else { - return Err(PdfParseError::MissingEofComment { - pos: eof_comment_pos, - }); - }; - self.parser.tokenizer.skip_whitespace(); - if let Some(byte) = self.parser.tokenizer.peek_byte() { - return Err(PdfParseError::UnexpectedByte { - pos: self.parser.tokenizer.pos(), - byte, - }); - } - if let Some(trailer_dictionary) = trailer_dictionary { - return Ok(PdfTrailer::Trailer { - trailer_dictionary, - start_xref, - }); - } - let id = PdfParser { - tokenizer: PdfTokenizer::new( - self.parser.tokenizer.bytes, - PdfInputPositionKnown { - pos: start_xref, - containing_streams_pos: None, - }, - ), - objects: self.parser.objects.clone(), - } - .parse_object_identifier(false); - let Some(id) = id? else { - return Err(PdfParseError::InvalidStartXRefValue { - pos: start_xref_pos, - start_xref, - }); - }; - let xref_stream = PdfStream::parse( - PdfObjectIndirect::new(&self.parser.objects, id) - .get() - .into(), - )?; - Ok(PdfTrailer::Stream { - xref_stream, - start_xref, - }) - } - fn parse_file(mut self) -> Result { - let header = self.parse_header()?; - self.parse_body()?; - self.parse_xref_table()?; - let trailer = self.parse_trailer()?; - Ok(Pdf { - header, - objects: self.parser.objects, - trailer, - }) - } -} - -impl Pdf { - pub fn parse(bytes: impl AsRef<[u8]>) -> Result { - PdfFileParser { - parser: PdfParser { - objects: Arc::new(PdfObjects { - inner: OnceLock::new(), - }), - tokenizer: PdfTokenizer::new( - bytes.as_ref(), - PdfInputPositionKnown { - pos: 0, - containing_streams_pos: None, - }, - ), - }, - objects_map: BTreeMap::new(), - } - .parse_file() - } -} - -#[cfg(test)] -mod tests { - use crate::{ - pdf::{ - object::{ - PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject, PdfString, - }, - parse::{PdfInputPosition, PdfParse, PdfParseError}, - }, - util::ArcOrRef, - }; - - #[test] - fn test_deserialize_dict() -> Result<(), PdfParseError> { - crate::pdf::parse::pdf_parse! { - #[pdf] - #[derive(Debug)] - #[allow(dead_code)] - struct TestStruct { - #[pdf(name = "a")] - a: i32, - #[pdf(name = "c")] - c: i32, - #[pdf(name = "b")] - b: i32, - #[pdf(flatten)] - rest: PdfDictionary, - } - } - - let v: TestStruct = PdfParse::parse(PdfObject::from(PdfDictionary::from_iter([ - ( - PdfName::new_static(b"a"), - PdfInteger::new(PdfInputPosition::empty(), 1).into(), - ), - ( - PdfName::new_static(b"c"), - PdfInteger::new(PdfInputPosition::empty(), 7).into(), - ), - ( - PdfName::new_static(b"b"), - PdfInteger::new(PdfInputPosition::empty(), 5).into(), - ), - ( - PdfName::new_static(b"d"), - PdfBoolean::new(PdfInputPosition::empty(), false).into(), - ), - ( - PdfName::new_static(b"e"), - PdfNull::new(PdfInputPosition::empty()).into(), - ), - ( - PdfName::new_static(b"f"), - PdfString::new(PdfInputPosition::empty(), ArcOrRef::Ref(b"test")).into(), - ), - ])))?; - let expected = TestStruct { - a: 1, - c: 7, - b: 5, - rest: PdfDictionary::from_iter([ - ( - PdfName::new_static(b"d"), - PdfBoolean::new(PdfInputPosition::empty(), false).into(), - ), - ( - PdfName::new_static(b"f"), - PdfString::new(PdfInputPosition::empty(), ArcOrRef::Ref(b"test")).into(), - ), - ]), - }; - assert_eq!(format!("{v:?}"), format!("{expected:?}")); - Ok(()) - } -} diff --git a/src/pdf/content_stream.rs b/src/pdf/content_stream.rs deleted file mode 100644 index 79764c2..0000000 --- a/src/pdf/content_stream.rs +++ /dev/null @@ -1,829 +0,0 @@ -use crate::{ - pdf::{ - PdfObjectOrStreamDictionaryOrOperator, PdfObjects, PdfParser, PdfTokenizer, - object::{ - NameOr, PdfDictionary, PdfMatrix, PdfName, PdfObject, PdfObjectDirect, PdfRectangle, - PdfStream, PdfStreamContents, PdfString, PdfStringBytesDebug, PdfStringOrNumber, - PdfVec2D, - }, - parse::{ - GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown, - PdfInputPositionNoCompare, PdfParse, PdfParseError, - }, - render::{ - PdfColorDeviceGray, PdfColorDeviceRgb, PdfRenderOperator, PdfRenderState, - PdfRenderingIntent, - }, - }, - util::ArcOrRef, -}; -use std::{fmt, sync::Arc}; - -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct PdfOperatorUnparsed { - pos: PdfInputPositionNoCompare, - bytes: ArcOrRef<'static, [u8]>, -} - -impl GetPdfInputPosition for PdfOperatorUnparsed { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos() - } -} - -impl fmt::Debug for PdfOperatorUnparsed { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - Self::debug_with_name("PdfOperatorUnparsed", &self.bytes, self.pos.0, f) - } -} - -trait PdfParseIter: Sized { - fn parse_iter(iter: impl IntoIterator) -> Result; -} - -impl PdfParseIter for Arc<[T]> { - fn parse_iter(iter: impl IntoIterator) -> Result { - FromIterator::from_iter(iter.into_iter().map(T::parse)) - } -} - -impl PdfOperatorUnparsed { - pub fn new( - pos: impl Into, - bytes: impl Into>, - ) -> Self { - Self { - pos: pos.into(), - bytes: bytes.into(), - } - } - pub const fn new_static(bytes: &'static [u8]) -> Self { - Self { - pos: PdfInputPositionNoCompare::empty(), - bytes: ArcOrRef::Ref(bytes), - } - } - pub fn pos(&self) -> PdfInputPosition { - self.pos.0 - } - pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> { - &self.bytes - } - fn debug_with_name( - name: &str, - pdf_name: &[u8], - pos: PdfInputPosition, - f: &mut fmt::Formatter<'_>, - ) -> fmt::Result { - write!(f, "{name}(at {pos}, {})", PdfStringBytesDebug(pdf_name)) - } - pub fn bytes_debug(&self) -> PdfStringBytesDebug<'_> { - PdfStringBytesDebug(&self.bytes) - } -} - -macro_rules! make_pdf_operator_enum { - ( - $(#[$($operator_meta:tt)*])* - $operator_enum_vis:vis enum $PdfOperator:ident; - - $(#[$($operator_and_operands_meta:tt)*])* - $enum_vis:vis enum $PdfOperatorAndOperands:ident { - $(#[$($unknown_variant_meta:tt)*])* - $Unknown:ident { - $(#[$($unknown_operands_meta:tt)*])* - $unknown_operands:ident: $unknown_operands_ty:ty, - $(#[$($unknown_operator_meta:tt)*])* - $unknown_operator:ident: $unknown_operator_ty:ty, - }, - $( - #[kw = $kw:literal] - $(#[$($variant_meta:tt)*])* - $Variant:ident($VariantStruct:ident { - $pos:ident: PdfInputPositionNoCompare, - $( - #[$field_parse:ident($($parse_args:tt)*)] - $(#[$($field_meta:tt)*])* - $field:ident: $field_ty:ty, - )* - }), - )* - } - ) => { - $(#[$($operator_meta)*])* - $operator_enum_vis enum $PdfOperator { - $(#[$($unknown_variant_meta)*])* - $Unknown($unknown_operator_ty), - $( - $(#[$($variant_meta)*])* - $Variant(PdfInputPositionNoCompare), - )* - } - - impl $PdfOperator { - $operator_enum_vis fn parse(self, operands: impl IntoIterator) -> Result<$PdfOperatorAndOperands, PdfParseError> { - let operands = operands.into_iter(); - Ok(match self { - Self::$Unknown(operator) => $PdfOperatorAndOperands::$Unknown { - operands: FromIterator::from_iter(operands.map(Into::into)), - operator, - }, - $(Self::$Variant(pos) => $VariantStruct::parse(pos, operands)?.into(),)* - }) - } - $operator_enum_vis fn pos(&self) -> PdfInputPosition { - match *self { - Self::$Unknown(ref operator) => operator.pos(), - $(Self::$Variant(pos) => pos.0,)* - } - } - } - - impl fmt::Debug for $PdfOperator { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::$Unknown(operator) => PdfOperatorUnparsed::debug_with_name("Unknown", &operator.bytes, operator.pos.0, f), - $(Self::$Variant(pos) => PdfOperatorUnparsed::debug_with_name(stringify!($Variant), $kw, pos.0, f),)* - } - } - } - - impl From<$PdfOperator> for PdfOperatorUnparsed { - fn from(v: $PdfOperator) -> PdfOperatorUnparsed { - match v { - $PdfOperator::$Unknown(operator) => operator, - $($PdfOperator::$Variant(pos) => PdfOperatorUnparsed { pos, bytes: ArcOrRef::Ref($kw) },)* - } - } - } - - impl From for $PdfOperator { - fn from(v: PdfOperatorUnparsed) -> $PdfOperator { - match &**v.bytes() { - $($kw => Self::$Variant(v.pos),)* - _ => Self::$Unknown(v), - } - } - } - - $(#[derive(Clone)] - $(#[$($variant_meta)*])* - $enum_vis struct $VariantStruct { - $enum_vis $pos: PdfInputPositionNoCompare, - $( - $(#[$($field_meta)*])* - $enum_vis $field: $field_ty, - )* - } - - impl fmt::Debug for $VariantStruct { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct(stringify!($VariantStruct)).field("pos", &self.pos)$(.field(stringify!($field), &self.$field))*.finish() - } - } - - impl GetPdfInputPosition for $VariantStruct { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos() - } - } - - impl From<$VariantStruct> for $PdfOperatorAndOperands { - fn from(v: $VariantStruct) -> Self { - Self::$Variant(v) - } - } - - impl $VariantStruct { - $enum_vis fn operator_from_pos(pos: impl Into) -> $PdfOperator { - $PdfOperator::$Variant(pos.into()) - } - $enum_vis fn operator(&self) -> $PdfOperator { - $PdfOperator::$Variant(self.pos) - } - $enum_vis fn pos(&self) -> PdfInputPosition { - self.pos.0 - } - } - - make_pdf_operator_enum! { - @impl_variant_parse - $enum_vis enum; - struct $VariantStruct { - $pos: PdfInputPositionNoCompare, - $( - #[$field_parse($($parse_args)*)] - $(#[$($field_meta)*])* - $field: $field_ty, - )* - } - })* - - $(#[$($operator_and_operands_meta)*])* - $enum_vis enum $PdfOperatorAndOperands { - $(#[$($unknown_variant_meta)*])* - $Unknown { - $(#[$($unknown_operands_meta)*])* - $unknown_operands: $unknown_operands_ty, - $(#[$($unknown_operator_meta)*])* - $unknown_operator: $unknown_operator_ty, - }, - $( - $(#[$($variant_meta)*])* - $Variant($VariantStruct), - )* - } - - impl $PdfOperatorAndOperands { - $enum_vis fn operator(&self) -> $PdfOperator { - match self { - Self::Unknown { operator, .. } => $PdfOperator::Unknown(operator.clone()), - $(Self::$Variant(v) => v.operator(),)* - } - } - $enum_vis fn pos(&self) -> PdfInputPosition { - match self { - Self::$Unknown { operator, .. } => operator.pos(), - $(Self::$Variant(v) => v.pos(),)* - } - } - } - - impl fmt::Debug for $PdfOperatorAndOperands { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::$Unknown { - operands, - operator, - } => f.debug_struct("Unknown").field("operator", operator).field("operands", operands).finish(), - $(Self::$Variant($VariantStruct { - $pos, - $($field,)* - }) => f.debug_struct(stringify!($Variant)).field("pos", $pos)$(.field(stringify!($field), $field))*.finish(),)* - } - } - } - - impl PdfRenderOperator for $PdfOperatorAndOperands { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - match self { - Self::$Unknown { - operands, - operator, - } => state.handle_unknown_operator(operator, operands), - $(Self::$Variant(v) => <$VariantStruct as PdfRenderOperator>::render(v, state),)* - } - } - } - }; - ( - @impl_variant_parse - $enum_vis:vis enum; - struct $VariantStruct:ident { - $pos:ident: PdfInputPositionNoCompare, - $( - #[$field_parse:ident($($parse_args:ident),* $(,)?)] - $(#[$($field_meta:tt)*])* - $field:ident: $field_ty:ty, - )* - } - ) => { - impl $VariantStruct { - $enum_vis fn parse(pos: impl Into, operands: impl IntoIterator) -> Result { - let pos = pos.into(); - let mut operands = operands.into_iter(); - $($(let Some($parse_args) = operands.next() else { - return Err(PdfParseError::OperatorHasTooFewOperands { operator: Self::operator_from_pos(pos) }); - };)*)* - if operands.next().is_some() { - return Err(PdfParseError::OperatorHasTooManyOperands { operator: Self::operator_from_pos(pos) }); - } - Ok(Self { - pos, - $($field: <$field_ty>::$field_parse($($parse_args),*)?,)* - }) - } - } - }; - ( - @impl_variant_parse - $enum_vis:vis enum; - struct $VariantStruct:ident { - $pos:ident: PdfInputPositionNoCompare, - #[$field_parse:ident(...)] - $(#[$($field_meta:tt)*])* - $field:ident: $field_ty:ty, - } - ) => { - impl $VariantStruct { - $enum_vis fn parse(pos: impl Into, operands: impl IntoIterator) -> Result { - let pos = pos.into(); - let operands = operands.into_iter(); - Ok(Self { - pos, - $field: <$field_ty>::$field_parse(operands)?, - }) - } - } - }; -} - -make_pdf_operator_enum! { - #[derive(Clone)] - pub enum PdfOperator; - #[derive(Clone)] - pub enum PdfOperatorAndOperands { - Unknown { - operands: Arc<[PdfObjectDirect]>, - operator: PdfOperatorUnparsed, - }, - #[kw = b"b"] - CloseFillAndStrokePath(PdfOperatorCloseFillAndStrokePath { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"B"] - FillAndStrokePath(PdfOperatorFillAndStrokePath { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"b*"] - CloseFillAndStrokePathEvenOdd(PdfOperatorCloseFillAndStrokePathEvenOdd { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"B*"] - FillAndStrokePathEvenOdd(PdfOperatorFillAndStrokePathEvenOdd { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"BDC"] - BeginMarkedContentWithProperties(PdfOperatorBeginMarkedContentWithProperties { - pos: PdfInputPositionNoCompare, - #[parse(tag)] - tag: PdfName, - #[parse(properties)] - properties: NameOr, - }), - #[kw = b"BI"] - BeginInlineImage(PdfOperatorBeginInlineImage { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"BMC"] - BeginMarkedContent(PdfOperatorBeginMarkedContent { - pos: PdfInputPositionNoCompare, - #[parse(tag)] - tag: PdfName, - }), - #[kw = b"BT"] - BeginText(PdfOperatorBeginText { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"BX"] - BeginCompatibilitySection(PdfOperatorBeginCompatibilitySection { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"c"] - CurveTo(PdfOperatorCurveTo { - pos: PdfInputPositionNoCompare, - #[parse(x1, y1)] - p1: PdfVec2D, - #[parse(x2, y2)] - p2: PdfVec2D, - #[parse(x3, y3)] - p3: PdfVec2D, - }), - #[kw = b"cm"] - ConcatMatrix(PdfOperatorConcatMatrix { - pos: PdfInputPositionNoCompare, - #[parse_flat(a, b, c, d, e, f)] - matrix: PdfMatrix, - }), - #[kw = b"CS"] - SetStrokeColorSpace(PdfOperatorSetStrokeColorSpace { - pos: PdfInputPositionNoCompare, - #[parse(name)] - name: PdfName, - }), - #[kw = b"cs"] - SetNonStrokeColorSpace(PdfOperatorSetNonStrokeColorSpace { - pos: PdfInputPositionNoCompare, - #[parse(name)] - name: PdfName, - }), - #[kw = b"d"] - SetLineDashPattern(PdfOperatorSetLineDashPattern { - pos: PdfInputPositionNoCompare, - #[parse(dash_array)] - dash_array: PdfObject, // TODO: actually parse - #[parse(dash_phase)] - dash_phase: PdfObject, // TODO: actually parse - }), - #[kw = b"d0"] - FontType3SetWidth(PdfOperatorFontType3SetWidth { - pos: PdfInputPositionNoCompare, - #[parse(x, y)] - width: PdfVec2D, - }), - #[kw = b"d1"] - FontType3SetWidthAndBBox(PdfOperatorFontType3SetWidthAndBBox { - pos: PdfInputPositionNoCompare, - #[parse(width_x, width_y)] - width: PdfVec2D, - #[parse_flat(lower_left_x, lower_left_y, upper_right_x, upper_right_y)] - bbox: PdfRectangle, - }), - #[kw = b"Do"] - PaintXObject(PdfOperatorPaintXObject { - pos: PdfInputPositionNoCompare, - #[parse(name)] - name: PdfName, - }), - #[kw = b"DP"] - DesignateMarkedContentPointWithProperties(PdfOperatorDesignateMarkedContentPointWithProperties { - pos: PdfInputPositionNoCompare, - #[parse(tag)] - tag: PdfName, - #[parse(properties)] - properties: NameOr, - }), - #[kw = b"EI"] - EndInlineImage(PdfOperatorEndInlineImage { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"EMC"] - EndMarkedContent(PdfOperatorEndMarkedContent { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"ET"] - EndText(PdfOperatorEndText { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"EX"] - EndCompatibilitySection(PdfOperatorEndCompatibilitySection { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"f"] - FillPath(PdfOperatorFillPath { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"F"] - FillPathObsolete(PdfOperatorFillPathObsolete { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"f*"] - FillPathEvenOdd(PdfOperatorFillPathEvenOdd { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"G"] - SetStrokeGray(PdfOperatorSetStrokeGray { - pos: PdfInputPositionNoCompare, - #[parse(gray)] - gray: PdfColorDeviceGray, - }), - #[kw = b"g"] - SetNonStrokeGray(PdfOperatorSetNonStrokeGray { - pos: PdfInputPositionNoCompare, - #[parse(gray)] - gray: PdfColorDeviceGray, - }), - #[kw = b"gs"] - SetGraphicsState(PdfOperatorSetGraphicsState { - pos: PdfInputPositionNoCompare, - #[parse(dictionary_name)] - dictionary_name: PdfName, - }), - #[kw = b"h"] - CloseSubpath(PdfOperatorCloseSubpath { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"i"] - SetFlatnessTolerance(PdfOperatorSetFlatnessTolerance { - pos: PdfInputPositionNoCompare, - #[parse(flatness)] - flatness: f32, - }), - #[kw = b"ID"] - BeginInlineImageData(PdfOperatorBeginInlineImageData { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"j"] - SetLineJoinStyle(PdfOperatorSetLineJoinStyle { - pos: PdfInputPositionNoCompare, - #[parse(line_join_style)] - line_join_style: u8, // TODO parse - }), - #[kw = b"J"] - SetLineCapStyle(PdfOperatorSetLineCapStyle { - pos: PdfInputPositionNoCompare, - #[parse(line_cap_style)] - line_cap_style: u8, // TODO parse - }), - #[kw = b"K"] - SetStrokeCmyk(PdfOperatorSetStrokeCmyk { - pos: PdfInputPositionNoCompare, - #[parse(c)] - c: f32, - #[parse(m)] - m: f32, - #[parse(y)] - y: f32, - #[parse(k)] - k: f32, - }), - #[kw = b"k"] - SetNonStrokeCmyk(PdfOperatorSetNonStrokeCmyk { - pos: PdfInputPositionNoCompare, - #[parse(c)] - c: f32, - #[parse(m)] - m: f32, - #[parse(y)] - y: f32, - #[parse(k)] - k: f32, - }), - #[kw = b"l"] - LineTo(PdfOperatorLineTo { - pos: PdfInputPositionNoCompare, - #[parse(x, y)] - to: PdfVec2D, - }), - #[kw = b"m"] - MoveTo(PdfOperatorMoveTo { - pos: PdfInputPositionNoCompare, - #[parse(x, y)] - to: PdfVec2D, - }), - #[kw = b"M"] - SetMiterLimit(PdfOperatorSetMiterLimit { - pos: PdfInputPositionNoCompare, - #[parse(limit)] - limit: f32, - }), - #[kw = b"MP"] - DesignateMarkedContentPoint(PdfOperatorDesignateMarkedContentPoint { - pos: PdfInputPositionNoCompare, - #[parse(tag)] - tag: PdfName, - }), - #[kw = b"n"] - EndPath(PdfOperatorEndPath { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"q"] - SaveGraphicsState(PdfOperatorSaveGraphicsState { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"Q"] - RestoreGraphicsState(PdfOperatorRestoreGraphicsState { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"re"] - Rectangle(PdfOperatorRectangle { - pos: PdfInputPositionNoCompare, - #[parse(x, y)] - p: PdfVec2D, - #[parse(width, height)] - size: PdfVec2D, - }), - #[kw = b"RG"] - SetStrokeRgb(PdfOperatorSetStrokeRgb { - pos: PdfInputPositionNoCompare, - #[parse_flat(r, g, b)] - color: PdfColorDeviceRgb, - }), - #[kw = b"rg"] - SetNonStrokeRgb(PdfOperatorSetNonStrokeRgb { - pos: PdfInputPositionNoCompare, - #[parse_flat(r, g, b)] - color: PdfColorDeviceRgb, - }), - #[kw = b"ri"] - SetColorRenderingIntent(PdfOperatorSetColorRenderingIntent { - pos: PdfInputPositionNoCompare, - #[parse(intent)] - intent: PdfRenderingIntent, - }), - #[kw = b"s"] - CloseAndStrokePath(PdfOperatorCloseAndStrokePath { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"S"] - StrokePath(PdfOperatorStrokePath { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"SC"] - SetStrokeColor(PdfOperatorSetStrokeColor { - pos: PdfInputPositionNoCompare, - #[parse_iter(...)] - color: Arc<[f32]>, - }), - #[kw = b"sc"] - SetNonStrokeColor(PdfOperatorSetNonStrokeColor { - pos: PdfInputPositionNoCompare, - #[parse_iter(...)] - color: Arc<[f32]>, - }), - #[kw = b"SCN"] - SetStrokeColorWithName(PdfOperatorSetStrokeColorWithName { - pos: PdfInputPositionNoCompare, - #[parse_iter(...)] - color_and_name: Arc<[NameOr]>, - }), - #[kw = b"scn"] - SetNonStrokeColorWithName(PdfOperatorSetNonStrokeColorWithName { - pos: PdfInputPositionNoCompare, - #[parse_iter(...)] - color_and_name: Arc<[NameOr]>, - }), - #[kw = b"sh"] - Shade(PdfOperatorShade { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"T*"] - TextNextLine(PdfOperatorTextNextLine { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"Tc"] - SetCharacterSpacing(PdfOperatorSetCharacterSpacing { - pos: PdfInputPositionNoCompare, - #[parse(char_space)] - char_space: f32, - }), - #[kw = b"Td"] - TextNextLineWithOffset(PdfOperatorTextNextLineWithOffset { - pos: PdfInputPositionNoCompare, - #[parse(x, y)] - offset: PdfVec2D, - }), - #[kw = b"TD"] - TextNextLineWithOffsetAndLeading(PdfOperatorTextNextLineWithOffsetAndLeading { - pos: PdfInputPositionNoCompare, - #[parse(x, y)] - offset: PdfVec2D, - }), - #[kw = b"Tf"] - SetFontAndSize(PdfOperatorSetFontAndSize { - pos: PdfInputPositionNoCompare, - #[parse(font)] - font: PdfName, - #[parse(size)] - size: f32, - }), - #[kw = b"Tj"] - ShowText(PdfOperatorShowText { - pos: PdfInputPositionNoCompare, - #[parse(text)] - text: PdfString, - }), - #[kw = b"TJ"] - ShowTextWithGlyphPositioning(PdfOperatorShowTextWithGlyphPositioning { - pos: PdfInputPositionNoCompare, - #[parse(text_and_positioning)] - text_and_positioning: Arc<[PdfStringOrNumber]>, - }), - #[kw = b"TL"] - SetTextLeading(PdfOperatorSetTextLeading { - pos: PdfInputPositionNoCompare, - #[parse(leading)] - leading: f32, - }), - #[kw = b"Tm"] - SetTextMatrix(PdfOperatorSetTextMatrix { - pos: PdfInputPositionNoCompare, - #[parse_flat(a, b, c, d, e, f)] - matrix: PdfMatrix, - }), - #[kw = b"Tr"] - SetTextRenderingMode(PdfOperatorSetTextRenderingMode { - pos: PdfInputPositionNoCompare, - #[parse(rendering_mode)] - rendering_mode: u8, // TODO: parse - }), - #[kw = b"Ts"] - SetTextRise(PdfOperatorSetTextRise { - pos: PdfInputPositionNoCompare, - #[parse(rise)] - rise: f32, - }), - #[kw = b"Tw"] - SetWordSpacing(PdfOperatorSetWordSpacing { - pos: PdfInputPositionNoCompare, - #[parse(word_space)] - word_space: f32, - }), - #[kw = b"Tz"] - SetTextHorizontalScaling(PdfOperatorSetTextHorizontalScaling { - pos: PdfInputPositionNoCompare, - #[parse(scale_percent)] - scale_percent: f32, - }), - #[kw = b"v"] - CurveTo23(PdfOperatorCurveTo23 { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"w"] - SetLineWidth(PdfOperatorSetLineWidth { - pos: PdfInputPositionNoCompare, - #[parse(line_width)] - line_width: f32, - }), - #[kw = b"W"] - Clip(PdfOperatorClip { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"W*"] - ClipEvenOdd(PdfOperatorClipEvenOdd { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"y"] - CurveTo13(PdfOperatorCurveTo13 { - pos: PdfInputPositionNoCompare, - }), - #[kw = b"'"] - TextNextLineAndShow(PdfOperatorTextNextLineAndShow { - pos: PdfInputPositionNoCompare, - #[parse(text)] - text: PdfString, - }), - #[kw = b"\""] - SetSpacingThenTextNextLineAndShow(PdfOperatorSetSpacingThenTextNextLineAndShow { - pos: PdfInputPositionNoCompare, - #[parse(word_space)] - word_space: f32, - #[parse(char_space)] - char_space: f32, - #[parse(text)] - text: PdfString, - }), - } -} - -impl GetPdfInputPosition for PdfOperator { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos() - } -} - -impl GetPdfInputPosition for PdfOperatorAndOperands { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos() - } -} - -#[derive(Clone)] -pub struct PdfContentStreamData { - pub operators: Arc<[PdfOperatorAndOperands]>, -} - -impl fmt::Debug for PdfContentStreamData { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_struct("PdfContentStreamData") - .field("operators", &self.operators) - .finish() - } -} - -impl PdfStreamContents for PdfContentStreamData { - fn parse( - data: &[u8], - stream_pos: PdfInputPosition, - objects: Arc, - ) -> Result { - let mut parser = PdfParser { - objects, - tokenizer: PdfTokenizer::new( - data, - PdfInputPositionKnown { - pos: 0, - containing_streams_pos: stream_pos.get().map(|v| v.pos), - }, - ), - }; - let mut operands = Vec::new(); - let mut operators = Vec::new(); - loop { - parser.skip_comments_and_whitespace(); - if parser.tokenizer.peek().is_none() { - break; - } - match parser.parse_object_or_operator()? { - PdfObjectOrStreamDictionaryOrOperator::StreamDictionary { - stream_kw_pos, .. - } => return Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }), - PdfObjectOrStreamDictionaryOrOperator::Object(object) => operands.push(object), - PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => { - operators.push(PdfOperator::from(operator).parse(operands.drain(..))?); - } - } - } - if operands.is_empty() { - Ok(Self { - operators: operators.into(), - }) - } else { - Err(PdfParseError::MissingOperator { - pos: parser.tokenizer.pos(), - }) - } - } -} - -pub type PdfContentStream = PdfStream; diff --git a/src/pdf/document_structure.rs b/src/pdf/document_structure.rs deleted file mode 100644 index 268d503..0000000 --- a/src/pdf/document_structure.rs +++ /dev/null @@ -1,743 +0,0 @@ -use crate::{ - pdf::{ - content_stream::PdfContentStream, - font::PdfFont, - object::{ - IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject, - PdfObjectDirect, PdfObjectIndirect, PdfRectangle, PdfStream, PdfString, - }, - parse::{PdfParse, PdfParseError}, - pdf_parse, - render::{PdfRenderOperator, PdfRenderState}, - }, - util::DagDebugState, -}; -use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator}; -use std::{borrow::Cow, fmt, sync::Arc}; - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] - pub enum PdfDocumentCatalogType { - #[pdf(name = "Catalog")] - #[default] - Catalog, - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfDocumentCatalog { - #[pdf(name = "Type")] - pub ty: PdfDocumentCatalogType, - #[pdf(name = "Version")] - pub version: Option, - #[pdf(name = "Extensions")] - pub extensions: Option, - #[pdf(name = "Pages")] - pub pages: PdfPageTree, - // TODO - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl fmt::Debug for PdfDocumentCatalog { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { - ty, - version, - extensions, - pages, - rest, - } = self; - f.debug_struct("PdfDocumentCatalog") - .field("ty", ty) - .field("version", version) - .field("extensions", extensions) - .field("pages", pages) - .field("rest", rest) - .finish() - }) - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone, Debug)] - pub struct PdfResourcesDictionary { - #[pdf(name = "Font")] - pub fonts: PdfDictionary, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -#[derive(Clone)] -pub struct PdfPageTree { - page_tree: PdfPageTreeNode, - pages: Arc<[PdfPage]>, -} - -impl PdfPageTree { - fn collect_leaves( - node: &PdfPageTreeNode, - leaves: &mut Vec, - ) -> Result<(), PdfParseError> { - for kid in node.kids.iter() { - match kid { - PdfPageTreeNodeOrLeaf::Node(node) => Self::collect_leaves(node, leaves)?, - PdfPageTreeNodeOrLeaf::Leaf(leaf) => { - leaves.push(leaf.clone()); - } - PdfPageTreeNodeOrLeaf::Other(v) => { - return Err(PdfParseError::InvalidType { - pos: v.pos(), - ty: "dictionary", - expected_ty: "PdfPageTreeNodeOrLeaf", - }); - } - } - } - Ok(()) - } - pub fn try_from_page_tree_root(mut page_tree: PdfPageTreeNode) -> Result { - page_tree.propagate_inheritable_data_to_leaves(); - let mut leaves = Vec::new(); - Self::collect_leaves(&page_tree, &mut leaves)?; - Ok(Self { - page_tree, - pages: Result::from_par_iter( - leaves - .into_par_iter() - .map(PdfPage::parse_after_propagating_inheritable_data) - .panic_fuse(), - )?, - }) - } - pub fn page_tree(&self) -> &PdfPageTreeNode { - &self.page_tree - } - pub fn pages(&self) -> &Arc<[PdfPage]> { - &self.pages - } -} - -impl fmt::Debug for PdfPageTree { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - DagDebugState::scope(|_state| { - let Self { - page_tree: _, - pages, - } = self; - f.debug_struct("PdfPageTree") - .field("pages", pages) - .finish_non_exhaustive() - }) - } -} - -impl IsPdfNull for PdfPageTree { - fn is_pdf_null(&self) -> bool { - self.page_tree.is_pdf_null() - } -} - -impl PdfParse for PdfPageTree { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("PdfPageTree") - } - fn parse(object: PdfObject) -> Result { - Self::try_from_page_tree_root(PdfParse::parse(object)?) - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone, Default, Debug)] - pub struct PdfPageInheritableData { - #[pdf(name = "Resources")] - pub resources: Option, - #[pdf(name = "MediaBox")] - pub media_box: Option, - #[pdf(name = "CropBox")] - pub crop_box: Option, - #[pdf(name = "Rotate")] - pub rotate: Option, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl PdfPageInheritableData { - pub fn propagate_to(&self, target: &mut Self) { - let Self { - resources, - media_box, - crop_box, - rotate, - rest: _, - } = self; - fn propagate_to(this: &Option, target: &mut Option) { - if let (Some(this), target @ None) = (this, target) { - *target = Some(this.clone()); - } - } - propagate_to(resources, &mut target.resources); - propagate_to(media_box, &mut target.media_box); - propagate_to(crop_box, &mut target.crop_box); - propagate_to(rotate, &mut target.rotate); - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] - pub enum PdfPageTreeNodeType { - #[pdf(name = "Pages")] - #[default] - Pages, - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfPageTreeNode { - #[pdf(name = "Type")] - pub ty: PdfPageTreeNodeType, - #[pdf(name = "Parent")] - pub parent: Option, - #[pdf(name = "Kids")] - pub kids: Arc<[PdfPageTreeNodeOrLeaf]>, - #[pdf(name = "Count")] - pub count: usize, - // TODO - #[pdf(flatten)] - pub inheritable: PdfPageInheritableData, - } -} - -impl fmt::Debug for PdfPageTreeNode { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - DagDebugState::scope(|_state| { - let Self { - ty, - parent, - kids, - count, - inheritable, - } = self; - f.debug_struct("PdfPageTreeNode") - .field("ty", ty) - .field("parent", parent) - .field("kids", kids) - .field("count", count) - .field("inheritable", inheritable) - .finish() - }) - } -} - -impl PdfPageTreeNode { - pub fn propagate_inheritable_data_to_leaves(&mut self) { - for kid in Arc::make_mut(&mut self.kids) { - if let Some(target) = kid.inheritable_data_mut() { - self.inheritable.propagate_to(target); - } - kid.propagate_inheritable_data_to_leaves(); - } - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] - pub enum PdfPageType { - #[pdf(name = "Page")] - #[default] - Page, - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Clone, PartialEq, Eq, Hash, Debug)] - pub enum PdfPageAnnotationsTabOrder { - #[pdf(name = "R")] - RowOrder, - #[pdf(name = "C")] - ColumnOrder, - #[pdf(name = "S")] - StructureOrder, - #[pdf(other)] - Other(PdfName), - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfPageTreeLeaf { - #[pdf(name = "Type")] - pub ty: PdfPageType, - #[pdf(name = "Parent")] - pub parent: PdfObjectIndirect, - #[pdf(name = "LastModified")] - pub last_modified: Option, - #[pdf(name = "BleedBox")] - pub bleed_box: Option, - #[pdf(name = "TrimBox")] - pub trim_box: Option, - #[pdf(name = "ArtBox")] - pub art_box: Option, - #[pdf(name = "BoxColorInfo")] - pub box_color_info: Option, - #[pdf(name = "Contents")] - pub contents: MaybeArray, - #[pdf(name = "Group")] - pub group: Option, - #[pdf(name = "Thumb")] - pub thumbnail: Option, - #[pdf(name = "B")] - pub beads: Option>, - #[pdf(name = "Dur")] - pub duration: Option, - #[pdf(name = "Trans")] - pub transition: Option, - #[pdf(name = "Annots")] - pub annotations: Option>, - #[pdf(name = "AA")] - pub additional_actions: Option, - #[pdf(name = "Metadata")] - pub metadata: Option, - #[pdf(name = "PieceInfo")] - pub piece_info: Option, - #[pdf(name = "StructParents")] - pub structural_parents: Option, - #[pdf(name = "ID")] - pub parent_web_capture_content_set_id: Option, - #[pdf(name = "PZ")] - pub preferred_zoom_factor: Option, - #[pdf(name = "SeparationInfo")] - pub separation_info: Option, - #[pdf(name = "Tabs")] - pub annotations_tab_order: Option, - #[pdf(name = "TemplateInstantiated")] - pub template_instantiated: Option, - #[pdf(name = "PresSteps")] - pub pres_steps: Option, - #[pdf(name = "UserUnit")] - pub user_unit: Option, - #[pdf(name = "VP")] - pub viewports: Option>, - #[pdf(flatten)] - pub inheritable: PdfPageInheritableData, - } -} - -impl fmt::Debug for PdfPageTreeLeaf { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - DagDebugState::scope(|_state| { - let Self { - ty, - parent, - last_modified, - bleed_box, - trim_box, - art_box, - box_color_info, - contents, - group, - thumbnail, - beads, - duration, - transition, - annotations, - additional_actions, - metadata, - piece_info, - structural_parents, - parent_web_capture_content_set_id, - preferred_zoom_factor, - separation_info, - annotations_tab_order, - template_instantiated, - pres_steps, - user_unit, - viewports, - inheritable, - } = self; - f.debug_struct("PdfPageTreeLeaf") - .field("ty", ty) - .field("parent", parent) - .field("last_modified", last_modified) - .field("bleed_box", bleed_box) - .field("trim_box", trim_box) - .field("art_box", art_box) - .field("box_color_info", box_color_info) - .field("contents", contents) - .field("group", group) - .field("thumbnail", thumbnail) - .field("beads", beads) - .field("duration", duration) - .field("transition", transition) - .field("annotations", annotations) - .field("additional_actions", additional_actions) - .field("metadata", metadata) - .field("piece_info", piece_info) - .field("structural_parents", structural_parents) - .field( - "parent_web_capture_content_set_id", - parent_web_capture_content_set_id, - ) - .field("preferred_zoom_factor", preferred_zoom_factor) - .field("separation_info", separation_info) - .field("annotations_tab_order", annotations_tab_order) - .field("template_instantiated", template_instantiated) - .field("pres_steps", pres_steps) - .field("user_unit", user_unit) - .field("viewports", viewports) - .field("inheritable", inheritable) - .finish() - }) - } -} - -pdf_parse! { - #[pdf(tag = "Type")] - #[derive(Clone)] - pub enum PdfPageTreeNodeOrLeaf { - #[pdf(tag_value = "Pages")] - Node(PdfPageTreeNode), - #[pdf(tag_value = "Page")] - Leaf(PdfPageTreeLeaf), - #[pdf(other)] - Other(PdfDictionary), - } -} - -impl PdfPageTreeNodeOrLeaf { - pub fn propagate_inheritable_data_to_leaves(&mut self) { - match self { - PdfPageTreeNodeOrLeaf::Node(v) => v.propagate_inheritable_data_to_leaves(), - PdfPageTreeNodeOrLeaf::Leaf(_) | PdfPageTreeNodeOrLeaf::Other(_) => {} - } - } - pub fn inheritable_data_mut(&mut self) -> Option<&mut PdfPageInheritableData> { - match self { - PdfPageTreeNodeOrLeaf::Node(v) => Some(&mut v.inheritable), - PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&mut v.inheritable), - PdfPageTreeNodeOrLeaf::Other(_) => None, - } - } - pub fn inheritable_data(&self) -> Option<&PdfPageInheritableData> { - match self { - PdfPageTreeNodeOrLeaf::Node(v) => Some(&v.inheritable), - PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&v.inheritable), - PdfPageTreeNodeOrLeaf::Other(_) => None, - } - } -} - -impl fmt::Debug for PdfPageTreeNodeOrLeaf { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Node(v) => v.fmt(f), - Self::Leaf(v) => v.fmt(f), - Self::Other(v) => v.fmt(f), - } - } -} - -/// the amount by which the page is rotated clockwise when displaying or printing, is always a multiple of 90 degrees. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] -pub enum PdfPageRotation { - #[default] - NoRotation = 0, - ClockwiseBy90Degrees = 90, - By180Degrees = 180, - ClockwiseBy270Degrees = 270, -} - -impl PdfPageRotation { - pub fn from_clockwise_angle_in_degrees(angle: i32) -> Option { - match angle.rem_euclid(360) { - 0 => Some(Self::NoRotation), - 90 => Some(Self::ClockwiseBy90Degrees), - 180 => Some(Self::By180Degrees), - 270 => Some(Self::ClockwiseBy270Degrees), - _ => None, - } - } - pub fn from_clockwise_angle_in_degrees_i128(angle: i128) -> Option { - Self::from_clockwise_angle_in_degrees((angle % 360) as i32) - } -} - -impl From for i32 { - fn from(value: PdfPageRotation) -> Self { - value as i32 - } -} - -impl IsPdfNull for PdfPageRotation { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfPageRotation { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("page rotation") - } - fn parse(object: PdfObject) -> Result { - let object = PdfObjectDirect::from(object); - let pos = object.pos(); - let angle = PdfInteger::parse(object.into())?; - Self::from_clockwise_angle_in_degrees_i128(angle.value()) - .ok_or(PdfParseError::IntegerOutOfRange { pos }) - } -} - -#[derive(Clone)] -pub struct PdfPage { - pub ty: PdfPageType, - pub parent: PdfObjectIndirect, - pub last_modified: Option, - pub resources: PdfResourcesDictionary, - pub media_box: PdfRectangle, - pub crop_box: PdfRectangle, - pub bleed_box: PdfRectangle, - pub trim_box: PdfRectangle, - pub art_box: PdfRectangle, - pub box_color_info: Option, - pub contents: Arc<[PdfContentStream]>, - pub rotate: PdfPageRotation, - pub group: Option, - pub thumbnail: Option, - pub beads: Option>, - pub duration: Option, - pub transition: Option, - pub annotations: Option>, - pub additional_actions: Option, - pub metadata: Option, - pub piece_info: Option, - pub structural_parents: Option, - pub parent_web_capture_content_set_id: Option, - pub preferred_zoom_factor: Option, - pub separation_info: Option, - pub annotations_tab_order: Option, - pub template_instantiated: Option, - pub pres_steps: Option, - pub user_unit: f32, - pub viewports: Option>, - pub rest: PdfDictionary, - rendered_objects: Option, -} - -impl PdfPage { - pub fn rendered_objects(&self) -> &PdfPageRenderedObjects { - let Some(retval) = &self.rendered_objects else { - unreachable!(); - }; - retval - } - pub fn parse_after_propagating_inheritable_data( - leaf: PdfPageTreeLeaf, - ) -> Result { - let PdfPageTreeLeaf { - ty, - parent, - last_modified, - bleed_box, - trim_box, - art_box, - box_color_info, - contents, - group, - thumbnail, - beads, - duration, - transition, - annotations, - additional_actions, - metadata, - piece_info, - structural_parents, - parent_web_capture_content_set_id, - preferred_zoom_factor, - separation_info, - annotations_tab_order, - template_instantiated, - pres_steps, - user_unit, - viewports, - inheritable: - PdfPageInheritableData { - resources, - media_box, - crop_box, - rotate, - rest, - }, - } = leaf; - let pos = rest.pos(); - let resources = resources.ok_or(PdfParseError::InvalidType { - pos, - ty: "null", - expected_ty: "page resources dictionary", - })?; - let media_box = media_box.ok_or(PdfParseError::InvalidType { - pos, - ty: "null", - expected_ty: "page MediaBox rectangle", - })?; - let crop_box = crop_box.unwrap_or(media_box); - let rotate = rotate.unwrap_or(PdfPageRotation::NoRotation); - let mut retval = Self { - ty, - parent, - last_modified, - resources, - media_box, - crop_box, - bleed_box: bleed_box.unwrap_or(crop_box), - trim_box: trim_box.unwrap_or(crop_box), - art_box: art_box.unwrap_or(crop_box), - box_color_info, - contents: contents.0, - rotate, - group, - thumbnail, - beads, - duration, - transition, - annotations, - additional_actions, - metadata, - piece_info, - structural_parents, - parent_web_capture_content_set_id, - preferred_zoom_factor, - separation_info, - annotations_tab_order, - template_instantiated, - pres_steps, - user_unit: user_unit.unwrap_or(1.0), - viewports, - rest, - rendered_objects: None, - }; - retval.rendered_objects = Some(PdfPageRenderedObjects::render_page(&retval)?); - Ok(retval) - } -} - -impl fmt::Debug for PdfPage { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { - ty, - parent, - last_modified, - resources, - media_box, - crop_box, - bleed_box, - trim_box, - art_box, - box_color_info, - contents, - rotate, - group, - thumbnail, - beads, - duration, - transition, - annotations, - additional_actions, - metadata, - piece_info, - structural_parents, - parent_web_capture_content_set_id, - preferred_zoom_factor, - separation_info, - annotations_tab_order, - template_instantiated, - pres_steps, - user_unit, - viewports, - rest, - rendered_objects, - } = self; - struct Unparsed; - impl fmt::Debug for Unparsed { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("") - } - } - f.debug_struct("PdfPage") - .field("ty", ty) - .field("parent", parent) - .field("last_modified", last_modified) - .field("resources", resources) - .field("media_box", media_box) - .field("crop_box", crop_box) - .field("bleed_box", bleed_box) - .field("trim_box", trim_box) - .field("art_box", art_box) - .field("box_color_info", box_color_info) - .field("contents", contents) - .field("rotate", rotate) - .field("group", group) - .field("thumbnail", thumbnail) - .field("beads", beads) - .field("duration", duration) - .field("transition", transition) - .field("annotations", annotations) - .field("additional_actions", additional_actions) - .field("metadata", metadata) - .field("piece_info", piece_info) - .field("structural_parents", structural_parents) - .field( - "parent_web_capture_content_set_id", - parent_web_capture_content_set_id, - ) - .field("preferred_zoom_factor", preferred_zoom_factor) - .field("separation_info", separation_info) - .field("annotations_tab_order", annotations_tab_order) - .field("template_instantiated", template_instantiated) - .field("pres_steps", pres_steps) - .field("user_unit", user_unit) - .field("viewports", viewports) - .field("rest", rest) - .field( - "rendered_objects", - if let Some(rendered_objects) = rendered_objects { - rendered_objects - } else { - &Unparsed - }, - ) - .finish() - }) - } -} - -#[derive(Clone, Debug)] -pub struct PdfPageRenderedObjects {} - -impl PdfPageRenderedObjects { - fn render_page(page: &PdfPage) -> Result { - let mut state = PdfRenderState::new(page); - for content_stream in page.contents.iter() { - for op in content_stream.decoded_data().as_ref()?.operators.iter() { - op.render(&mut state)?; - } - } - Ok(Self {}) - } -} diff --git a/src/pdf/font.rs b/src/pdf/font.rs deleted file mode 100644 index 04b62f3..0000000 --- a/src/pdf/font.rs +++ /dev/null @@ -1,924 +0,0 @@ -use crate::{ - pdf::{ - object::{ - IsPdfNull, PdfArray, PdfDictionary, PdfMatrix, PdfName, PdfNameOrInteger, PdfObject, - PdfObjectDirect, PdfRectangle, PdfStream, PdfString, - }, - parse::{ - GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse, - PdfParseError, - }, - pdf_parse, - }, - util::{ArcOrRef, DagDebugState}, -}; -use std::{borrow::Cow, collections::BTreeMap, fmt, sync::Arc}; - -mod tables; -mod type_1_parse; - -pdf_parse! { - #[pdf(transparent)] - #[derive(Clone)] - // TODO: actually parse the stream - pub struct PdfFontToUnicode { - #[pdf] - stream: PdfStream, - } -} - -impl fmt::Debug for PdfFontToUnicode { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { stream } = self; - f.debug_struct("PdfFontToUnicode") - .field("stream", stream) - .finish() - }) - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] - pub enum PdfFontDescriptorType { - #[pdf(name = "FontDescriptor")] - #[default] - FontDescriptor, - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] - pub enum PdfFontStretch { - #[pdf(name = "UltraCondensed")] - UltraCondensed, - #[pdf(name = "ExtraCondensed")] - ExtraCondensed, - #[pdf(name = "Condensed")] - Condensed, - #[pdf(name = "SemiCondensed")] - SemiCondensed, - #[pdf(name = "Normal")] - Normal, - #[pdf(name = "SemiExpanded")] - SemiExpanded, - #[pdf(name = "Expanded")] - Expanded, - #[pdf(name = "ExtraExpanded")] - ExtraExpanded, - #[pdf(name = "UltraExpanded")] - UltraExpanded, - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfFontDescriptor { - #[pdf(name = "Type")] - pub ty: PdfFontDescriptorType, - #[pdf(name = "FontName")] - pub font_name: PdfName, - #[pdf(name = "FontFamily")] - pub font_family: Option, - #[pdf(name = "FontStretch")] - pub font_stretch: Option, - #[pdf(name = "FontWeight")] - pub font_weight: Option, - #[pdf(name = "Flags")] - pub flags: u32, - #[pdf(name = "FontBBox")] - pub font_bounding_box: Option, - #[pdf(name = "ItalicAngle")] - pub italic_angle: f32, - #[pdf(name = "Ascent")] - pub ascent: Option, - #[pdf(name = "Descent")] - pub descent: Option, - #[pdf(name = "Leading")] - pub leading: Option, - #[pdf(name = "CapHeight")] - pub cap_height: Option, - #[pdf(name = "XHeight")] - pub x_height: Option, - #[pdf(name = "StemV")] - pub stem_v: Option, - #[pdf(name = "StemH")] - pub stem_h: Option, - #[pdf(name = "AvgWidth")] - pub avg_width: Option, - #[pdf(name = "MaxWidth")] - pub max_width: Option, - #[pdf(name = "MissingWidth")] - pub missing_width: Option, - #[pdf(name = "FontFile")] - pub font_file: Option>, - #[pdf(name = "FontFile2")] - pub font_file2: Option, - #[pdf(name = "FontFile3")] - pub font_file3: Option, - #[pdf(name = "CharSet")] - pub char_set: Option, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl fmt::Debug for PdfFontDescriptor { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { - ty, - font_name, - font_family, - font_stretch, - font_weight, - flags, - font_bounding_box, - italic_angle, - ascent, - descent, - leading, - cap_height, - x_height, - stem_v, - stem_h, - avg_width, - max_width, - missing_width, - font_file, - font_file2, - font_file3, - char_set, - rest, - } = self; - f.debug_struct("PdfFontDescriptor") - .field("ty", ty) - .field("font_name", font_name) - .field("font_family", font_family) - .field("font_stretch", font_stretch) - .field("font_weight", font_weight) - .field("flags", flags) - .field("font_bounding_box", font_bounding_box) - .field("italic_angle", italic_angle) - .field("ascent", ascent) - .field("descent", descent) - .field("leading", leading) - .field("cap_height", cap_height) - .field("x_height", x_height) - .field("stem_v", stem_v) - .field("stem_h", stem_h) - .field("avg_width", avg_width) - .field("max_width", max_width) - .field("missing_width", missing_width) - .field("font_file", font_file) - .field("font_file2", font_file2) - .field("font_file3", font_file3) - .field("char_set", char_set) - .field("rest", rest) - .finish() - }) - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] - pub enum PdfFontType { - #[pdf(name = "Font")] - #[default] - Font, - } -} - -#[derive(Clone)] -pub enum PdfTodo {} - -impl fmt::Debug for PdfTodo { - fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self {} - } -} - -impl IsPdfNull for PdfTodo { - fn is_pdf_null(&self) -> bool { - match *self {} - } -} - -impl PdfParse for PdfTodo { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("PdfTodo") - } - #[track_caller] - fn parse(object: PdfObject) -> Result { - todo!("{object:?}") - } -} - -pdf_parse! { - #[pdf(tag = "Subtype")] - #[derive(Clone)] - pub enum PdfFont { - #[pdf(tag_value = "Type0")] - Type0(Arc), - #[pdf(tag_value = "Type1")] - Type1(PdfFontType1), - #[pdf(other)] - Other(Arc), - } -} - -impl fmt::Debug for PdfFont { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|state| match self { - PdfFont::Type0(v) => state.debug_or_id(v, "PdfFontType0(...)").fmt(f), - PdfFont::Type1(v) => v.fmt(f), - PdfFont::Other(v) => match **v {}, - }) - } -} - -impl PdfFont { - pub(crate) fn is_vertical_writing_mode(&self) -> bool { - // TODO: - false - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] - pub enum PdfFontType0Subtype { - #[pdf(name = "Type0")] - #[default] - Type0, - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfFontType0 { - #[pdf(name = "Type")] - pub ty: PdfFontType, - #[pdf(name = "Subtype")] - pub subtype: PdfFontType0Subtype, - #[pdf(name = "BaseFont")] - pub base_font: PdfName, - #[pdf(name = "Encoding")] - // TODO - pub encoding: PdfObjectDirect, - #[pdf(name = "DescendentFonts")] - // TODO - pub descendent_fonts: [PdfDictionary; 1], - #[pdf(name = "ToUnicode")] - pub to_unicode: Option, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl fmt::Debug for PdfFontType0 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { - ty, - subtype, - base_font, - encoding, - descendent_fonts, - to_unicode, - rest, - } = self; - f.debug_struct("PdfFontType0") - .field("ty", ty) - .field("subtype", subtype) - .field("base_font", base_font) - .field("encoding", encoding) - .field("descendent_fonts", descendent_fonts) - .field("to_unicode", to_unicode) - .field("rest", rest) - .finish() - }) - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] - pub enum PdfFontType1Subtype { - #[pdf(name = "Type1")] - #[default] - Type1, - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] - pub enum PdfStandardFontName { - #[pdf(name = "Times-Roman")] - TimesRoman, - #[pdf(name = "Helvetica")] - Helvetica, - #[pdf(name = "Courier")] - Courier, - #[pdf(name = "Symbol")] - Symbol, - #[pdf(name = "Times-Bold")] - TimesBold, - #[pdf(name = "Helvetica-Bold")] - HelveticaBold, - #[pdf(name = "Courier-Bold")] - CourierBold, - #[pdf(name = "ZapfDingbats")] - ZapfDingbats, - #[pdf(name = "Times-Italic")] - TimesItalic, - #[pdf(name = "Helvetica-Oblique")] - HelveticaOblique, - #[pdf(name = "Courier-Oblique")] - CourierOblique, - #[pdf(name = "Times-BoldItalic")] - TimesBoldItalic, - #[pdf(name = "Helvetica-BoldOblique")] - HelveticaBoldOblique, - #[pdf(name = "Courier-BoldOblique")] - CourierBoldOblique, - } -} - -#[derive(Clone)] -pub enum PdfFontType1 { - Standard(Arc), - Other(Arc), -} - -impl fmt::Debug for PdfFontType1 { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|state| match self { - PdfFontType1::Standard(v) => state.debug_or_id(v, "PdfFontType1Standard(...)").fmt(f), - PdfFontType1::Other(v) => state.debug_or_id(v, "PdfFontType1Other(...)").fmt(f), - }) - } -} - -impl PdfFontType1 { - pub fn common(&self) -> PdfFontType1Common { - match self { - PdfFontType1::Standard(v) => v.common(), - PdfFontType1::Other(v) => v.common(), - } - } - pub fn name(&self) -> &Option { - match self { - Self::Standard(v) => &v.name, - Self::Other(v) => &v.name, - } - } - pub fn base_font(&self) -> PdfName { - match self { - Self::Standard(v) => v.base_font.into(), - Self::Other(v) => v.base_font.clone(), - } - } - pub fn first_char(&self) -> Option { - match self { - Self::Standard(v) => v.first_char, - Self::Other(v) => Some(v.first_char), - } - } - pub fn last_char(&self) -> Option { - match self { - Self::Standard(v) => v.last_char, - Self::Other(v) => Some(v.last_char), - } - } - pub fn widths(&self) -> Option<&Arc<[f32]>> { - match self { - Self::Standard(v) => v.widths.as_ref(), - Self::Other(v) => Some(&v.widths), - } - } - pub fn font_descriptor(&self) -> Option<&PdfFontDescriptor> { - match self { - Self::Standard(v) => v.font_descriptor.as_ref(), - Self::Other(v) => Some(&v.font_descriptor), - } - } - pub fn encoding(&self) -> &Option { - match self { - Self::Standard(v) => &v.encoding, - Self::Other(v) => &v.encoding, - } - } - pub fn to_unicode(&self) -> &Option { - match self { - Self::Standard(v) => &v.to_unicode, - Self::Other(v) => &v.to_unicode, - } - } - pub fn rest(&self) -> &PdfDictionary { - match self { - Self::Standard(v) => &v.rest, - Self::Other(v) => &v.rest, - } - } -} - -impl IsPdfNull for PdfFontType1 { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfFontType1 { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("PdfFontType1") - } - fn parse(object: PdfObject) -> Result { - let object = object.into(); - let font = if let PdfObjectDirect::Dictionary(object) = object { - if let Ok(_) = PdfStandardFontName::parse(object.get_or_null(b"BaseFont".as_slice())) { - Self::Standard(PdfParse::parse(object.into())?) - } else { - Self::Other(PdfParse::parse(object.into())?) - } - } else { - Self::Other(PdfParse::parse(object.into())?) - }; - if let Some(font_file) = font.font_descriptor().and_then(|v| v.font_file.as_ref()) { - font_file.decoded_data().as_ref()?; - } - Ok(font) - } -} - -#[derive(Clone)] -pub struct PdfFontType1Common { - pub ty: PdfFontType, - pub subtype: PdfFontType1Subtype, - pub name: Option, - pub base_font: PdfName, - pub first_char: Option, - pub last_char: Option, - pub widths: Option>, - pub font_descriptor: Option, - pub encoding: Option, - pub to_unicode: Option, - pub rest: PdfDictionary, -} - -impl fmt::Debug for PdfFontType1Common { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { - ty, - subtype, - name, - base_font, - first_char, - last_char, - widths, - font_descriptor, - encoding, - to_unicode, - rest, - } = self; - f.debug_struct("PdfFontType1Common") - .field("ty", ty) - .field("subtype", subtype) - .field("name", name) - .field("base_font", base_font) - .field("first_char", first_char) - .field("last_char", last_char) - .field("widths", widths) - .field("font_descriptor", font_descriptor) - .field("encoding", encoding) - .field("to_unicode", to_unicode) - .field("rest", rest) - .finish() - }) - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfFontType1Standard { - #[pdf(name = "Type")] - pub ty: PdfFontType, - #[pdf(name = "Subtype")] - pub subtype: PdfFontType1Subtype, - #[pdf(name = "Name")] - pub name: Option, - #[pdf(name = "BaseFont")] - pub base_font: PdfStandardFontName, - #[pdf(name = "FirstChar")] - pub first_char: Option, - #[pdf(name = "LastChar")] - pub last_char: Option, - #[pdf(name = "Widths")] - pub widths: Option>, - #[pdf(name = "FontDescriptor")] - pub font_descriptor: Option, - #[pdf(name = "Encoding")] - pub encoding: Option, - #[pdf(name = "ToUnicode")] - pub to_unicode: Option, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl fmt::Debug for PdfFontType1Standard { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { - ty, - subtype, - name, - base_font, - first_char, - last_char, - widths, - font_descriptor, - encoding, - to_unicode, - rest, - } = self; - f.debug_struct("PdfFontType1Standard") - .field("ty", ty) - .field("subtype", subtype) - .field("name", name) - .field("base_font", base_font) - .field("first_char", first_char) - .field("last_char", last_char) - .field("widths", widths) - .field("font_descriptor", font_descriptor) - .field("encoding", encoding) - .field("to_unicode", to_unicode) - .field("rest", rest) - .finish() - }) - } -} - -impl PdfFontType1Standard { - pub fn common(&self) -> PdfFontType1Common { - let Self { - ty, - subtype, - ref name, - base_font, - first_char, - last_char, - ref widths, - ref font_descriptor, - ref encoding, - ref to_unicode, - ref rest, - } = *self; - PdfFontType1Common { - ty, - subtype, - name: name.clone(), - base_font: base_font.into(), - first_char, - last_char, - widths: widths.clone(), - font_descriptor: font_descriptor.clone(), - encoding: encoding.clone(), - to_unicode: to_unicode.clone(), - rest: rest.clone(), - } - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfFontType1Other { - #[pdf(name = "Type")] - pub ty: PdfFontType, - #[pdf(name = "Subtype")] - pub subtype: PdfFontType1Subtype, - #[pdf(name = "Name")] - pub name: Option, - #[pdf(name = "BaseFont")] - pub base_font: PdfName, - #[pdf(name = "FirstChar")] - pub first_char: u32, - #[pdf(name = "LastChar")] - pub last_char: u32, - #[pdf(name = "Widths")] - pub widths: Arc<[f32]>, - #[pdf(name = "FontDescriptor")] - pub font_descriptor: PdfFontDescriptor, - #[pdf(name = "Encoding")] - pub encoding: Option, - #[pdf(name = "ToUnicode")] - pub to_unicode: Option, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl fmt::Debug for PdfFontType1Other { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { - ty, - subtype, - name, - base_font, - first_char, - last_char, - widths, - font_descriptor, - encoding, - to_unicode, - rest, - } = self; - f.debug_struct("PdfFontType1Other") - .field("ty", ty) - .field("subtype", subtype) - .field("name", name) - .field("base_font", base_font) - .field("first_char", first_char) - .field("last_char", last_char) - .field("widths", widths) - .field("font_descriptor", font_descriptor) - .field("encoding", encoding) - .field("to_unicode", to_unicode) - .field("rest", rest) - .finish() - }) - } -} - -impl PdfFontType1Other { - pub fn common(&self) -> PdfFontType1Common { - let Self { - ty, - subtype, - ref name, - ref base_font, - first_char, - last_char, - ref widths, - ref font_descriptor, - ref encoding, - ref to_unicode, - ref rest, - } = *self; - PdfFontType1Common { - ty, - subtype, - name: name.clone(), - base_font: base_font.clone(), - first_char: Some(first_char), - last_char: Some(last_char), - widths: Some(widths.clone()), - font_descriptor: Some(font_descriptor.clone()), - encoding: encoding.clone(), - to_unicode: to_unicode.clone(), - rest: rest.clone(), - } - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] - pub enum PdfSimpleFontEncodingPredefined { - #[pdf(name = "MacRomanEncoding")] - MacRomanEncoding, - #[pdf(name = "MacExpertEncoding")] - MacExpertEncoding, - #[pdf(name = "WinAnsiEncoding")] - WinAnsiEncoding, - } -} - -impl PdfSimpleFontEncodingPredefined { - pub const fn table(self) -> PdfSimpleFontEncodingTable { - match self { - Self::MacRomanEncoding => PdfSimpleFontEncodingTable::MAC_ROMAN, - Self::MacExpertEncoding => PdfSimpleFontEncodingTable::MAC_EXPERT, - Self::WinAnsiEncoding => PdfSimpleFontEncodingTable::WIN_ANSI, - } - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)] - pub enum PdfSimpleFontEncodingDictionaryType { - #[pdf(name = "Encoding")] - #[default] - Encoding, - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone, Debug)] - pub struct PdfSimpleFontEncodingDictionary { - #[pdf(name = "Type")] - pub ty: Option, - #[pdf(name = "BaseEncoding")] - pub base_encoding: Option, - #[pdf(name = "Differences")] - pub differences: Option, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl PdfSimpleFontEncodingDictionary { - pub fn table( - &self, - default_table: impl FnOnce() -> PdfSimpleFontEncodingTable, - ) -> PdfSimpleFontEncodingTable { - let Self { - ty: _, - base_encoding, - differences, - rest: _, - } = self; - let mut retval = base_encoding - .map(|v| v.table()) - .unwrap_or_else(default_table); - if let Some(differences) = differences { - retval = differences.table(retval); - } - retval - } -} - -#[derive(Clone, Debug)] -pub struct PdfSimpleFontEncodingDifferences { - pos: PdfInputPositionNoCompare, - map: Arc>, -} - -impl PdfSimpleFontEncodingDifferences { - pub fn new(pos: impl Into, map: Arc>) -> Self { - Self { - pos: pos.into(), - map, - } - } - pub fn pos(&self) -> PdfInputPosition { - self.pos.0 - } - pub fn map(&self) -> &Arc> { - &self.map - } - pub fn table(&self, base_table: PdfSimpleFontEncodingTable) -> PdfSimpleFontEncodingTable { - let mut retval = base_table; - let table: &mut [_; 0x100] = ArcOrRef::make_mut(&mut retval.table); - for (&byte, name) in self.map.iter() { - table[usize::from(byte)] = PdfSimpleFontEncodingTableEntry { - name: Some(name.clone()), - presumed_unicode: None, - }; - } - retval - } -} - -impl GetPdfInputPosition for PdfSimpleFontEncodingDifferences { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -impl IsPdfNull for PdfSimpleFontEncodingDifferences { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfSimpleFontEncodingDifferences { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("PdfSimpleFontEncodingDifferences") - } - fn parse(object: PdfObject) -> Result { - let array = PdfArray::parse(object)?; - let pos = array.pos(); - let mut map = BTreeMap::new(); - let mut next_byte = None::; - for i in array.iter() { - let i = PdfNameOrInteger::parse(i.clone())?; - match i { - PdfNameOrInteger::Name(name) => { - let pos = name.pos(); - let byte = next_byte.ok_or(PdfParseError::IntegerOutOfRange { pos })?; - next_byte = byte.checked_add(1); - map.insert(byte, name); - } - PdfNameOrInteger::Integer(v) => next_byte = Some(u8::parse(v.into())?), - } - } - Ok(Self { - pos: pos.into(), - map: Arc::new(map), - }) - } -} - -#[derive(Clone, Default, Debug)] -pub struct PdfSimpleFontEncodingTableEntry { - pub name: Option, - pub presumed_unicode: Option<&'static str>, -} - -impl PdfSimpleFontEncodingTableEntry { - pub const fn new_static( - name: Option<&'static [u8]>, - presumed_unicode: Option<&'static str>, - ) -> Self { - Self { - name: match name { - Some(name) => Some(PdfName::new_static(name)), - None => None, - }, - presumed_unicode, - } - } -} - -#[derive(Clone, Debug)] -pub struct PdfSimpleFontEncodingTable { - pub table: ArcOrRef<'static, [PdfSimpleFontEncodingTableEntry; 0x100]>, -} - -#[derive(Clone, Debug)] -pub enum PdfSimpleFontEncoding { - Predefined(PdfSimpleFontEncodingPredefined), - Dictionary(PdfSimpleFontEncodingDictionary), -} - -impl PdfSimpleFontEncoding { - pub fn table( - &self, - default_table: impl FnOnce() -> PdfSimpleFontEncodingTable, - ) -> PdfSimpleFontEncodingTable { - match self { - PdfSimpleFontEncoding::Predefined(v) => v.table(), - PdfSimpleFontEncoding::Dictionary(v) => v.table(default_table), - } - } -} - -impl IsPdfNull for PdfSimpleFontEncoding { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfSimpleFontEncoding { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("PdfSimpleFontEncoding") - } - fn parse(object: PdfObject) -> Result { - let object = PdfObjectDirect::from(object); - match object { - PdfObjectDirect::Name(v) => Ok(Self::Predefined(PdfParse::parse(v.into())?)), - PdfObjectDirect::Dictionary(v) => Ok(Self::Dictionary(PdfParse::parse(v.into())?)), - _ => Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "PdfSimpleFontEncoding", - }), - } - } -} - -#[derive(Clone, Debug)] -#[non_exhaustive] -pub struct PdfFontType1Program { - pub encoding: Option]>>, - pub font_bbox: Option, - pub font_info: Option, - pub font_matrix: Option, - pub font_name: Option, -} - -#[derive(Clone, Debug)] -pub struct PdfFontType1FontInfo { - pub family_name: Option, - pub full_name: Option, - pub notice: Option, - pub weight: Option, - pub version: Option, - pub italic_angle: Option, - pub is_fixed_pitch: Option, - pub underline_position: Option, - pub underline_thickness: Option, -} diff --git a/src/pdf/font/tables.rs b/src/pdf/font/tables.rs deleted file mode 100644 index fcb8218..0000000 --- a/src/pdf/font/tables.rs +++ /dev/null @@ -1,1067 +0,0 @@ -use crate::{ - pdf::font::{PdfSimpleFontEncodingTable, PdfSimpleFontEncodingTableEntry}, - util::ArcOrRef, -}; - -macro_rules! opt_lit { - (None) => { - None - }; - ($v:literal) => { - Some($v) - }; -} - -macro_rules! array_from_fn_0x100 { - (|$i:ident| $value:expr) => { - array_from_fn_0x100!(@step1 |$i| $value; - [ - 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, - 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, - ]; - [ - 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, - 0x80, 0x90, 0xA0, 0xB0, 0xC0, 0xD0, 0xE0, 0xF0, - ] - ) - }; - (@step1 |$i:ident| $value:expr; [$($low_digits:literal,)*]; $high_digits:tt) => { - array_from_fn_0x100!(@step2 |$i| $value; [$($low_digits + $high_digits,)*]) - }; - (@step2 |$i:ident| $value:expr; [$($low_digits:literal + [$($high_digits:literal,)*],)*]) => { - [$($({ - let $i = $low_digits + $high_digits; - $value - },)*)*] - }; -} - -macro_rules! builtin_simple_font_encoding_table { - ( - $vis:vis const $const_name:ident; - $(#[default = ($default_name:literal, $default_presumed_unicode:literal)])? - [ - $(($byte:literal, $name:tt, $presumed_unicode:tt),)* - ] - ) => { - impl PdfSimpleFontEncodingTable { - $vis const $const_name: Self = { - const TABLE: [PdfSimpleFontEncodingTableEntry; 0x100] = { - #[allow(unused_mut, unused_assignments)] - const DEFAULT: (Option<&[u8]>, Option<&str>) = { - let mut value: (Option<&[u8]>, Option<&str>) = (None, None); - $(value = (Some($default_name), Some($default_presumed_unicode));)? - value - }; - let mut encoding = [DEFAULT; 0x100]; - $(encoding[$byte] = (opt_lit!($name), opt_lit!($presumed_unicode));)* - array_from_fn_0x100!(|i| PdfSimpleFontEncodingTableEntry::new_static(encoding[i].0, encoding[i].1)) - }; - Self { table: ArcOrRef::Ref(&TABLE)} - }; - } - }; -} - -builtin_simple_font_encoding_table! { - pub const MAC_ROMAN; - [ - (0o040, b"space", " "), - (0o041, b"exclam", "!"), - (0o042, b"quotedbl", "\""), - (0o043, b"numbersign", "#"), - (0o044, b"dollar", "$"), - (0o045, b"percent", "%"), - (0o046, b"ampersand", "&"), - (0o047, b"quotesingle", "\'"), - (0o050, b"parenleft", "("), - (0o051, b"parenright", ")"), - (0o052, b"asterisk", "*"), - (0o053, b"plus", "+"), - (0o054, b"comma", ","), - (0o055, b"hyphen", "-"), - (0o056, b"period", "."), - (0o057, b"slash", "/"), - (0o060, b"zero", "0"), - (0o061, b"one", "1"), - (0o062, b"two", "2"), - (0o063, b"three", "3"), - (0o064, b"four", "4"), - (0o065, b"five", "5"), - (0o066, b"six", "6"), - (0o067, b"seven", "7"), - (0o070, b"eight", "8"), - (0o071, b"nine", "9"), - (0o072, b"colon", ":"), - (0o073, b"semicolon", ";"), - (0o074, b"less", "<"), - (0o075, b"equal", "="), - (0o076, b"greater", ">"), - (0o077, b"question", "?"), - (0o100, b"at", "@"), - (0o101, b"A", "A"), - (0o102, b"B", "B"), - (0o103, b"C", "C"), - (0o104, b"D", "D"), - (0o105, b"E", "E"), - (0o106, b"F", "F"), - (0o107, b"G", "G"), - (0o110, b"H", "H"), - (0o111, b"I", "I"), - (0o112, b"J", "J"), - (0o113, b"K", "K"), - (0o114, b"L", "L"), - (0o115, b"M", "M"), - (0o116, b"N", "N"), - (0o117, b"O", "O"), - (0o120, b"P", "P"), - (0o121, b"Q", "Q"), - (0o122, b"R", "R"), - (0o123, b"S", "S"), - (0o124, b"T", "T"), - (0o125, b"U", "U"), - (0o126, b"V", "V"), - (0o127, b"W", "W"), - (0o130, b"X", "X"), - (0o131, b"Y", "Y"), - (0o132, b"Z", "Z"), - (0o133, b"bracketleft", "["), - (0o134, b"backslash", "\\"), - (0o135, b"bracketright", "]"), - (0o136, b"asciicircum", "^"), - (0o137, b"underscore", "_"), - (0o140, b"grave", "`"), - (0o141, b"a", "a"), - (0o142, b"b", "b"), - (0o143, b"c", "c"), - (0o144, b"d", "d"), - (0o145, b"e", "e"), - (0o146, b"f", "f"), - (0o147, b"g", "g"), - (0o150, b"h", "h"), - (0o151, b"i", "i"), - (0o152, b"j", "j"), - (0o153, b"k", "k"), - (0o154, b"l", "l"), - (0o155, b"m", "m"), - (0o156, b"n", "n"), - (0o157, b"o", "o"), - (0o160, b"p", "p"), - (0o161, b"q", "q"), - (0o162, b"r", "r"), - (0o163, b"s", "s"), - (0o164, b"t", "t"), - (0o165, b"u", "u"), - (0o166, b"v", "v"), - (0o167, b"w", "w"), - (0o170, b"x", "x"), - (0o171, b"y", "y"), - (0o172, b"z", "z"), - (0o173, b"braceleft", "{"), - (0o174, b"bar", "|"), - (0o175, b"braceright", "}"), - (0o176, b"asciitilde", "~"), - (0o200, b"Adieresis", "\u{c4}"), - (0o201, b"Aring", "\u{c5}"), - (0o202, b"Ccedilla", "\u{c7}"), - (0o203, b"Eacute", "\u{c9}"), - (0o204, b"Ntilde", "\u{d1}"), - (0o205, b"Odieresis", "\u{d6}"), - (0o206, b"Udieresis", "\u{dc}"), - (0o207, b"aacute", "\u{e1}"), - (0o210, b"agrave", "\u{e0}"), - (0o211, b"acircumflex", "\u{e2}"), - (0o212, b"adieresis", "\u{e4}"), - (0o213, b"atilde", "\u{e3}"), - (0o214, b"aring", "\u{e5}"), - (0o215, b"ccedilla", "\u{e7}"), - (0o216, b"eacute", "\u{e9}"), - (0o217, b"egrave", "\u{e8}"), - (0o220, b"ecircumflex", "\u{ea}"), - (0o221, b"edieresis", "\u{eb}"), - (0o222, b"iacute", "\u{ed}"), - (0o223, b"igrave", "\u{ec}"), - (0o224, b"icircumflex", "\u{ee}"), - (0o225, b"idieresis", "\u{ef}"), - (0o226, b"ntilde", "\u{f1}"), - (0o227, b"oacute", "\u{f3}"), - (0o230, b"ograve", "\u{f2}"), - (0o231, b"ocircumflex", "\u{f4}"), - (0o232, b"odieresis", "\u{f6}"), - (0o233, b"otilde", "\u{f5}"), - (0o234, b"uacute", "\u{fa}"), - (0o235, b"ugrave", "\u{f9}"), - (0o236, b"ucircumflex", "\u{fb}"), - (0o237, b"udieresis", "\u{fc}"), - (0o240, b"dagger", "\u{2020}"), - (0o241, b"degree", "\u{b0}"), - (0o242, b"cent", "\u{a2}"), - (0o243, b"sterling", "\u{a3}"), - (0o244, b"section", "\u{a7}"), - (0o245, b"bullet", "\u{2022}"), - (0o246, b"paragraph", "\u{b6}"), - (0o247, b"germandbls", "\u{df}"), - (0o250, b"registered", "\u{ae}"), - (0o251, b"copyright", "\u{a9}"), - (0o252, b"trademark", "\u{2122}"), - (0o253, b"acute", "\u{b4}"), - (0o254, b"dieresis", "\u{a8}"), - (0o256, b"AE", "\u{c6}"), - (0o257, b"Oslash", "\u{d8}"), - (0o261, b"plusminus", "\u{b1}"), - (0o264, b"yen", "\u{a5}"), - (0o265, b"mu", "\u{3bc}"), - (0o273, b"ordfeminine", "\u{aa}"), - (0o274, b"ordmasculine", "\u{ba}"), - (0o276, b"ae", "\u{e6}"), - (0o277, b"oslash", "\u{f8}"), - (0o300, b"questiondown", "\u{bf}"), - (0o301, b"exclamdown", "\u{a1}"), - (0o302, b"logicalnot", "\u{ac}"), - (0o304, b"florin", "\u{192}"), - (0o307, b"guillemotleft", "\u{ab}"), - (0o310, b"guillemotright", "\u{bb}"), - (0o311, b"ellipsis", "\u{2026}"), - (0o312, b"space", "\u{a0}"), - (0o313, b"Agrave", "\u{c0}"), - (0o314, b"Atilde", "\u{c3}"), - (0o315, b"Otilde", "\u{d5}"), - (0o316, b"OE", "\u{152}"), - (0o317, b"oe", "\u{153}"), - (0o320, b"endash", "\u{2013}"), - (0o321, b"emdash", "\u{2014}"), - (0o322, b"quotedblleft", "\u{201c}"), - (0o323, b"quotedblright", "\u{201d}"), - (0o324, b"quoteleft", "\u{2018}"), - (0o325, b"quoteright", "\u{2019}"), - (0o326, b"divide", "\u{f7}"), - (0o330, b"ydieresis", "\u{ff}"), - (0o331, b"Ydieresis", "\u{178}"), - (0o332, b"fraction", "\u{2044}"), - (0o333, b"currency", "\u{a4}"), - (0o334, b"guilsinglleft", "\u{2039}"), - (0o335, b"guilsinglright", "\u{203a}"), - (0o336, b"fi", "\u{fb01}"), - (0o337, b"fl", "\u{fb02}"), - (0o340, b"daggerdbl", "\u{2021}"), - (0o341, b"periodcentered", "\u{b7}"), - (0o342, b"quotesinglbase", "\u{201a}"), - (0o343, b"quotedblbase", "\u{201e}"), - (0o344, b"perthousand", "\u{2030}"), - (0o345, b"Acircumflex", "\u{c2}"), - (0o346, b"Ecircumflex", "\u{ca}"), - (0o347, b"Aacute", "\u{c1}"), - (0o350, b"Edieresis", "\u{cb}"), - (0o351, b"Egrave", "\u{c8}"), - (0o352, b"Iacute", "\u{cd}"), - (0o353, b"Icircumflex", "\u{ce}"), - (0o354, b"Idieresis", "\u{cf}"), - (0o355, b"Igrave", "\u{cc}"), - (0o356, b"Oacute", "\u{d3}"), - (0o357, b"Ocircumflex", "\u{d4}"), - (0o361, b"Ograve", "\u{d2}"), - (0o362, b"Uacute", "\u{da}"), - (0o363, b"Ucircumflex", "\u{db}"), - (0o364, b"Ugrave", "\u{d9}"), - (0o365, b"dotlessi", "\u{131}"), - (0o366, b"circumflex", "\u{2c6}"), - (0o367, b"tilde", "\u{2dc}"), - (0o370, b"macron", "\u{af}"), - (0o371, b"breve", "\u{2d8}"), - (0o372, b"dotaccent", "\u{2d9}"), - (0o373, b"ring", "\u{2da}"), - (0o374, b"cedilla", "\u{b8}"), - (0o375, b"hungarumlaut", "\u{2dd}"), - (0o376, b"ogonek", "\u{2db}"), - (0o377, b"caron", "\u{2c7}"), - ] -} - -builtin_simple_font_encoding_table! { - pub const STANDARD; - [ - (0o040, b"space", " "), - (0o041, b"exclam", "!"), - (0o042, b"quotedbl", "\""), - (0o043, b"numbersign", "#"), - (0o044, b"dollar", "$"), - (0o045, b"percent", "%"), - (0o046, b"ampersand", "&"), - (0o047, b"quoteright", "\u{2019}"), - (0o050, b"parenleft", "("), - (0o051, b"parenright", ")"), - (0o052, b"asterisk", "*"), - (0o053, b"plus", "+"), - (0o054, b"comma", ","), - (0o055, b"hyphen", "-"), - (0o056, b"period", "."), - (0o057, b"slash", "/"), - (0o060, b"zero", "0"), - (0o061, b"one", "1"), - (0o062, b"two", "2"), - (0o063, b"three", "3"), - (0o064, b"four", "4"), - (0o065, b"five", "5"), - (0o066, b"six", "6"), - (0o067, b"seven", "7"), - (0o070, b"eight", "8"), - (0o071, b"nine", "9"), - (0o072, b"colon", ":"), - (0o073, b"semicolon", ";"), - (0o074, b"less", "<"), - (0o075, b"equal", "="), - (0o076, b"greater", ">"), - (0o077, b"question", "?"), - (0o100, b"at", "@"), - (0o101, b"A", "A"), - (0o102, b"B", "B"), - (0o103, b"C", "C"), - (0o104, b"D", "D"), - (0o105, b"E", "E"), - (0o106, b"F", "F"), - (0o107, b"G", "G"), - (0o110, b"H", "H"), - (0o111, b"I", "I"), - (0o112, b"J", "J"), - (0o113, b"K", "K"), - (0o114, b"L", "L"), - (0o115, b"M", "M"), - (0o116, b"N", "N"), - (0o117, b"O", "O"), - (0o120, b"P", "P"), - (0o121, b"Q", "Q"), - (0o122, b"R", "R"), - (0o123, b"S", "S"), - (0o124, b"T", "T"), - (0o125, b"U", "U"), - (0o126, b"V", "V"), - (0o127, b"W", "W"), - (0o130, b"X", "X"), - (0o131, b"Y", "Y"), - (0o132, b"Z", "Z"), - (0o133, b"bracketleft", "["), - (0o134, b"backslash", "\\"), - (0o135, b"bracketright", "]"), - (0o136, b"asciicircum", "^"), - (0o137, b"underscore", "_"), - (0o140, b"quoteleft", "\u{2018}"), - (0o141, b"a", "a"), - (0o142, b"b", "b"), - (0o143, b"c", "c"), - (0o144, b"d", "d"), - (0o145, b"e", "e"), - (0o146, b"f", "f"), - (0o147, b"g", "g"), - (0o150, b"h", "h"), - (0o151, b"i", "i"), - (0o152, b"j", "j"), - (0o153, b"k", "k"), - (0o154, b"l", "l"), - (0o155, b"m", "m"), - (0o156, b"n", "n"), - (0o157, b"o", "o"), - (0o160, b"p", "p"), - (0o161, b"q", "q"), - (0o162, b"r", "r"), - (0o163, b"s", "s"), - (0o164, b"t", "t"), - (0o165, b"u", "u"), - (0o166, b"v", "v"), - (0o167, b"w", "w"), - (0o170, b"x", "x"), - (0o171, b"y", "y"), - (0o172, b"z", "z"), - (0o173, b"braceleft", "{"), - (0o174, b"bar", "|"), - (0o175, b"braceright", "}"), - (0o176, b"asciitilde", "~"), - (0o241, b"exclamdown", "\u{a1}"), - (0o242, b"cent", "\u{a2}"), - (0o243, b"sterling", "\u{a3}"), - (0o244, b"fraction", "\u{2044}"), - (0o245, b"yen", "\u{a5}"), - (0o246, b"florin", "\u{192}"), - (0o247, b"section", "\u{a7}"), - (0o250, b"currency", "\u{a4}"), - (0o251, b"quotesingle", "\'"), - (0o252, b"quotedblleft", "\u{201c}"), - (0o253, b"guillemotleft", "\u{ab}"), - (0o254, b"guilsinglleft", "\u{2039}"), - (0o255, b"guilsinglright", "\u{203a}"), - (0o256, b"fi", "\u{fb01}"), - (0o257, b"fl", "\u{fb02}"), - (0o261, b"endash", "\u{2013}"), - (0o262, b"dagger", "\u{2020}"), - (0o263, b"daggerdbl", "\u{2021}"), - (0o264, b"periodcentered", "\u{b7}"), - (0o266, b"paragraph", "\u{b6}"), - (0o267, b"bullet", "\u{2022}"), - (0o270, b"quotesinglbase", "\u{201a}"), - (0o271, b"quotedblbase", "\u{201e}"), - (0o272, b"quotedblright", "\u{201d}"), - (0o273, b"guillemotright", "\u{bb}"), - (0o274, b"ellipsis", "\u{2026}"), - (0o275, b"perthousand", "\u{2030}"), - (0o277, b"questiondown", "\u{bf}"), - (0o301, b"grave", "`"), - (0o302, b"acute", "\u{b4}"), - (0o303, b"circumflex", "\u{2c6}"), - (0o304, b"tilde", "\u{2dc}"), - (0o305, b"macron", "\u{af}"), - (0o306, b"breve", "\u{2d8}"), - (0o307, b"dotaccent", "\u{2d9}"), - (0o310, b"dieresis", "\u{a8}"), - (0o312, b"ring", "\u{2da}"), - (0o313, b"cedilla", "\u{b8}"), - (0o315, b"hungarumlaut", "\u{2dd}"), - (0o316, b"ogonek", "\u{2db}"), - (0o317, b"caron", "\u{2c7}"), - (0o320, b"emdash", "\u{2014}"), - (0o341, b"AE", "\u{c6}"), - (0o343, b"ordfeminine", "\u{aa}"), - (0o350, b"Lslash", "\u{141}"), - (0o351, b"Oslash", "\u{d8}"), - (0o352, b"OE", "\u{152}"), - (0o353, b"ordmasculine", "\u{ba}"), - (0o361, b"ae", "\u{e6}"), - (0o365, b"dotlessi", "\u{131}"), - (0o370, b"lslash", "\u{142}"), - (0o371, b"oslash", "\u{f8}"), - (0o372, b"oe", "\u{153}"), - (0o373, b"germandbls", "\u{df}"), - ] -} - -builtin_simple_font_encoding_table! { - pub const WIN_ANSI; - #[default = (b"bullet", "\u{2022}")] - [ - (0o040, b"space", " "), - (0o041, b"exclam", "!"), - (0o042, b"quotedbl", "\""), - (0o043, b"numbersign", "#"), - (0o044, b"dollar", "$"), - (0o045, b"percent", "%"), - (0o046, b"ampersand", "&"), - (0o047, b"quotesingle", "\'"), - (0o050, b"parenleft", "("), - (0o051, b"parenright", ")"), - (0o052, b"asterisk", "*"), - (0o053, b"plus", "+"), - (0o054, b"comma", ","), - (0o055, b"hyphen", "-"), - (0o056, b"period", "."), - (0o057, b"slash", "/"), - (0o060, b"zero", "0"), - (0o061, b"one", "1"), - (0o062, b"two", "2"), - (0o063, b"three", "3"), - (0o064, b"four", "4"), - (0o065, b"five", "5"), - (0o066, b"six", "6"), - (0o067, b"seven", "7"), - (0o070, b"eight", "8"), - (0o071, b"nine", "9"), - (0o072, b"colon", ":"), - (0o073, b"semicolon", ";"), - (0o074, b"less", "<"), - (0o075, b"equal", "="), - (0o076, b"greater", ">"), - (0o077, b"question", "?"), - (0o100, b"at", "@"), - (0o101, b"A", "A"), - (0o102, b"B", "B"), - (0o103, b"C", "C"), - (0o104, b"D", "D"), - (0o105, b"E", "E"), - (0o106, b"F", "F"), - (0o107, b"G", "G"), - (0o110, b"H", "H"), - (0o111, b"I", "I"), - (0o112, b"J", "J"), - (0o113, b"K", "K"), - (0o114, b"L", "L"), - (0o115, b"M", "M"), - (0o116, b"N", "N"), - (0o117, b"O", "O"), - (0o120, b"P", "P"), - (0o121, b"Q", "Q"), - (0o122, b"R", "R"), - (0o123, b"S", "S"), - (0o124, b"T", "T"), - (0o125, b"U", "U"), - (0o126, b"V", "V"), - (0o127, b"W", "W"), - (0o130, b"X", "X"), - (0o131, b"Y", "Y"), - (0o132, b"Z", "Z"), - (0o133, b"bracketleft", "["), - (0o134, b"backslash", "\\"), - (0o135, b"bracketright", "]"), - (0o136, b"asciicircum", "^"), - (0o137, b"underscore", "_"), - (0o140, b"grave", "`"), - (0o141, b"a", "a"), - (0o142, b"b", "b"), - (0o143, b"c", "c"), - (0o144, b"d", "d"), - (0o145, b"e", "e"), - (0o146, b"f", "f"), - (0o147, b"g", "g"), - (0o150, b"h", "h"), - (0o151, b"i", "i"), - (0o152, b"j", "j"), - (0o153, b"k", "k"), - (0o154, b"l", "l"), - (0o155, b"m", "m"), - (0o156, b"n", "n"), - (0o157, b"o", "o"), - (0o160, b"p", "p"), - (0o161, b"q", "q"), - (0o162, b"r", "r"), - (0o163, b"s", "s"), - (0o164, b"t", "t"), - (0o165, b"u", "u"), - (0o166, b"v", "v"), - (0o167, b"w", "w"), - (0o170, b"x", "x"), - (0o171, b"y", "y"), - (0o172, b"z", "z"), - (0o173, b"braceleft", "{"), - (0o174, b"bar", "|"), - (0o175, b"braceright", "}"), - (0o176, b"asciitilde", "~"), - (0o200, b"Euro", "\u{20ac}"), - (0o202, b"quotesinglbase", "\u{201a}"), - (0o203, b"florin", "\u{192}"), - (0o204, b"quotedblbase", "\u{201e}"), - (0o205, b"ellipsis", "\u{2026}"), - (0o206, b"dagger", "\u{2020}"), - (0o207, b"daggerdbl", "\u{2021}"), - (0o210, b"circumflex", "\u{2c6}"), - (0o211, b"perthousand", "\u{2030}"), - (0o212, b"Scaron", "\u{160}"), - (0o213, b"guilsinglleft", "\u{2039}"), - (0o214, b"OE", "\u{152}"), - (0o216, b"Zcaron", "\u{17d}"), - (0o221, b"quoteleft", "\u{2018}"), - (0o222, b"quoteright", "\u{2019}"), - (0o223, b"quotedblleft", "\u{201c}"), - (0o224, b"quotedblright", "\u{201d}"), - (0o225, b"bullet", "\u{2022}"), - (0o226, b"endash", "\u{2013}"), - (0o227, b"emdash", "\u{2014}"), - (0o230, b"tilde", "\u{2dc}"), - (0o231, b"trademark", "\u{2122}"), - (0o232, b"scaron", "\u{161}"), - (0o233, b"guilsinglright", "\u{203a}"), - (0o234, b"oe", "\u{153}"), - (0o236, b"zcaron", "\u{17e}"), - (0o237, b"Ydieresis", "\u{178}"), - (0o240, b"space", "\u{a0}"), - (0o241, b"exclamdown", "\u{a1}"), - (0o242, b"cent", "\u{a2}"), - (0o243, b"sterling", "\u{a3}"), - (0o244, b"currency", "\u{a4}"), - (0o245, b"yen", "\u{a5}"), - (0o246, b"brokenbar", "\u{a6}"), - (0o247, b"section", "\u{a7}"), - (0o250, b"dieresis", "\u{a8}"), - (0o251, b"copyright", "\u{a9}"), - (0o252, b"ordfeminine", "\u{aa}"), - (0o253, b"guillemotleft", "\u{ab}"), - (0o254, b"logicalnot", "\u{ac}"), - (0o255, b"hyphen", "\u{ad}"), - (0o256, b"registered", "\u{ae}"), - (0o257, b"macron", "\u{af}"), - (0o260, b"degree", "\u{b0}"), - (0o261, b"plusminus", "\u{b1}"), - (0o262, b"twosuperior", "\u{b2}"), - (0o263, b"threesuperior", "\u{b3}"), - (0o264, b"acute", "\u{b4}"), - (0o265, b"mu", "\u{3bc}"), - (0o266, b"paragraph", "\u{b6}"), - (0o267, b"periodcentered", "\u{b7}"), - (0o270, b"cedilla", "\u{b8}"), - (0o271, b"onesuperior", "\u{b9}"), - (0o272, b"ordmasculine", "\u{ba}"), - (0o273, b"guillemotright", "\u{bb}"), - (0o274, b"onequarter", "\u{bc}"), - (0o275, b"onehalf", "\u{bd}"), - (0o276, b"threequarters", "\u{be}"), - (0o277, b"questiondown", "\u{bf}"), - (0o300, b"Agrave", "\u{c0}"), - (0o301, b"Aacute", "\u{c1}"), - (0o302, b"Acircumflex", "\u{c2}"), - (0o303, b"Atilde", "\u{c3}"), - (0o304, b"Adieresis", "\u{c4}"), - (0o305, b"Aring", "\u{c5}"), - (0o306, b"AE", "\u{c6}"), - (0o307, b"Ccedilla", "\u{c7}"), - (0o310, b"Egrave", "\u{c8}"), - (0o311, b"Eacute", "\u{c9}"), - (0o312, b"Ecircumflex", "\u{ca}"), - (0o313, b"Edieresis", "\u{cb}"), - (0o314, b"Igrave", "\u{cc}"), - (0o315, b"Iacute", "\u{cd}"), - (0o316, b"Icircumflex", "\u{ce}"), - (0o317, b"Idieresis", "\u{cf}"), - (0o320, b"Eth", "\u{d0}"), - (0o321, b"Ntilde", "\u{d1}"), - (0o322, b"Ograve", "\u{d2}"), - (0o323, b"Oacute", "\u{d3}"), - (0o324, b"Ocircumflex", "\u{d4}"), - (0o325, b"Otilde", "\u{d5}"), - (0o326, b"Odieresis", "\u{d6}"), - (0o327, b"multiply", "\u{d7}"), - (0o330, b"Oslash", "\u{d8}"), - (0o331, b"Ugrave", "\u{d9}"), - (0o332, b"Uacute", "\u{da}"), - (0o333, b"Ucircumflex", "\u{db}"), - (0o334, b"Udieresis", "\u{dc}"), - (0o335, b"Yacute", "\u{dd}"), - (0o336, b"Thorn", "\u{de}"), - (0o337, b"germandbls", "\u{df}"), - (0o340, b"agrave", "\u{e0}"), - (0o341, b"aacute", "\u{e1}"), - (0o342, b"acircumflex", "\u{e2}"), - (0o343, b"atilde", "\u{e3}"), - (0o344, b"adieresis", "\u{e4}"), - (0o345, b"aring", "\u{e5}"), - (0o346, b"ae", "\u{e6}"), - (0o347, b"ccedilla", "\u{e7}"), - (0o350, b"egrave", "\u{e8}"), - (0o351, b"eacute", "\u{e9}"), - (0o352, b"ecircumflex", "\u{ea}"), - (0o353, b"edieresis", "\u{eb}"), - (0o354, b"igrave", "\u{ec}"), - (0o355, b"iacute", "\u{ed}"), - (0o356, b"icircumflex", "\u{ee}"), - (0o357, b"idieresis", "\u{ef}"), - (0o360, b"eth", "\u{f0}"), - (0o361, b"ntilde", "\u{f1}"), - (0o362, b"ograve", "\u{f2}"), - (0o363, b"oacute", "\u{f3}"), - (0o364, b"ocircumflex", "\u{f4}"), - (0o365, b"otilde", "\u{f5}"), - (0o366, b"odieresis", "\u{f6}"), - (0o367, b"divide", "\u{f7}"), - (0o370, b"oslash", "\u{f8}"), - (0o371, b"ugrave", "\u{f9}"), - (0o372, b"uacute", "\u{fa}"), - (0o373, b"ucircumflex", "\u{fb}"), - (0o374, b"udieresis", "\u{fc}"), - (0o375, b"yacute", "\u{fd}"), - (0o376, b"thorn", "\u{fe}"), - (0o377, b"ydieresis", "\u{ff}"), - ] -} - -builtin_simple_font_encoding_table! { - pub const PDF_DOC; - [ - (0o011, None, "\t"), - (0o012, None, "\n"), - (0o015, None, "\r"), - (0o030, b"breve", "\u{2d8}"), - (0o031, b"caron", "\u{2c7}"), - (0o032, b"circumflex", "\u{2c6}"), - (0o033, b"dotaccent", "\u{2d9}"), - (0o034, b"hungarumlaut", "\u{2dd}"), - (0o035, b"ogonek", "\u{2db}"), - (0o036, b"ring", "\u{2da}"), - (0o037, b"tilde", "\u{2dc}"), - (0o040, b"space", " "), - (0o041, b"exclam", "!"), - (0o042, b"quotedbl", "\""), - (0o043, b"numbersign", "#"), - (0o044, b"dollar", "$"), - (0o045, b"percent", "%"), - (0o046, b"ampersand", "&"), - (0o047, b"quotesingle", "\'"), - (0o050, b"parenleft", "("), - (0o051, b"parenright", ")"), - (0o052, b"asterisk", "*"), - (0o053, b"plus", "+"), - (0o054, b"comma", ","), - (0o055, b"hyphen", "-"), - (0o056, b"period", "."), - (0o057, b"slash", "/"), - (0o060, b"zero", "0"), - (0o061, b"one", "1"), - (0o062, b"two", "2"), - (0o063, b"three", "3"), - (0o064, b"four", "4"), - (0o065, b"five", "5"), - (0o066, b"six", "6"), - (0o067, b"seven", "7"), - (0o070, b"eight", "8"), - (0o071, b"nine", "9"), - (0o072, b"colon", ":"), - (0o073, b"semicolon", ";"), - (0o074, b"less", "<"), - (0o075, b"equal", "="), - (0o076, b"greater", ">"), - (0o077, b"question", "?"), - (0o100, b"at", "@"), - (0o101, b"A", "A"), - (0o102, b"B", "B"), - (0o103, b"C", "C"), - (0o104, b"D", "D"), - (0o105, b"E", "E"), - (0o106, b"F", "F"), - (0o107, b"G", "G"), - (0o110, b"H", "H"), - (0o111, b"I", "I"), - (0o112, b"J", "J"), - (0o113, b"K", "K"), - (0o114, b"L", "L"), - (0o115, b"M", "M"), - (0o116, b"N", "N"), - (0o117, b"O", "O"), - (0o120, b"P", "P"), - (0o121, b"Q", "Q"), - (0o122, b"R", "R"), - (0o123, b"S", "S"), - (0o124, b"T", "T"), - (0o125, b"U", "U"), - (0o126, b"V", "V"), - (0o127, b"W", "W"), - (0o130, b"X", "X"), - (0o131, b"Y", "Y"), - (0o132, b"Z", "Z"), - (0o133, b"bracketleft", "["), - (0o134, b"backslash", "\\"), - (0o135, b"bracketright", "]"), - (0o136, b"asciicircum", "^"), - (0o137, b"underscore", "_"), - (0o140, b"grave", "`"), - (0o141, b"a", "a"), - (0o142, b"b", "b"), - (0o143, b"c", "c"), - (0o144, b"d", "d"), - (0o145, b"e", "e"), - (0o146, b"f", "f"), - (0o147, b"g", "g"), - (0o150, b"h", "h"), - (0o151, b"i", "i"), - (0o152, b"j", "j"), - (0o153, b"k", "k"), - (0o154, b"l", "l"), - (0o155, b"m", "m"), - (0o156, b"n", "n"), - (0o157, b"o", "o"), - (0o160, b"p", "p"), - (0o161, b"q", "q"), - (0o162, b"r", "r"), - (0o163, b"s", "s"), - (0o164, b"t", "t"), - (0o165, b"u", "u"), - (0o166, b"v", "v"), - (0o167, b"w", "w"), - (0o170, b"x", "x"), - (0o171, b"y", "y"), - (0o172, b"z", "z"), - (0o173, b"braceleft", "{"), - (0o174, b"bar", "|"), - (0o175, b"braceright", "}"), - (0o176, b"asciitilde", "~"), - (0o200, b"bullet", "\u{2022}"), - (0o201, b"dagger", "\u{2020}"), - (0o202, b"daggerdbl", "\u{2021}"), - (0o203, b"ellipsis", "\u{2026}"), - (0o204, b"emdash", "\u{2014}"), - (0o205, b"endash", "\u{2013}"), - (0o206, b"florin", "\u{192}"), - (0o207, b"fraction", "\u{2044}"), - (0o210, b"guilsinglleft", "\u{2039}"), - (0o211, b"guilsinglright", "\u{203a}"), - (0o212, b"minus", "\u{2212}"), - (0o213, b"perthousand", "\u{2030}"), - (0o214, b"quotedblbase", "\u{201e}"), - (0o215, b"quotedblleft", "\u{201c}"), - (0o216, b"quotedblright", "\u{201d}"), - (0o217, b"quoteleft", "\u{2018}"), - (0o220, b"quoteright", "\u{2019}"), - (0o221, b"quotesinglbase", "\u{201a}"), - (0o222, b"trademark", "\u{2122}"), - (0o223, b"fi", "\u{fb01}"), - (0o224, b"fl", "\u{fb02}"), - (0o225, b"Lslash", "\u{141}"), - (0o226, b"OE", "\u{152}"), - (0o227, b"Scaron", "\u{160}"), - (0o230, b"Ydieresis", "\u{178}"), - (0o231, b"Zcaron", "\u{17d}"), - (0o232, b"dotlessi", "\u{131}"), - (0o233, b"lslash", "\u{142}"), - (0o234, b"oe", "\u{153}"), - (0o235, b"scaron", "\u{161}"), - (0o236, b"zcaron", "\u{17e}"), - (0o240, b"Euro", "\u{20ac}"), - (0o241, b"exclamdown", "\u{a1}"), - (0o242, b"cent", "\u{a2}"), - (0o243, b"sterling", "\u{a3}"), - (0o244, b"currency", "\u{a4}"), - (0o245, b"yen", "\u{a5}"), - (0o246, b"brokenbar", "\u{a6}"), - (0o247, b"section", "\u{a7}"), - (0o250, b"dieresis", "\u{a8}"), - (0o251, b"copyright", "\u{a9}"), - (0o252, b"ordfeminine", "\u{aa}"), - (0o253, b"guillemotleft", "\u{ab}"), - (0o254, b"logicalnot", "\u{ac}"), - (0o256, b"registered", "\u{ae}"), - (0o257, b"macron", "\u{af}"), - (0o260, b"degree", "\u{b0}"), - (0o261, b"plusminus", "\u{b1}"), - (0o262, b"twosuperior", "\u{b2}"), - (0o263, b"threesuperior", "\u{b3}"), - (0o264, b"acute", "\u{b4}"), - (0o265, b"mu", "\u{3bc}"), - (0o266, b"paragraph", "\u{b6}"), - (0o267, b"periodcentered", "\u{b7}"), - (0o270, b"cedilla", "\u{b8}"), - (0o271, b"onesuperior", "\u{b9}"), - (0o272, b"ordmasculine", "\u{ba}"), - (0o273, b"guillemotright", "\u{bb}"), - (0o274, b"onequarter", "\u{bc}"), - (0o275, b"onehalf", "\u{bd}"), - (0o276, b"threequarters", "\u{be}"), - (0o277, b"questiondown", "\u{bf}"), - (0o300, b"Agrave", "\u{c0}"), - (0o301, b"Aacute", "\u{c1}"), - (0o302, b"Acircumflex", "\u{c2}"), - (0o303, b"Atilde", "\u{c3}"), - (0o304, b"Adieresis", "\u{c4}"), - (0o305, b"Aring", "\u{c5}"), - (0o306, b"AE", "\u{c6}"), - (0o307, b"Ccedilla", "\u{c7}"), - (0o310, b"Egrave", "\u{c8}"), - (0o311, b"Eacute", "\u{c9}"), - (0o312, b"Ecircumflex", "\u{ca}"), - (0o313, b"Edieresis", "\u{cb}"), - (0o314, b"Igrave", "\u{cc}"), - (0o315, b"Iacute", "\u{cd}"), - (0o316, b"Icircumflex", "\u{ce}"), - (0o317, b"Idieresis", "\u{cf}"), - (0o320, b"Eth", "\u{d0}"), - (0o321, b"Ntilde", "\u{d1}"), - (0o322, b"Ograve", "\u{d2}"), - (0o323, b"Oacute", "\u{d3}"), - (0o324, b"Ocircumflex", "\u{d4}"), - (0o325, b"Otilde", "\u{d5}"), - (0o326, b"Odieresis", "\u{d6}"), - (0o327, b"multiply", "\u{d7}"), - (0o330, b"Oslash", "\u{d8}"), - (0o331, b"Ugrave", "\u{d9}"), - (0o332, b"Uacute", "\u{da}"), - (0o333, b"Ucircumflex", "\u{db}"), - (0o334, b"Udieresis", "\u{dc}"), - (0o335, b"Yacute", "\u{dd}"), - (0o336, b"Thorn", "\u{de}"), - (0o337, b"germandbls", "\u{df}"), - (0o340, b"agrave", "\u{e0}"), - (0o341, b"aacute", "\u{e1}"), - (0o342, b"acircumflex", "\u{e2}"), - (0o343, b"atilde", "\u{e3}"), - (0o344, b"adieresis", "\u{e4}"), - (0o345, b"aring", "\u{e5}"), - (0o346, b"ae", "\u{e6}"), - (0o347, b"ccedilla", "\u{e7}"), - (0o350, b"egrave", "\u{e8}"), - (0o351, b"eacute", "\u{e9}"), - (0o352, b"ecircumflex", "\u{ea}"), - (0o353, b"edieresis", "\u{eb}"), - (0o354, b"igrave", "\u{ec}"), - (0o355, b"iacute", "\u{ed}"), - (0o356, b"icircumflex", "\u{ee}"), - (0o357, b"idieresis", "\u{ef}"), - (0o360, b"eth", "\u{f0}"), - (0o361, b"ntilde", "\u{f1}"), - (0o362, b"ograve", "\u{f2}"), - (0o363, b"oacute", "\u{f3}"), - (0o364, b"ocircumflex", "\u{f4}"), - (0o365, b"otilde", "\u{f5}"), - (0o366, b"odieresis", "\u{f6}"), - (0o367, b"divide", "\u{f7}"), - (0o370, b"oslash", "\u{f8}"), - (0o371, b"ugrave", "\u{f9}"), - (0o372, b"uacute", "\u{fa}"), - (0o373, b"ucircumflex", "\u{fb}"), - (0o374, b"udieresis", "\u{fc}"), - (0o375, b"yacute", "\u{fd}"), - (0o376, b"thorn", "\u{fe}"), - (0o377, b"ydieresis", "\u{ff}"), - ] -} - -builtin_simple_font_encoding_table! { - pub const MAC_EXPERT; - [ - (0o040, b"space", " "), - (0o041, b"exclamsmall", "!"), - (0o042, b"Hungarumlautsmall", "\u{2dd}"), - (0o043, b"centoldstyle", "\u{a2}"), - (0o044, b"dollaroldstyle", "$"), - (0o045, b"dollarsuperior", "$"), - (0o046, b"ampersandsmall", "&"), - (0o047, b"Acutesmall", "\u{b4}"), - (0o050, b"parenleftsuperior", "\u{207d}"), - (0o051, b"parenrightsuperior", "\u{207e}"), - (0o052, b"twodotenleader", "\u{2025}"), - (0o053, b"onedotenleader", "\u{2024}"), - (0o054, b"comma", ","), - (0o055, b"hyphen", "-"), - (0o056, b"period", "."), - (0o057, b"fraction", "\u{2044}"), - (0o060, b"zerooldstyle", "0"), - (0o061, b"oneoldstyle", "1"), - (0o062, b"twooldstyle", "2"), - (0o063, b"threeoldstyle", "3"), - (0o064, b"fouroldstyle", "4"), - (0o065, b"fiveoldstyle", "5"), - (0o066, b"sixoldstyle", "6"), - (0o067, b"sevenoldstyle", "7"), - (0o070, b"eightoldstyle", "8"), - (0o071, b"nineoldstyle", "9"), - (0o072, b"colon", ":"), - (0o073, b"semicolon", ";"), - (0o075, b"threequartersemdash", "\u{2014}"), - (0o077, b"questionsmall", "?"), - (0o104, b"Ethsmall", "\u{f0}"), - (0o107, b"onequarter", "\u{bc}"), - (0o110, b"onehalf", "\u{bd}"), - (0o111, b"threequarters", "\u{be}"), - (0o112, b"oneeighth", "\u{215b}"), - (0o113, b"threeeighths", "\u{215c}"), - (0o114, b"fiveeighths", "\u{215d}"), - (0o115, b"seveneighths", "\u{215e}"), - (0o116, b"onethird", "\u{2153}"), - (0o117, b"twothirds", "\u{2154}"), - (0o126, b"ff", "\u{fb00}"), - (0o127, b"fi", "\u{fb01}"), - (0o130, b"fl", "\u{fb02}"), - (0o131, b"ffi", "\u{fb03}"), - (0o132, b"ffl", "\u{fb04}"), - (0o133, b"parenleftinferior", "\u{208d}"), - (0o135, b"parenrightinferior", "\u{208e}"), - (0o136, b"Circumflexsmall", "\u{2c6}"), - (0o137, b"hypheninferior", "-"), - (0o140, b"Gravesmall", "`"), - (0o141, b"Asmall", "a"), - (0o142, b"Bsmall", "b"), - (0o143, b"Csmall", "c"), - (0o144, b"Dsmall", "d"), - (0o145, b"Esmall", "e"), - (0o146, b"Fsmall", "f"), - (0o147, b"Gsmall", "g"), - (0o150, b"Hsmall", "h"), - (0o151, b"Ismall", "i"), - (0o152, b"Jsmall", "j"), - (0o153, b"Ksmall", "k"), - (0o154, b"Lsmall", "l"), - (0o155, b"Msmall", "m"), - (0o156, b"Nsmall", "n"), - (0o157, b"Osmall", "o"), - (0o160, b"Psmall", "p"), - (0o161, b"Qsmall", "q"), - (0o162, b"Rsmall", "r"), - (0o163, b"Ssmall", "s"), - (0o164, b"Tsmall", "t"), - (0o165, b"Usmall", "u"), - (0o166, b"Vsmall", "v"), - (0o167, b"Wsmall", "w"), - (0o170, b"Xsmall", "x"), - (0o171, b"Ysmall", "y"), - (0o172, b"Zsmall", "z"), - (0o173, b"colonmonetary", "\u{20a1}"), - (0o174, b"onefitted", "1"), - (0o175, b"rupiah", "Rp"), - (0o176, b"Tildesmall", "\u{2dc}"), - (0o201, b"asuperior", "a"), - (0o202, b"centsuperior", "\u{a2}"), - (0o207, b"Aacutesmall", "\u{e1}"), - (0o210, b"Agravesmall", "\u{e0}"), - (0o211, b"Acircumflexsmall", "\u{e2}"), - (0o212, b"Adieresissmall", "\u{e4}"), - (0o213, b"Atildesmall", "\u{e3}"), - (0o214, b"Aringsmall", "\u{e5}"), - (0o215, b"Ccedillasmall", "\u{e7}"), - (0o216, b"Eacutesmall", "\u{e9}"), - (0o217, b"Egravesmall", "\u{e8}"), - (0o220, b"Ecircumflexsmall", "\u{ea}"), - (0o221, b"Edieresissmall", "\u{eb}"), - (0o222, b"Iacutesmall", "\u{ed}"), - (0o223, b"Igravesmall", "\u{ec}"), - (0o224, b"Icircumflexsmall", "\u{ee}"), - (0o225, b"Idieresissmall", "\u{ef}"), - (0o226, b"Ntildesmall", "\u{f1}"), - (0o227, b"Oacutesmall", "\u{f3}"), - (0o230, b"Ogravesmall", "\u{f2}"), - (0o231, b"Ocircumflexsmall", "\u{f4}"), - (0o232, b"Odieresissmall", "\u{f6}"), - (0o233, b"Otildesmall", "\u{f5}"), - (0o234, b"Uacutesmall", "\u{fa}"), - (0o235, b"Ugravesmall", "\u{f9}"), - (0o236, b"Ucircumflexsmall", "\u{fb}"), - (0o237, b"Udieresissmall", "\u{fc}"), - (0o241, b"eightsuperior", "\u{2078}"), - (0o242, b"fourinferior", "\u{2084}"), - (0o243, b"threeinferior", "\u{2083}"), - (0o244, b"sixinferior", "\u{2086}"), - (0o245, b"eightinferior", "\u{2088}"), - (0o246, b"seveninferior", "\u{2087}"), - (0o247, b"Scaronsmall", "\u{161}"), - (0o251, b"centinferior", "\u{a2}"), - (0o252, b"twoinferior", "\u{2082}"), - (0o254, b"Dieresissmall", "\u{a8}"), - (0o256, b"Caronsmall", "\u{2c7}"), - (0o257, b"osuperior", "o"), - (0o260, b"fiveinferior", "\u{2085}"), - (0o262, b"commainferior", ","), - (0o263, b"periodinferior", "."), - (0o264, b"Yacutesmall", "\u{fd}"), - (0o266, b"dollarinferior", "$"), - (0o271, b"Thornsmall", "\u{fe}"), - (0o273, b"nineinferior", "\u{2089}"), - (0o274, b"zeroinferior", "\u{2080}"), - (0o275, b"Zcaronsmall", "\u{17e}"), - (0o276, b"AEsmall", "\u{e6}"), - (0o277, b"Oslashsmall", "\u{f8}"), - (0o300, b"questiondownsmall", "\u{bf}"), - (0o301, b"oneinferior", "\u{2081}"), - (0o302, b"Lslashsmall", "\u{142}"), - (0o311, b"Cedillasmall", "\u{b8}"), - (0o317, b"OEsmall", "\u{153}"), - (0o320, b"figuredash", "\u{2012}"), - (0o321, b"hyphensuperior", "-"), - (0o326, b"exclamdownsmall", "\u{a1}"), - (0o330, b"Ydieresissmall", "\u{ff}"), - (0o332, b"onesuperior", "\u{b9}"), - (0o333, b"twosuperior", "\u{b2}"), - (0o334, b"threesuperior", "\u{b3}"), - (0o335, b"foursuperior", "\u{2074}"), - (0o336, b"fivesuperior", "\u{2075}"), - (0o337, b"sixsuperior", "\u{2076}"), - (0o340, b"sevensuperior", "\u{2077}"), - (0o341, b"ninesuperior", "\u{2079}"), - (0o342, b"zerosuperior", "\u{2070}"), - (0o344, b"esuperior", "e"), - (0o345, b"rsuperior", "r"), - (0o346, b"tsuperior", "t"), - (0o351, b"isuperior", "i"), - (0o352, b"ssuperior", "s"), - (0o353, b"dsuperior", "d"), - (0o361, b"lsuperior", "l"), - (0o362, b"Ogoneksmall", "\u{2db}"), - (0o363, b"Brevesmall", "\u{2d8}"), - (0o364, b"Macronsmall", "\u{af}"), - (0o365, b"bsuperior", "b"), - (0o366, b"nsuperior", "\u{207f}"), - (0o367, b"msuperior", "m"), - (0o370, b"commasuperior", ","), - (0o371, b"periodsuperior", "."), - (0o372, b"Dotaccentsmall", "\u{2d9}"), - (0o373, b"Ringsmall", "\u{2da}"), - ] -} diff --git a/src/pdf/font/type_1_parse.rs b/src/pdf/font/type_1_parse.rs deleted file mode 100644 index c557d5a..0000000 --- a/src/pdf/font/type_1_parse.rs +++ /dev/null @@ -1,1423 +0,0 @@ -use crate::{ - pdf::{ - PdfObjects, - font::{PdfFontType1FontInfo, PdfFontType1Program}, - object::{PdfMatrix, PdfName, PdfRectangle, PdfStreamContents, PdfString, PdfVec2D}, - parse::{ - PdfInputPosition, PdfInputPositionKnown, PdfInputPositionNoCompare, PdfParseError, - }, - }, - util::ArcOrRef, -}; -use std::{ - cell::{Cell, RefCell}, - collections::BTreeMap, - fmt, - num::NonZero, - rc::Rc, - sync::Arc, -}; - -#[derive(Debug)] -enum PsBreakReason { - FoundEExec, - Error(PdfParseError), -} - -fn custom_err>(msg: impl ToString) -> Result { - Err(PdfParseError::Custom(msg.to_string()).into()) -} - -impl From for PsBreakReason { - fn from(value: PdfParseError) -> Self { - Self::Error(value) - } -} - -struct PsFileDecryptedSource { - source: Box>, - decoded: Vec, -} - -impl PsFileDecryptedSource { - fn get(&mut self, index: usize) -> Option { - loop { - if let Some(byte) = self.decoded.get(index) { - return Some(*byte); - } - self.decoded.push(self.source.next()?); - } - } -} - -#[derive(Clone)] -enum PsFileSource { - Bytes(Rc<[u8]>), - Decrypted(Rc>), -} - -impl PsFileSource { - fn get(&self, index: usize) -> Option { - match self { - PsFileSource::Bytes(bytes) => bytes.get(index).copied(), - PsFileSource::Decrypted(src) => src.borrow_mut().get(index), - } - } -} - -#[derive(Clone)] -struct PsFile { - id: u64, - source: PsFileSource, - pos: Rc>, -} - -impl PartialEq for PsFile { - fn eq(&self, other: &Self) -> bool { - self.id == other.id - } -} - -impl Eq for PsFile {} - -impl PartialOrd for PsFile { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for PsFile { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.id.cmp(&other.id) - } -} - -impl fmt::Debug for PsFile { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { id, source: _, pos } = self; - f.debug_struct("PsFile") - .field("id", id) - .field("pos", pos) - .finish_non_exhaustive() - } -} - -fn is_whitespace_char(v: u8) -> bool { - matches!(v, b'\0' | b'\t' | b'\n' | b'\x0C' | b'\r' | b' ') -} - -fn is_special_char(v: u8) -> bool { - matches!( - v, - b'(' | b')' | b'<' | b'>' | b'[' | b']' | b'{' | b'}' | b'/' | b'%' - ) -} - -fn is_regular_char(v: u8) -> bool { - !(is_whitespace_char(v) || is_special_char(v)) -} - -struct NotALineEnd; - -#[derive(Clone)] -enum Token { - Integer(i128), - Real(f64), - ArrayStart, - ArrayEnd, - ProcedureStart, - ProcedureEnd, - ExecutableName(Vec), - LiteralName(Vec), - ImmediatelyEvaluatedName(Vec), - String(Vec), -} - -impl fmt::Debug for Token { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Integer(v) => f.debug_tuple("Integer").field(v).finish(), - Self::Real(v) => f.debug_tuple("Real").field(v).finish(), - Self::ArrayStart => write!(f, "ArrayStart"), - Self::ArrayEnd => write!(f, "ArrayEnd"), - Self::ProcedureStart => write!(f, "ProcedureStart"), - Self::ProcedureEnd => write!(f, "ProcedureEnd"), - Self::ExecutableName(name) => write!(f, "ExecutableName({})", name.escape_ascii()), - Self::LiteralName(name) => write!(f, "LiteralName({})", name.escape_ascii()), - Self::ImmediatelyEvaluatedName(name) => { - write!(f, "ImmediatelyEvaluatedName({})", name.escape_ascii()) - } - Self::String(contents) => { - write!(f, "String({})", contents.escape_ascii()) - } - } - } -} - -impl PsFile { - fn new(id: u64, source: PsFileSource, pos: usize, stream_pos: PdfInputPosition) -> Self { - Self { - id, - source, - pos: Rc::new(Cell::new(PdfInputPositionKnown { - pos, - containing_streams_pos: stream_pos.get().map(|v| v.pos), - })), - } - } - fn pos(&self) -> PdfInputPosition { - PdfInputPosition::new(Some(self.pos.get())) - } - fn peek_byte(&self) -> Option { - self.source.get(self.pos.get().pos) - } - fn next_byte(&mut self) -> Option { - if let Some(b) = self.source.get(self.pos.get().pos) { - self.pos.update(|mut pos| { - pos.pos += 1; - pos - }); - Some(b) - } else { - None - } - } - fn try_parse_line_end(&mut self) -> Result<(), NotALineEnd> { - match self.peek_byte().ok_or(NotALineEnd)? { - b'\r' => { - self.next_byte(); - if self.peek_byte() == Some(b'\n') { - self.next_byte(); - } - Ok(()) - } - b'\x0C' | b'\n' => Ok(()), - _ => Err(NotALineEnd), - } - } - fn skip_whitespace(&mut self) { - while let Some(b'\0' | b'\t' | b'\n' | b'\x0C' | b'\r' | b' ') = self.peek_byte() { - self.next_byte(); - } - } - fn skip_comments_and_whitespace(&mut self) { - loop { - self.skip_whitespace(); - let Some(b'%') = self.peek_byte() else { - break; - }; - while self.peek_byte().is_some() { - if let Ok(()) = self.try_parse_line_end() { - break; - } - self.next_byte(); - } - } - } - fn parse_number(mut text: &[u8]) -> Option { - let full_text = text; - let sign = match text { - [sign @ (b'-' | b'+'), rest @ ..] => { - text = rest; - Some(*sign) - } - _ => None, - }; - let mut radix = Some(0u32); - let mut any_digits = false; - while let [digit @ b'0'..=b'9', rest @ ..] = text { - text = rest; - any_digits = true; - radix = radix - .and_then(|v| v.checked_mul(10)) - .and_then(|v| v.checked_add((digit - b'0').into())); - } - if let (Some(radix @ (2..=36)), [b'#', rest @ ..]) = (radix, text) { - text = rest; - if sign.is_some() || text.is_empty() { - return None; - } - let mut value = 0i128; - for &digit in text { - let digit = (digit as char).to_digit(radix)?; - value = value.checked_mul(radix.into())?; - value = value.checked_add(digit.into())?; - } - return Some(Token::Integer(value)); - } - let mut is_real = false; - if let [b'.', rest @ ..] = text { - text = rest; - is_real = true; - while let [b'0'..=b'9', rest @ ..] = text { - text = rest; - any_digits = true; - } - } - if !any_digits { - return None; - } - if let [b'e' | b'E', rest @ ..] = text { - text = rest; - is_real = true; - if let [b'+' | b'-', rest @ ..] = text { - text = rest; - } - let [b'0'..=b'9', ..] = text else { - return None; - }; - while let [b'0'..=b'9', rest @ ..] = text { - text = rest; - } - } - let full_text = str::from_utf8(full_text).ok()?; - if is_real { - Some(Token::Real(full_text.parse().ok()?)) - } else { - Some(Token::Integer(full_text.parse().ok()?)) - } - } - fn parse_string_after_l_paren(&mut self) -> Result { - let mut contents = Vec::new(); - let mut paren_level = NonZero::new(1usize).expect("non-zero"); - while let Some(b) = self.next_byte() { - contents.push(match b { - b'(' => { - paren_level = paren_level.checked_add(1).expect("overflow"); - b - } - b')' => { - let Some(new_paren_level) = NonZero::new(paren_level.get() - 1) else { - return Ok(Token::String(contents)); - }; - paren_level = new_paren_level; - b - } - b'\r' if self.peek_byte() == Some(b'\n') => { - self.next_byte(); - b'\n' - } - b'\r' | b'\n' => b'\n', - b'\\' => { - let pos = self.pos(); - let Some(b) = self.next_byte() else { - return Err(PdfParseError::InvalidStringEscape { pos }); - }; - match b { - b'\r' if self.peek_byte() == Some(b'\n') => { - self.next_byte(); - continue; - } - b'\r' | b'\n' => continue, - b'n' => b'\n', - b'r' => b'\r', - b't' => b'\t', - b'b' => b'\x08', - b'f' => b'\x0C', - b'(' | b')' | b'\\' => b, - b'0'..=b'7' => { - const MAX_OCTAL_DIGITS: usize = 3; - let mut value = b - b'0'; - let mut len = 1; - while len < MAX_OCTAL_DIGITS { - let Some(b @ b'0'..=b'7') = self.peek_byte() else { - break; - }; - value <<= 3; - value |= b - b'0'; - len += 1; - self.next_byte(); - } - value - } - _ => { - return Err(PdfParseError::InvalidStringEscape { pos }); - } - } - } - _ => b, - }); - } - Err(PdfParseError::TruncatedFile { pos: self.pos() }) - } - fn next_token(&mut self) -> Result, PdfParseError> { - self.skip_comments_and_whitespace(); - let Some(first_byte) = self.peek_byte() else { - return Ok(None); - }; - match first_byte { - b'(' => { - self.next_byte(); - self.parse_string_after_l_paren().map(Some) - } - b')' => todo!(), - b'<' => { - todo!("encoded string"); - } - b'>' => todo!(), - b'[' => { - self.next_byte(); - Ok(Some(Token::ArrayStart)) - } - b']' => { - self.next_byte(); - Ok(Some(Token::ArrayEnd)) - } - b'{' => { - self.next_byte(); - Ok(Some(Token::ProcedureStart)) - } - b'}' => { - self.next_byte(); - Ok(Some(Token::ProcedureEnd)) - } - b'/' => { - self.next_byte(); - let is_immediately_evaluated_name = self.peek_byte() == Some(b'/'); - if is_immediately_evaluated_name { - self.next_byte(); - } - let mut name = Vec::new(); - while self.peek_byte().is_some_and(is_regular_char) { - name.extend(self.next_byte()); - } - Ok(Some(if is_immediately_evaluated_name { - Token::ImmediatelyEvaluatedName(name) - } else { - Token::LiteralName(name) - })) - } - _ => { - let mut name = Vec::new(); - name.extend(self.next_byte()); - while self.peek_byte().is_some_and(is_regular_char) { - name.extend(self.next_byte()); - } - if let Some(token) = Self::parse_number(&name) { - Ok(Some(token)) - } else { - Ok(Some(Token::ExecutableName(name))) - } - } - } - } - fn decrypt_for_eexec_helper( - mut self, - new_id: u64, - random_bytes: Option<[u8; 4]>, - next_byte: impl Fn(&mut Self) -> Option + 'static, - ) -> Result { - let read_first_4 = || -> Option<[u8; 4]> { - let b0 = next_byte(&mut self)?; - let b1 = next_byte(&mut self)?; - let b2 = next_byte(&mut self)?; - let b3 = next_byte(&mut self)?; - Some([b0, b1, b2, b3]) - }; - let random_bytes = random_bytes.or_else(read_first_4).ok_or_else(|| { - PdfParseError::Custom("postscript eexec operator: can't read the 4 random bytes".into()) - })?; - let mut r = 55665u16; - let c1 = 52845u16; - let c2 = 22719u16; - let mut decrypt_one = move |cipher: u8| -> u8 { - dbg!(cipher); - let plain = cipher ^ (r >> 8) as u8; - dbg!(plain); - r = (cipher as u16) - .wrapping_add(r) - .wrapping_mul(c1) - .wrapping_add(c2); - dbg!(r); - plain - }; - for b in random_bytes { - decrypt_one(b); - } - let stream_pos = self.pos(); - Ok(Self::new( - new_id, - PsFileSource::Decrypted(Rc::new(RefCell::new(PsFileDecryptedSource { - source: Box::new(std::iter::from_fn(move || { - dbg!(next_byte(&mut self)).map(decrypt_one) - })), - decoded: Vec::new(), - }))), - 0, - stream_pos, - )) - } - fn decrypt_for_eexec(mut self, new_id: u64) -> Result { - while let Some(b' ' | b'\t' | b'\r' | b'\n') = self.peek_byte() { - dbg!(self.next_byte()); - } - let start_pos = self.pos.get(); - let mut read_first_4_binary = || -> Option<[u8; 4]> { - let b0 = self.next_byte()?; - let b1 = self.next_byte()?; - let b2 = self.next_byte()?; - let b3 = self.next_byte()?; - let retval = [b0, b1, b2, b3]; - if retval.iter().all(u8::is_ascii_hexdigit) { - None - } else { - Some(retval) - } - }; - if let Some(random_bytes) = dbg!(read_first_4_binary()) { - self.decrypt_for_eexec_helper(new_id, Some(random_bytes), PsFile::next_byte) - } else { - self.pos.set(start_pos); - let next_byte = |this: &mut Self| { - let mut first_digit = None; - loop { - let byte = this.peek_byte()?; - if matches!(byte, b' ' | b'\t' | b'\r' | b'\n') { - this.next_byte(); - continue; - } - let digit = (byte as char).to_digit(0x10)?; - this.next_byte(); - if let Some(first_digit) = first_digit { - return Some(((first_digit << 4) | digit) as u8); - } else { - first_digit = Some(digit); - } - } - }; - self.decrypt_for_eexec_helper(new_id, None, next_byte) - } - } -} - -#[derive(Clone, Copy, Debug, Default)] -struct PsReal(f64); - -impl PartialOrd for PsReal { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Eq for PsReal {} - -impl PartialEq for PsReal { - fn eq(&self, other: &Self) -> bool { - self.cmp(other).is_eq() - } -} - -impl Ord for PsReal { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - let l = (!self.0.is_nan()).then_some(self.0); - let r = (!other.0.is_nan()).then_some(other.0); - l.partial_cmp(&r).expect("already checked for NaN") - } -} - -macro_rules! make_operator_enum { - ( - enum $enum_name:ident { - $($Variant:ident = $name:literal,)* - } - ) => { - #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] - enum $enum_name { - $($Variant,)* - } - - impl $enum_name { - const VARIANTS: &[Self] = &[$(Self::$Variant,)*]; - fn name(self) -> &'static str { - match self { - $(Self::$Variant => $name,)* - } - } - } - }; -} - -make_operator_enum! { - enum PsOperator { - Array = "array", - ArrayStart = "[", - ArrayEnd = "]", - Begin = "begin", - CurrentDict = "currentdict", - CurrentFile = "currentfile", - Def = "def", - Dict = "dict", - Dup = "dup", - FontDirectory = "FontDirectory", - For = "for", - EExec = "eexec", - End = "end", - Exch = "exch", - IfElse = "ifelse", - Index = "index", - Known = "known", - Put = "put", - ReadOnly = "readonly", - } -} - -impl PsOperator { - fn run(self, parser: &mut PsParser) -> Result<(), PsBreakReason> { - match self { - PsOperator::Array => { - let Some(len) = parser.operand_stack.pop().and_then(|v| v.to_int()) else { - return custom_err( - "postscript array operator is missing required integer operand", - ); - }; - let Ok(len) = len.try_into() else { - return custom_err("postscript array operator passed invalid length"); - }; - let array = PsArray::from_elements(parser, vec![PsObject::Null; len]); - parser.operand_stack.push(PsObject::Array(array)); - Ok(()) - } - PsOperator::ArrayStart => { - parser.operand_stack.push(PsObject::Mark); - Ok(()) - } - PsOperator::ArrayEnd => { - let mut elements = Vec::new(); - while let Some(object) = parser.operand_stack.pop() { - match object { - PsObject::Mark => { - elements.reverse(); - let array = PsArray::from_elements(parser, elements); - parser.operand_stack.push(PsObject::Array(array)); - return Ok(()); - } - _ => elements.push(object), - } - } - custom_err("postscript ] operator is missing required mark operand") - } - PsOperator::Begin => { - let Some(PsObject::Dictionary(dict)) = parser.operand_stack.pop() else { - return custom_err( - "postscript begin operator is missing required dictionary operand", - ); - }; - parser.dictionary_stack.push(dict); - Ok(()) - } - PsOperator::CurrentDict => { - let Some(dict) = parser.dictionary_stack.last().cloned() else { - unreachable!(); - }; - parser.operand_stack.push(PsObject::Dictionary(dict)); - Ok(()) - } - PsOperator::CurrentFile => { - parser - .operand_stack - .push(PsObject::File(parser.tokenizer.clone())); - Ok(()) - } - PsOperator::Def => { - let Some(value) = parser.operand_stack.pop() else { - return custom_err("postscript def operator is missing required operand"); - }; - let Some(key) = parser.operand_stack.pop() else { - return custom_err("postscript def operator is missing required operand"); - }; - let Some(dict) = parser.dictionary_stack.last_mut() else { - unreachable!(); - }; - dict.insert(key, value); - Ok(()) - } - PsOperator::Dict => { - let Some(_capacity) = parser.operand_stack.pop().and_then(|v| v.to_int()) else { - return custom_err( - "postscript dict operator is missing required integer operand", - ); - }; - let dict = PsDictionary::new(parser); - parser.operand_stack.push(PsObject::Dictionary(dict)); - Ok(()) - } - PsOperator::Dup => { - let Some(value) = parser.operand_stack.pop() else { - return custom_err("postscript dup operator is missing required operand"); - }; - parser.operand_stack.push(value.clone()); - parser.operand_stack.push(value); - Ok(()) - } - PsOperator::EExec => { - if parser.break_at_eexec { - return Err(PsBreakReason::FoundEExec); - } - let Some(source) = parser.operand_stack.pop() else { - return custom_err("postscript eexec operator is missing required operand"); - }; - let file = match source { - PsObject::String(string) => todo!(), - PsObject::File(file) => file, - _ => { - return custom_err("postscript eexec operator has invalid operand"); - } - }; - dbg!(&parser.dictionary_stack); - dbg!(&parser.operand_stack); - let file = file.decrypt_for_eexec(parser.next_file_id)?; - parser.next_file_id += 1; - struct PutBackTokenizerOnDrop<'a> { - parser: &'a mut PsParser, - old_tokenizer: PsFile, - } - impl Drop for PutBackTokenizerOnDrop<'_> { - fn drop(&mut self) { - self.parser.tokenizer = self.old_tokenizer.clone(); - } - } - let put_back_tokenizer_on_drop = PutBackTokenizerOnDrop { - old_tokenizer: std::mem::replace(&mut parser.tokenizer, file), - parser, - }; - put_back_tokenizer_on_drop.parser.parse_file() - } - PsOperator::End => { - if parser.dictionary_stack.len() <= PsParser::MIN_DICTIONARY_STACK_SIZE { - return custom_err("postscript end operator without corresponding begin"); - } - parser.dictionary_stack.pop(); - Ok(()) - } - PsOperator::Exch => { - let Some([a, b]) = parser.operand_stack.last_chunk_mut() else { - return custom_err("postscript exch operator is missing required operands"); - }; - std::mem::swap(a, b); - Ok(()) - } - PsOperator::FontDirectory => { - parser.operand_stack.push(PsObject::Dictionary( - parser.font_directory.clone().expect("set in PsParser::new"), - )); - Ok(()) - } - PsOperator::For => { - let Some(PsObject::Procedure(proc)) = parser.operand_stack.pop() else { - return custom_err( - "postscript for operator is missing required procedure operand", - ); - }; - let Some(limit) = parser.operand_stack.pop() else { - return custom_err("postscript for operator is missing required limit operand"); - }; - let Some(increment) = parser.operand_stack.pop() else { - return custom_err( - "postscript for operator is missing required increment operand", - ); - }; - let Some(initial) = parser.operand_stack.pop() else { - return custom_err( - "postscript for operator is missing required initial operand", - ); - }; - let PsObject::Integer(initial) = initial else { - todo!("{initial:?}"); - }; - let PsObject::Integer(increment @ (..=-1 | 1..)) = increment else { - todo!("{increment:?}"); - }; - let PsObject::Integer(limit) = limit else { - todo!("{limit:?} {:?}", parser.operand_stack); - }; - let mut counter = initial; - let proc = proc.into_vec(); - loop { - if increment < 0 { - if counter < limit { - break; - } - } else if counter > limit { - break; - } - parser.operand_stack.push(PsObject::Integer(counter)); - parser.run_procedure(&proc)?; - counter = counter.checked_add(increment).ok_or_else(|| { - PdfParseError::Custom("postscript arithmetic overflow".into()) - })?; - } - Ok(()) - } - PsOperator::IfElse => { - let Some(PsObject::Procedure(else_proc)) = parser.operand_stack.pop() else { - return custom_err( - "postscript ifelse operator is missing required procedure operand", - ); - }; - let Some(PsObject::Procedure(then_proc)) = parser.operand_stack.pop() else { - return custom_err( - "postscript ifelse operator is missing required procedure operand", - ); - }; - let Some(PsObject::Boolean(cond)) = parser.operand_stack.pop() else { - return custom_err( - "postscript ifelse operator is missing required bool operand", - ); - }; - if cond { - parser.run_procedure(&then_proc.into_vec())?; - } else { - parser.run_procedure(&else_proc.into_vec())?; - } - Ok(()) - } - PsOperator::Index => { - let Some(index) = parser.operand_stack.pop().and_then(|v| v.to_int()) else { - return custom_err( - "postscript index operator is missing required integer operand", - ); - }; - let Some(object) = index - .try_into() - .ok() - .and_then(|index| parser.operand_stack.iter().nth_back(index).cloned()) - else { - return custom_err("postscript index operator passed invalid integer"); - }; - parser.operand_stack.push(object); - Ok(()) - } - PsOperator::Known => { - let Some(key) = parser.operand_stack.pop() else { - return custom_err("postscript known operator is missing required key operand"); - }; - let Some(PsObject::Dictionary(dictionary)) = parser.operand_stack.pop() else { - return custom_err( - "postscript known operator is missing required dictionary operand", - ); - }; - parser - .operand_stack - .push(PsObject::Boolean(dictionary.get(key).is_some())); - Ok(()) - } - PsOperator::Put => { - let Some(value) = parser.operand_stack.pop() else { - return custom_err("postscript put operator is missing required value operand"); - }; - let Some(key_or_index) = parser.operand_stack.pop() else { - return custom_err( - "postscript put operator is missing required key/index operand", - ); - }; - let Some(container) = parser.operand_stack.pop() else { - return custom_err( - "postscript put operator is missing required container operand", - ); - }; - match container { - PsObject::Array(array) => { - let array = array.rc(); - let mut array = array.borrow_mut(); - let Some(target) = key_or_index - .to_int() - .and_then(|index| array.get_mut(usize::try_from(index).ok()?)) - else { - return custom_err("postscript put operator has invalid index operand"); - }; - *target = value; - Ok(()) - } - PsObject::Dictionary(mut dict) => { - dict.insert(key_or_index, value); - Ok(()) - } - PsObject::String(s) => todo!(), - _ => custom_err("postscript put operator was passed invalid container operand"), - } - } - PsOperator::ReadOnly => { - // TODO: implement permissions - Ok(()) - } - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Default)] -struct PsDictionaryImpl { - named: BTreeMap, - other: BTreeMap, -} - -#[derive(Clone)] -struct PsDictionary { - id: usize, - weak: std::rc::Weak>, -} - -impl fmt::Debug for PsDictionary { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { id, weak } = self; - if let Some(weak) = weak.upgrade() { - if let Ok(weak) = weak.try_borrow() { - write!(f, "#{id} ")?; - let PsDictionaryImpl { named, other } = &*weak; - return f.debug_map().entries(named).entries(other).finish(); - } - } - f.debug_struct("PsDictionary") - .field("id", id) - .field("weak", &weak) - .finish() - } -} - -impl Ord for PsDictionary { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.id.cmp(&other.id) - } -} - -impl PartialOrd for PsDictionary { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Eq for PsDictionary {} - -impl PartialEq for PsDictionary { - fn eq(&self, other: &Self) -> bool { - self.id == other.id - } -} - -impl PsDictionary { - fn new(parser: &mut PsParser) -> Self { - Self::from_impl(parser, PsDictionaryImpl::default()) - } - fn rc(&self) -> Rc> { - self.weak.upgrade().expect("still in parser scope") - } - fn from_impl(parser: &mut PsParser, impl_: PsDictionaryImpl) -> Self { - let dict = Rc::new(RefCell::new(impl_)); - let weak = Rc::downgrade(&dict); - let id = parser.dictionaries.len(); - parser.dictionaries.push(dict); - Self { id, weak } - } - fn from_name_value_pairs<'a>( - parser: &mut PsParser, - iter: impl IntoIterator, - ) -> Self { - Self::from_impl( - parser, - PsDictionaryImpl { - named: BTreeMap::from_iter( - iter.into_iter() - .map(|(k, v)| (PsName(k.as_bytes().into()), v)), - ), - other: BTreeMap::new(), - }, - ) - } - fn get_named(&self, key: &PsName) -> Option { - self.rc().borrow().named.get(key).cloned() - } - fn insert(&mut self, key: PsObject, value: PsObject) -> Option { - let this = self.rc(); - let mut this = this.borrow_mut(); - let PsDictionaryImpl { named, other } = &mut *this; - match key { - PsObject::String(s) => named.insert(PsName(s.0.borrow().clone()), value), - PsObject::Name(name) => named.insert(name, value), - _ => other.insert(key, value), - } - } - fn insert_named(&mut self, name: PsName, value: PsObject) -> Option { - self.rc().borrow_mut().named.insert(name, value) - } - fn into_impl(self) -> PsDictionaryImpl { - self.rc().borrow().clone() - } - fn get(&self, key: PsObject) -> Option { - let this = self.rc(); - let this = this.borrow(); - let PsDictionaryImpl { named, other } = &*this; - match key { - PsObject::String(s) => named.get(&PsName(s.0.borrow().clone())).cloned(), - PsObject::Name(name) => named.get(&name).cloned(), - _ => other.get(&key).cloned(), - } - } -} - -#[derive(Clone)] -struct PsArray { - id: usize, - weak: std::rc::Weak>>, -} - -impl fmt::Debug for PsArray { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { id, weak } = self; - if let Some(weak) = weak.upgrade() { - if let Ok(weak) = weak.try_borrow() { - write!(f, "#{id} ")?; - return Vec::fmt(&weak, f); - } - } - f.debug_struct("PsArray") - .field("id", id) - .field("weak", &weak) - .finish() - } -} - -impl Ord for PsArray { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.id.cmp(&other.id) - } -} - -impl PartialOrd for PsArray { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Eq for PsArray {} - -impl PartialEq for PsArray { - fn eq(&self, other: &Self) -> bool { - self.id == other.id - } -} - -impl PsArray { - fn new(parser: &mut PsParser) -> Self { - Self::from_elements(parser, Vec::new()) - } - fn rc(&self) -> Rc>> { - self.weak.upgrade().expect("still in parser scope") - } - fn from_elements(parser: &mut PsParser, elements: Vec) -> Self { - let array = Rc::new(RefCell::new(elements)); - let weak = Rc::downgrade(&array); - let id = parser.arrays.len(); - parser.arrays.push(array); - Self { id, weak } - } - fn into_vec(self) -> Vec { - self.rc().borrow().clone() - } -} - -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone)] -struct PsName(Vec); - -impl fmt::Debug for PsName { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "PsName({})", self.0.escape_ascii()) - } -} - -impl From for PdfName { - fn from(value: PsName) -> Self { - PdfName::new(PdfInputPosition::empty(), ArcOrRef::Arc(value.0.into())) - } -} - -#[derive(PartialEq, Eq, PartialOrd, Ord, Clone)] -struct PsString(Rc>>); - -impl fmt::Debug for PsString { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "PsString({})", self.0.borrow().escape_ascii()) - } -} - -impl From for PdfString { - fn from(value: PsString) -> Self { - PdfString::new( - PdfInputPosition::empty(), - ArcOrRef::Arc(value.0.borrow().as_slice().into()), - ) - } -} - -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -enum PsObject { - Array(PsArray), - Procedure(PsArray), - Dictionary(PsDictionary), - Integer(i128), - Mark, - Name(PsName), - Operator(PsOperator), - Real(PsReal), - String(PsString), - Boolean(bool), - Null, - ExecutableName(PsName), - File(PsFile), -} - -impl PsObject { - fn to_int(&self) -> Option { - match self { - PsObject::Integer(v) => Some(*v), - PsObject::Real(PsReal(v)) => Some(*v as i128), - _ => None, - } - } - fn to_f32(&self) -> Option { - match self { - PsObject::Integer(v) => Some(*v as f32), - PsObject::Real(PsReal(v)) => Some(*v as f32), - _ => None, - } - } -} - -struct PsParser { - tokenizer: PsFile, - operand_stack: Vec, - dictionary_stack: Vec, - dictionaries: Vec>>, - arrays: Vec>>>, - next_file_id: u64, - break_at_eexec: bool, - font_directory: Option, -} - -impl PsParser { - const MIN_DICTIONARY_STACK_SIZE: usize = 3; - fn new(tokenizer: PsFile) -> Self { - let mut retval = Self { - next_file_id: tokenizer.id + 1, - tokenizer, - operand_stack: Vec::with_capacity(16), - dictionary_stack: Vec::with_capacity(8), - dictionaries: Vec::with_capacity(16), - arrays: Vec::with_capacity(16), - break_at_eexec: false, - font_directory: None, - }; - let mut system_dict = PsDictionary::from_name_value_pairs( - &mut retval, - PsOperator::VARIANTS - .iter() - .map(|&op| (op.name(), PsObject::Operator(op))), - ); - system_dict.insert_named(PsName(b"false".into()), PsObject::Boolean(false)); - system_dict.insert_named(PsName(b"true".into()), PsObject::Boolean(true)); - retval.dictionary_stack.push(system_dict); - let dict = PsDictionary::new(&mut retval); - retval.dictionary_stack.push(dict); - let dict = PsDictionary::new(&mut retval); - retval.dictionary_stack.push(dict); - retval.font_directory = Some(PsDictionary::new(&mut retval)); - retval - } - - fn run_name(&mut self, name: &PsName) -> Result<(), PsBreakReason> { - let Some(value) = self - .dictionary_stack - .iter() - .rev() - .find_map(|dict| dict.get_named(name)) - else { - todo!("unimplemented PS operator {name:?}"); - }; - match value { - PsObject::Integer(_) | PsObject::Real(_) | PsObject::Boolean(_) => { - self.operand_stack.push(value); - Ok(()) - } - PsObject::Name(v) => todo!(), - PsObject::ExecutableName(v) => todo!(), - PsObject::Null => todo!(), - PsObject::String(v) => todo!(), - PsObject::Array(v) => todo!(), - PsObject::Dictionary(v) => todo!(), - PsObject::Operator(value) => value.run(self), - PsObject::Mark => todo!(), - PsObject::Procedure(v) => todo!(), - PsObject::File(v) => todo!(), - } - } - - fn parse_procedure(&mut self) -> Result { - self.tokenizer.skip_comments_and_whitespace(); - let mut objects = Vec::new(); - while let Some(token) = self.tokenizer.next_token()? { - objects.push(match token { - Token::Integer(v) => PsObject::Integer(v), - Token::Real(v) => PsObject::Real(PsReal(v)), - Token::ArrayStart => PsObject::ExecutableName(PsName(b"[".into())), - Token::ArrayEnd => PsObject::ExecutableName(PsName(b"]".into())), - Token::ProcedureStart => PsObject::Procedure(self.parse_procedure()?), - Token::ProcedureEnd => return Ok(PsArray::from_elements(self, objects)), - Token::ExecutableName(name) => PsObject::ExecutableName(PsName(name.into())), - Token::LiteralName(name) => PsObject::Name(PsName(name.into())), - Token::ImmediatelyEvaluatedName(_) => todo!("{token:?}"), - Token::String(v) => PsObject::String(PsString(Rc::new(RefCell::new(v)))), - }); - } - custom_err("postscript missing closing }") - } - fn parse_file(&mut self) -> Result<(), PsBreakReason> { - self.tokenizer.skip_comments_and_whitespace(); - while let Some(token) = self.tokenizer.next_token()? { - match token { - Token::Integer(v) => self.operand_stack.push(PsObject::Integer(v)), - Token::Real(v) => self.operand_stack.push(PsObject::Real(PsReal(v))), - Token::ArrayStart => self.run_name(&PsName(b"[".into()))?, - Token::ArrayEnd => self.run_name(&PsName(b"]".into()))?, - Token::ProcedureStart => { - let procedure = self.parse_procedure()?; - self.operand_stack.push(PsObject::Procedure(procedure)) - } - Token::ProcedureEnd => todo!(), - Token::ExecutableName(name) => { - let name = PsName(name.into()); - self.run_name(&name)? - } - Token::LiteralName(name) => { - self.operand_stack.push(PsObject::Name(PsName(name.into()))) - } - Token::ImmediatelyEvaluatedName(_) => todo!("{token:?}"), - Token::String(v) => self - .operand_stack - .push(PsObject::String(PsString(Rc::new(RefCell::new(v))))), - } - } - Ok(()) - } - fn parse_font_encoding( - &mut self, - value: PsArray, - ) -> Result]>, PdfParseError> { - let value = value.rc(); - let value = value.borrow(); - let mut vec = Vec::with_capacity(value.len()); - for entry in value.iter() { - match entry { - PsObject::Name(name) => { - if name.0 == b".notdef" { - vec.push(None); - } else { - vec.push(Some(PdfName::new( - self.tokenizer.pos(), - Arc::from(&*name.0), - ))); - } - } - _ => todo!("{entry:?}"), - } - } - Ok(Arc::from(vec)) - } - fn parse_font_bbox(&mut self, value: PsArray) -> Result { - let value = value.rc(); - let value = value.borrow(); - let mut vec = Vec::new(); - for entry in value.iter() { - let Some(v) = entry.to_f32() else { - return custom_err("postscript invalid FontBBox entry"); - }; - vec.push(v); - } - match <[f32; 4]>::try_from(vec) { - Ok([x1, y1, x2, y2]) => Ok(PdfRectangle::new( - PdfVec2D { - pos: PdfInputPositionNoCompare::empty(), - x: x1, - y: y1, - }, - PdfVec2D { - pos: PdfInputPositionNoCompare::empty(), - x: x2, - y: y2, - }, - )), - Err(_) => custom_err("postscript invalid FontBBox entry"), - } - } - fn parse_font_matrix(&mut self, value: PsArray) -> Result { - let value = value.rc(); - let value = value.borrow(); - let mut vec = Vec::new(); - for entry in value.iter() { - let Some(v) = entry.to_f32() else { - return custom_err("postscript invalid FontBBox entry"); - }; - vec.push(v); - } - match vec.try_into() { - Ok(elements) => Ok(PdfMatrix { - pos: PdfInputPositionNoCompare::empty(), - elements, - }), - Err(_) => custom_err("postscript invalid FontBBox entry"), - } - } - fn parse_font_info_dict( - &mut self, - font_info_dict: PsDictionary, - ) -> Result { - let PsDictionaryImpl { named, other: _ } = font_info_dict.into_impl(); - let mut family_name = None; - let mut full_name = None; - let mut notice = None; - let mut weight = None; - let mut version = None; - let mut italic_angle = None; - let mut is_fixed_pitch = None; - let mut underline_position = None; - let mut underline_thickness = None; - for (key, value) in named { - match (&*key.0, value) { - (b"FamilyName", PsObject::String(string)) => family_name = Some(string.into()), - (b"FullName", PsObject::String(string)) => full_name = Some(string.into()), - (b"Notice", PsObject::String(string)) => notice = Some(string.into()), - (b"Weight", PsObject::String(string)) => weight = Some(string.into()), - (b"version", PsObject::String(string)) => version = Some(string.into()), - (b"ItalicAngle", value) => { - if let Some(value) = value.to_f32() { - italic_angle = Some(value); - } else { - todo!("{value:?}") - } - } - (b"isFixedPitch", PsObject::Boolean(v)) => is_fixed_pitch = Some(v), - (b"UnderlinePosition", value) => { - if let Some(value) = value.to_f32() { - underline_position = Some(value); - } else { - todo!("{value:?}") - } - } - (b"UnderlineThickness", value) => { - if let Some(value) = value.to_f32() { - underline_thickness = Some(value); - } else { - todo!("{value:?}") - } - } - _ => {} - } - } - Ok(PdfFontType1FontInfo { - family_name, - full_name, - notice, - weight, - version, - italic_angle, - is_fixed_pitch, - underline_position, - underline_thickness, - }) - } - fn parse_font_dict( - &mut self, - dict: PsDictionary, - ) -> Result { - let PsDictionaryImpl { named, other } = dict.into_impl(); - let mut encoding = None; - let mut font_bbox = None; - let mut font_info = None; - let mut font_matrix = None; - let mut font_name = None; - for (key, value) in named { - match (&*key.0, value) { - (b"Encoding", PsObject::Array(value)) => { - encoding = Some(self.parse_font_encoding(value)?); - } - (b"FontBBox", PsObject::Array(value) | PsObject::Procedure(value)) => { - font_bbox = Some(self.parse_font_bbox(value)?); - } - (b"FontInfo", PsObject::Dictionary(value)) => { - font_info = Some(self.parse_font_info_dict(value)?); - } - (b"FontMatrix", PsObject::Array(value) | PsObject::Procedure(value)) => { - font_matrix = Some(self.parse_font_matrix(value)?); - } - (b"FontName", PsObject::Name(value)) => { - font_name = Some(value.into()); - } - (b"FontType", _) => { - // TODO - } - (b"PaintType", _) => { - // TODO - } - (_, value) => todo!("{key:?}: {value:?}"), - } - } - for (key, value) in other { - todo!("{key:?}: {value:?}"); - } - Ok(PdfFontType1Program { - encoding, - font_bbox, - font_info, - font_matrix, - font_name, - }) - } - fn parse(mut self) -> Result { - self.break_at_eexec = true; - match self.parse_file() { - Ok(()) => return custom_err("postscript eexec operator not found"), - Err(PsBreakReason::FoundEExec) => {} - Err(PsBreakReason::Error(e)) => return Err(e), - } - let Some(PsObject::File(_)) = self.operand_stack.pop() else { - return custom_err("postscript eexec operand not found"); - }; - let Some(PsObject::Dictionary(dict)) = self.operand_stack.pop() else { - todo!(); - }; - self.parse_font_dict(dict) - } - fn run_procedure(&mut self, proc: &[PsObject]) -> Result<(), PsBreakReason> { - for object in proc { - match object { - PsObject::Array(v) => todo!(), - PsObject::Procedure(v) => todo!(), - PsObject::Dictionary(v) => todo!(), - PsObject::Integer(_) => self.operand_stack.push(object.clone()), - PsObject::Mark => todo!(), - PsObject::Name(_) => self.operand_stack.push(object.clone()), - PsObject::ExecutableName(name) => self.run_name(name)?, - PsObject::Operator(v) => todo!(), - PsObject::Real(v) => todo!(), - PsObject::String(v) => todo!(), - PsObject::Boolean(v) => todo!(), - PsObject::Null => todo!(), - PsObject::File(v) => todo!(), - } - } - Ok(()) - } -} - -impl PdfStreamContents for PdfFontType1Program { - fn parse( - data: &[u8], - stream_pos: PdfInputPosition, - _objects: Arc, - ) -> Result { - PsParser::new(PsFile::new( - 0, - PsFileSource::Bytes(Rc::from(data)), - 0, - stream_pos, - )) - .parse() - } -} diff --git a/src/pdf/object.rs b/src/pdf/object.rs deleted file mode 100644 index bf91fac..0000000 --- a/src/pdf/object.rs +++ /dev/null @@ -1,2142 +0,0 @@ -use crate::{ - pdf::{ - PdfObjectAndParseCache, PdfObjects, - parse::{ - GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse, - PdfParseError, - }, - stream_filters::PdfStreamFilter, - }, - pdf_parse, - util::{ArcOrRef, DagDebugState}, -}; -use std::{ - any::TypeId, - borrow::{Borrow, Cow}, - collections::BTreeMap, - fmt::{self, Write}, - iter::FusedIterator, - num::NonZero, - sync::{Arc, OnceLock}, -}; - -#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord)] -pub struct PdfString { - pos: PdfInputPositionNoCompare, - bytes: ArcOrRef<'static, [u8]>, -} - -impl std::fmt::Debug for PdfString { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self { pos, bytes: _ } = self; - write!(f, "PdfString(at {pos}, {})", self.bytes_debug()) - } -} - -#[derive(Clone, Copy)] -pub struct PdfStringBytesDebug<'a>(pub &'a [u8]); - -impl<'a> fmt::Display for PdfStringBytesDebug<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "b\"{}\"", self.0.escape_ascii()) - } -} - -impl<'a> fmt::Debug for PdfStringBytesDebug<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -impl PdfString { - pub fn new(pos: impl Into, bytes: ArcOrRef<'static, [u8]>) -> Self { - Self { - pos: pos.into(), - bytes, - } - } - pub fn pos(&self) -> PdfInputPosition { - self.pos.0 - } - pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> { - &self.bytes - } - pub fn bytes_debug(&self) -> PdfStringBytesDebug<'_> { - PdfStringBytesDebug(&self.bytes) - } -} - -impl GetPdfInputPosition for PdfString { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -#[derive(Clone, PartialEq, Eq)] -pub struct PdfDate { - text: PdfString, -} - -impl fmt::Debug for PdfDate { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { text } = self; - let pos = text.pos(); - write!(f, "PdfDate(at {pos}, {})", text.bytes_debug()) - } -} - -impl IsPdfNull for PdfDate { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfDate { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("date") - } - fn parse(object: PdfObject) -> Result { - Self::try_new(PdfString::parse(object)?) - } -} - -impl PdfDate { - pub fn try_new(text: PdfString) -> Result { - // TODO: check syntax - Ok(Self { text }) - } - pub fn text(&self) -> &PdfString { - &self.text - } -} - -impl GetPdfInputPosition for PdfDate { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.text.pos() - } -} - -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct PdfName { - pos: PdfInputPositionNoCompare, - bytes: ArcOrRef<'static, [u8]>, -} - -impl Borrow<[u8]> for PdfName { - fn borrow(&self) -> &[u8] { - &self.bytes - } -} - -impl PdfName { - pub fn try_new( - pos: impl Into, - bytes: impl Into>, - ) -> Option { - let bytes = bytes.into(); - if bytes.contains(&0) { - None - } else { - Some(Self { - pos: pos.into(), - bytes, - }) - } - } - #[track_caller] - pub const fn new_static(bytes: &'static [u8]) -> Self { - let mut i = 0; - while i < bytes.len() { - if bytes[i] == 0 { - panic!("shouldn't contain any nul bytes"); - } - i += 1; - } - Self { - pos: PdfInputPositionNoCompare::empty(), - bytes: ArcOrRef::Ref(bytes), - } - } - #[track_caller] - pub fn new( - pos: impl Into, - bytes: impl Into>, - ) -> Self { - Self::try_new(pos, bytes).expect("shouldn't contain any nul bytes") - } - pub fn as_bytes(&self) -> &ArcOrRef<'static, [u8]> { - &self.bytes - } - pub fn pos(&self) -> PdfInputPosition { - self.pos.0 - } -} - -impl GetPdfInputPosition for PdfName { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -impl fmt::Debug for PdfName { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "PdfName(at {}: {self})", self.pos) - } -} - -impl fmt::Display for PdfName { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("/")?; - for &b in self.bytes.iter() { - match b { - 0x21..=0x7E if b != b'#' => f.write_char(b.into())?, - _ => write!(f, "#{b:02X}")?, - } - } - Ok(()) - } -} - -#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Default)] -pub struct PdfBoolean { - pos: PdfInputPositionNoCompare, - value: bool, -} - -impl fmt::Debug for PdfBoolean { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { pos, value } = *self; - write!(f, "PdfBoolean(at {pos}, {value})") - } -} - -impl PdfBoolean { - pub fn new(pos: impl Into, value: bool) -> Self { - Self { - pos: pos.into(), - value, - } - } - pub fn pos(&self) -> PdfInputPosition { - self.pos.0 - } - pub fn value(&self) -> bool { - self.value - } -} - -impl GetPdfInputPosition for PdfBoolean { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Default)] -pub struct PdfInteger { - pos: PdfInputPositionNoCompare, - value: i128, -} - -impl fmt::Debug for PdfInteger { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { pos, value } = *self; - write!(f, "PdfInteger(at {pos}, {value})") - } -} - -impl PdfInteger { - pub fn new(pos: impl Into, value: i128) -> Self { - Self { - pos: pos.into(), - value, - } - } - pub fn pos(&self) -> PdfInputPosition { - self.pos.0 - } - pub fn value(&self) -> i128 { - self.value - } -} - -impl GetPdfInputPosition for PdfInteger { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -#[derive(Clone, Copy, PartialEq, PartialOrd, Default)] -pub struct PdfReal { - pos: PdfInputPositionNoCompare, - value: f64, -} - -impl fmt::Debug for PdfReal { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { pos, value } = *self; - write!(f, "PdfReal(at {pos}, {value})") - } -} - -impl PdfReal { - pub fn new(pos: impl Into, value: f64) -> Self { - Self { - pos: pos.into(), - value, - } - } - pub fn pos(&self) -> PdfInputPosition { - self.pos.0 - } - pub fn value(&self) -> f64 { - self.value - } -} - -impl GetPdfInputPosition for PdfReal { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -#[derive(Clone, Copy)] -pub enum PdfNumber { - Integer(PdfInteger), - Real(PdfReal), -} - -impl fmt::Debug for PdfNumber { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Integer(v) => v.fmt(f), - Self::Real(v) => v.fmt(f), - } - } -} - -impl PdfNumber { - pub fn pos(self) -> PdfInputPosition { - match self { - Self::Integer(v) => v.pos(), - Self::Real(v) => v.pos(), - } - } - pub fn as_f64(self) -> f64 { - match self { - Self::Integer(v) => v.value as f64, - Self::Real(v) => v.value, - } - } - pub fn as_f32(self) -> f32 { - match self { - Self::Integer(v) => v.value as f32, - Self::Real(v) => v.value as f32, - } - } -} - -impl PartialOrd for PdfNumber { - fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { - (Self::Integer(this), Self::Integer(other)) => Some(this.cmp(other)), - _ => self.as_f64().partial_cmp(&other.as_f64()), - } - } -} - -impl PartialEq for PdfNumber { - fn eq(&self, other: &Self) -> bool { - self.partial_cmp(other).is_some_and(|v| v.is_eq()) - } -} - -impl Default for PdfNumber { - fn default() -> Self { - PdfNumber::Integer(PdfInteger::default()) - } -} - -impl PdfObjectDirect { - pub fn number(&self) -> Option { - match *self { - PdfObjectDirect::Integer(v) => Some(PdfNumber::Integer(v)), - PdfObjectDirect::Real(v) => Some(PdfNumber::Real(v)), - PdfObjectDirect::Boolean(_) - | PdfObjectDirect::String(_) - | PdfObjectDirect::Name(_) - | PdfObjectDirect::Array(_) - | PdfObjectDirect::Dictionary(_) - | PdfObjectDirect::Stream(_) - | PdfObjectDirect::Null(_) => None, - } - } -} - -impl PdfObjectNonNull { - pub fn number(&self) -> Option { - match *self { - PdfObjectNonNull::Integer(v) => Some(PdfNumber::Integer(v)), - PdfObjectNonNull::Real(v) => Some(PdfNumber::Real(v)), - PdfObjectNonNull::Boolean(_) - | PdfObjectNonNull::String(_) - | PdfObjectNonNull::Name(_) - | PdfObjectNonNull::Array(_) - | PdfObjectNonNull::Dictionary(_) - | PdfObjectNonNull::Stream(_) => None, - } - } -} - -impl IsPdfNull for PdfNumber { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfNumber { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("number") - } - fn parse(object: PdfObject) -> Result { - let object = PdfObjectDirect::from(object); - object.number().ok_or(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "number", - }) - } -} - -#[derive(Clone)] -pub enum PdfStringOrNumber { - String(PdfString), - Number(PdfNumber), -} - -impl fmt::Debug for PdfStringOrNumber { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::String(v) => v.fmt(f), - Self::Number(v) => v.fmt(f), - } - } -} - -impl PdfStringOrNumber { - pub fn pos(self) -> PdfInputPosition { - match self { - Self::String(v) => v.pos(), - Self::Number(v) => v.pos(), - } - } -} - -impl PdfObjectDirect { - pub fn string_or_number(&self) -> Option { - match *self { - PdfObjectDirect::Integer(v) => Some(PdfStringOrNumber::Number(PdfNumber::Integer(v))), - PdfObjectDirect::Real(v) => Some(PdfStringOrNumber::Number(PdfNumber::Real(v))), - PdfObjectDirect::String(ref v) => Some(PdfStringOrNumber::String(v.clone())), - PdfObjectDirect::Boolean(_) - | PdfObjectDirect::Name(_) - | PdfObjectDirect::Array(_) - | PdfObjectDirect::Dictionary(_) - | PdfObjectDirect::Stream(_) - | PdfObjectDirect::Null(_) => None, - } - } -} - -impl PdfObjectNonNull { - pub fn string_or_number(&self) -> Option { - match *self { - PdfObjectNonNull::Integer(v) => Some(PdfStringOrNumber::Number(PdfNumber::Integer(v))), - PdfObjectNonNull::Real(v) => Some(PdfStringOrNumber::Number(PdfNumber::Real(v))), - PdfObjectNonNull::String(ref v) => Some(PdfStringOrNumber::String(v.clone())), - PdfObjectNonNull::Boolean(_) - | PdfObjectNonNull::Name(_) - | PdfObjectNonNull::Array(_) - | PdfObjectNonNull::Dictionary(_) - | PdfObjectNonNull::Stream(_) => None, - } - } -} - -impl IsPdfNull for PdfStringOrNumber { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfStringOrNumber { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("string or number") - } - fn parse(object: PdfObject) -> Result { - let object = PdfObjectDirect::from(object); - object.string_or_number().ok_or(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "string or number", - }) - } -} - -#[derive(Clone)] -pub enum PdfNameOrInteger { - Name(PdfName), - Integer(PdfInteger), -} - -impl fmt::Debug for PdfNameOrInteger { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Name(v) => v.fmt(f), - Self::Integer(v) => v.fmt(f), - } - } -} - -impl PdfNameOrInteger { - pub fn pos(self) -> PdfInputPosition { - match self { - Self::Name(v) => v.pos(), - Self::Integer(v) => v.pos(), - } - } -} - -impl PdfObjectDirect { - pub fn name_or_integer(&self) -> Option { - match *self { - PdfObjectDirect::Name(ref v) => Some(PdfNameOrInteger::Name(v.clone())), - PdfObjectDirect::Integer(v) => Some(PdfNameOrInteger::Integer(v)), - PdfObjectDirect::Boolean(_) - | PdfObjectDirect::Real(_) - | PdfObjectDirect::String(_) - | PdfObjectDirect::Array(_) - | PdfObjectDirect::Dictionary(_) - | PdfObjectDirect::Stream(_) - | PdfObjectDirect::Null(_) => None, - } - } -} - -impl PdfObjectNonNull { - pub fn name_or_integer(&self) -> Option { - match *self { - PdfObjectNonNull::Name(ref v) => Some(PdfNameOrInteger::Name(v.clone())), - PdfObjectNonNull::Integer(v) => Some(PdfNameOrInteger::Integer(v)), - PdfObjectNonNull::Boolean(_) - | PdfObjectNonNull::Real(_) - | PdfObjectNonNull::String(_) - | PdfObjectNonNull::Array(_) - | PdfObjectNonNull::Dictionary(_) - | PdfObjectNonNull::Stream(_) => None, - } - } -} - -impl IsPdfNull for PdfNameOrInteger { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfNameOrInteger { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("name or integer") - } - fn parse(object: PdfObject) -> Result { - let object = PdfObjectDirect::from(object); - object.name_or_integer().ok_or(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "name or integer", - }) - } -} - -macro_rules! make_pdf_object { - ( - $( - #[parse = $($parse:ident)?, type_name = $type_name:literal] - $Variant:ident($ty:ty), - )+ - ) => { - #[derive(Clone)] - pub enum PdfObjectNonNull { - $($Variant($ty),)* - } - - impl fmt::Debug for PdfObjectNonNull { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - $(Self::$Variant(v) => v.fmt(f),)* - } - } - } - - impl IsPdfNull for PdfObjectNonNull { - fn is_pdf_null(&self) -> bool { - false - } - } - - #[derive(Clone)] - pub enum PdfObjectDirect { - $($Variant($ty),)* - Null(PdfNull), - } - - impl IsPdfNull for PdfObjectDirect { - fn is_pdf_null(&self) -> bool { - self.is_null() - } - } - - impl fmt::Debug for PdfObjectDirect { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - $(Self::$Variant(v) => v.fmt(f),)* - Self::Null(v) => v.fmt(f), - } - } - } - - #[derive(Clone)] - pub enum PdfObject { - $($Variant($ty),)* - Null(PdfNull), - Indirect(PdfObjectIndirect), - } - - impl IsPdfNull for PdfObject { - fn is_pdf_null(&self) -> bool { - self.is_null() - } - } - - impl fmt::Debug for PdfObject { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - $(Self::$Variant(v) => v.fmt(f),)* - Self::Null(v) => v.fmt(f), - Self::Indirect(v) => v.fmt(f), - } - } - } - - $( - impl From<$ty> for PdfObjectNonNull { - fn from(value: $ty) -> Self { - Self::$Variant(value) - } - } - - impl From<$ty> for PdfObjectDirect { - fn from(value: $ty) -> Self { - Self::$Variant(value) - } - } - - impl From<$ty> for PdfObject { - fn from(value: $ty) -> Self { - Self::$Variant(value) - } - } - - impl From> for PdfObjectDirect { - fn from(value: Option<$ty>) -> Self { - match value { - Some(value) => Self::$Variant(value), - None => Self::Null(Default::default()), - } - } - } - - impl From> for PdfObject { - fn from(value: Option<$ty>) -> Self { - match value { - Some(value) => Self::$Variant(value), - None => Self::Null(Default::default()), - } - } - } - - $(impl IsPdfNull for $ty { - fn is_pdf_null(&self) -> bool { - false - } - } - - impl PdfParse for $ty { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed($type_name) - } - fn $parse(object: PdfObject) -> Result { - match PdfObjectDirect::from(object) { - PdfObjectDirect::$Variant(v) => Ok(v), - object => Err(PdfParseError::InvalidType { - pos: object.get_pdf_input_position(), - ty: object.type_name(), - expected_ty: $type_name, - }), - } - } - })? - )* - - impl From for PdfObjectDirect { - fn from(value: PdfObjectNonNull) -> Self { - match value { - $(PdfObjectNonNull::$Variant(v) => Self::$Variant(v),)* - } - } - } - - impl From for PdfObject { - fn from(value: PdfObjectNonNull) -> Self { - match value { - $(PdfObjectNonNull::$Variant(v) => Self::$Variant(v),)* - } - } - } - - impl From for PdfObject { - fn from(value: PdfObjectDirect) -> Self { - match value { - $(PdfObjectDirect::$Variant(v) => Self::$Variant(v),)* - PdfObjectDirect::Null(v) => Self::Null(v), - } - } - } - - impl From for PdfObjectDirect { - fn from(value: PdfObject) -> Self { - match value { - $(PdfObject::$Variant(v) => Self::$Variant(v),)* - PdfObject::Null(v) => Self::Null(v), - PdfObject::Indirect(v) => v.into(), - } - } - } - - impl PdfObjectNonNull { - pub fn type_name(&self) -> &'static str { - match self { - $(PdfObjectNonNull::$Variant(_) => $type_name,)* - } - } - pub fn pos(&self) -> PdfInputPosition { - self.get_pdf_input_position() - } - } - - impl GetPdfInputPosition for PdfObjectNonNull { - fn get_pdf_input_position(&self) -> PdfInputPosition { - match self { - $(PdfObjectNonNull::$Variant(v) => <$ty as GetPdfInputPosition>::get_pdf_input_position(v),)* - } - } - } - - impl From for Option { - fn from(value: PdfObjectDirect) -> Self { - match value { - $(PdfObjectDirect::$Variant(v) => Some(PdfObjectNonNull::$Variant(v)),)* - PdfObjectDirect::Null(_) => None, - } - } - } - - impl From for Option { - fn from(value: PdfObject) -> Self { - PdfObjectDirect::from(value).into() - } - } - - impl PdfObjectDirect { - pub fn is_null(&self) -> bool { - matches!(self, PdfObjectDirect::Null(_)) - } - pub fn type_name(&self) -> &'static str { - match self { - $(PdfObjectDirect::$Variant(_) => $type_name,)* - PdfObjectDirect::Null(_) => "null", - } - } - pub fn pos(&self) -> PdfInputPosition { - self.get_pdf_input_position() - } - } - - impl GetPdfInputPosition for PdfObjectDirect { - fn get_pdf_input_position(&self) -> PdfInputPosition { - match self { - $(PdfObjectDirect::$Variant(v) => <$ty as GetPdfInputPosition>::get_pdf_input_position(v),)* - PdfObjectDirect::Null(v) => ::get_pdf_input_position(v), - } - } - } - - impl PdfObject { - pub fn is_null(&self) -> bool { - matches!(self, PdfObject::Null(_)) - } - pub fn type_name(&self) -> &'static str { - match self { - $(PdfObject::$Variant(_) => $type_name,)* - PdfObject::Null(_) => "null", - PdfObject::Indirect(_) => "indirect object", - } - } - pub fn pos(&self) -> PdfInputPosition { - self.get_pdf_input_position() - } - } - - impl GetPdfInputPosition for PdfObject { - fn get_pdf_input_position(&self) -> PdfInputPosition { - match self { - $(PdfObject::$Variant(v) => <$ty as GetPdfInputPosition>::get_pdf_input_position(v),)* - PdfObject::Null(v) => ::get_pdf_input_position(v), - PdfObject::Indirect(v) => ::get_pdf_input_position(v), - } - } - } - - const _: () = { - fn _assert_parsable() {} - - $(let _ = _assert_parsable::<$ty>;)* - let _ = _assert_parsable::; - let _ = _assert_parsable::; - let _ = _assert_parsable::; - let _ = _assert_parsable::; - let _ = _assert_parsable::; - }; - }; -} - -make_pdf_object! { - #[parse = parse, type_name = "boolean"] - Boolean(PdfBoolean), - #[parse = parse, type_name = "integer"] - Integer(PdfInteger), - #[parse = parse, type_name = "real"] - Real(PdfReal), - #[parse = parse, type_name = "string"] - String(PdfString), - #[parse = parse, type_name = "name"] - Name(PdfName), - #[parse = parse, type_name = "array"] - Array(PdfArray), - #[parse =, type_name = "dictionary"] - Dictionary(PdfDictionary), - #[parse =, type_name = "stream"] - Stream(PdfStream), -} - -#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct PdfNull(PdfInputPositionNoCompare); - -impl fmt::Debug for PdfNull { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "PdfNull(at {})", self.0) - } -} - -impl PdfNull { - pub fn new(pos: impl Into) -> Self { - Self(pos.into()) - } -} - -impl GetPdfInputPosition for PdfNull { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.0.0 - } -} - -impl From for PdfObjectDirect { - fn from(v: PdfNull) -> Self { - Self::Null(v) - } -} - -impl Default for PdfObjectDirect { - fn default() -> Self { - Self::Null(PdfNull(PdfInputPositionNoCompare::empty())) - } -} - -impl From for PdfObject { - fn from(v: PdfNull) -> Self { - Self::Null(v) - } -} - -impl Default for PdfObject { - fn default() -> Self { - Self::Null(PdfNull(PdfInputPositionNoCompare::empty())) - } -} - -impl From for PdfObject { - fn from(v: PdfObjectIndirect) -> Self { - Self::Indirect(v) - } -} - -#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct PdfObjectIdentifier { - pub pos: PdfInputPositionNoCompare, - pub object_number: NonZero, - pub generation_number: u16, -} - -impl fmt::Debug for PdfObjectIdentifier { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { - pos, - object_number, - generation_number, - } = *self; - write!( - f, - "PdfObjectIdentifier(at {pos}, {object_number}, {generation_number})" - ) - } -} - -impl GetPdfInputPosition for PdfObjectIdentifier { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -#[derive(Clone)] -pub struct PdfObjectIndirect { - objects: std::sync::Weak, - id: PdfObjectIdentifier, - final_id: Arc>, -} - -impl fmt::Debug for PdfObjectIndirect { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { - objects: _, - id: - PdfObjectIdentifier { - pos, - object_number, - generation_number, - }, - final_id: _, - } = *self; - write!( - f, - "PdfObjectIndirect(at {pos}, {object_number}, {generation_number})" - ) - } -} - -impl GetPdfInputPosition for PdfObjectIndirect { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.id.get_pdf_input_position() - } -} - -impl PartialEq for PdfObjectIndirect { - fn eq(&self, other: &Self) -> bool { - let Self { - objects, - id, - final_id: _, - } = self; - objects.ptr_eq(&other.objects) && *id == other.id - } -} - -impl PdfObjectIndirect { - pub fn new(objects: &Arc, id: PdfObjectIdentifier) -> Self { - Self { - objects: Arc::downgrade(objects), - id, - final_id: Arc::new(OnceLock::new()), - } - } - pub(crate) fn cache_parse( - &self, - parse_inner: impl FnOnce(PdfObjectDirect) -> Result, E>, - ) -> Result, E> { - self.get_object_and_parse_cache(|object, object_and_parse_cache| { - match object_and_parse_cache { - Some(object_and_parse_cache) => { - if let Some(retval) = object_and_parse_cache.parse_cache_get::() { - println!("cache reused for {object:?}"); - return Ok(retval); - } - parse_inner(object) - .map(|retval| object_and_parse_cache.parse_cache_get_or_insert::(retval)) - } - None => parse_inner(object), - } - }) - } - fn get_object_and_parse_cache_inner<'a>( - &self, - objects: &'a PdfObjects, - ) -> (PdfObjectDirect, Option<&'a PdfObjectAndParseCache>) { - if let Some(objects) = objects.inner.get() { - let final_id = self.final_id.get().copied(); - let limit = if final_id.is_some() { 1 } else { 1000usize }; - let mut id = final_id.unwrap_or(self.id); - for _ in 0..limit { - if let Some(object_and_parse_cache) = objects.objects.get(&self.id) { - let object = match &object_and_parse_cache.object { - PdfObject::Boolean(v) => PdfObjectDirect::Boolean(*v), - PdfObject::Integer(v) => PdfObjectDirect::Integer(*v), - PdfObject::Real(v) => PdfObjectDirect::Real(*v), - PdfObject::String(v) => PdfObjectDirect::String(v.clone()), - PdfObject::Name(v) => PdfObjectDirect::Name(v.clone()), - PdfObject::Array(v) => PdfObjectDirect::Array(v.clone()), - PdfObject::Dictionary(v) => PdfObjectDirect::Dictionary(v.clone()), - PdfObject::Stream(v) => PdfObjectDirect::Stream(v.clone()), - PdfObject::Null(v) => PdfObjectDirect::Null(*v), - PdfObject::Indirect(v) => { - id = v.id; - continue; - } - }; - // we could be racing with another thread, so set can fail but that's not a problem - let _ = self.final_id.set(id); - return (object, Some(object_and_parse_cache)); - } else { - return (PdfNull::new(id.pos).into(), None); - } - } - } - (PdfNull::new(self.pos()).into(), None) - } - fn get_object_and_parse_cache( - &self, - f: impl FnOnce(PdfObjectDirect, Option<&PdfObjectAndParseCache>) -> R, - ) -> R { - let Some(objects) = self.objects.upgrade() else { - panic!("PdfObjects is no longer available"); - }; - let (object, object_and_parse_cache) = self.get_object_and_parse_cache_inner(&objects); - f(object, object_and_parse_cache) - } - pub fn get(&self) -> PdfObjectDirect { - self.get_object_and_parse_cache(|object, _object_and_parse_cache| object) - } - pub fn id(&self) -> PdfObjectIdentifier { - self.id - } - pub fn pos(&self) -> PdfInputPosition { - self.id.pos.0 - } -} - -impl From for PdfObjectDirect { - fn from(value: PdfObjectIndirect) -> Self { - value.get() - } -} - -pub trait IsPdfNull { - fn is_pdf_null(&self) -> bool; -} - -impl IsPdfNull for Option { - fn is_pdf_null(&self) -> bool { - self.as_ref().is_none_or(IsPdfNull::is_pdf_null) - } -} - -pub struct PdfDictionary { - pos: PdfInputPositionNoCompare, - fields: Arc>, -} - -impl Clone for PdfDictionary { - fn clone(&self) -> Self { - Self { - pos: self.pos, - fields: self.fields.clone(), - } - } -} - -impl PdfDictionary { - pub fn new(pos: impl Into) -> Self { - Self { - pos: pos.into(), - fields: Arc::new(BTreeMap::new()), - } - } - pub fn from_fields( - pos: impl Into, - mut fields: Arc>, - ) -> Self - where - T: IsPdfNull + Clone, - { - if fields.values().any(T::is_pdf_null) { - Arc::make_mut(&mut fields).retain(|_k, v| !v.is_pdf_null()); - } - Self { - pos: pos.into(), - fields, - } - } - pub fn fields(&self) -> &Arc> { - &self.fields - } - pub fn into_fields(self) -> Arc> { - self.fields - } - pub fn iter(&self) -> std::collections::btree_map::Iter<'_, PdfName, T> { - self.fields.iter() - } - pub fn contains_key(&self, key: &Q) -> bool - where - PdfName: std::borrow::Borrow, - Q: Ord, - { - self.fields.contains_key(key) - } - pub fn get(&self, key: &Q) -> Option<&T> - where - PdfName: std::borrow::Borrow, - Q: Ord, - { - self.fields.get(key) - } - pub fn get_or_null(&self, key: &Q) -> T - where - PdfName: std::borrow::Borrow, - Q: Ord, - T: Clone + From, - { - self.get(key) - .cloned() - .unwrap_or_else(|| PdfNull(self.pos).into()) - } - pub fn pos(&self) -> PdfInputPosition { - self.pos.0 - } -} - -impl GetPdfInputPosition for PdfDictionary { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -impl Default for PdfDictionary { - fn default() -> Self { - Self::new(PdfInputPosition::empty()) - } -} - -impl FromIterator<(PdfName, T)> for PdfDictionary { - fn from_iter>(iter: I) -> Self { - Self { - pos: PdfInputPositionNoCompare::empty(), - fields: Arc::new(BTreeMap::from_iter( - iter.into_iter() - .filter(|(_name, value)| !value.is_pdf_null()), - )), - } - } -} - -impl IntoIterator for PdfDictionary { - type Item = (PdfName, T); - type IntoIter = std::collections::btree_map::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - Arc::unwrap_or_clone(self.fields).into_iter() - } -} - -impl<'a, T> IntoIterator for &'a PdfDictionary { - type Item = (&'a PdfName, &'a T); - type IntoIter = std::collections::btree_map::Iter<'a, PdfName, T>; - - fn into_iter(self) -> Self::IntoIter { - self.fields.iter() - } -} - -impl fmt::Debug for PdfDictionary { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|state| { - state - .debug_or_id_with( - &self.fields, - |_, f| f.debug_map().entries(self).finish(), - |f| f.write_str("{...}"), - ) - .fmt(f) - }) - } -} - -impl IsPdfNull for PdfDictionary { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfDictionary { - fn type_name() -> Cow<'static, str> { - if TypeId::of::() == TypeId::of::() { - Cow::Borrowed("dictionary") - } else { - Cow::Owned(format!("PdfDictionary<{}>", T::type_name())) - } - } - fn parse(object: PdfObject) -> Result { - let object = PdfObjectDirect::from(object); - let PdfObjectDirect::Dictionary(object) = object else { - return Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "dictionary", - }); - }; - if let Some(retval) = ::downcast_ref::(&object) { - return Ok(retval.clone()); - } - let pos = object.pos; - let fields = Result::from_iter(object.fields.iter().filter_map(|(name, value)| { - match T::parse(value.clone()) { - Ok(value) => { - if value.is_pdf_null() { - None - } else { - Some(Ok((name.clone(), value))) - } - } - Err(e) => Some(Err(e)), - } - }))?; - Ok(Self { - pos, - fields: Arc::new(fields), - }) - } -} - -#[derive(Clone, Default)] -pub struct PdfArray { - pos: PdfInputPositionNoCompare, - elements: Arc<[PdfObject]>, -} - -impl PdfArray { - pub fn new(pos: impl Into) -> Self { - Self { - pos: pos.into(), - elements: Arc::default(), - } - } - pub fn from_elements( - pos: impl Into, - elements: Arc<[PdfObject]>, - ) -> Self { - Self { - pos: pos.into(), - elements, - } - } - pub fn pos(&self) -> PdfInputPosition { - self.pos.0 - } - pub fn elements(&self) -> &Arc<[PdfObject]> { - &self.elements - } - pub fn into_elements(self) -> Arc<[PdfObject]> { - self.elements - } - pub fn iter(&self) -> std::slice::Iter<'_, PdfObject> { - self.elements.iter() - } -} - -impl GetPdfInputPosition for PdfArray { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -impl FromIterator for PdfArray { - fn from_iter>(iter: T) -> Self { - Self { - pos: PdfInputPositionNoCompare::empty(), - elements: Arc::from_iter(iter), - } - } -} - -#[derive(Clone)] -pub struct PdfArrayIntoIter { - indexes: std::ops::Range, - elements: Arc<[PdfObject]>, -} - -impl Iterator for PdfArrayIntoIter { - type Item = PdfObject; - - fn next(&mut self) -> Option { - self.indexes.next().map(|i| self.elements[i].clone()) - } - - fn size_hint(&self) -> (usize, Option) { - self.indexes.size_hint() - } - - fn nth(&mut self, n: usize) -> Option { - self.indexes.nth(n).map(|i| self.elements[i].clone()) - } - - fn last(self) -> Option { - self.indexes.last().map(|i| self.elements[i].clone()) - } - - fn fold(self, init: B, mut f: F) -> B - where - F: FnMut(B, Self::Item) -> B, - { - self.indexes - .fold(init, |init, i| f(init, self.elements[i].clone())) - } -} - -impl std::iter::FusedIterator for PdfArrayIntoIter {} - -impl DoubleEndedIterator for PdfArrayIntoIter { - fn next_back(&mut self) -> Option { - self.indexes.next_back().map(|i| self.elements[i].clone()) - } - fn nth_back(&mut self, n: usize) -> Option { - self.indexes.nth_back(n).map(|i| self.elements[i].clone()) - } - fn rfold(self, init: B, mut f: F) -> B - where - F: FnMut(B, Self::Item) -> B, - { - self.indexes - .rfold(init, |init, i| f(init, self.elements[i].clone())) - } -} - -impl ExactSizeIterator for PdfArrayIntoIter {} - -impl IntoIterator for PdfArray { - type Item = PdfObject; - type IntoIter = PdfArrayIntoIter; - - fn into_iter(self) -> Self::IntoIter { - PdfArrayIntoIter { - indexes: 0..self.elements.len(), - elements: self.elements, - } - } -} - -impl<'a> IntoIterator for &'a PdfArray { - type Item = &'a PdfObject; - type IntoIter = std::slice::Iter<'a, PdfObject>; - - fn into_iter(self) -> Self::IntoIter { - self.elements.iter() - } -} - -impl fmt::Debug for PdfArray { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.elements.fmt(f) - } -} - -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct MaybeArray(pub Arc<[T]>); - -impl fmt::Debug for MaybeArray { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl std::ops::Deref for MaybeArray { - type Target = Arc<[T]>; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl std::ops::DerefMut for MaybeArray { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -impl Default for MaybeArray { - fn default() -> Self { - Self(Arc::default()) - } -} - -impl<'a, T> IntoIterator for &'a MaybeArray { - type Item = &'a T; - type IntoIter = std::slice::Iter<'a, T>; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -#[derive(Clone)] -pub enum NameOr { - Name(PdfName), - Value(T), -} - -impl NameOr { - pub fn into_resolved(self, resolve: impl FnOnce(PdfName) -> Result) -> Result { - match self { - Self::Name(name) => resolve(name), - Self::Value(v) => Ok(v), - } - } - pub fn replace_with_resolved( - &mut self, - resolve: impl FnOnce(&PdfName) -> Result, - ) -> Result<&mut T, E> { - match self { - Self::Name(name) => { - *self = Self::Value(resolve(name)?); - let Self::Value(v) = self else { - unreachable!(); - }; - Ok(v) - } - Self::Value(v) => Ok(v), - } - } -} - -impl fmt::Debug for NameOr { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Name(v) => v.fmt(f), - Self::Value(v) => v.fmt(f), - } - } -} - -impl GetPdfInputPosition for NameOr { - fn get_pdf_input_position(&self) -> PdfInputPosition { - match self { - Self::Name(v) => v.pos(), - Self::Value(v) => v.get_pdf_input_position(), - } - } -} - -impl IsPdfNull for NameOr { - fn is_pdf_null(&self) -> bool { - match self { - Self::Name(_) => false, - Self::Value(v) => v.is_pdf_null(), - } - } -} - -impl PdfParse for NameOr { - fn type_name() -> Cow<'static, str> { - Cow::Owned(format!("NameOr<{}>", T::type_name())) - } - fn parse(object: PdfObject) -> Result { - Ok(match PdfObjectDirect::from(object) { - PdfObjectDirect::Name(name) => Self::Name(name), - object => Self::Value(T::parse(object.into())?), - }) - } -} - -#[derive(Copy, Clone, PartialEq)] -pub struct PdfMatrix { - pub pos: PdfInputPositionNoCompare, - pub elements: [f32; 6], -} - -impl PdfMatrix { - pub fn identity(pos: impl Into) -> Self { - Self { - pos: pos.into(), - elements: [1.0, 0.0, 0.0, 1.0, 0.0, 0.0], - } - } - #[must_use] - pub fn mul(self, other: PdfMatrix, new_pos: impl Into) -> Self { - let [la, lb, lc, ld, le, lf] = self.elements; - let [ra, rb, rc, rd, re, rf] = other.elements; - Self { - pos: new_pos.into(), - elements: [ - lb * rc + la * ra, - lb * rd + la * rb, - ld * rc + lc * ra, - ld * rd + lc * rb, - re + lf * rc + le * ra, - rf + lf * rd + le * rb, - ], - } - } -} - -impl fmt::Debug for PdfMatrix { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { pos, elements } = *self; - write!(f, "PdfMatrix(at {pos}, {elements:?})") - } -} - -impl IsPdfNull for PdfMatrix { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfMatrix { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("matrix") - } - fn parse(object: PdfObject) -> Result { - Ok(Self { - pos: object.pos().into(), - elements: PdfParse::parse(object)?, - }) - } -} - -impl PdfMatrix { - pub fn parse_flat( - a: PdfObject, - b: PdfObject, - c: PdfObject, - d: PdfObject, - e: PdfObject, - f: PdfObject, - ) -> Result { - Ok(Self { - pos: a.pos().into(), - elements: [ - PdfParse::parse(a)?, - PdfParse::parse(b)?, - PdfParse::parse(c)?, - PdfParse::parse(d)?, - PdfParse::parse(e)?, - PdfParse::parse(f)?, - ], - }) - } -} - -impl GetPdfInputPosition for PdfMatrix { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -#[derive(Copy, Clone, PartialEq)] -pub struct PdfVec2D { - pub pos: PdfInputPositionNoCompare, - pub x: f32, - pub y: f32, -} - -impl fmt::Debug for PdfVec2D { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { pos, x, y } = *self; - write!(f, "PdfVec2D(at {pos}, {x}, {y})") - } -} - -impl PdfVec2D { - pub fn parse(x: PdfObject, y: PdfObject) -> Result { - Ok(Self { - pos: x.pos().into(), - x: PdfParse::parse(x)?, - y: PdfParse::parse(y)?, - }) - } -} - -impl GetPdfInputPosition for PdfVec2D { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -#[derive(Copy, Clone, Debug)] -pub struct PdfRectangle { - /// the corner with the smaller x and y coordinates - smaller: PdfVec2D, - /// the corner with the larger x and y coordinates - larger: PdfVec2D, -} - -impl PdfRectangle { - pub fn new(mut smaller: PdfVec2D, mut larger: PdfVec2D) -> Self { - // `pos` follows the `x` coordinate - if smaller.x.is_nan() { - smaller.pos = larger.pos; - } else if larger.x.is_nan() { - larger.pos = smaller.pos; - } else if larger.x < smaller.x { - std::mem::swap(&mut smaller.pos, &mut larger.pos); - } - Self { - smaller: PdfVec2D { - pos: smaller.pos, - x: smaller.x.min(larger.x), - y: smaller.y.min(larger.y), - }, - larger: PdfVec2D { - pos: larger.pos, - x: smaller.x.max(larger.x), - y: smaller.y.max(larger.y), - }, - } - } - /// return the corner with the smaller x and y coordinates - pub fn smaller(&self) -> PdfVec2D { - self.smaller - } - /// return the corner with the larger x and y coordinates - pub fn larger(&self) -> PdfVec2D { - self.larger - } - pub fn parse_flat( - lower_left_x: PdfObject, - lower_left_y: PdfObject, - upper_right_x: PdfObject, - upper_right_y: PdfObject, - ) -> Result { - Ok(Self::new( - PdfVec2D::parse(lower_left_x, lower_left_y)?, - PdfVec2D::parse(upper_right_x, upper_right_y)?, - )) - } -} - -impl GetPdfInputPosition for PdfRectangle { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.smaller.get_pdf_input_position() - } -} - -impl IsPdfNull for PdfRectangle { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfRectangle { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("rectangle") - } - - fn parse(object: PdfObject) -> Result { - let object = object.into(); - let PdfObjectDirect::Array(array) = &object else { - return Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "rectangle", - }); - }; - let [lower_left_x, lower_left_y, upper_right_x, upper_right_y] = &**array.elements() else { - return Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "rectangle", - }); - }; - Self::parse_flat( - lower_left_x.clone(), - lower_left_y.clone(), - upper_right_x.clone(), - upper_right_y.clone(), - ) - } -} - -#[derive(Clone)] -pub enum PdfFileSpecification { - String(PdfString), - Dictionary(PdfDictionary), -} - -impl fmt::Debug for PdfFileSpecification { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::String(v) => v.fmt(f), - Self::Dictionary(v) => v.fmt(f), - } - } -} - -impl IsPdfNull for PdfFileSpecification { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfFileSpecification { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("file specification") - } - fn parse(object: PdfObject) -> Result { - match PdfObjectDirect::from(object) { - PdfObjectDirect::String(v) => Ok(Self::String(v)), - PdfObjectDirect::Dictionary(v) => Ok(Self::Dictionary(v)), - object => Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "PdfFileSpecification", - }), - } - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfStreamDictionary { - #[pdf(name = "Length")] - pub len: usize, - #[pdf(name = "Filter")] - pub filters: MaybeArray, - #[pdf(name = "DecodeParms")] - pub decode_parms: MaybeArray>, - #[pdf(name = "F")] - pub file: Option, - #[pdf(name = "FFilter")] - pub file_filters: MaybeArray, - #[pdf(name = "FDecodeParms")] - pub file_decode_parms: MaybeArray>, - #[pdf(name = "DL")] - pub decoded_len: Option, - #[pdf(flatten)] - pub rest: Rest, - } -} - -impl fmt::Debug for PdfStreamDictionary { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|_state| { - let Self { - len, - filters, - decode_parms, - file, - file_filters, - file_decode_parms, - decoded_len, - rest, - } = self; - f.debug_struct("PdfStreamDictionary") - .field("len", len) - .field("filters", filters) - .field("decode_parms", decode_parms) - .field("file", file) - .field("file_filters", file_filters) - .field("file_decode_parms", file_decode_parms) - .field("decoded_len", decoded_len) - .field("rest", rest) - .finish() - }) - } -} - -#[derive(Debug, Clone, Default)] -pub struct PdfStreamDictionaryFiltersAndParms<'a> { - filters: std::iter::Enumerate>, - decode_parms: &'a [Option], -} - -impl<'a> PdfStreamDictionaryFiltersAndParms<'a> { - fn item_helper( - filter: (usize, &'a PdfStreamFilter), - decode_parms: &'a [Option], - ) -> (&'a PdfStreamFilter, &'a PdfDictionary) { - static EMPTY_DICTIONARY: OnceLock = OnceLock::new(); - let (index, filter) = filter; - ( - filter, - match decode_parms.get(index) { - Some(Some(v)) => v, - _ => EMPTY_DICTIONARY.get_or_init(PdfDictionary::default), - }, - ) - } -} - -impl<'a> Iterator for PdfStreamDictionaryFiltersAndParms<'a> { - type Item = (&'a PdfStreamFilter, &'a PdfDictionary); - - fn next(&mut self) -> Option { - self.filters - .next() - .map(|filter| Self::item_helper(filter, self.decode_parms)) - } - - fn size_hint(&self) -> (usize, Option) { - self.filters.size_hint() - } - - fn nth(&mut self, n: usize) -> Option { - self.filters - .nth(n) - .map(|filter| Self::item_helper(filter, self.decode_parms)) - } - - fn fold(self, init: B, f: F) -> B - where - F: FnMut(B, Self::Item) -> B, - { - self.filters - .map(|filter| Self::item_helper(filter, self.decode_parms)) - .fold(init, f) - } -} - -impl<'a> FusedIterator for PdfStreamDictionaryFiltersAndParms<'a> {} - -impl<'a> ExactSizeIterator for PdfStreamDictionaryFiltersAndParms<'a> {} - -impl<'a> DoubleEndedIterator for PdfStreamDictionaryFiltersAndParms<'a> { - fn next_back(&mut self) -> Option { - self.filters - .next_back() - .map(|filter| Self::item_helper(filter, self.decode_parms)) - } - - fn nth_back(&mut self, n: usize) -> Option { - self.filters - .nth_back(n) - .map(|filter| Self::item_helper(filter, self.decode_parms)) - } - - fn rfold(self, init: B, f: F) -> B - where - F: FnMut(B, Self::Item) -> B, - { - self.filters - .map(|filter| Self::item_helper(filter, self.decode_parms)) - .rfold(init, f) - } -} - -impl PdfStreamDictionary { - pub fn filters_and_parms<'a>(&'a self) -> PdfStreamDictionaryFiltersAndParms<'a> { - PdfStreamDictionaryFiltersAndParms { - filters: self.filters.iter().enumerate(), - decode_parms: &self.decode_parms, - } - } - pub fn file_filters_and_parms<'a>(&'a self) -> PdfStreamDictionaryFiltersAndParms<'a> { - PdfStreamDictionaryFiltersAndParms { - filters: self.file_filters.iter().enumerate(), - decode_parms: &self.file_decode_parms, - } - } -} - -pub trait PdfStreamContents: Sized + fmt::Debug + 'static { - fn parse( - data: &[u8], - stream_pos: PdfInputPosition, - objects: Arc, - ) -> Result; - fn parse_arc( - data: Arc<[u8]>, - stream_pos: PdfInputPosition, - objects: Arc, - ) -> Result { - Self::parse(&*data, stream_pos, objects) - } -} - -impl PdfStreamContents for Arc<[u8]> { - fn parse( - data: &[u8], - _stream_pos: PdfInputPosition, - _objects: Arc, - ) -> Result { - Ok(Arc::from(data)) - } - fn parse_arc( - data: Arc<[u8]>, - _stream_pos: PdfInputPosition, - _objects: Arc, - ) -> Result { - Ok(data.clone()) - } -} - -#[derive(Clone)] -pub struct PdfStream> { - pos: PdfInputPositionNoCompare, - objects: std::sync::Weak, - dictionary: PdfStreamDictionary, - encoded_data: Arc<[u8]>, - decoded_data: Arc>>, -} - -struct DumpBytes<'a>(&'a [u8]); - -impl<'a> fmt::Debug for DumpBytes<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -impl fmt::Display for DumpBytes<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut first = true; - let mut fmt_chunk = |chunk: &[u8]| -> fmt::Result { - if first { - first = false; - } else { - f.write_str("\n")?; - } - write!(f, "\"{}\"", chunk.escape_ascii()) - }; - if self.0.is_empty() { - return fmt_chunk(self.0); - } - for chunk in self.0.chunks(32) { - fmt_chunk(chunk)?; - } - Ok(()) - } -} - -impl fmt::Debug for PdfStream { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - DagDebugState::scope(|state| { - state - .debug_or_id_with( - &self.decoded_data, - |_, f| { - let Self { - pos, - objects: _, - dictionary, - encoded_data, - decoded_data, - } = self; - let mut debug_struct = f.debug_struct("PdfStream"); - debug_struct.field("pos", pos); - debug_struct.field("dictionary", dictionary); - debug_struct.field("encoded_data", &DumpBytes(encoded_data)); - if let Some(decoded_data) = decoded_data.get() { - match decoded_data { - Ok(decoded_data) => { - if let Some(decoded_data) = - ::downcast_ref::>(decoded_data) - { - debug_struct - .field("decoded_data", &DumpBytes(&**decoded_data)) - } else { - debug_struct.field("decoded_data", decoded_data) - } - } - Err(e) => debug_struct.field("decoded_data", &Err::<(), _>(e)), - }; - } else { - debug_struct.field("decoded_data", &format_args!("")); - } - debug_struct.finish() - }, - |f| f.write_str("PdfStream(...)"), - ) - .fmt(f) - }) - } -} - -impl PdfStream { - pub fn new( - pos: impl Into, - objects: &Arc, - dictionary: PdfStreamDictionary, - encoded_data: Arc<[u8]>, - ) -> Self { - Self { - pos: pos.into(), - objects: Arc::downgrade(objects), - dictionary, - encoded_data, - decoded_data: Arc::new(OnceLock::new()), - } - } - pub fn dictionary(&self) -> &PdfStreamDictionary { - &self.dictionary - } - pub fn encoded_data(&self) -> &Arc<[u8]> { - &self.encoded_data - } - fn try_decode_data(&self) -> Result { - let Some(objects) = self.objects.upgrade() else { - panic!("PdfObjects is no longer available"); - }; - let dictionary = self.dictionary(); - let (data, filters) = if let Some(file) = &dictionary.file { - todo!() - } else { - (&self.encoded_data, dictionary.filters_and_parms()) - }; - if filters.len() == 0 { - return Data::parse_arc(data.clone(), self.pos.0, objects); - } - let mut data: &[u8] = data; - let mut buffer; - for (filter, filter_parms) in filters { - buffer = filter.decode_stream_data(filter_parms.clone(), self.pos.0, &data)?; - data = &buffer; - } - Data::parse(data, self.pos.0, objects) - } - pub fn decoded_data(&self) -> &Result { - self.decoded_data.get_or_init(|| self.try_decode_data()) - } -} - -impl GetPdfInputPosition for PdfStream { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.pos.0 - } -} - -impl IsPdfNull for PdfStream { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfStream { - fn type_name() -> Cow<'static, str> { - if TypeId::of::() == TypeId::of::() { - Cow::Borrowed("stream") - } else { - Cow::Owned(format!("PdfStream<{}>", Rest::type_name())) - } - } - fn parse(object: PdfObject) -> Result { - match PdfObjectDirect::from(object) { - PdfObjectDirect::Stream(stream) => { - Ok(PdfStream { - pos: stream.pos, - dictionary: { - let PdfStreamDictionary { - len, - filters, - decode_parms, - file, - file_filters, - file_decode_parms, - decoded_len, - rest, - } = stream.dictionary; - PdfStreamDictionary { - len, - filters, - decode_parms, - file, - file_filters, - file_decode_parms, - decoded_len, - rest: Rest::parse(rest.into())?, - } - }, - encoded_data: stream.encoded_data, - decoded_data: if let Some(decoded_data) = - ::downcast_ref(&stream.decoded_data) - { - Arc::clone(decoded_data) - } else { - let Some(objects) = stream.objects.upgrade() else { - panic!("PdfObjects is no longer available"); - }; - Arc::new( - stream - .decoded_data - .get() - .cloned() - .map(|data| { - OnceLock::from(data.and_then(|data| { - Data::parse_arc(data, stream.pos.0, objects) - })) - }) - .unwrap_or_default(), - ) - }, - objects: stream.objects, - }) - } - object => Err(PdfParseError::InvalidType { - pos: object.get_pdf_input_position(), - ty: object.type_name(), - expected_ty: "stream", - }), - } - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Clone, Copy, Debug, Hash, Default, PartialEq, Eq, PartialOrd, Ord)] - pub enum PdfObjectStreamType { - #[pdf(name = "ObjStm")] - #[default] - ObjStm, - } -} - -pdf_parse! { - #[pdf] - #[derive(Clone)] - pub struct PdfObjectStreamDictionary { - #[pdf(name = Self::TYPE_NAME)] - pub ty: PdfObjectStreamType, - #[pdf(name = "N")] - pub n: usize, - #[pdf(name = "First")] - pub first: usize, - #[pdf(name = "Extends")] - pub extends: Option, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl PdfObjectStreamDictionary { - pub const TYPE_NAME: &str = "Type"; - pub(crate) fn parse_type_from_dictionary( - dictionary: &PdfDictionary, - ) -> Result { - PdfParse::parse(dictionary.get_or_null(Self::TYPE_NAME.as_bytes())) - } -} diff --git a/src/pdf/parse.rs b/src/pdf/parse.rs deleted file mode 100644 index 1d57f5e..0000000 --- a/src/pdf/parse.rs +++ /dev/null @@ -1,1313 +0,0 @@ -use crate::pdf::{ - content_stream::{PdfOperator, PdfOperatorUnparsed}, - object::{ - IsPdfNull, MaybeArray, PdfInteger, PdfName, PdfNull, PdfNumber, PdfObject, PdfObjectDirect, - PdfObjectIdentifier, PdfObjectIndirect, PdfObjectNonNull, - }, -}; -use std::{any::Any, borrow::Cow, fmt, mem, num::NonZero, sync::Arc}; - -#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct PdfInputPositionKnown { - pub pos: usize, - pub containing_streams_pos: Option, -} - -impl fmt::Debug for PdfInputPositionKnown { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } -} - -impl fmt::Display for PdfInputPositionKnown { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let Self { - pos, - containing_streams_pos, - } = *self; - if let Some(containing_streams_pos) = containing_streams_pos { - write!(f, "{pos:#x} in stream at {containing_streams_pos:#x}") - } else { - write!(f, "{pos:#x}") - } - } -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] -pub struct PdfInputPosition(Option); - -impl fmt::Debug for PdfInputPosition { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "at {self}") - } -} - -impl fmt::Display for PdfInputPosition { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(pos) = self.0 { - pos.fmt(f) - } else { - f.write_str("") - } - } -} - -impl PdfInputPosition { - pub const fn new(pos: Option) -> Self { - Self(pos) - } - pub const fn empty() -> PdfInputPosition { - Self(None) - } - pub const fn get(self) -> Option { - self.0 - } -} - -pub trait GetPdfInputPosition { - fn get_pdf_input_position(&self) -> PdfInputPosition; -} - -impl GetPdfInputPosition for &'_ T { - fn get_pdf_input_position(&self) -> PdfInputPosition { - T::get_pdf_input_position(self) - } -} - -impl GetPdfInputPosition for &'_ mut T { - fn get_pdf_input_position(&self) -> PdfInputPosition { - T::get_pdf_input_position(self) - } -} - -impl GetPdfInputPosition for Box { - fn get_pdf_input_position(&self) -> PdfInputPosition { - T::get_pdf_input_position(self) - } -} - -impl GetPdfInputPosition for PdfInputPosition { - fn get_pdf_input_position(&self) -> PdfInputPosition { - *self - } -} - -impl GetPdfInputPosition for bool { - fn get_pdf_input_position(&self) -> PdfInputPosition { - PdfInputPosition::empty() - } -} - -impl GetPdfInputPosition for i128 { - fn get_pdf_input_position(&self) -> PdfInputPosition { - PdfInputPosition::empty() - } -} - -impl GetPdfInputPosition for f64 { - fn get_pdf_input_position(&self) -> PdfInputPosition { - PdfInputPosition::empty() - } -} - -#[derive(Clone, Copy, Default)] -pub struct PdfInputPositionNoCompare(pub PdfInputPosition); - -impl PdfInputPositionNoCompare { - pub const fn empty() -> Self { - Self(PdfInputPosition::empty()) - } - pub const fn new(pos: Option) -> Self { - Self(PdfInputPosition::new(pos)) - } -} - -impl GetPdfInputPosition for PdfInputPositionNoCompare { - fn get_pdf_input_position(&self) -> PdfInputPosition { - self.0 - } -} - -impl From for PdfInputPositionNoCompare { - fn from(value: PdfInputPosition) -> Self { - Self(value) - } -} - -impl fmt::Debug for PdfInputPositionNoCompare { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl fmt::Display for PdfInputPositionNoCompare { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl Ord for PdfInputPositionNoCompare { - fn cmp(&self, _other: &Self) -> std::cmp::Ordering { - std::cmp::Ordering::Equal - } -} - -impl PartialOrd for PdfInputPositionNoCompare { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl std::hash::Hash for PdfInputPositionNoCompare { - fn hash(&self, _state: &mut H) { - // don't hash anything since Self always compares equal - } -} - -impl Eq for PdfInputPositionNoCompare {} - -impl PartialEq for PdfInputPositionNoCompare { - fn eq(&self, _other: &Self) -> bool { - true - } -} - -#[derive(Debug, Clone)] -#[non_exhaustive] -pub enum PdfParseError { - Custom(String), - InvalidType { - pos: PdfInputPosition, - ty: &'static str, - expected_ty: &'static str, - }, - InvalidName { - pos: PdfInputPosition, - name: PdfName, - expected_ty: &'static str, - }, - NotAPdfFile, - TruncatedFile { - pos: PdfInputPosition, - }, - InvalidObjectNumber { - pos: PdfInputPosition, - }, - InvalidGenerationNumber { - pos: PdfInputPosition, - }, - InvalidNumber { - pos: PdfInputPosition, - }, - InvalidStringEscape { - pos: PdfInputPosition, - }, - InvalidHexStringDigit { - pos: PdfInputPosition, - }, - DuplicateIndirectObjectDefinition { - pos: PdfInputPosition, - id: PdfObjectIdentifier, - }, - MissingObj { - pos: PdfInputPosition, - }, - MissingEndObj { - pos: PdfInputPosition, - }, - InvalidDictionaryClosingDoubleRAngle { - pos: PdfInputPosition, - }, - DuplicateDictionaryKey { - pos: PdfInputPosition, - name: PdfName, - }, - InvalidNameEscape { - pos: PdfInputPosition, - }, - InvalidOrMissingEolAfterStreamKeyword { - pos: PdfInputPosition, - }, - MissingEndStreamKeyword { - pos: PdfInputPosition, - }, - IntegerOutOfRange { - pos: PdfInputPosition, - }, - MissingTrailer { - pos: PdfInputPosition, - }, - WrongArrayLength { - pos: PdfInputPosition, - len: usize, - expected_len: usize, - }, - MissingStartXRefKeyword { - pos: PdfInputPosition, - }, - MissingStartXRefValue { - pos: PdfInputPosition, - }, - MissingEofComment { - pos: PdfInputPosition, - }, - UnexpectedByte { - pos: PdfInputPosition, - byte: u8, - }, - InvalidStartXRefValue { - pos: PdfInputPosition, - start_xref: usize, - }, - UnknownStreamFilter { - pos: PdfInputPosition, - filter: PdfName, - }, - StreamFilterError { - pos: PdfInputPosition, - filter: PdfName, - error: String, - }, - StreamNotAllowedHere { - pos: PdfInputPosition, - }, - OperatorNotAllowedHere { - operator: PdfOperatorUnparsed, - }, - MissingOperator { - pos: PdfInputPosition, - }, - OperatorHasTooFewOperands { - operator: PdfOperator, - }, - OperatorHasTooManyOperands { - operator: PdfOperator, - }, - CantRestoreGraphicsStateWithEmptyStack { - pos: PdfInputPosition, - }, - FontResourceNotFound { - pos: PdfInputPosition, - font: PdfName, - }, - MissingBeginTextOperator { - pos: PdfInputPosition, - }, - MissingSetFontOperator { - pos: PdfInputPosition, - }, -} - -impl From for PdfParseError { - fn from(value: std::convert::Infallible) -> Self { - match value {} - } -} - -impl<'a> From<&'a Self> for PdfParseError { - fn from(value: &'a Self) -> Self { - value.clone() - } -} - -impl GetPdfInputPosition for PdfParseError { - fn get_pdf_input_position(&self) -> PdfInputPosition { - match *self { - PdfParseError::Custom(_) | PdfParseError::NotAPdfFile => PdfInputPosition::empty(), - PdfParseError::InvalidType { pos, .. } - | PdfParseError::InvalidName { pos, .. } - | PdfParseError::TruncatedFile { pos } - | PdfParseError::InvalidObjectNumber { pos } - | PdfParseError::InvalidGenerationNumber { pos } - | PdfParseError::InvalidNumber { pos } - | PdfParseError::InvalidStringEscape { pos } - | PdfParseError::InvalidHexStringDigit { pos } - | PdfParseError::DuplicateIndirectObjectDefinition { pos, .. } - | PdfParseError::MissingObj { pos } - | PdfParseError::MissingEndObj { pos } - | PdfParseError::InvalidDictionaryClosingDoubleRAngle { pos } - | PdfParseError::DuplicateDictionaryKey { pos, .. } - | PdfParseError::InvalidNameEscape { pos } - | PdfParseError::InvalidOrMissingEolAfterStreamKeyword { pos } - | PdfParseError::MissingEndStreamKeyword { pos } - | PdfParseError::IntegerOutOfRange { pos } - | PdfParseError::MissingTrailer { pos } - | PdfParseError::WrongArrayLength { pos, .. } - | PdfParseError::MissingStartXRefKeyword { pos } - | PdfParseError::MissingStartXRefValue { pos } - | PdfParseError::MissingEofComment { pos } - | PdfParseError::UnexpectedByte { pos, .. } - | PdfParseError::InvalidStartXRefValue { pos, .. } - | PdfParseError::UnknownStreamFilter { pos, .. } - | PdfParseError::StreamFilterError { pos, .. } - | PdfParseError::StreamNotAllowedHere { pos } - | PdfParseError::MissingOperator { pos } - | PdfParseError::CantRestoreGraphicsStateWithEmptyStack { pos } - | PdfParseError::FontResourceNotFound { pos, .. } - | PdfParseError::MissingBeginTextOperator { pos } - | PdfParseError::MissingSetFontOperator { pos } => pos, - PdfParseError::OperatorNotAllowedHere { ref operator } => operator.pos(), - PdfParseError::OperatorHasTooFewOperands { ref operator } - | PdfParseError::OperatorHasTooManyOperands { ref operator } => operator.pos(), - } - } -} - -impl fmt::Display for PdfParseError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - PdfParseError::Custom(ref v) => f.write_str(v), - PdfParseError::InvalidType { - pos, - ty, - expected_ty, - } => { - write!( - f, - "at {pos}: invalid type: expected {expected_ty}, got {ty}" - ) - } - PdfParseError::InvalidName { - pos, - ref name, - expected_ty, - } => { - write!( - f, - "at {pos}: invalid name: expected a(n) {expected_ty}, got {name}" - ) - } - PdfParseError::NotAPdfFile => f.write_str("Not a PDF file"), - PdfParseError::TruncatedFile { pos } => { - write!(f, "at {pos}: PDF file is truncated too early") - } - PdfParseError::InvalidObjectNumber { pos } => { - write!(f, "at {pos}: PDF object number is invalid") - } - PdfParseError::InvalidGenerationNumber { pos } => { - write!( - f, - "at {pos}: PDF object identifier's generation number is invalid" - ) - } - PdfParseError::InvalidNumber { pos } => { - write!(f, "at {pos}: invalid number") - } - PdfParseError::InvalidStringEscape { pos } => { - write!(f, "at {pos}: invalid string escape") - } - PdfParseError::InvalidHexStringDigit { pos } => { - write!(f, "at {pos}: invalid hex string digit") - } - PdfParseError::DuplicateIndirectObjectDefinition { pos, id } => { - write!(f, "at {pos}: duplicate indirect object definition: {id:?}") - } - PdfParseError::MissingObj { pos } => { - write!( - f, - "at {pos}: indirect object definition is missing `obj` keyword" - ) - } - PdfParseError::MissingEndObj { pos } => { - write!( - f, - "at {pos}: indirect object definition is missing `endobj` keyword" - ) - } - PdfParseError::InvalidDictionaryClosingDoubleRAngle { pos } => { - write!(f, "at {pos}: dictionary has an invalid closing `>>` symbol") - } - PdfParseError::DuplicateDictionaryKey { pos, ref name } => { - write!(f, "at {pos}: duplicate dictionary key: {name}") - } - PdfParseError::InvalidNameEscape { pos } => { - write!(f, "at {pos}: invalid name escape") - } - PdfParseError::InvalidOrMissingEolAfterStreamKeyword { pos } => { - write!( - f, - "at {pos}: invalid or missing end-of-line after `stream` keyword" - ) - } - PdfParseError::MissingEndStreamKeyword { pos } => { - write!(f, "at {pos}: missing `endstream` keyword") - } - PdfParseError::IntegerOutOfRange { pos } => { - write!(f, "at {pos}: integer out of range") - } - PdfParseError::MissingTrailer { pos } => { - write!(f, "at {pos}: missing `trailer` keyword") - } - PdfParseError::WrongArrayLength { - pos, - len, - expected_len, - } => { - write!( - f, - "at {pos}: wrong array length: expected {expected_len}, got {len}" - ) - } - PdfParseError::MissingStartXRefKeyword { pos } => { - write!(f, "at {pos}: missing `startxref` keyword") - } - PdfParseError::MissingStartXRefValue { pos } => { - write!(f, "at {pos}: missing `startxref` value") - } - PdfParseError::MissingEofComment { pos } => { - write!(f, "at {pos}: missing `%%EOF` comment") - } - PdfParseError::UnexpectedByte { pos, byte } => { - write!(f, "at {pos}: unexpected byte '{}'", byte.escape_ascii()) - } - PdfParseError::InvalidStartXRefValue { pos, start_xref } => { - write!( - f, - "at {pos}: invalid `startxref` value: {start_xref} ({start_xref:#x})" - ) - } - PdfParseError::UnknownStreamFilter { pos, ref filter } => { - write!(f, "at {pos}: unknown stream filter: {filter}") - } - PdfParseError::StreamFilterError { - pos, - ref filter, - ref error, - } => { - write!(f, "at {pos}: stream filter {filter} error: {error}") - } - PdfParseError::StreamNotAllowedHere { pos } => { - write!(f, "at {pos}: stream not allowed here") - } - PdfParseError::OperatorNotAllowedHere { ref operator } => { - write!( - f, - "at {}: operator not allowed here: {}", - operator.pos(), - operator.bytes_debug() - ) - } - PdfParseError::MissingOperator { pos } => { - write!(f, "at {pos}: stream not allowed here") - } - PdfParseError::OperatorHasTooFewOperands { ref operator } => { - write!( - f, - "at {}: operator has too few operands: {operator:?}", - operator.pos(), - ) - } - PdfParseError::OperatorHasTooManyOperands { ref operator } => { - write!( - f, - "at {}: operator has too many operands: {operator:?}", - operator.pos(), - ) - } - PdfParseError::CantRestoreGraphicsStateWithEmptyStack { pos } => { - write!( - f, - "at {pos}: can't restore graphics state when the graphics state stack is empty" - ) - } - PdfParseError::FontResourceNotFound { pos, ref font } => { - write!(f, "at {pos}: font resource not found: {font:?}") - } - PdfParseError::MissingBeginTextOperator { pos } => { - write!( - f, - "at {pos}: missing begin text `BT` operator before this text operator" - ) - } - PdfParseError::MissingSetFontOperator { pos } => { - write!( - f, - "at {pos}: missing set font `Tf` operator before this text showing operator" - ) - } - } - } -} - -impl std::error::Error for PdfParseError {} - -pub trait PdfParse: Sized + 'static + IsPdfNull { - fn type_name() -> Cow<'static, str>; - fn parse(object: PdfObject) -> Result; - fn parse_option(object: PdfObject) -> Result, PdfParseError> { - match object { - PdfObject::Null(_) => Ok(None), - PdfObject::Indirect(ref v) if v.get().is_null() => Ok(None), - PdfObject::Boolean(_) - | PdfObject::Integer(_) - | PdfObject::Real(_) - | PdfObject::String(_) - | PdfObject::Name(_) - | PdfObject::Array(_) - | PdfObject::Dictionary(_) - | PdfObject::Stream(_) - | PdfObject::Indirect(_) => Self::parse(object).map(Some), - } - } -} - -impl PdfParse for Option { - fn type_name() -> Cow<'static, str> { - T::type_name() - } - fn parse(object: PdfObject) -> Result { - T::parse_option(object) - } - fn parse_option(object: PdfObject) -> Result, PdfParseError> { - if matches!(object, PdfObject::Null(_)) { - Ok(None) - } else { - Self::parse(object).map(Some) - } - } -} - -macro_rules! impl_pdf_parse_prim_int { - ($ty:ident) => { - impl IsPdfNull for $ty { - fn is_pdf_null(&self) -> bool { - false - } - } - impl PdfParse for $ty { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed(stringify!($ty)) - } - fn parse(object: PdfObject) -> Result { - let v: PdfInteger = PdfParse::parse(object)?; - v.value() - .try_into() - .map_err(|_| PdfParseError::IntegerOutOfRange { pos: v.pos() }) - } - } - impl IsPdfNull for NonZero<$ty> { - fn is_pdf_null(&self) -> bool { - false - } - } - impl PdfParse for NonZero<$ty> { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed(concat!("NonZero<", stringify!($ty), ">")) - } - fn parse(object: PdfObject) -> Result { - let v: PdfInteger = PdfParse::parse(object)?; - v.value() - .try_into() - .ok() - .and_then(NonZero::new) - .ok_or(PdfParseError::IntegerOutOfRange { pos: v.pos() }) - } - } - }; -} - -impl_pdf_parse_prim_int!(u8); -impl_pdf_parse_prim_int!(i8); -impl_pdf_parse_prim_int!(u16); -impl_pdf_parse_prim_int!(i16); -impl_pdf_parse_prim_int!(u32); -impl_pdf_parse_prim_int!(i32); -impl_pdf_parse_prim_int!(u64); -impl_pdf_parse_prim_int!(i64); -impl_pdf_parse_prim_int!(u128); -impl_pdf_parse_prim_int!(usize); -impl_pdf_parse_prim_int!(isize); - -impl IsPdfNull for i128 { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for i128 { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("i128") - } - fn parse(object: PdfObject) -> Result { - let v: PdfInteger = PdfParse::parse(object)?; - Ok(v.value().into()) - } -} - -impl IsPdfNull for NonZero { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for NonZero { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("NonZero") - } - fn parse(object: PdfObject) -> Result { - let v: PdfInteger = PdfParse::parse(object)?; - NonZero::new(v.value().into()).ok_or(PdfParseError::IntegerOutOfRange { pos: v.pos() }) - } -} - -impl IsPdfNull for f64 { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for f64 { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("f64") - } - fn parse(object: PdfObject) -> Result { - Ok(::parse(object)?.as_f64()) - } -} - -impl IsPdfNull for f32 { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for f32 { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("f32") - } - fn parse(object: PdfObject) -> Result { - Ok(::parse(object)?.as_f32()) - } -} - -impl IsPdfNull for PdfNull { - fn is_pdf_null(&self) -> bool { - true - } -} - -impl PdfParse for PdfNull { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("null") - } - fn parse(object: PdfObject) -> Result { - match PdfObjectDirect::from(object) { - PdfObjectDirect::Null(v) => Ok(v), - object => Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "null", - }), - } - } - fn parse_option(object: PdfObject) -> Result, PdfParseError> { - Self::parse(object).map(Some) - } -} - -impl PdfParse for PdfObjectNonNull { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("non-null") - } - fn parse(object: PdfObject) -> Result { - Option::::from(object).ok_or(PdfParseError::InvalidType { - pos: PdfInputPosition::empty(), - ty: "null", - expected_ty: "non-null", - }) - } - fn parse_option(object: PdfObject) -> Result, PdfParseError> { - Ok(object.into()) - } -} - -impl PdfParse for PdfObjectDirect { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("direct object") - } - fn parse(object: PdfObject) -> Result { - Ok(object.into()) - } -} - -impl PdfParse for PdfObject { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("object") - } - fn parse(object: PdfObject) -> Result { - Ok(object) - } -} - -impl IsPdfNull for PdfObjectIndirect { - fn is_pdf_null(&self) -> bool { - self.get().is_pdf_null() - } -} - -impl PdfParse for PdfObjectIndirect { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("indirect object") - } - fn parse(object: PdfObject) -> Result { - match object { - PdfObject::Indirect(v) => Ok(v), - _ => Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "indirect object", - }), - } - } - fn parse_option(object: PdfObject) -> Result, PdfParseError> { - match object { - PdfObject::Indirect(v) => Ok(Some(v)), - PdfObject::Null(_) => Ok(None), - _ => Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "indirect object", - }), - } - } -} - -impl IsPdfNull for [T; N] { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for [T; N] { - fn type_name() -> Cow<'static, str> { - Cow::Owned(format!("[{}; {N}]", T::type_name())) - } - fn parse(object: PdfObject) -> Result { - match PdfObjectDirect::from(object) { - PdfObjectDirect::Array(array) => { - let array_pos = array.pos(); - let elements = array.into_elements(); - let mut elements: Arc<[PdfObject; N]> = - elements.try_into().map_err(|elements: Arc<[PdfObject]>| { - PdfParseError::WrongArrayLength { - pos: array_pos, - len: elements.len(), - expected_len: N, - } - })?; - let elements: Box<[T]> = if let Some(elements) = Arc::get_mut(&mut elements) { - Result::from_iter(elements.iter_mut().map(|v| T::parse(mem::take(v))))? - } else { - Result::from_iter(elements.iter().map(|v| T::parse(v.clone())))? - }; - Ok(*Box::<[T; N]>::try_from(elements) - .ok() - .expect("already checked length")) - } - object => Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "array", - }), - } - } -} - -impl IsPdfNull for Arc<[T]> { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for Arc<[T]> { - fn type_name() -> Cow<'static, str> { - Cow::Owned(format!("Arc<[{}]>", T::type_name())) - } - fn parse(object: PdfObject) -> Result { - match PdfObjectDirect::from(object) { - PdfObjectDirect::Array(array) => { - let mut elements = array.into_elements(); - if let Some(retval) = ::downcast_ref::(&elements) { - return Ok(retval.clone()); - } - if let Some(elements) = Arc::get_mut(&mut elements) { - Result::from_iter(elements.iter_mut().map(|v| T::parse(mem::take(v)))) - } else { - Result::from_iter(elements.iter().map(|v| T::parse(v.clone()))) - } - } - PdfObjectDirect::Null(_) => Ok(Self::default()), - object => Err(PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: "array", - }), - } - } -} - -impl IsPdfNull for Arc { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for Arc { - fn type_name() -> Cow<'static, str> { - T::type_name() - } - fn parse(object: PdfObject) -> Result { - if let PdfObject::Indirect(indirect) = object { - indirect.cache_parse(|object| T::parse(object.into()).map(Arc::new)) - } else { - T::parse(object).map(Arc::new) - } - } - fn parse_option(object: PdfObject) -> Result, PdfParseError> { - if let PdfObject::Indirect(indirect) = object { - match indirect.cache_parse(|object| match T::parse_option(object.into()) { - Ok(Some(v)) => Ok(Arc::new(v)), - Ok(None) => Err(None), - Err(e) => Err(Some(e)), - }) { - Ok(v) => Ok(Some(v)), - Err(None) => Ok(None), - Err(Some(e)) => Err(e), - } - } else { - Ok(T::parse_option(object)?.map(Arc::new)) - } - } -} - -impl IsPdfNull for MaybeArray { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for MaybeArray { - fn type_name() -> Cow<'static, str> { - Cow::Owned(format!("MaybeArray<{}>", T::type_name())) - } - fn parse(object: PdfObject) -> Result { - match PdfObjectDirect::from(object) { - PdfObjectDirect::Null(_) => Ok(Self::default()), - PdfObjectDirect::Array(object) => Ok(Self(PdfParse::parse(object.into())?)), - object => Ok(Self(Arc::new([PdfParse::parse(object.into())?]))), - } - } -} - -#[macro_export] -macro_rules! pdf_parse { - ( - #[pdf $($struct_pdf_meta:tt)*] - $(#[$($struct_meta:tt)*])* - $struct_vis:vis struct $Struct:ident$(<$($StructParam:ident $(: $StructBound:tt)? $(= $StructParamDefault:ty)?),* $(,)?>)? { - $(#[pdf $($pdf_meta:tt)*] - $(#[$($field_meta:tt)*])* - $field_vis:vis $field_name:ident: $field_ty:ty,)* - } - ) => { - $(#[$($struct_meta)*])* - $struct_vis struct $Struct$(<$($StructParam $(: $StructBound)? $(= $StructParamDefault)?),*>)? { - $($(#[$($field_meta)*])* - $field_vis $field_name: $field_ty,)* - } - - $crate::pdf::parse::pdf_parse! { - @impl - #[pdf $($struct_pdf_meta)*] - struct $Struct$(<$($StructParam $(: $StructBound)?),*>)? { - $(#[pdf $($pdf_meta)*] - $(#[$($field_meta)*])* - $field_name: $field_ty,)* - } - } - }; - ( - @impl - #[pdf(transparent)] - struct $Struct:ident$(<$($StructParam:ident $(: $StructBound:tt)?),* $(,)?>)? { - #[pdf] - $(#[$($field_meta:tt)*])* - $field_name:ident: $field_ty:ty, - $(#[pdf] - $(#[$($phantom_meta:tt)*])* - $phantom_name:ident: PhantomData<$phantom_ty:ty>,)? - } - ) => { - impl$(<$($StructParam: $crate::pdf::parse::IsPdfNull $(+ $StructBound)?),*>)? $crate::pdf::object::IsPdfNull for $Struct$(<$($StructParam),*>)? { - fn is_pdf_null(&self) -> $crate::__std::primitive::bool { - <$field_ty as $crate::pdf::object::IsPdfNull>::is_pdf_null(&self.$field_name) - } - } - impl$(<$($StructParam: $crate::pdf::parse::PdfParse $(+ $StructBound)?),*>)? $crate::pdf::parse::PdfParse for $Struct$(<$($StructParam),*>)? { - fn type_name() -> $crate::__std::borrow::Cow<'static, $crate::__std::primitive::str> { - let args: &[$crate::__std::borrow::Cow<'static, $crate::__std::primitive::str>] = &[ - $($(<$StructParam as $crate::pdf::parse::PdfParse>::type_name()),*)? - ]; - if args.is_empty() { - $crate::__std::borrow::Cow::Borrowed($crate::__std::stringify!($Struct)) - } else { - let mut retval = $crate::__std::string::String::new(); - retval.push_str($crate::__std::stringify!($Struct)); - retval.push_str("<"); - let mut first = true; - for arg in args { - if first { - first = false; - } else { - retval.push_str(", "); - } - retval.push_str(arg); - } - retval.push_str(">"); - $crate::__std::borrow::Cow::Owned(retval) - } - } - fn parse(object: $crate::pdf::object::PdfObject) -> $crate::__std::result::Result { - $crate::__std::result::Result::Ok(Self { - $field_name: <$field_ty as $crate::pdf::parse::PdfParse>::parse(object)?, - $($phantom_name: $crate::__std::marker::PhantomData,)? - }) - } - } - }; - ( - @impl - #[pdf] - struct $Struct:ident$(<$($StructParam:ident $(: $StructBound:tt)?),* $(,)?>)? { - $($(#[$($field_meta:tt)*])* - $field_name:ident: $field_ty:ty,)* - } - ) => { - impl$(<$($StructParam $(: $StructBound)?),*>)? $crate::pdf::object::IsPdfNull for $Struct$(<$($StructParam),*>)? { - fn is_pdf_null(&self) -> $crate::__std::primitive::bool { - false - } - } - impl$(<$($StructParam: $crate::pdf::parse::PdfParse $(+ $StructBound)?),*>)? $crate::pdf::parse::PdfParse for $Struct$(<$($StructParam),*>)? { - fn type_name() -> $crate::__std::borrow::Cow<'static, $crate::__std::primitive::str> { - let args: &[$crate::__std::borrow::Cow<'static, $crate::__std::primitive::str>] = &[ - $($(<$StructParam as $crate::pdf::parse::PdfParse>::type_name()),*)? - ]; - if args.is_empty() { - $crate::__std::borrow::Cow::Borrowed($crate::__std::stringify!($Struct)) - } else { - let mut retval = $crate::__std::string::String::new(); - retval.push_str($crate::__std::stringify!($Struct)); - retval.push_str("<"); - let mut first = true; - for arg in args { - if first { - first = false; - } else { - retval.push_str(", "); - } - retval.push_str(arg); - } - retval.push_str(">"); - $crate::__std::borrow::Cow::Owned(retval) - } - } - fn parse(object: $crate::pdf::object::PdfObject) -> $crate::__std::result::Result { - let object = $crate::__std::convert::From::from(object); - let $crate::pdf::object::PdfObjectDirect::Dictionary(object) = object else { - return $crate::__std::result::Result::Err($crate::pdf::parse::PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: $crate::__std::stringify!($Struct), - }); - }; - let pos = object.pos(); - let mut object = object.into_fields(); - let object_mut = $crate::__std::sync::Arc::make_mut(&mut object); - let _ = object_mut; - $($crate::pdf::parse::pdf_parse! { - @impl_struct_field(pos, object, object_mut) - [] - $(#[$($field_meta)*])* - $field_name: $field_ty - })* - $crate::__std::result::Result::Ok(Self { - $($field_name,)* - }) - } - } - }; - ( - @impl_struct_field($pos:ident, $object:ident, $object_mut:ident) - [$(#[$($prev_field_meta:tt)*])*] - #[pdf $pdf_meta:tt] - $(#[$($field_meta:tt)*])* - $field_name:ident: $field_ty:ty - ) => { - $crate::pdf::parse::pdf_parse! { - @impl_struct_field($pos, $object, $object_mut, pdf $pdf_meta) - [$(#[$($prev_field_meta)*])*] - $(#[$($field_meta)*])* - $field_name: $field_ty - } - }; - ( - @impl_struct_field($pos:ident, $object:ident, $object_mut:ident $($pdf_meta:tt)*) - [$(#[$($prev_field_meta:tt)*])*] - #[$($next_field_meta:tt)*] - $(#[$($field_meta:tt)*])* - $field_name:ident: $field_ty:ty - ) => { - $crate::pdf::parse::pdf_parse! { - @impl_struct_field($pos, $object, $object_mut $($pdf_meta)*) - [$(#[$($prev_field_meta)*])* #[$($next_field_meta)*]] - $(#[$($field_meta)*])* - $field_name: $field_ty - } - }; - ( - @impl_struct_field($pos:ident, $object:ident, $object_mut:ident, pdf(flatten)) - [$(#[$($field_meta:tt)*])*] - $field_name:ident: $field_ty:ty - ) => { - let $field_name = <$field_ty as $crate::pdf::parse::PdfParse>::parse( - $crate::pdf::object::PdfObject::Dictionary( - $crate::pdf::object::PdfDictionary::from_fields($pos, $object), - ), - )?; - }; - ( - @impl_struct_field($pos:ident, $object:ident, $object_mut:ident, pdf(name = $name:expr)) - [$(#[$($field_meta:tt)*])*] - $field_name:ident: $field_ty:ty - ) => { - let $field_name = $crate::__std::convert::AsRef::<[$crate::__std::primitive::u8]>::as_ref($name); - let $field_name = <$field_ty as $crate::pdf::parse::PdfParse>::parse( - $object_mut - .remove($field_name) - .unwrap_or($crate::pdf::object::PdfObject::Null($crate::pdf::object::PdfNull::new($pos))), - )?; - }; - ( - #[pdf $($enum_pdf_meta:tt)*] - $(#[$($enum_meta:tt)*])* - $enum_vis:vis enum $Enum:ident { - $(#[pdf $($pdf_meta:tt)*] - $(#[$($variant_meta:tt)*])* - $VariantName:ident $(($($variant_paren_body:tt)*))? $({$($variant_brace_body:tt)*})?,)* - } - ) => { - $(#[$($enum_meta)*])* - $enum_vis enum $Enum { - $($(#[$($variant_meta)*])* - $VariantName $(($($variant_paren_body)*))? $({$($variant_brace_body)*})?,)* - } - - $crate::pdf::parse::pdf_parse! { - @impl - #[pdf $($enum_pdf_meta)*] - $(#[$($enum_meta)*])* - enum $Enum { - $(#[pdf $($pdf_meta)*] - $(#[$($variant_meta)*])* - $VariantName $(($($variant_paren_body)*))? $({$($variant_brace_body)*})?,)* - } - } - }; - ( - @impl - #[pdf(tag = $tag_name:expr)] - $(#[$($enum_meta:tt)*])* - enum $Enum:ident { - $(#[pdf(tag_value = $tag_value:expr)] - $(#[$($variant_meta:tt)*])* - $VariantName:ident($Body:ty),)* - #[pdf(other)] - $(#[$($variant_meta_other:tt)*])* - $VariantNameOther:ident($Other:ty), - } - ) => { - impl $crate::pdf::object::IsPdfNull for $Enum { - fn is_pdf_null(&self) -> $crate::__std::primitive::bool { - if let Self::$VariantNameOther(other) = self { - $crate::pdf::object::IsPdfNull::is_pdf_null(other) - } else { - false - } - } - } - impl $crate::pdf::parse::PdfParse for $Enum { - fn type_name() -> $crate::__std::borrow::Cow<'static, $crate::__std::primitive::str> { - $crate::__std::borrow::Cow::Borrowed($crate::__std::stringify!($Enum)) - } - fn parse(object: $crate::pdf::object::PdfObject) -> $crate::__std::result::Result { - let object = $crate::__std::convert::From::from(object); - let $crate::pdf::object::PdfObjectDirect::Dictionary(object) = object else { - return <$Other as $crate::pdf::parse::PdfParse>::parse( - $crate::__std::convert::From::from(object), - ).map($Enum::$VariantNameOther); - }; - 'a: { - let tag_name = $crate::__std::convert::AsRef::<[$crate::__std::primitive::u8]>::as_ref($tag_name); - let $crate::__std::option::Option::Some(tag_value) = object.get(tag_name).cloned() else { - break 'a; - }; - let tag_value = $crate::__std::convert::From::from(tag_value); - let $crate::pdf::object::PdfObjectDirect::Name(tag_value) = tag_value else { - break 'a; - }; - let _ = tag_value; - $(if tag_value == $crate::pdf::object::PdfName::new_static( - $crate::__std::convert::AsRef::<[u8]>::as_ref($tag_value), - ) { - return <$Body as $crate::pdf::parse::PdfParse>::parse( - $crate::pdf::object::PdfObject::Dictionary(object), - ).map($Enum::$VariantName); - })* - } - <$Other as $crate::pdf::parse::PdfParse>::parse( - $crate::pdf::object::PdfObject::Dictionary(object), - ).map($Enum::$VariantNameOther) - } - } - }; - ( - @impl - #[pdf(name)] - $(#[$($enum_meta:tt)*])* - enum $Enum:ident { - $(#[pdf(name = $name:expr)] - $(#[$($variant_meta:tt)*])* - $VariantName:ident,)* - $(#[pdf(other)] - $(#[$($variant_meta_other:tt)*])* - $VariantNameOther:ident($($PdfName:tt)*),)? - } - ) => { - impl $crate::__std::convert::From<$Enum> for $crate::pdf::object::PdfName { - fn from(value: $Enum) -> Self { - match value { - $($Enum::$VariantName => $crate::pdf::object::PdfName::new_static( - $crate::__std::convert::AsRef::<[u8]>::as_ref($name), - ),)* - $($Enum::$VariantNameOther(v) => $crate::__std::convert::Into::into(v),)? - } - } - } - - $crate::pdf::parse::pdf_parse! { - @impl_try_from - $(#[$($enum_meta)*])* - enum $Enum { - $(#[pdf(name = $name)] - $(#[$($variant_meta)*])* - $VariantName,)* - $(#[pdf(other)] - $(#[$($variant_meta_other)*])* - $VariantNameOther($($PdfName)*),)? - } - } - - impl $crate::pdf::object::IsPdfNull for $Enum { - fn is_pdf_null(&self) -> $crate::__std::primitive::bool { - false - } - } - - impl $crate::pdf::parse::PdfParse for $Enum { - fn type_name() -> $crate::__std::borrow::Cow<'static, $crate::__std::primitive::str> { - $crate::__std::borrow::Cow::Borrowed($crate::__std::stringify!($Enum)) - } - fn parse(object: $crate::pdf::object::PdfObject) -> $crate::__std::result::Result { - let object = $crate::__std::convert::From::from(object); - let $crate::pdf::object::PdfObjectDirect::Name(name) = object else { - return $crate::__std::result::Result::Err($crate::pdf::parse::PdfParseError::InvalidType { - pos: object.pos(), - ty: object.type_name(), - expected_ty: $crate::__std::stringify!($Enum), - }); - }; - $crate::__std::result::Result::Ok($crate::__std::convert::TryInto::<$Enum>::try_into(name)?) - } - } - }; - ( - @impl_try_from - $(#[$($enum_meta:tt)*])* - enum $Enum:ident { - $(#[pdf(name = $name:expr)] - $(#[$($variant_meta:tt)*])* - $VariantName:ident,)* - #[pdf(other)] - $(#[$($variant_meta_other:tt)*])* - $VariantNameOther:ident(PdfName), - } - ) => { - impl $crate::__std::convert::From<$crate::pdf::object::PdfName> for $Enum { - fn from(name: $crate::pdf::object::PdfName) -> Self { - $(if name == $crate::pdf::object::PdfName::new_static( - $crate::__std::convert::AsRef::<[u8]>::as_ref($name), - ) { - $Enum::$VariantName - } else)* { - $Enum::$VariantNameOther(name) - } - } - } - }; - ( - @impl_try_from - $(#[$($enum_meta:tt)*])* - enum $Enum:ident { - $(#[pdf(name = $name:expr)] - $(#[$($variant_meta:tt)*])* - $VariantName:ident,)* - #[pdf(other)] - $(#[$($variant_meta_other:tt)*])* - $VariantNameOther:ident($PdfName:ty), - } - ) => { - impl $crate::__std::convert::TryFrom<$crate::pdf::object::PdfName> for $Enum { - type Error = $crate::pdf::parse::PdfParseError; - - fn try_from(name: $crate::pdf::object::PdfName) -> $crate::__std::result::Result { - $(if name == $crate::pdf::object::PdfName::new_static( - $crate::__std::convert::AsRef::<[u8]>::as_ref($name), - ) { - $crate::__std::result::Result::Ok($Enum::$VariantName) - } else)* { - $crate::__std::result::Result::Ok($Enum::$VariantNameOther($crate::__std::convert::TryInto::<$PdfName>::try_into(name)?)) - } - } - } - }; - ( - @impl_try_from - $(#[$($enum_meta:tt)*])* - enum $Enum:ident { - $(#[pdf(name = $name:expr)] - $(#[$($variant_meta:tt)*])* - $VariantName:ident,)* - } - ) => { - impl $crate::__std::convert::TryFrom<$crate::pdf::object::PdfName> for $Enum { - type Error = $crate::pdf::parse::PdfParseError; - - fn try_from(name: $crate::pdf::object::PdfName) -> $crate::__std::result::Result { - $(if name == $crate::pdf::object::PdfName::new_static( - $crate::__std::convert::AsRef::<[u8]>::as_ref($name), - ) { - $crate::__std::result::Result::Ok($Enum::$VariantName) - } else)* { - $crate::__std::result::Result::Err($crate::pdf::parse::PdfParseError::InvalidName { - pos: name.pos(), - name, - expected_ty: $crate::__std::stringify!($Enum), - }) - } - } - } - }; -} - -pub use pdf_parse; diff --git a/src/pdf/render.rs b/src/pdf/render.rs deleted file mode 100644 index 8ffbcac..0000000 --- a/src/pdf/render.rs +++ /dev/null @@ -1,1064 +0,0 @@ -use crate::{ - pdf::{ - content_stream::{ - PdfOperatorAndOperands, PdfOperatorBeginCompatibilitySection, - PdfOperatorBeginInlineImage, PdfOperatorBeginInlineImageData, - PdfOperatorBeginMarkedContent, PdfOperatorBeginMarkedContentWithProperties, - PdfOperatorBeginText, PdfOperatorClip, PdfOperatorClipEvenOdd, - PdfOperatorCloseAndStrokePath, PdfOperatorCloseFillAndStrokePath, - PdfOperatorCloseFillAndStrokePathEvenOdd, PdfOperatorCloseSubpath, - PdfOperatorConcatMatrix, PdfOperatorCurveTo, PdfOperatorCurveTo13, - PdfOperatorCurveTo23, PdfOperatorDesignateMarkedContentPoint, - PdfOperatorDesignateMarkedContentPointWithProperties, - PdfOperatorEndCompatibilitySection, PdfOperatorEndInlineImage, - PdfOperatorEndMarkedContent, PdfOperatorEndPath, PdfOperatorEndText, - PdfOperatorFillAndStrokePath, PdfOperatorFillAndStrokePathEvenOdd, PdfOperatorFillPath, - PdfOperatorFillPathEvenOdd, PdfOperatorFillPathObsolete, PdfOperatorFontType3SetWidth, - PdfOperatorFontType3SetWidthAndBBox, PdfOperatorLineTo, PdfOperatorMoveTo, - PdfOperatorPaintXObject, PdfOperatorRectangle, PdfOperatorRestoreGraphicsState, - PdfOperatorSaveGraphicsState, PdfOperatorSetCharacterSpacing, - PdfOperatorSetColorRenderingIntent, PdfOperatorSetFlatnessTolerance, - PdfOperatorSetFontAndSize, PdfOperatorSetGraphicsState, PdfOperatorSetLineCapStyle, - PdfOperatorSetLineDashPattern, PdfOperatorSetLineJoinStyle, PdfOperatorSetLineWidth, - PdfOperatorSetMiterLimit, PdfOperatorSetNonStrokeCmyk, PdfOperatorSetNonStrokeColor, - PdfOperatorSetNonStrokeColorSpace, PdfOperatorSetNonStrokeColorWithName, - PdfOperatorSetNonStrokeGray, PdfOperatorSetNonStrokeRgb, - PdfOperatorSetSpacingThenTextNextLineAndShow, PdfOperatorSetStrokeCmyk, - PdfOperatorSetStrokeColor, PdfOperatorSetStrokeColorSpace, - PdfOperatorSetStrokeColorWithName, PdfOperatorSetStrokeGray, PdfOperatorSetStrokeRgb, - PdfOperatorSetTextHorizontalScaling, PdfOperatorSetTextLeading, - PdfOperatorSetTextMatrix, PdfOperatorSetTextRenderingMode, PdfOperatorSetTextRise, - PdfOperatorSetWordSpacing, PdfOperatorShade, PdfOperatorShowText, - PdfOperatorShowTextWithGlyphPositioning, PdfOperatorStrokePath, - PdfOperatorTextNextLine, PdfOperatorTextNextLineAndShow, - PdfOperatorTextNextLineWithOffset, PdfOperatorTextNextLineWithOffsetAndLeading, - PdfOperatorUnparsed, - }, - document_structure::{PdfPage, PdfResourcesDictionary}, - font::{PdfFont, PdfTodo}, - object::{ - IsPdfNull, PdfMatrix, PdfName, PdfNumber, PdfObject, PdfObjectDirect, - PdfStringOrNumber, PdfVec2D, - }, - parse::{PdfInputPosition, PdfInputPositionNoCompare, PdfParse, PdfParseError}, - }, - pdf_parse, -}; -use std::borrow::Cow; - -#[derive(Clone, Debug)] -#[non_exhaustive] -pub struct PdfPath {} - -pdf_parse! { - #[pdf(name)] - #[derive(Clone, Debug)] - pub enum PdfColorSpace { - #[pdf(name = "DeviceGray")] - DeviceGray, - #[pdf(name = "DeviceRGB")] - DeviceRgb, - // TODO: add others - #[pdf(other)] - Unknown(PdfName), - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Clone, Debug)] - pub enum PdfRenderingIntent { - #[pdf(name = "RelativeColorimetric")] - RelativeColorimetric, - // TODO: add others - #[pdf(other)] - Unknown(PdfName), - } -} - -pdf_parse! { - #[pdf(name)] - #[derive(Clone, Debug)] - pub enum PdfBlendMode { - #[pdf(name = "Normal")] - Normal, - // TODO: add others - #[pdf(other)] - Unknown(PdfName), - } -} - -#[derive(Clone, Copy, PartialEq, PartialOrd)] -pub struct PdfColorDeviceGray { - pos: PdfInputPositionNoCompare, - level: f32, -} - -impl PdfColorDeviceGray { - pub fn pos(self) -> PdfInputPosition { - self.pos.0 - } - pub fn level(self) -> f32 { - self.level - } -} - -impl IsPdfNull for PdfColorDeviceGray { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl PdfParse for PdfColorDeviceGray { - fn type_name() -> Cow<'static, str> { - Cow::Borrowed("PdfColorDeviceGray") - } - fn parse(object: PdfObject) -> Result { - let number = PdfNumber::parse(object)?; - Ok(Self { - pos: number.pos().into(), - level: number.as_f32(), - }) - } -} - -impl std::fmt::Debug for PdfColorDeviceGray { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self { pos, level } = self; - write!(f, "PdfColorDeviceGray(at {pos}, {level})") - } -} - -#[derive(Clone, Copy, PartialEq, PartialOrd)] -pub struct PdfColorDeviceRgb { - pos: PdfInputPositionNoCompare, - r: f32, - g: f32, - b: f32, -} - -impl PdfColorDeviceRgb { - pub fn pos(self) -> PdfInputPosition { - self.pos.0 - } - pub fn r(self) -> f32 { - self.r - } - pub fn g(self) -> f32 { - self.g - } - pub fn b(self) -> f32 { - self.b - } - pub fn parse_flat(r: PdfObject, g: PdfObject, b: PdfObject) -> Result { - let r = PdfNumber::parse(r)?; - let g = f32::parse(g)?; - let b = f32::parse(b)?; - Ok(Self { - pos: r.pos().into(), - r: r.as_f32(), - g, - b, - }) - } -} - -impl IsPdfNull for PdfColorDeviceRgb { - fn is_pdf_null(&self) -> bool { - false - } -} - -impl std::fmt::Debug for PdfColorDeviceRgb { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self { pos, r, g, b } = self; - write!(f, "PdfColorDeviceRgb(at {pos}, {r}, {g}, {b})") - } -} - -#[derive(Clone, Debug)] -pub enum PdfColor { - DeviceGray(PdfColorDeviceGray), - DeviceRgb(PdfColorDeviceRgb), - // TODO -} - -#[derive(Clone, Debug)] -pub struct PdfTextObjectState { - pub text_matrix: PdfMatrix, - pub text_line_matrix: PdfMatrix, -} - -impl PdfTextObjectState { - fn require>( - v: Option, - pos: PdfInputPosition, - ) -> Result { - v.ok_or(PdfParseError::MissingBeginTextOperator { pos }) - } -} - -#[derive(Clone, Debug)] -pub struct PdfTextState { - pub char_spacing: f32, - pub word_spacing: f32, - pub horizontal_scaling_percent: f32, - pub leading: f32, - pub font: Option, - pub font_size: f32, - pub rendering_mode: u8, // TODO: replace with enum - pub rise: f32, - pub knockout: bool, - pub text_object: Option, -} - -#[derive(Clone, Debug)] -#[non_exhaustive] -pub struct PdfLineDashPattern { - // TODO -} - -impl PdfLineDashPattern { - pub fn solid() -> Self { - Self {} - } -} - -#[derive(Clone, Debug)] -pub struct PdfGraphicsState { - pub current_transformation_matrix: PdfMatrix, - pub clipping_path: PdfPath, - pub stroking_color: PdfColor, - pub non_stroking_color: PdfColor, - pub text_state: PdfTextState, - pub line_width: f32, - pub line_cap_style: u8, // TODO: replace with enum - pub line_join_style: u8, // TODO: replace with enum - pub miter_limit: f32, - pub line_dash_pattern: PdfLineDashPattern, - pub rendering_intent: PdfRenderingIntent, - pub automatic_stroke_adjustment: bool, - pub current_blend_mode: PdfBlendMode, - pub soft_mask: Option, // TODO: replace with struct - pub stroking_alpha_constant: f32, - pub non_stroking_alpha_constant: f32, - pub alpha_source: bool, -} - -impl PdfGraphicsState { - pub fn text_rendering_matrix(&self, pos: PdfInputPosition) -> Result { - let text_object = PdfTextObjectState::require(self.text_state.text_object.as_ref(), pos)?; - Ok(PdfMatrix { - pos: PdfInputPositionNoCompare::empty(), - elements: [ - self.text_state.font_size * self.text_state.horizontal_scaling_percent * 1e-2, - 0.0, - 0.0, - self.text_state.font_size, - 0.0, - self.text_state.rise, - ], - } - .mul(text_object.text_matrix, PdfInputPositionNoCompare::empty()) - .mul( - self.current_transformation_matrix, - text_object.text_matrix.pos, - )) - } - pub fn advance_text_matrix( - &mut self, - pos: PdfInputPosition, - glyph_displacement: PdfVec2D, - position_adjustment: f32, - has_char_spacing: bool, - has_word_spacing: bool, - ) -> Result<(), PdfParseError> { - let text_object = PdfTextObjectState::require(self.text_state.text_object.as_mut(), pos)?; - let (tx, ty) = if self - .text_state - .font - .as_ref() - .ok_or(PdfParseError::MissingSetFontOperator { pos })? - .is_vertical_writing_mode() - { - let mut ty = - (glyph_displacement.y - position_adjustment * 1e-3) * self.text_state.font_size; - if has_char_spacing { - ty += self.text_state.char_spacing; - } - if has_word_spacing { - ty += self.text_state.word_spacing; - } - (0.0, ty) - } else { - let mut tx = - (glyph_displacement.x - position_adjustment * 1e-3) * self.text_state.font_size; - if has_char_spacing { - tx += self.text_state.char_spacing; - } - if has_word_spacing { - tx += self.text_state.word_spacing; - } - (tx * self.text_state.horizontal_scaling_percent * 1e-2, 0.0) - }; - text_object.text_matrix = PdfMatrix { - pos: pos.into(), - elements: [1.0, 0.0, 0.0, 1.0, tx, ty], - } - .mul(text_object.text_matrix, pos); - Ok(()) - } -} - -#[derive(Debug)] -pub struct PdfRenderState<'a> { - pub graphics_state: PdfGraphicsState, - pub graphics_state_stack: Vec, - pub resources: &'a PdfResourcesDictionary, -} - -impl<'a> PdfRenderState<'a> { - pub fn new(page: &'a PdfPage) -> Self { - let pos = page.rest.pos().into(); - Self { - graphics_state: PdfGraphicsState { - current_transformation_matrix: PdfMatrix::identity(pos), - clipping_path: PdfPath {}, - stroking_color: PdfColor::DeviceGray(PdfColorDeviceGray { pos, level: 0.0 }), - non_stroking_color: PdfColor::DeviceGray(PdfColorDeviceGray { pos, level: 0.0 }), - text_state: PdfTextState { - char_spacing: 0.0, - word_spacing: 0.0, - horizontal_scaling_percent: 100.0, - leading: 0.0, - font: None, - font_size: 0.0, - rendering_mode: 0, - rise: 0.0, - knockout: true, - text_object: None, - }, - line_width: 1.0, - line_cap_style: 0, - line_join_style: 0, - miter_limit: 10.0, - line_dash_pattern: PdfLineDashPattern::solid(), - rendering_intent: PdfRenderingIntent::RelativeColorimetric, - automatic_stroke_adjustment: false, - current_blend_mode: PdfBlendMode::Normal, - soft_mask: None, - stroking_alpha_constant: 1.0, - non_stroking_alpha_constant: 1.0, - alpha_source: false, - }, - graphics_state_stack: Vec::with_capacity(3), - resources: &page.resources, - } - } - pub fn handle_unknown_operator( - &mut self, - operator: &PdfOperatorUnparsed, - operands: &[PdfObjectDirect], - ) -> Result<(), PdfParseError> { - todo!() - } -} - -pub trait PdfRenderOperator: Into { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError>; -} - -impl PdfRenderOperator for PdfOperatorCloseFillAndStrokePath { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorFillAndStrokePath { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorCloseFillAndStrokePathEvenOdd { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorFillAndStrokePathEvenOdd { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorBeginMarkedContentWithProperties { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - tag, - properties, - } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorBeginInlineImage { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorBeginMarkedContent { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, tag } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorBeginText { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos } = *self; - state.graphics_state.text_state.text_object = Some(PdfTextObjectState { - text_matrix: PdfMatrix::identity(pos), - text_line_matrix: PdfMatrix::identity(pos), - }); - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorBeginCompatibilitySection { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorCurveTo { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, p1, p2, p3 } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorConcatMatrix { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos, matrix } = *self; - state.graphics_state.current_transformation_matrix = state - .graphics_state - .current_transformation_matrix - .mul(matrix, pos); - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorSetStrokeColorSpace { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, name } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetNonStrokeColorSpace { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, name } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetLineDashPattern { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - dash_array, - dash_phase, - } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorFontType3SetWidth { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, width } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorFontType3SetWidthAndBBox { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, width, bbox } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorPaintXObject { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, name } = self; */ - let _ = state; - // TODO: implement - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorDesignateMarkedContentPointWithProperties { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - tag, - properties, - } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorEndInlineImage { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorEndMarkedContent { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorEndText { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos: _ } = self; - state.graphics_state.text_state.text_object = None; - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorEndCompatibilitySection { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorFillPath { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorFillPathObsolete { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorFillPathEvenOdd { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetStrokeGray { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos: _, gray } = *self; - state.graphics_state.stroking_color = PdfColor::DeviceGray(gray); - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorSetNonStrokeGray { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos: _, gray } = *self; - state.graphics_state.non_stroking_color = PdfColor::DeviceGray(gray); - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorSetGraphicsState { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - dictionary_name, - } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorCloseSubpath { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetFlatnessTolerance { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, flatness } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorBeginInlineImageData { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetLineJoinStyle { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - line_join_style, - } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetLineCapStyle { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - line_cap_style, - } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetStrokeCmyk { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, c, m, y, k } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetNonStrokeCmyk { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, c, m, y, k } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorLineTo { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, to } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorMoveTo { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, to } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetMiterLimit { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, limit } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorDesignateMarkedContentPoint { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, tag } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorEndPath { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSaveGraphicsState { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos: _ } = self; - state - .graphics_state_stack - .push(state.graphics_state.clone()); - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorRestoreGraphicsState { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos } = self; - state.graphics_state = state - .graphics_state_stack - .pop() - .ok_or(PdfParseError::CantRestoreGraphicsStateWithEmptyStack { pos: pos.0 })?; - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorRectangle { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, p, size } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetStrokeRgb { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos: _, color } = *self; - state.graphics_state.stroking_color = PdfColor::DeviceRgb(color); - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorSetNonStrokeRgb { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos: _, color } = *self; - state.graphics_state.non_stroking_color = PdfColor::DeviceRgb(color); - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorSetColorRenderingIntent { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, intent } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorCloseAndStrokePath { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorStrokePath { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetStrokeColor { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, color } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetNonStrokeColor { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, color } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetStrokeColorWithName { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - color_and_name, - } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetNonStrokeColorWithName { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - color_and_name, - } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorShade { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorTextNextLine { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetCharacterSpacing { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, char_space } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorTextNextLineWithOffset { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { pos, offset } = *self; - let PdfTextObjectState { - text_matrix, - text_line_matrix, - } = PdfTextObjectState::require( - state.graphics_state.text_state.text_object.as_mut(), - pos.0, - )?; - let matrix = PdfMatrix { - pos, - elements: [1.0, 0.0, 0.0, 1.0, offset.x, offset.y], - } - .mul(*text_line_matrix, pos); - *text_line_matrix = matrix; - *text_matrix = matrix; - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorTextNextLineWithOffsetAndLeading { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, offset } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetFontAndSize { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { - pos, - ref font, - size, - } = *self; - state.graphics_state.text_state.font = Some( - state - .resources - .fonts - .get(font) - .ok_or_else(|| PdfParseError::FontResourceNotFound { - pos: pos.0, - font: font.clone(), - })? - .clone(), - ); - state.graphics_state.text_state.font_size = size; - Ok(()) - } -} - -impl PdfRenderOperator for PdfOperatorShowText { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, text } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorShowTextWithGlyphPositioning { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - let Self { - pos, - ref text_and_positioning, - } = *self; - let font = state - .graphics_state - .text_state - .font - .as_ref() - .ok_or(PdfParseError::MissingSetFontOperator { pos: pos.0 })?; - let PdfFont::Type1(font) = font else { todo!() }; - let mut positioning = 0.0; - for text_or_positioning in text_and_positioning.iter() { - match text_or_positioning { - PdfStringOrNumber::String(s) => { - for glyph in s.bytes().iter() { - let positioning = std::mem::replace(&mut positioning, 0.0); - let Some(encoding) = font.encoding() else { - todo!(); - }; - let table = encoding.table(|| { - let Some(font_encoding) = font - .font_descriptor() - .and_then(|v| v.font_file.as_ref()) - .and_then(|v| v.decoded_data().as_ref().ok()) - .and_then(|v| v.encoding.as_ref()) - else { - todo!() - }; - todo!("{font_encoding:?}"); - }); - todo!("{table:?}"); - } - } - PdfStringOrNumber::Number(number) => positioning = number.as_f32(), - } - } - let _ = state; - todo!("{text_and_positioning:?}") - } -} - -impl PdfRenderOperator for PdfOperatorSetTextLeading { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, leading } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetTextMatrix { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, matrix } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetTextRenderingMode { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - rendering_mode, - } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetTextRise { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, rise } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetWordSpacing { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, word_space } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetTextHorizontalScaling { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, scale_percent } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorCurveTo23 { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetLineWidth { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, line_width } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorClip { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorClipEvenOdd { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorCurveTo13 { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorTextNextLineAndShow { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { pos, text } = self; */ - let _ = state; - todo!() - } -} - -impl PdfRenderOperator for PdfOperatorSetSpacingThenTextNextLineAndShow { - fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> { - /* let Self { - pos, - word_space, - char_space, - text, - } = self; */ - let _ = state; - todo!() - } -} diff --git a/src/pdf/stream_filters.rs b/src/pdf/stream_filters.rs deleted file mode 100644 index baf6670..0000000 --- a/src/pdf/stream_filters.rs +++ /dev/null @@ -1,66 +0,0 @@ -use crate::pdf::{ - object::{PdfDictionary, PdfName}, - parse::{PdfInputPosition, PdfParse, PdfParseError}, - pdf_parse, -}; - -pub mod flate; - -pdf_parse! { - #[pdf(name)] - #[derive(Clone, Debug, PartialEq, Eq)] - #[non_exhaustive] - pub enum PdfStreamFilter { - #[pdf(name = "ASCIIHexDecode")] - AsciiHexDecode, - #[pdf(name = "ASCII85Decode")] - Ascii85Decode, - #[pdf(name = "LZWDecode")] - LzwDecode, - #[pdf(name = "FlateDecode")] - FlateDecode, - #[pdf(name = "RunLengthDecode")] - RunLengthDecode, - #[pdf(name = "CCITTFaxDecode")] - CcittFaxDecode, - #[pdf(name = "JBIG2Decode")] - Jbig2Decode, - #[pdf(name = "DCTDecode")] - DctDecode, - #[pdf(name = "JPXDecode")] - JpxDecode, - #[pdf(name = "Crypt")] - Crypt, - #[pdf(other)] - Unknown(PdfName), - } -} - -impl PdfStreamFilter { - pub fn decode_stream_data( - &self, - filter_parms: PdfDictionary, - stream_pos: PdfInputPosition, - encoded_data: &[u8], - ) -> Result, PdfParseError> { - match self { - PdfStreamFilter::AsciiHexDecode => todo!(), - PdfStreamFilter::Ascii85Decode => todo!(), - PdfStreamFilter::LzwDecode => todo!(), - PdfStreamFilter::FlateDecode => { - flate::PdfFilterParmsFlateDecode::parse(filter_parms.into())? - .decode_stream_data(stream_pos, encoded_data) - } - PdfStreamFilter::RunLengthDecode => todo!(), - PdfStreamFilter::CcittFaxDecode => todo!(), - PdfStreamFilter::Jbig2Decode => todo!(), - PdfStreamFilter::DctDecode => todo!(), - PdfStreamFilter::JpxDecode => todo!(), - PdfStreamFilter::Crypt => todo!(), - PdfStreamFilter::Unknown(filter) => Err(PdfParseError::UnknownStreamFilter { - pos: stream_pos, - filter: filter.clone(), - }), - } - } -} diff --git a/src/pdf/stream_filters/flate.rs b/src/pdf/stream_filters/flate.rs deleted file mode 100644 index b5d49e8..0000000 --- a/src/pdf/stream_filters/flate.rs +++ /dev/null @@ -1,74 +0,0 @@ -use crate::pdf::{ - object::PdfDictionary, - parse::{PdfInputPosition, PdfParseError}, - pdf_parse, - stream_filters::PdfStreamFilter, -}; -use std::{io::Read, num::NonZero}; - -pdf_parse! { - #[pdf] - #[derive(Clone, Debug, Default)] - pub struct PdfFilterParmsFlateDecode { - #[pdf(name = "Predictor")] - pub predictor: Option>, - #[pdf(name = "Colors")] - pub colors: Option>, - #[pdf(name = "BitsPerComponent")] - pub bits_per_component: Option>, - #[pdf(name = "Columns")] - pub columns: Option>, - #[pdf(flatten)] - pub rest: PdfDictionary, - } -} - -impl PdfFilterParmsFlateDecode { - pub const FILTER: PdfStreamFilter = PdfStreamFilter::FlateDecode; - pub const DEFAULT_PREDICTOR: NonZero = const { NonZero::new(1).unwrap() }; - pub const DEFAULT_COLORS: NonZero = const { NonZero::new(1).unwrap() }; - pub const DEFAULT_BITS_PER_COMPONENT: NonZero = const { NonZero::new(8).unwrap() }; - pub const DEFAULT_COLUMNS: NonZero = const { NonZero::new(1).unwrap() }; - pub fn predictor(&self) -> NonZero { - self.predictor.unwrap_or(Self::DEFAULT_PREDICTOR) - } - pub fn colors(&self) -> NonZero { - self.colors.unwrap_or(Self::DEFAULT_COLORS) - } - pub fn bits_per_component(&self) -> NonZero { - self.bits_per_component - .unwrap_or(Self::DEFAULT_BITS_PER_COMPONENT) - } - pub fn columns(&self) -> NonZero { - self.columns.unwrap_or(Self::DEFAULT_COLUMNS) - } - pub fn decode_stream_data( - &self, - stream_pos: PdfInputPosition, - encoded_data: &[u8], - ) -> Result, PdfParseError> { - let Self { - predictor: _, - colors: _, - bits_per_component: _, - columns: _, - rest: _, - } = self; - let mut decoded_data = vec![]; - flate2::bufread::ZlibDecoder::new(encoded_data) - .read_to_end(&mut decoded_data) - .map_err(|e| PdfParseError::StreamFilterError { - pos: stream_pos, - filter: Self::FILTER.into(), - error: e.to_string(), - })?; - let predictor = self.predictor(); - let colors = self.colors(); - let bits_per_component = self.bits_per_component(); - let columns = self.columns(); - match predictor { - Self::DEFAULT_PREDICTOR => Ok(decoded_data), - _ => todo!("{predictor}"), - } - } -} diff --git a/src/util.rs b/src/util.rs deleted file mode 100644 index 9d576f8..0000000 --- a/src/util.rs +++ /dev/null @@ -1,382 +0,0 @@ -use std::{ - any::{Any, TypeId}, - borrow::Borrow, - cell::Cell, - collections::HashMap, - fmt, - hash::{Hash, Hasher}, - sync::Arc, -}; - -pub enum ArcOrRef<'a, T: ?Sized> { - Arc(Arc), - Ref(&'a T), -} - -impl<'a, T: ?Sized> AsRef for ArcOrRef<'a, T> { - fn as_ref(&self) -> &T { - self - } -} - -impl<'a, T: ?Sized> Borrow for ArcOrRef<'a, T> { - fn borrow(&self) -> &T { - self - } -} - -impl<'a, T: ?Sized> From> for ArcOrRef<'a, T> { - fn from(value: Arc) -> Self { - Self::Arc(value) - } -} - -impl<'a, T: ?Sized> From<&'a T> for ArcOrRef<'a, T> { - fn from(value: &'a T) -> Self { - Self::Ref(value) - } -} - -impl<'a, T: ?Sized> Default for ArcOrRef<'a, T> -where - &'a T: Default, -{ - fn default() -> Self { - Self::Ref(Default::default()) - } -} - -impl Clone for ArcOrRef<'_, T> { - fn clone(&self) -> Self { - match self { - Self::Arc(v) => Self::Arc(v.clone()), - Self::Ref(v) => Self::Ref(v), - } - } -} - -impl Hash for ArcOrRef<'_, T> { - fn hash(&self, state: &mut H) { - T::hash(self, state) - } -} - -impl<'a, 'b, T: ?Sized + PartialEq, U: ?Sized> PartialEq> for ArcOrRef<'a, T> { - fn eq(&self, other: &ArcOrRef<'b, U>) -> bool { - T::eq(self, other) - } -} - -impl Eq for ArcOrRef<'_, T> {} - -impl<'a, 'b, T: ?Sized + PartialOrd, U: ?Sized> PartialOrd> for ArcOrRef<'a, T> { - fn partial_cmp(&self, other: &ArcOrRef<'b, U>) -> Option { - T::partial_cmp(self, other) - } -} - -impl Ord for ArcOrRef<'_, T> { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - T::cmp(self, other) - } -} - -impl std::ops::Deref for ArcOrRef<'_, T> { - type Target = T; - - fn deref(&self) -> &Self::Target { - match self { - ArcOrRef::Arc(v) => v, - ArcOrRef::Ref(v) => v, - } - } -} - -impl fmt::Debug for ArcOrRef<'_, T> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - T::fmt(self, f) - } -} - -impl fmt::Display for ArcOrRef<'_, T> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - T::fmt(self, f) - } -} - -/// a stable alternative to `CloneToUninit` for `Arc` -pub trait ArcFromRef { - /// like `Arc::new(Self::clone(self))` but works for unsized types too - fn arc_from_ref(&self) -> Arc; - /// generic version of `Arc::make_mut` - fn make_mut(this: &mut Arc) -> &mut Self; -} - -impl ArcFromRef for T { - fn arc_from_ref(&self) -> Arc { - Arc::new(Self::clone(self)) - } - fn make_mut(this: &mut Arc) -> &mut Self { - Arc::make_mut(this) - } -} - -impl ArcFromRef for [T] { - fn arc_from_ref(&self) -> Arc { - Arc::from(self) - } - fn make_mut(this: &mut Arc) -> &mut Self { - Arc::make_mut(this) - } -} - -impl ArcFromRef for str { - fn arc_from_ref(&self) -> Arc { - Arc::from(self) - } - fn make_mut(this: &mut Arc) -> &mut Self { - Arc::make_mut(this) - } -} - -impl<'a, T: ?Sized + ArcFromRef> ArcOrRef<'a, T> { - pub fn into_arc(this: Self) -> Arc { - match this { - ArcOrRef::Arc(v) => v, - ArcOrRef::Ref(v) => T::arc_from_ref(v), - } - } - pub fn make_arc(this: &mut Self) -> &mut Arc { - match this { - ArcOrRef::Arc(v) => v, - ArcOrRef::Ref(v) => { - *this = ArcOrRef::Arc(T::arc_from_ref(v)); - let ArcOrRef::Arc(v) = this else { - unreachable!(); - }; - v - } - } - } - pub fn make_mut(this: &mut Self) -> &mut T { - T::make_mut(Self::make_arc(this)) - } -} - -trait DagDebugStateSealed {} - -#[expect(private_bounds)] -pub trait SupportsDagDebugState: DagDebugStateSealed + 'static + Clone { - type Key: Clone + Hash + Eq + 'static; - fn key(this: &Self) -> Self::Key; -} - -impl DagDebugStateSealed for Arc {} - -impl SupportsDagDebugState for Arc { - type Key = *const T; - - fn key(this: &Self) -> Self::Key { - Arc::as_ptr(this) - } -} - -impl DagDebugStateSealed for Arc<[T]> {} - -impl SupportsDagDebugState for Arc<[T]> { - type Key = *const [T]; - - fn key(this: &Self) -> Self::Key { - Arc::as_ptr(this) - } -} - -impl DagDebugStateSealed for Arc {} - -impl SupportsDagDebugState for Arc { - type Key = *const str; - - fn key(this: &Self) -> Self::Key { - Arc::as_ptr(this) - } -} - -trait DagDebugStatePartTrait: 'static { - fn reset(&mut self); - fn as_any_mut(&mut self) -> &mut dyn Any; -} - -struct DagDebugStatePart { - table: HashMap, - next_id: u64, -} - -impl DagDebugStatePartTrait for DagDebugStatePart { - fn reset(&mut self) { - let Self { table, next_id } = self; - table.clear(); - *next_id = 0; - } - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } -} - -impl DagDebugStatePart { - fn insert(&mut self, value: &T) -> DagDebugStateInsertResult { - use std::collections::hash_map::Entry; - match self.table.entry(T::key(value)) { - Entry::Occupied(entry) => DagDebugStateInsertResult::Old { id: entry.get().0 }, - Entry::Vacant(entry) => { - let value = T::clone(value); - let id = self.next_id; - self.next_id += 1; - entry.insert((id, value)); - DagDebugStateInsertResult::New { id } - } - } - } -} - -impl Default for DagDebugStatePart { - fn default() -> Self { - Self { - table: HashMap::default(), - next_id: 0, - } - } -} - -pub struct DagDebugState { - parts: std::cell::RefCell>>, - ref_count: Cell, -} - -#[derive(Clone, Copy, Debug)] -pub enum DagDebugStateInsertResult { - New { id: u64 }, - Old { id: u64 }, -} - -impl DagDebugStateInsertResult { - pub fn id(self) -> u64 { - match self { - Self::New { id } | Self::Old { id } => id, - } - } -} - -impl DagDebugState { - fn with_part( - &self, - f: impl FnOnce(&mut DagDebugStatePart) -> R, - ) -> R { - let mut parts = self.parts.borrow_mut(); - let Some(part) = parts - .entry(TypeId::of::>()) - .or_insert_with(|| Box::new(DagDebugStatePart::::default())) - .as_any_mut() - .downcast_mut::>() - else { - unreachable!() - }; - f(part) - } - pub fn insert(&self, value: &T) -> DagDebugStateInsertResult { - self.with_part(|part: &mut DagDebugStatePart| part.insert(value)) - } - pub fn debug_or_id<'a, T: SupportsDagDebugState + fmt::Debug, Abbreviated: fmt::Display>( - &self, - value: &'a T, - abbreviated: Abbreviated, - ) -> impl fmt::Debug + fmt::Display + use<'a, T, Abbreviated> { - self.debug_or_id_with(value, fmt::Debug::fmt, move |f| abbreviated.fmt(f)) - } - pub fn debug_or_id_with< - 'a, - T: SupportsDagDebugState, - DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result, - DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, - >( - &self, - value: &'a T, - debug_value: DebugValue, - debug_abbreviated: DebugAbbreviated, - ) -> impl fmt::Debug + fmt::Display + use<'a, T, DebugValue, DebugAbbreviated> { - struct DebugOrIdWith< - 'a, - T: SupportsDagDebugState, - DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result, - DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, - > { - insert_result: DagDebugStateInsertResult, - value: &'a T, - debug_value: DebugValue, - debug_abbreviated: DebugAbbreviated, - } - impl< - 'a, - T: SupportsDagDebugState, - DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result, - DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, - > fmt::Debug for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated> - { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self, f) - } - } - impl< - 'a, - T: SupportsDagDebugState, - DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result, - DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result, - > fmt::Display for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated> - { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "#{} ", self.insert_result.id())?; - match self.insert_result { - DagDebugStateInsertResult::New { id: _ } => (self.debug_value)(self.value, f), - DagDebugStateInsertResult::Old { id: _ } => (self.debug_abbreviated)(f), - } - } - } - DebugOrIdWith { - insert_result: self.insert(value), - value, - debug_value, - debug_abbreviated, - } - } - #[must_use] - fn inc_ref_count_scope(&self) -> impl Sized { - struct DecRefCountOnDrop<'a>(&'a DagDebugState); - impl Drop for DecRefCountOnDrop<'_> { - fn drop(&mut self) { - self.0.ref_count.set(self.0.ref_count.get() - 1); - if self.0.ref_count.get() == 0 { - self.0 - .parts - .borrow_mut() - .values_mut() - .for_each(|v| v.reset()); - } - } - } - self.ref_count.set( - self.ref_count - .get() - .checked_add(1) - .expect("too many nested calls"), - ); - DecRefCountOnDrop(self) - } - pub fn scope(f: impl FnOnce(&Self) -> R) -> R { - thread_local! { - static STATE: DagDebugState = DagDebugState { parts: Default::default(), ref_count: Cell::new(0) }; - } - STATE.with(|state| { - let _scope = state.inc_ref_count_scope(); - f(state) - }) - } -}