use crate::{ pdf::{ object::{ MaybeArray, PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject, PdfObjectIdentifier, PdfObjectIndirect, PdfReal, PdfStream, PdfStreamDictionary, PdfString, UnparsedPdfStreamDictionary, }, parse::{PdfInputPosition, PdfParse, PdfParseError}, }, pdf_parse, util::ArcOrRef, }; use std::{ collections::BTreeMap, convert::Infallible, fmt, num::NonZero, str::FromStr, sync::{Arc, OnceLock}, }; pub mod object; pub mod parse; pub struct PdfObjects { objects: OnceLock>, } #[derive(Copy, Clone, Debug)] pub struct PdfHeader { pub major: NonZero, pub minor: u16, } impl PdfHeader { pub const PREFIX: &str = "%PDF-"; } pdf_parse! { #[derive(Clone, Debug)] pub struct PdfTrailerDictionary { #[pdf(name = "Size")] pub size: usize, #[pdf(name = "Prev")] pub prev: Option, #[pdf(name = "Root")] pub root: PdfDictionary, #[pdf(name = "Encrypt")] pub encrypt: Option, #[pdf(name = "Info")] pub info: Option, #[pdf(name = "ID")] pub id: Option<[PdfString; 2]>, #[pdf(flatten)] pub rest: PdfDictionary, } } pdf_parse! { #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] pub enum PdfXRefName { #[pdf(name = "XRef")] #[default] XRef, } } pdf_parse! { #[derive(Clone, Debug)] pub struct PdfXRefStreamDictionaryRest { #[pdf(name = "Type")] pub ty: PdfXRefName, #[pdf(name = "Size")] pub size: usize, #[pdf(name = "Index")] pub index: Option>, #[pdf(name = "Prev")] pub prev: Option, #[pdf(name = "W")] pub w: Option>, #[pdf(name = "Root")] pub root: PdfDictionary, #[pdf(name = "Encrypt")] pub encrypt: Option, #[pdf(name = "Info")] pub info: Option, #[pdf(name = "ID")] pub id: Option<[PdfString; 2]>, #[pdf(flatten)] pub rest: PdfDictionary, } } #[derive(Clone, Debug)] pub enum PdfTrailer { Trailer { trailer_dictionary: PdfTrailerDictionary, start_xref: usize, }, Stream { xref_stream: PdfStream, start_xref: usize, }, } pub struct Pdf { pub header: PdfHeader, pub objects: Arc, pub trailer: PdfTrailer, } #[derive(Clone, Copy, PartialEq, Eq, Debug)] enum PdfCharCategory { Regular, Whitespace, LParen, RParen, LAngle, RAngle, LBracket, RBracket, LBrace, RBrace, FSlash, Percent, } impl PdfCharCategory { fn new(b: u8) -> Self { match b { b'\0' | b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' => Self::Whitespace, b'(' => Self::LParen, b')' => Self::RParen, b'<' => Self::LAngle, b'>' => Self::RAngle, b'[' => Self::LBracket, b']' => Self::RBracket, b'{' => Self::LBrace, b'}' => Self::RBrace, b'/' => Self::FSlash, b'%' => Self::Percent, _ => Self::Regular, } } } #[derive(Clone, Copy, PartialEq)] enum PdfToken<'a> { Regular(&'a [u8]), LParen, RParen, LAngle, RAngle, LBracket, RBracket, LBrace, RBrace, FSlash, Comment(&'a [u8]), } impl<'a> fmt::Debug for PdfToken<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Regular(contents) => { if let Ok(contents) = str::from_utf8(contents) { write!(f, "Regular({contents:?})") } else { write!(f, "Regular({contents:?})") } } Self::LParen => write!(f, "LParen"), Self::RParen => write!(f, "RParen"), Self::LAngle => write!(f, "LAngle"), Self::RAngle => write!(f, "RAngle"), Self::LBracket => write!(f, "LBracket"), Self::RBracket => write!(f, "RBracket"), Self::LBrace => write!(f, "LBrace"), Self::RBrace => write!(f, "RBrace"), Self::FSlash => write!(f, "FSlash"), Self::Comment(contents) => { if let Ok(contents) = str::from_utf8(contents) { write!(f, "Comment({contents:?})") } else { write!(f, "Comment({contents:?})") } } } } } #[derive(Clone)] struct PdfTokenizerPeek<'a> { token: PdfToken<'a>, pos_after_token: usize, } #[derive(Clone)] struct PdfTokenizer<'a> { bytes: &'a [u8], pos: usize, peek_cache: Option>, } impl<'a> PdfTokenizer<'a> { fn new(bytes: &'a [u8], pos: usize) -> Self { Self { bytes, pos, peek_cache: None, } } fn pos(&self) -> PdfInputPosition { PdfInputPosition::new(self.pos) } fn peek_byte(&mut self) -> Option { self.bytes.get(self.pos).copied() } fn next_byte(&mut self) -> Option { let b = self.bytes.get(self.pos)?; self.pos += 1; self.peek_cache = None; Some(*b) } fn skip_whitespace(&mut self) { while let Some(PdfCharCategory::Whitespace) = self.peek_byte().map(PdfCharCategory::new) { self.next_byte(); } } fn peek(&mut self) -> Option> { if let Some(PdfTokenizerPeek { token, .. }) = self.peek_cache { return Some(token); } let mut tokenizer = self.clone(); let token = tokenizer.next()?; self.peek_cache = Some(PdfTokenizerPeek { token, pos_after_token: tokenizer.pos, }); Some(token) } fn read_bytes(&mut self, len: usize) -> Option<&'a [u8]> { let retval = self.bytes.get(self.pos..self.pos.saturating_add(len))?; self.peek_cache = None; self.pos += len; Some(retval) } } impl<'a> Iterator for PdfTokenizer<'a> { type Item = PdfToken<'a>; fn next(&mut self) -> Option { if let Some(PdfTokenizerPeek { token, pos_after_token, }) = self.peek_cache.take() { self.pos = pos_after_token; return Some(token); } loop { let start_pos = self.pos; break match PdfCharCategory::new(self.next_byte()?) { PdfCharCategory::Whitespace => continue, PdfCharCategory::LParen => Some(PdfToken::LParen), PdfCharCategory::RParen => Some(PdfToken::RParen), PdfCharCategory::LAngle => Some(PdfToken::LAngle), PdfCharCategory::RAngle => Some(PdfToken::RAngle), PdfCharCategory::LBracket => Some(PdfToken::LBracket), PdfCharCategory::RBracket => Some(PdfToken::RBracket), PdfCharCategory::LBrace => Some(PdfToken::LBrace), PdfCharCategory::RBrace => Some(PdfToken::RBrace), PdfCharCategory::FSlash => Some(PdfToken::FSlash), PdfCharCategory::Percent => { loop { match self.next_byte() { None | Some(b'\n') => break, Some(b'\r') => { if let Some(b'\n') = self.peek_byte() { self.pos += 1; } break; } Some(_) => continue, } } Some(PdfToken::Comment(&self.bytes[start_pos..self.pos])) } PdfCharCategory::Regular => { while let Some(PdfCharCategory::Regular) = self.peek_byte().map(PdfCharCategory::new) { self.pos += 1; } Some(PdfToken::Regular(&self.bytes[start_pos..self.pos])) } }; } } } struct PdfParser<'a> { objects_arc: Arc, objects_map: BTreeMap, unparsed_stream_dictionaries: Vec>, tokenizer: PdfTokenizer<'a>, } impl<'a> PdfParser<'a> { fn parse_header(&mut self) -> Result { let Some(b'%') = self.tokenizer.bytes.first() else { return Err(PdfParseError::NotAPdfFile); }; let Some(PdfToken::Comment(header)) = self.tokenizer.next() else { unreachable!() }; let Ok(header) = str::from_utf8(header) else { return Err(PdfParseError::NotAPdfFile); }; let header = header.trim_end_matches(['\n', '\r']); let Some(version) = header.strip_prefix(PdfHeader::PREFIX) else { return Err(PdfParseError::NotAPdfFile); }; let Some((major_str, minor_str)) = version.split_once('.') else { return Err(PdfParseError::NotAPdfFile); }; let (Ok(major), Ok(minor)) = (major_str.parse(), minor_str.parse()) else { return Err(PdfParseError::NotAPdfFile); }; Ok(PdfHeader { major, minor }) } fn skip_comments_and_whitespace(&mut self) { self.tokenizer.skip_whitespace(); while let Some(PdfToken::Comment(_)) = self.tokenizer.peek() { self.tokenizer.next(); self.tokenizer.skip_whitespace(); } } fn parse_digits( &mut self, on_parse_failed: impl FnOnce(PdfInputPosition) -> Result, PdfParseError>, ) -> Result, PdfParseError> { self.skip_comments_and_whitespace(); let old_tokenizer = self.tokenizer.clone(); let pos = self.tokenizer.pos(); let Some(PdfToken::Regular(number)) = self.tokenizer.next() else { self.tokenizer = old_tokenizer; return Ok(None); }; if !number.iter().all(|b| b.is_ascii_digit()) { self.tokenizer = old_tokenizer; return Ok(None); } let Some(number) = str::from_utf8(number).ok().and_then(|v| v.parse().ok()) else { self.tokenizer = old_tokenizer; return Ok(match on_parse_failed(pos)? { None => None, }); }; Ok(Some((pos, number))) } fn parse_object_identifier( &mut self, return_none_for_out_of_range: bool, ) -> Result, PdfParseError> { let old_tokenizer = self.tokenizer.clone(); let Some((pos, object_number)) = self.parse_digits(|pos| { if return_none_for_out_of_range { Ok(None) } else { Err(PdfParseError::InvalidObjectNumber { pos }) } })? else { self.tokenizer = old_tokenizer; return Ok(None); }; let Some((_pos, generation_number)) = self.parse_digits(|pos| { if return_none_for_out_of_range { Ok(None) } else { Err(PdfParseError::InvalidGenerationNumber { pos }) } })? else { self.tokenizer = old_tokenizer; return Ok(None); }; Ok(Some(PdfObjectIdentifier { pos: pos.into(), object_number, generation_number, })) } fn parse_indirect_object(&mut self) -> Result, PdfParseError> { let old_tokenizer = self.tokenizer.clone(); let Some(id) = self.parse_object_identifier(true)? else { self.tokenizer = old_tokenizer; return Ok(None); }; if let Some(PdfToken::Regular(b"R")) = self.tokenizer.next() { Ok(Some(PdfObjectIndirect::new(&self.objects_arc, id))) } else { self.tokenizer = old_tokenizer; Ok(None) } } fn parse_string_after_l_paren(&mut self) -> Result { let mut contents = Vec::new(); let mut paren_level = NonZero::new(1usize).expect("non-zero"); let string_pos = self.tokenizer.pos(); while let Some(b) = self.tokenizer.next_byte() { contents.push(match b { b'(' => { paren_level = paren_level.checked_add(1).expect("overflow"); b } b')' => { let Some(new_paren_level) = NonZero::new(paren_level.get() - 1) else { return Ok(PdfString::new( string_pos, ArcOrRef::Arc(Arc::from(contents)), )); }; paren_level = new_paren_level; b } b'\r' if self.tokenizer.peek_byte() == Some(b'\n') => { self.tokenizer.next_byte(); b'\n' } b'\r' | b'\n' => b'\n', b'\\' => { let pos = self.tokenizer.pos(); let Some(b) = self.tokenizer.next_byte() else { return Err(PdfParseError::InvalidStringEscape { pos }); }; match b { b'\r' if self.tokenizer.peek_byte() == Some(b'\n') => { self.tokenizer.next_byte(); continue; } b'\r' | b'\n' => continue, b'n' => b'\n', b'r' => b'\r', b't' => b'\t', b'b' => b'\x08', b'f' => b'\x0C', b'(' | b')' | b'\\' => b, b'0'..=b'7' => { const MAX_OCTAL_DIGITS: usize = 3; let mut value = b - b'0'; let mut len = 1; while len < MAX_OCTAL_DIGITS { let Some(b @ b'0'..=b'7') = self.tokenizer.peek_byte() else { break; }; value <<= 3; value |= b - b'0'; len += 1; self.tokenizer.next_byte(); } value } _ => { return Err(PdfParseError::InvalidStringEscape { pos }); } } } _ => b, }); } Err(PdfParseError::TruncatedFile { pos: self.tokenizer.pos(), }) } fn parse_string_after_l_angle(&mut self) -> Result { let mut contents = Vec::new(); let mut high_digit_value = None; let mut push_digit_value = |value: u8| { high_digit_value = match high_digit_value { Some(high_digit_value) => { contents.push((high_digit_value << 4) | value); None } None => Some(value), }; }; let string_pos = self.tokenizer.pos(); loop { let pos = self.tokenizer.pos(); match self.tokenizer.next_byte() { None => { return Err(PdfParseError::TruncatedFile { pos }); } Some(b) if PdfCharCategory::new(b) == PdfCharCategory::Whitespace => {} Some(b'>') => { // if we have an odd trailing digit, add the final digit, otherwise doesn't modify contents push_digit_value(0); return Ok(PdfString::new( string_pos, Arc::<[u8]>::from(contents).into(), )); } Some(b) => { let Some(value) = (b as char).to_digit(0x10) else { return Err(PdfParseError::InvalidHexStringDigit { pos }); }; push_digit_value(value as u8); } } } } fn parse_name_after_f_slash(&mut self) -> Result { let mut name = vec![]; let name_pos = self.tokenizer.pos(); loop { let Some(PdfCharCategory::Regular) = self.tokenizer.peek_byte().map(PdfCharCategory::new) else { return Ok(PdfName::new(name_pos, ArcOrRef::Arc(Arc::from(name)))); }; let pos = self.tokenizer.pos(); match self .tokenizer .next_byte() .expect("just checked that it's not None") { b'#' => { let mut value = 0u8; for _ in 0..2 { let Some(digit) = self .tokenizer .next_byte() .and_then(|b| (b as char).to_digit(0x10)) else { return Err(PdfParseError::InvalidNameEscape { pos }); }; value <<= 4; value |= digit as u8; } name.push(value); } b => name.push(b), } } } fn parse_array_after_l_bracket(&mut self) -> Result { let array_pos = self.tokenizer.pos(); let mut contents: Vec = Vec::new(); loop { self.skip_comments_and_whitespace(); if let Some(PdfToken::RBracket) = self.tokenizer.peek() { self.tokenizer.next(); return Ok(PdfArray::from_elements(array_pos, Arc::from(contents))); } contents.push(self.parse_object()?); } } /// assumes `self.tokenizer.peek_byte() == Some(b'<')` fn parse_dictionary_after_one_l_angle(&mut self) -> Result { let l_angle = self.tokenizer.next_byte(); assert_eq!(l_angle, Some(b'<')); let dictionary_pos = self.tokenizer.pos(); let mut contents: BTreeMap = BTreeMap::new(); loop { self.skip_comments_and_whitespace(); if let Some(PdfToken::RAngle) = self.tokenizer.peek() { self.tokenizer.next(); let pos = self.tokenizer.pos(); let b'>' = self .tokenizer .next_byte() .ok_or(PdfParseError::TruncatedFile { pos })? else { return Err(PdfParseError::InvalidDictionaryClosingDoubleRAngle { pos }); }; return Ok(PdfDictionary::from_fields( dictionary_pos, Arc::new(contents), )); } let name = PdfName::parse(self.parse_object()?.into())?; let name_pos = name.pos(); match contents.entry(name) { std::collections::btree_map::Entry::Vacant(entry) => { entry.insert(self.parse_object()?.into()); } std::collections::btree_map::Entry::Occupied(entry) => { return Err(PdfParseError::DuplicateDictionaryKey { pos: name_pos, name: entry.key().clone(), }); } } } } /// assumes `self.tokenizer.peek() == Some(PdfToken::Regular(b"stream"))` fn parse_stream_after_dictionary( &mut self, dictionary: PdfDictionary, ) -> Result { self.tokenizer.skip_whitespace(); let stream_pos = self.tokenizer.pos(); let stream = self.tokenizer.next(); assert_eq!(stream, Some(PdfToken::Regular(b"stream"))); let len = PdfStreamDictionary::parse_len_from_dictionary(&dictionary)?; let eol_pos = self.tokenizer.pos(); match self.tokenizer.next_byte() { None => return Err(PdfParseError::TruncatedFile { pos: eol_pos }), Some(b'\r') => { let Some(b'\n') = self.tokenizer.next_byte() else { return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword { pos: eol_pos, }); }; } Some(b'\n') => {} _ => return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword { pos: eol_pos }), } let Some(data) = self.tokenizer.read_bytes(len) else { return Err(PdfParseError::TruncatedFile { pos: PdfInputPosition::new(self.tokenizer.bytes.len()), }); }; let (stream, unparsed) = PdfStream::new_unparsed(stream_pos, dictionary, Arc::from(data)); self.unparsed_stream_dictionaries.push(unparsed); self.skip_comments_and_whitespace(); let pos = self.tokenizer.pos(); if let Some(PdfToken::Regular(b"endstream")) = self.tokenizer.next() { Ok(stream) } else { Err(PdfParseError::MissingEndStreamKeyword { pos }) } } fn parse_object(&mut self) -> Result { self.skip_comments_and_whitespace(); if let Some(indirect) = self.parse_indirect_object()? { return Ok(indirect.into()); } let pos = self.tokenizer.pos(); match self .tokenizer .next() .ok_or(PdfParseError::TruncatedFile { pos })? { PdfToken::Regular(b"true") => Ok(PdfObject::Boolean(PdfBoolean::new(pos, true))), PdfToken::Regular(b"false") => Ok(PdfObject::Boolean(PdfBoolean::new(pos, false))), PdfToken::Regular(b"null") => Ok(PdfObject::Null(PdfNull::new(pos))), PdfToken::Regular( number @ ([b'+' | b'-', b'0'..=b'9' | b'.', ..] | [b'0'..=b'9' | b'.', ..]), ) => { // parse number let Ok(number) = str::from_utf8(number) else { return Err(PdfParseError::InvalidNumber { pos }); }; let mut parts = number .strip_prefix(&['+', '-']) .unwrap_or(number) .split('.'); let integer_part = parts .next() .expect("split always returns at least one part"); let fraction_part = parts.next(); if parts.next().is_some() { return Err(PdfParseError::InvalidNumber { pos }); } if integer_part.is_empty() && fraction_part.is_none_or(|v| v.is_empty()) { return Err(PdfParseError::InvalidNumber { pos }); } if !integer_part.bytes().all(|v| v.is_ascii_digit()) { return Err(PdfParseError::InvalidNumber { pos }); } if let Some(fraction_part) = fraction_part { if !fraction_part.bytes().all(|v| v.is_ascii_digit()) { return Err(PdfParseError::InvalidNumber { pos }); } Ok(PdfObject::Real(PdfReal::new( pos, number .parse() .map_err(|_| PdfParseError::InvalidNumber { pos })?, ))) } else { Ok(PdfObject::Integer(PdfInteger::new( pos, number .parse() .map_err(|_| PdfParseError::InvalidNumber { pos })?, ))) } } PdfToken::Regular(items) => todo!("{:?}", str::from_utf8(items)), PdfToken::LParen => self.parse_string_after_l_paren().map(PdfObject::String), PdfToken::RParen => todo!(), PdfToken::LAngle => { if self.tokenizer.peek_byte() == Some(b'<') { let dictionary = self.parse_dictionary_after_one_l_angle()?; self.skip_comments_and_whitespace(); if let Some(PdfToken::Regular(b"stream")) = self.tokenizer.peek() { self.parse_stream_after_dictionary(dictionary) .map(PdfObject::Stream) } else { Ok(dictionary.into()) } } else { self.parse_string_after_l_angle().map(PdfObject::String) } } PdfToken::RAngle => todo!(), PdfToken::LBracket => self.parse_array_after_l_bracket().map(PdfObject::Array), PdfToken::RBracket => todo!(), PdfToken::LBrace => todo!(), PdfToken::RBrace => todo!(), PdfToken::FSlash => self.parse_name_after_f_slash().map(PdfObject::Name), PdfToken::Comment(_) => unreachable!(), } } fn parse_indirect_object_definition(&mut self) -> Result, PdfParseError> { self.skip_comments_and_whitespace(); let Some(id) = self.parse_object_identifier(false)? else { return Ok(None); }; self.skip_comments_and_whitespace(); let obj_pos = self.tokenizer.pos(); let Some(PdfToken::Regular(b"obj")) = self.tokenizer.next() else { return Err(PdfParseError::MissingObj { pos: obj_pos }); }; let object = self.parse_object()?; self.skip_comments_and_whitespace(); let end_obj_pos = self.tokenizer.pos(); let Some(PdfToken::Regular(b"endobj")) = self.tokenizer.next() else { return Err(PdfParseError::MissingEndObj { pos: end_obj_pos }); }; if self.objects_map.insert(id, object).is_some() { Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id }) } else { Ok(Some(())) } } fn parse_body(&mut self) -> Result<(), PdfParseError> { while let Some(()) = self.parse_indirect_object_definition()? {} let Ok(()) = self .objects_arc .objects .set(std::mem::take(&mut self.objects_map)) else { unreachable!(); }; self.unparsed_stream_dictionaries .drain(..) .try_for_each(|v| v.finish_parsing()) } fn parse_xref_table(&mut self) -> Result<(), PdfParseError> { self.skip_comments_and_whitespace(); let xref_pos = self.tokenizer.pos(); let Some(PdfToken::Regular(b"xref")) = self.tokenizer.peek() else { return Ok(()); }; todo!("{xref_pos}") } fn parse_trailer(&mut self) -> Result { self.skip_comments_and_whitespace(); let trailer_pos = self.tokenizer.pos(); let trailer_dictionary = match self.tokenizer.peek() { Some(PdfToken::Regular(b"trailer")) => { self.tokenizer.next(); Some(PdfTrailerDictionary::parse(self.parse_object()?)?) } Some(PdfToken::Regular(b"startxref")) => None, _ => { return Err(PdfParseError::MissingTrailer { pos: trailer_pos }); } }; self.skip_comments_and_whitespace(); let start_xref_kw_pos = self.tokenizer.pos(); let Some(PdfToken::Regular(b"startxref")) = self.tokenizer.next() else { return Err(PdfParseError::MissingStartXRefKeyword { pos: start_xref_kw_pos, }); }; let start_xref_pos = self.tokenizer.pos(); let Some((start_xref_pos, start_xref)) = self.parse_digits(|pos| Err(PdfParseError::IntegerOutOfRange { pos }))? else { return Err(PdfParseError::MissingStartXRefValue { pos: start_xref_pos, }); }; self.tokenizer.skip_whitespace(); let eof_comment_pos = self.tokenizer.pos(); let Some(PdfToken::Comment(b"%%EOF" | b"%%EOF\r" | b"%%EOF\r\n" | b"%%EOF\n")) = self.tokenizer.next() else { return Err(PdfParseError::MissingEofComment { pos: eof_comment_pos, }); }; self.tokenizer.skip_whitespace(); if let Some(byte) = self.tokenizer.peek_byte() { return Err(PdfParseError::UnexpectedByte { pos: self.tokenizer.pos(), byte, }); } if let Some(trailer_dictionary) = trailer_dictionary { return Ok(PdfTrailer::Trailer { trailer_dictionary, start_xref, }); } let old_tokenizer = self.tokenizer.clone(); self.tokenizer = PdfTokenizer::new(self.tokenizer.bytes, start_xref); let id = self.parse_object_identifier(false); self.tokenizer = old_tokenizer; let Some(id) = id? else { return Err(PdfParseError::InvalidStartXRefValue { pos: start_xref_pos, start_xref, }); }; let xref_stream = PdfStream::parse(PdfObjectIndirect::new(&self.objects_arc, id).get().into())?; Ok(PdfTrailer::Stream { xref_stream, start_xref, }) } fn parse_file(mut self) -> Result { let header = self.parse_header()?; self.parse_body()?; self.parse_xref_table()?; let trailer = self.parse_trailer()?; Ok(Pdf { header, objects: self.objects_arc, trailer, }) } } impl Pdf { pub fn parse(bytes: impl AsRef<[u8]>) -> Result { PdfParser { objects_arc: Arc::new(PdfObjects { objects: OnceLock::new(), }), objects_map: BTreeMap::new(), unparsed_stream_dictionaries: vec![], tokenizer: PdfTokenizer::new(bytes.as_ref(), 0), } .parse_file() } } #[cfg(test)] mod tests { use crate::{ pdf::{ object::{ PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject, PdfString, }, parse::{PdfInputPosition, PdfParse, PdfParseError}, }, util::ArcOrRef, }; #[test] fn test_deserialize_dict() -> Result<(), PdfParseError> { crate::pdf::parse::pdf_parse! { #[derive(Debug)] #[allow(dead_code)] struct TestStruct { #[pdf(name = "a")] a: i32, #[pdf(name = "c")] c: i32, #[pdf(name = "b")] b: i32, #[pdf(flatten)] rest: PdfDictionary, } } let v: TestStruct = PdfParse::parse(PdfObject::from(PdfDictionary::from_iter([ ( PdfName::new_static(b"a"), PdfInteger::new(PdfInputPosition::empty(), 1).into(), ), ( PdfName::new_static(b"c"), PdfInteger::new(PdfInputPosition::empty(), 7).into(), ), ( PdfName::new_static(b"b"), PdfInteger::new(PdfInputPosition::empty(), 5).into(), ), ( PdfName::new_static(b"d"), PdfBoolean::new(PdfInputPosition::empty(), false).into(), ), ( PdfName::new_static(b"e"), PdfNull::new(PdfInputPosition::empty()).into(), ), ( PdfName::new_static(b"f"), PdfString::new(PdfInputPosition::empty(), ArcOrRef::Ref(b"test")).into(), ), ])))?; let expected = TestStruct { a: 1, c: 7, b: 5, rest: PdfDictionary::from_iter([ ( PdfName::new_static(b"d"), PdfBoolean::new(PdfInputPosition::empty(), false).into(), ), ( PdfName::new_static(b"f"), PdfString::new(PdfInputPosition::empty(), ArcOrRef::Ref(b"test")).into(), ), ]), }; assert_eq!(format!("{v:?}"), format!("{expected:?}")); Ok(()) } }