1279 lines
44 KiB
Rust
1279 lines
44 KiB
Rust
use crate::{
|
|
pdf::{
|
|
content_stream::PdfOperatorUnparsed,
|
|
document_structure::PdfDocumentCatalog,
|
|
object::{
|
|
PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject,
|
|
PdfObjectIdentifier, PdfObjectIndirect, PdfObjectStreamDictionary, PdfReal, PdfStream,
|
|
PdfStreamDictionary, PdfString,
|
|
},
|
|
parse::{
|
|
GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown, PdfParse, PdfParseError,
|
|
},
|
|
},
|
|
pdf_parse,
|
|
util::{ArcOrRef, DagDebugState},
|
|
};
|
|
use std::{
|
|
any::{Any, TypeId},
|
|
collections::{BTreeMap, HashMap},
|
|
convert::Infallible,
|
|
fmt,
|
|
num::NonZero,
|
|
str::FromStr,
|
|
sync::{Arc, Mutex, OnceLock},
|
|
};
|
|
|
|
pub mod content_stream;
|
|
pub mod document_structure;
|
|
pub mod font;
|
|
pub mod object;
|
|
pub mod parse;
|
|
pub mod render;
|
|
pub mod stream_filters;
|
|
|
|
struct ParseCache {
|
|
parse_results: HashMap<TypeId, std::sync::Weak<dyn Any + Send + Sync>>,
|
|
steps_till_next_gc: usize,
|
|
}
|
|
|
|
impl Default for ParseCache {
|
|
fn default() -> Self {
|
|
Self {
|
|
parse_results: HashMap::new(),
|
|
steps_till_next_gc: 1,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl ParseCache {
|
|
fn gc(&mut self) {
|
|
if self.steps_till_next_gc == 0 {
|
|
self.parse_results.retain(|_k, v| v.strong_count() > 0);
|
|
let mut adjusted_len = self.parse_results.len();
|
|
if adjusted_len < 10 {
|
|
adjusted_len = 10;
|
|
}
|
|
self.steps_till_next_gc = adjusted_len.saturating_mul(20);
|
|
} else {
|
|
self.steps_till_next_gc -= 1;
|
|
}
|
|
}
|
|
fn get<T: 'static + Send + Sync>(&mut self) -> Option<Arc<T>> {
|
|
self.gc();
|
|
let Ok(retval) = self
|
|
.parse_results
|
|
.get(&TypeId::of::<T>())?
|
|
.upgrade()?
|
|
.downcast()
|
|
else {
|
|
unreachable!();
|
|
};
|
|
Some(retval)
|
|
}
|
|
fn get_or_insert<T: 'static + Send + Sync>(
|
|
&mut self,
|
|
value: Arc<T>,
|
|
) -> (Arc<T>, impl Sized + use<T>) {
|
|
use std::collections::hash_map::Entry;
|
|
self.gc();
|
|
match self.parse_results.entry(TypeId::of::<T>()) {
|
|
Entry::Occupied(mut entry) => {
|
|
if let Some(retval) = entry.get().upgrade() {
|
|
let Ok(retval) = retval.downcast::<T>() else {
|
|
unreachable!();
|
|
};
|
|
(retval, Some(value))
|
|
} else {
|
|
entry.insert(Arc::<T>::downgrade(&value));
|
|
(value, None)
|
|
}
|
|
}
|
|
Entry::Vacant(entry) => {
|
|
entry.insert(Arc::<T>::downgrade(&value));
|
|
(value, None)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
struct PdfObjectAndParseCache {
|
|
object: PdfObject,
|
|
parse_cache: Mutex<ParseCache>,
|
|
}
|
|
|
|
impl PdfObjectAndParseCache {
|
|
fn new(object: PdfObject) -> Self {
|
|
Self {
|
|
object,
|
|
parse_cache: Mutex::default(),
|
|
}
|
|
}
|
|
fn parse_cache_get<T: 'static + Send + Sync>(&self) -> Option<Arc<T>> {
|
|
self.parse_cache.lock().expect("not poisoned").get()
|
|
}
|
|
fn parse_cache_get_or_insert<T: 'static + Send + Sync>(&self, value: Arc<T>) -> Arc<T> {
|
|
let mut parse_cache = self.parse_cache.lock().expect("not poisoned");
|
|
let (retval, to_drop_after_unlock) = parse_cache.get_or_insert(value);
|
|
drop(parse_cache);
|
|
drop(to_drop_after_unlock);
|
|
retval
|
|
}
|
|
}
|
|
|
|
struct PdfObjectsInner {
|
|
objects: BTreeMap<PdfObjectIdentifier, PdfObjectAndParseCache>,
|
|
#[allow(dead_code)]
|
|
object_streams: Vec<PdfStream<PdfObjectStreamDictionary>>,
|
|
}
|
|
|
|
pub struct PdfObjects {
|
|
inner: OnceLock<PdfObjectsInner>,
|
|
}
|
|
|
|
#[derive(Copy, Clone, Debug)]
|
|
pub struct PdfHeader {
|
|
pub major: NonZero<u16>,
|
|
pub minor: u16,
|
|
}
|
|
|
|
impl PdfHeader {
|
|
pub const PREFIX: &str = "%PDF-";
|
|
}
|
|
|
|
pdf_parse! {
|
|
#[pdf]
|
|
#[derive(Clone)]
|
|
pub struct PdfTrailerDictionary {
|
|
#[pdf(name = "Size")]
|
|
pub size: usize,
|
|
#[pdf(name = "Prev")]
|
|
pub prev: Option<usize>,
|
|
#[pdf(name = "Root")]
|
|
pub root: PdfDocumentCatalog,
|
|
#[pdf(name = "Encrypt")]
|
|
pub encrypt: Option<PdfDictionary>,
|
|
#[pdf(name = "Info")]
|
|
pub info: Option<PdfDictionary>,
|
|
#[pdf(name = "ID")]
|
|
pub id: Option<[PdfString; 2]>,
|
|
#[pdf(flatten)]
|
|
pub rest: PdfDictionary,
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for PdfTrailerDictionary {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
DagDebugState::scope(|_state| {
|
|
let Self {
|
|
size,
|
|
prev,
|
|
root,
|
|
encrypt,
|
|
info,
|
|
id,
|
|
rest,
|
|
} = self;
|
|
f.debug_struct("PdfTrailerDictionary")
|
|
.field("size", size)
|
|
.field("prev", prev)
|
|
.field("root", root)
|
|
.field("encrypt", encrypt)
|
|
.field("info", info)
|
|
.field("id", id)
|
|
.field("rest", rest)
|
|
.finish()
|
|
})
|
|
}
|
|
}
|
|
|
|
pdf_parse! {
|
|
#[pdf(name)]
|
|
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
|
|
pub enum PdfXRefName {
|
|
#[pdf(name = "XRef")]
|
|
#[default]
|
|
XRef,
|
|
}
|
|
}
|
|
|
|
pdf_parse! {
|
|
#[pdf]
|
|
#[derive(Clone)]
|
|
pub struct PdfXRefStreamDictionaryRest {
|
|
#[pdf(name = "Type")]
|
|
pub ty: PdfXRefName,
|
|
#[pdf(name = "Index")]
|
|
pub index: Option<Arc<[usize]>>,
|
|
#[pdf(name = "W")]
|
|
pub w: Option<Arc<[usize]>>,
|
|
#[pdf(flatten)]
|
|
pub rest: PdfTrailerDictionary,
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for PdfXRefStreamDictionaryRest {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
DagDebugState::scope(|_state| {
|
|
let Self { ty, index, w, rest } = self;
|
|
f.debug_struct("PdfXRefStreamDictionaryRest")
|
|
.field("ty", ty)
|
|
.field("index", index)
|
|
.field("w", w)
|
|
.field("rest", rest)
|
|
.finish()
|
|
})
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub enum PdfTrailer {
|
|
Trailer {
|
|
trailer_dictionary: PdfTrailerDictionary,
|
|
start_xref: usize,
|
|
},
|
|
Stream {
|
|
xref_stream: PdfStream<PdfXRefStreamDictionaryRest>,
|
|
start_xref: usize,
|
|
},
|
|
}
|
|
|
|
impl fmt::Debug for PdfTrailer {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
DagDebugState::scope(|_state| match self {
|
|
Self::Trailer {
|
|
trailer_dictionary,
|
|
start_xref,
|
|
} => f
|
|
.debug_struct("Trailer")
|
|
.field("trailer_dictionary", trailer_dictionary)
|
|
.field("start_xref", start_xref)
|
|
.finish(),
|
|
Self::Stream {
|
|
xref_stream,
|
|
start_xref,
|
|
} => f
|
|
.debug_struct("Stream")
|
|
.field("xref_stream", xref_stream)
|
|
.field("start_xref", start_xref)
|
|
.finish(),
|
|
})
|
|
}
|
|
}
|
|
|
|
impl PdfTrailer {
|
|
pub fn trailer_dictionary(&self) -> &PdfTrailerDictionary {
|
|
match self {
|
|
PdfTrailer::Trailer {
|
|
trailer_dictionary, ..
|
|
} => trailer_dictionary,
|
|
PdfTrailer::Stream { xref_stream, .. } => &xref_stream.dictionary().rest.rest,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct Pdf {
|
|
pub header: PdfHeader,
|
|
pub objects: Arc<PdfObjects>,
|
|
pub trailer: PdfTrailer,
|
|
}
|
|
|
|
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
|
|
enum PdfCharCategory {
|
|
Regular,
|
|
Whitespace,
|
|
LParen,
|
|
RParen,
|
|
LAngle,
|
|
RAngle,
|
|
LBracket,
|
|
RBracket,
|
|
LBrace,
|
|
RBrace,
|
|
FSlash,
|
|
Percent,
|
|
}
|
|
|
|
impl PdfCharCategory {
|
|
fn new(b: u8) -> Self {
|
|
match b {
|
|
b'\0' | b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' => Self::Whitespace,
|
|
b'(' => Self::LParen,
|
|
b')' => Self::RParen,
|
|
b'<' => Self::LAngle,
|
|
b'>' => Self::RAngle,
|
|
b'[' => Self::LBracket,
|
|
b']' => Self::RBracket,
|
|
b'{' => Self::LBrace,
|
|
b'}' => Self::RBrace,
|
|
b'/' => Self::FSlash,
|
|
b'%' => Self::Percent,
|
|
_ => Self::Regular,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, PartialEq)]
|
|
enum PdfToken<'a> {
|
|
Regular(&'a [u8]),
|
|
LParen,
|
|
RParen,
|
|
LAngle,
|
|
RAngle,
|
|
LBracket,
|
|
RBracket,
|
|
LBrace,
|
|
RBrace,
|
|
FSlash,
|
|
Comment(&'a [u8]),
|
|
}
|
|
|
|
impl<'a> fmt::Debug for PdfToken<'a> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Self::Regular(contents) => {
|
|
if let Ok(contents) = str::from_utf8(contents) {
|
|
write!(f, "Regular({contents:?})")
|
|
} else {
|
|
write!(f, "Regular({contents:?})")
|
|
}
|
|
}
|
|
Self::LParen => write!(f, "LParen"),
|
|
Self::RParen => write!(f, "RParen"),
|
|
Self::LAngle => write!(f, "LAngle"),
|
|
Self::RAngle => write!(f, "RAngle"),
|
|
Self::LBracket => write!(f, "LBracket"),
|
|
Self::RBracket => write!(f, "RBracket"),
|
|
Self::LBrace => write!(f, "LBrace"),
|
|
Self::RBrace => write!(f, "RBrace"),
|
|
Self::FSlash => write!(f, "FSlash"),
|
|
Self::Comment(contents) => {
|
|
if let Ok(contents) = str::from_utf8(contents) {
|
|
write!(f, "Comment({contents:?})")
|
|
} else {
|
|
write!(f, "Comment({contents:?})")
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
struct PdfTokenizerPeek<'a> {
|
|
token: PdfToken<'a>,
|
|
pos_after_token: usize,
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
struct PdfTokenizer<'a> {
|
|
bytes: &'a [u8],
|
|
pos: PdfInputPositionKnown,
|
|
peek_cache: Option<PdfTokenizerPeek<'a>>,
|
|
}
|
|
|
|
impl<'a> PdfTokenizer<'a> {
|
|
fn new(bytes: &'a [u8], pos: PdfInputPositionKnown) -> Self {
|
|
Self {
|
|
bytes,
|
|
pos,
|
|
peek_cache: None,
|
|
}
|
|
}
|
|
fn pos(&self) -> PdfInputPosition {
|
|
PdfInputPosition::new(Some(self.pos))
|
|
}
|
|
fn peek_byte(&mut self) -> Option<u8> {
|
|
self.bytes.get(self.pos.pos).copied()
|
|
}
|
|
fn next_byte(&mut self) -> Option<u8> {
|
|
let b = self.bytes.get(self.pos.pos)?;
|
|
self.pos.pos += 1;
|
|
self.peek_cache = None;
|
|
Some(*b)
|
|
}
|
|
fn skip_whitespace(&mut self) {
|
|
while let Some(PdfCharCategory::Whitespace) = self.peek_byte().map(PdfCharCategory::new) {
|
|
self.next_byte();
|
|
}
|
|
}
|
|
fn peek(&mut self) -> Option<PdfToken<'a>> {
|
|
if let Some(PdfTokenizerPeek { token, .. }) = self.peek_cache {
|
|
return Some(token);
|
|
}
|
|
let mut tokenizer = self.clone();
|
|
let token = tokenizer.next()?;
|
|
self.peek_cache = Some(PdfTokenizerPeek {
|
|
token,
|
|
pos_after_token: tokenizer.pos.pos,
|
|
});
|
|
Some(token)
|
|
}
|
|
fn read_bytes(&mut self, len: usize) -> Option<&'a [u8]> {
|
|
let retval = self
|
|
.bytes
|
|
.get(self.pos.pos..self.pos.pos.saturating_add(len))?;
|
|
self.peek_cache = None;
|
|
self.pos.pos += len;
|
|
Some(retval)
|
|
}
|
|
}
|
|
|
|
impl<'a> Iterator for PdfTokenizer<'a> {
|
|
type Item = PdfToken<'a>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
if let Some(PdfTokenizerPeek {
|
|
token,
|
|
pos_after_token,
|
|
}) = self.peek_cache.take()
|
|
{
|
|
self.pos.pos = pos_after_token;
|
|
return Some(token);
|
|
}
|
|
loop {
|
|
let start_pos = self.pos.pos;
|
|
break match PdfCharCategory::new(self.next_byte()?) {
|
|
PdfCharCategory::Whitespace => continue,
|
|
PdfCharCategory::LParen => Some(PdfToken::LParen),
|
|
PdfCharCategory::RParen => Some(PdfToken::RParen),
|
|
PdfCharCategory::LAngle => Some(PdfToken::LAngle),
|
|
PdfCharCategory::RAngle => Some(PdfToken::RAngle),
|
|
PdfCharCategory::LBracket => Some(PdfToken::LBracket),
|
|
PdfCharCategory::RBracket => Some(PdfToken::RBracket),
|
|
PdfCharCategory::LBrace => Some(PdfToken::LBrace),
|
|
PdfCharCategory::RBrace => Some(PdfToken::RBrace),
|
|
PdfCharCategory::FSlash => Some(PdfToken::FSlash),
|
|
PdfCharCategory::Percent => {
|
|
loop {
|
|
match self.next_byte() {
|
|
None | Some(b'\n') => break,
|
|
Some(b'\r') => {
|
|
if let Some(b'\n') = self.peek_byte() {
|
|
self.pos.pos += 1;
|
|
}
|
|
break;
|
|
}
|
|
Some(_) => continue,
|
|
}
|
|
}
|
|
Some(PdfToken::Comment(&self.bytes[start_pos..self.pos.pos]))
|
|
}
|
|
PdfCharCategory::Regular => {
|
|
while let Some(PdfCharCategory::Regular) =
|
|
self.peek_byte().map(PdfCharCategory::new)
|
|
{
|
|
self.pos.pos += 1;
|
|
}
|
|
Some(PdfToken::Regular(&self.bytes[start_pos..self.pos.pos]))
|
|
}
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
struct PdfParser<'a> {
|
|
objects: Arc<PdfObjects>,
|
|
tokenizer: PdfTokenizer<'a>,
|
|
}
|
|
|
|
enum PdfObjectOrStreamDictionaryOrOperator {
|
|
StreamDictionary {
|
|
dictionary: PdfDictionary,
|
|
stream_kw_pos: PdfInputPosition,
|
|
},
|
|
Object(PdfObject),
|
|
Operator(PdfOperatorUnparsed),
|
|
}
|
|
|
|
impl PdfObjectOrStreamDictionaryOrOperator {
|
|
fn error_on_stream_or_operator(self) -> Result<PdfObject, PdfParseError> {
|
|
match self {
|
|
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
|
|
dictionary: _,
|
|
stream_kw_pos,
|
|
} => Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }),
|
|
PdfObjectOrStreamDictionaryOrOperator::Object(object) => Ok(object),
|
|
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
|
|
Err(PdfParseError::OperatorNotAllowedHere { operator })
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> PdfParser<'a> {
|
|
fn skip_comments_and_whitespace(&mut self) {
|
|
self.tokenizer.skip_whitespace();
|
|
while let Some(PdfToken::Comment(_)) = self.tokenizer.peek() {
|
|
self.tokenizer.next();
|
|
self.tokenizer.skip_whitespace();
|
|
}
|
|
}
|
|
fn parse_digits<T: FromStr>(
|
|
&mut self,
|
|
on_parse_failed: impl FnOnce(PdfInputPosition) -> Result<Option<Infallible>, PdfParseError>,
|
|
) -> Result<Option<(PdfInputPosition, T)>, PdfParseError> {
|
|
self.skip_comments_and_whitespace();
|
|
let old_tokenizer = self.tokenizer.clone();
|
|
let pos = self.tokenizer.pos();
|
|
let Some(PdfToken::Regular(number)) = self.tokenizer.next() else {
|
|
self.tokenizer = old_tokenizer;
|
|
return Ok(None);
|
|
};
|
|
if !number.iter().all(|b| b.is_ascii_digit()) {
|
|
self.tokenizer = old_tokenizer;
|
|
return Ok(None);
|
|
}
|
|
let Some(number) = str::from_utf8(number).ok().and_then(|v| v.parse().ok()) else {
|
|
self.tokenizer = old_tokenizer;
|
|
return Ok(match on_parse_failed(pos)? {
|
|
None => None,
|
|
});
|
|
};
|
|
Ok(Some((pos, number)))
|
|
}
|
|
fn parse_object_identifier(
|
|
&mut self,
|
|
return_none_for_out_of_range: bool,
|
|
) -> Result<Option<PdfObjectIdentifier>, PdfParseError> {
|
|
let old_tokenizer = self.tokenizer.clone();
|
|
let Some((pos, object_number)) = self.parse_digits(|pos| {
|
|
if return_none_for_out_of_range {
|
|
Ok(None)
|
|
} else {
|
|
Err(PdfParseError::InvalidObjectNumber { pos })
|
|
}
|
|
})?
|
|
else {
|
|
self.tokenizer = old_tokenizer;
|
|
return Ok(None);
|
|
};
|
|
let Some((_pos, generation_number)) = self.parse_digits(|pos| {
|
|
if return_none_for_out_of_range {
|
|
Ok(None)
|
|
} else {
|
|
Err(PdfParseError::InvalidGenerationNumber { pos })
|
|
}
|
|
})?
|
|
else {
|
|
self.tokenizer = old_tokenizer;
|
|
return Ok(None);
|
|
};
|
|
Ok(Some(PdfObjectIdentifier {
|
|
pos: pos.into(),
|
|
object_number,
|
|
generation_number,
|
|
}))
|
|
}
|
|
fn parse_indirect_object(&mut self) -> Result<Option<PdfObjectIndirect>, PdfParseError> {
|
|
let old_tokenizer = self.tokenizer.clone();
|
|
let Some(id) = self.parse_object_identifier(true)? else {
|
|
self.tokenizer = old_tokenizer;
|
|
return Ok(None);
|
|
};
|
|
if let Some(PdfToken::Regular(b"R")) = self.tokenizer.next() {
|
|
Ok(Some(PdfObjectIndirect::new(&self.objects, id)))
|
|
} else {
|
|
self.tokenizer = old_tokenizer;
|
|
Ok(None)
|
|
}
|
|
}
|
|
fn parse_string_after_l_paren(&mut self) -> Result<PdfString, PdfParseError> {
|
|
let mut contents = Vec::new();
|
|
let mut paren_level = NonZero::new(1usize).expect("non-zero");
|
|
let string_pos = self.tokenizer.pos();
|
|
while let Some(b) = self.tokenizer.next_byte() {
|
|
contents.push(match b {
|
|
b'(' => {
|
|
paren_level = paren_level.checked_add(1).expect("overflow");
|
|
b
|
|
}
|
|
b')' => {
|
|
let Some(new_paren_level) = NonZero::new(paren_level.get() - 1) else {
|
|
return Ok(PdfString::new(
|
|
string_pos,
|
|
ArcOrRef::Arc(Arc::from(contents)),
|
|
));
|
|
};
|
|
paren_level = new_paren_level;
|
|
b
|
|
}
|
|
b'\r' if self.tokenizer.peek_byte() == Some(b'\n') => {
|
|
self.tokenizer.next_byte();
|
|
b'\n'
|
|
}
|
|
b'\r' | b'\n' => b'\n',
|
|
b'\\' => {
|
|
let pos = self.tokenizer.pos();
|
|
let Some(b) = self.tokenizer.next_byte() else {
|
|
return Err(PdfParseError::InvalidStringEscape { pos });
|
|
};
|
|
match b {
|
|
b'\r' if self.tokenizer.peek_byte() == Some(b'\n') => {
|
|
self.tokenizer.next_byte();
|
|
continue;
|
|
}
|
|
b'\r' | b'\n' => continue,
|
|
b'n' => b'\n',
|
|
b'r' => b'\r',
|
|
b't' => b'\t',
|
|
b'b' => b'\x08',
|
|
b'f' => b'\x0C',
|
|
b'(' | b')' | b'\\' => b,
|
|
b'0'..=b'7' => {
|
|
const MAX_OCTAL_DIGITS: usize = 3;
|
|
let mut value = b - b'0';
|
|
let mut len = 1;
|
|
while len < MAX_OCTAL_DIGITS {
|
|
let Some(b @ b'0'..=b'7') = self.tokenizer.peek_byte() else {
|
|
break;
|
|
};
|
|
value <<= 3;
|
|
value |= b - b'0';
|
|
len += 1;
|
|
self.tokenizer.next_byte();
|
|
}
|
|
value
|
|
}
|
|
_ => {
|
|
return Err(PdfParseError::InvalidStringEscape { pos });
|
|
}
|
|
}
|
|
}
|
|
_ => b,
|
|
});
|
|
}
|
|
Err(PdfParseError::TruncatedFile {
|
|
pos: self.tokenizer.pos(),
|
|
})
|
|
}
|
|
fn parse_string_after_l_angle(&mut self) -> Result<PdfString, PdfParseError> {
|
|
let mut contents = Vec::new();
|
|
let mut high_digit_value = None;
|
|
let mut push_digit_value = |value: u8| {
|
|
high_digit_value = match high_digit_value {
|
|
Some(high_digit_value) => {
|
|
contents.push((high_digit_value << 4) | value);
|
|
None
|
|
}
|
|
None => Some(value),
|
|
};
|
|
};
|
|
let string_pos = self.tokenizer.pos();
|
|
loop {
|
|
let pos = self.tokenizer.pos();
|
|
match self.tokenizer.next_byte() {
|
|
None => {
|
|
return Err(PdfParseError::TruncatedFile { pos });
|
|
}
|
|
Some(b) if PdfCharCategory::new(b) == PdfCharCategory::Whitespace => {}
|
|
Some(b'>') => {
|
|
// if we have an odd trailing digit, add the final digit, otherwise doesn't modify contents
|
|
push_digit_value(0);
|
|
return Ok(PdfString::new(
|
|
string_pos,
|
|
Arc::<[u8]>::from(contents).into(),
|
|
));
|
|
}
|
|
Some(b) => {
|
|
let Some(value) = (b as char).to_digit(0x10) else {
|
|
return Err(PdfParseError::InvalidHexStringDigit { pos });
|
|
};
|
|
push_digit_value(value as u8);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
fn parse_name_after_f_slash(&mut self) -> Result<PdfName, PdfParseError> {
|
|
let mut name = vec![];
|
|
let name_pos = self.tokenizer.pos();
|
|
loop {
|
|
let Some(PdfCharCategory::Regular) =
|
|
self.tokenizer.peek_byte().map(PdfCharCategory::new)
|
|
else {
|
|
return Ok(PdfName::new(name_pos, ArcOrRef::Arc(Arc::from(name))));
|
|
};
|
|
let pos = self.tokenizer.pos();
|
|
match self
|
|
.tokenizer
|
|
.next_byte()
|
|
.expect("just checked that it's not None")
|
|
{
|
|
b'#' => {
|
|
let mut value = 0u8;
|
|
for _ in 0..2 {
|
|
let Some(digit) = self
|
|
.tokenizer
|
|
.next_byte()
|
|
.and_then(|b| (b as char).to_digit(0x10))
|
|
else {
|
|
return Err(PdfParseError::InvalidNameEscape { pos });
|
|
};
|
|
value <<= 4;
|
|
value |= digit as u8;
|
|
}
|
|
name.push(value);
|
|
}
|
|
b => name.push(b),
|
|
}
|
|
}
|
|
}
|
|
fn parse_array_after_l_bracket(&mut self) -> Result<PdfArray, PdfParseError> {
|
|
let array_pos = self.tokenizer.pos();
|
|
let mut contents: Vec<PdfObject> = Vec::new();
|
|
loop {
|
|
self.skip_comments_and_whitespace();
|
|
if let Some(PdfToken::RBracket) = self.tokenizer.peek() {
|
|
self.tokenizer.next();
|
|
return Ok(PdfArray::from_elements(array_pos, Arc::from(contents)));
|
|
}
|
|
contents.push(
|
|
self.parse_object_or_operator()?
|
|
.error_on_stream_or_operator()?,
|
|
);
|
|
}
|
|
}
|
|
/// assumes `self.tokenizer.peek_byte() == Some(b'<')`
|
|
fn parse_dictionary_after_one_l_angle(&mut self) -> Result<PdfDictionary, PdfParseError> {
|
|
let l_angle = self.tokenizer.next_byte();
|
|
assert_eq!(l_angle, Some(b'<'));
|
|
let dictionary_pos = self.tokenizer.pos();
|
|
let mut contents: BTreeMap<PdfName, PdfObject> = BTreeMap::new();
|
|
loop {
|
|
self.skip_comments_and_whitespace();
|
|
if let Some(PdfToken::RAngle) = self.tokenizer.peek() {
|
|
self.tokenizer.next();
|
|
let pos = self.tokenizer.pos();
|
|
let b'>' = self
|
|
.tokenizer
|
|
.next_byte()
|
|
.ok_or(PdfParseError::TruncatedFile { pos })?
|
|
else {
|
|
return Err(PdfParseError::InvalidDictionaryClosingDoubleRAngle { pos });
|
|
};
|
|
return Ok(PdfDictionary::from_fields(
|
|
dictionary_pos,
|
|
Arc::new(contents),
|
|
));
|
|
}
|
|
let name = PdfName::parse(
|
|
self.parse_object_or_operator()?
|
|
.error_on_stream_or_operator()?,
|
|
)?;
|
|
let name_pos = name.pos();
|
|
match contents.entry(name) {
|
|
std::collections::btree_map::Entry::Vacant(entry) => {
|
|
entry.insert(
|
|
self.parse_object_or_operator()?
|
|
.error_on_stream_or_operator()?,
|
|
);
|
|
}
|
|
std::collections::btree_map::Entry::Occupied(entry) => {
|
|
return Err(PdfParseError::DuplicateDictionaryKey {
|
|
pos: name_pos,
|
|
name: entry.key().clone(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
fn parse_object_or_operator(
|
|
&mut self,
|
|
) -> Result<PdfObjectOrStreamDictionaryOrOperator, PdfParseError> {
|
|
self.skip_comments_and_whitespace();
|
|
if let Some(indirect) = self.parse_indirect_object()? {
|
|
return Ok(PdfObjectOrStreamDictionaryOrOperator::Object(
|
|
indirect.into(),
|
|
));
|
|
}
|
|
let pos = self.tokenizer.pos();
|
|
Ok(PdfObjectOrStreamDictionaryOrOperator::Object(
|
|
match self
|
|
.tokenizer
|
|
.next()
|
|
.ok_or(PdfParseError::TruncatedFile { pos })?
|
|
{
|
|
PdfToken::Regular(b"true") => PdfObject::Boolean(PdfBoolean::new(pos, true)),
|
|
PdfToken::Regular(b"false") => PdfObject::Boolean(PdfBoolean::new(pos, false)),
|
|
PdfToken::Regular(b"null") => PdfObject::Null(PdfNull::new(pos)),
|
|
PdfToken::Regular(
|
|
number @ ([b'+' | b'-', b'0'..=b'9' | b'.', ..] | [b'0'..=b'9' | b'.', ..]),
|
|
) => {
|
|
// parse number
|
|
let Ok(number) = str::from_utf8(number) else {
|
|
return Err(PdfParseError::InvalidNumber { pos });
|
|
};
|
|
let mut parts = number
|
|
.strip_prefix(&['+', '-'])
|
|
.unwrap_or(number)
|
|
.split('.');
|
|
let integer_part = parts
|
|
.next()
|
|
.expect("split always returns at least one part");
|
|
let fraction_part = parts.next();
|
|
if parts.next().is_some() {
|
|
return Err(PdfParseError::InvalidNumber { pos });
|
|
}
|
|
if integer_part.is_empty() && fraction_part.is_none_or(|v| v.is_empty()) {
|
|
return Err(PdfParseError::InvalidNumber { pos });
|
|
}
|
|
if !integer_part.bytes().all(|v| v.is_ascii_digit()) {
|
|
return Err(PdfParseError::InvalidNumber { pos });
|
|
}
|
|
if let Some(fraction_part) = fraction_part {
|
|
if !fraction_part.bytes().all(|v| v.is_ascii_digit()) {
|
|
return Err(PdfParseError::InvalidNumber { pos });
|
|
}
|
|
PdfObject::Real(PdfReal::new(
|
|
pos,
|
|
number
|
|
.parse()
|
|
.map_err(|_| PdfParseError::InvalidNumber { pos })?,
|
|
))
|
|
} else {
|
|
PdfObject::Integer(PdfInteger::new(
|
|
pos,
|
|
number
|
|
.parse()
|
|
.map_err(|_| PdfParseError::InvalidNumber { pos })?,
|
|
))
|
|
}
|
|
}
|
|
PdfToken::Regular(name) => {
|
|
return Ok(PdfObjectOrStreamDictionaryOrOperator::Operator(
|
|
PdfOperatorUnparsed::new(pos, ArcOrRef::Arc(name.into())),
|
|
));
|
|
}
|
|
PdfToken::LParen => PdfObject::String(self.parse_string_after_l_paren()?),
|
|
PdfToken::RParen => todo!(),
|
|
PdfToken::LAngle => {
|
|
if self.tokenizer.peek_byte() == Some(b'<') {
|
|
let dictionary = self.parse_dictionary_after_one_l_angle()?;
|
|
self.skip_comments_and_whitespace();
|
|
if let Some(PdfToken::Regular(b"stream")) = self.tokenizer.peek() {
|
|
return Ok(PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
|
|
dictionary,
|
|
stream_kw_pos: self.tokenizer.pos(),
|
|
});
|
|
} else {
|
|
dictionary.into()
|
|
}
|
|
} else {
|
|
self.parse_string_after_l_angle()?.into()
|
|
}
|
|
}
|
|
PdfToken::RAngle => todo!(),
|
|
PdfToken::LBracket => self.parse_array_after_l_bracket()?.into(),
|
|
PdfToken::RBracket => todo!(),
|
|
PdfToken::LBrace => todo!(),
|
|
PdfToken::RBrace => todo!(),
|
|
PdfToken::FSlash => self.parse_name_after_f_slash()?.into(),
|
|
PdfToken::Comment(_) => unreachable!(),
|
|
},
|
|
))
|
|
}
|
|
}
|
|
|
|
struct PdfFileParser<'a> {
|
|
parser: PdfParser<'a>,
|
|
objects_map: BTreeMap<PdfObjectIdentifier, PdfObjectAndParseCache>,
|
|
}
|
|
|
|
impl<'a> PdfFileParser<'a> {
|
|
fn parse_header(&mut self) -> Result<PdfHeader, PdfParseError> {
|
|
let Some(b'%') = self.parser.tokenizer.bytes.first() else {
|
|
return Err(PdfParseError::NotAPdfFile);
|
|
};
|
|
let Some(PdfToken::Comment(header)) = self.parser.tokenizer.next() else {
|
|
unreachable!()
|
|
};
|
|
let Ok(header) = str::from_utf8(header) else {
|
|
return Err(PdfParseError::NotAPdfFile);
|
|
};
|
|
let header = header.trim_end_matches(['\n', '\r']);
|
|
let Some(version) = header.strip_prefix(PdfHeader::PREFIX) else {
|
|
return Err(PdfParseError::NotAPdfFile);
|
|
};
|
|
let Some((major_str, minor_str)) = version.split_once('.') else {
|
|
return Err(PdfParseError::NotAPdfFile);
|
|
};
|
|
let (Ok(major), Ok(minor)) = (major_str.parse(), minor_str.parse()) else {
|
|
return Err(PdfParseError::NotAPdfFile);
|
|
};
|
|
Ok(PdfHeader { major, minor })
|
|
}
|
|
/// assumes `self.tokenizer.peek() == Some(PdfToken::Regular(b"stream"))`
|
|
fn parse_stream_after_dictionary(
|
|
&mut self,
|
|
dictionary: PdfDictionary,
|
|
) -> Result<PdfStream, PdfParseError> {
|
|
self.parser.tokenizer.skip_whitespace();
|
|
let stream_pos = self.parser.tokenizer.pos();
|
|
let stream = self.parser.tokenizer.next();
|
|
assert_eq!(stream, Some(PdfToken::Regular(b"stream")));
|
|
let dictionary = PdfStreamDictionary::parse(dictionary.into())?;
|
|
let eol_pos = self.parser.tokenizer.pos();
|
|
match self.parser.tokenizer.next_byte() {
|
|
None => return Err(PdfParseError::TruncatedFile { pos: eol_pos }),
|
|
Some(b'\r') => {
|
|
let Some(b'\n') = self.parser.tokenizer.next_byte() else {
|
|
return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword {
|
|
pos: eol_pos,
|
|
});
|
|
};
|
|
}
|
|
Some(b'\n') => {}
|
|
_ => return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword { pos: eol_pos }),
|
|
}
|
|
let Some(data) = self.parser.tokenizer.read_bytes(dictionary.len) else {
|
|
return Err(PdfParseError::TruncatedFile {
|
|
pos: PdfInputPosition::new(Some(PdfInputPositionKnown {
|
|
pos: self.parser.tokenizer.bytes.len(),
|
|
..self.parser.tokenizer.pos
|
|
})),
|
|
});
|
|
};
|
|
let stream = PdfStream::new(
|
|
stream_pos,
|
|
&self.parser.objects,
|
|
dictionary,
|
|
Arc::from(data),
|
|
);
|
|
self.parser.skip_comments_and_whitespace();
|
|
let pos = self.parser.tokenizer.pos();
|
|
if let Some(PdfToken::Regular(b"endstream")) = self.parser.tokenizer.next() {
|
|
Ok(stream)
|
|
} else {
|
|
Err(PdfParseError::MissingEndStreamKeyword { pos })
|
|
}
|
|
}
|
|
fn parse_object(&mut self) -> Result<PdfObject, PdfParseError> {
|
|
match self.parser.parse_object_or_operator()? {
|
|
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
|
|
dictionary,
|
|
stream_kw_pos: _,
|
|
} => Ok(PdfObject::Stream(
|
|
self.parse_stream_after_dictionary(dictionary)?,
|
|
)),
|
|
PdfObjectOrStreamDictionaryOrOperator::Object(object) => Ok(object),
|
|
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
|
|
Err(PdfParseError::OperatorNotAllowedHere { operator })
|
|
}
|
|
}
|
|
}
|
|
fn parse_indirect_object_definition(&mut self) -> Result<Option<()>, PdfParseError> {
|
|
self.parser.skip_comments_and_whitespace();
|
|
let Some(id) = self.parser.parse_object_identifier(false)? else {
|
|
return Ok(None);
|
|
};
|
|
self.parser.skip_comments_and_whitespace();
|
|
let obj_pos = self.parser.tokenizer.pos();
|
|
let Some(PdfToken::Regular(b"obj")) = self.parser.tokenizer.next() else {
|
|
return Err(PdfParseError::MissingObj { pos: obj_pos });
|
|
};
|
|
let object = self.parse_object()?;
|
|
self.parser.skip_comments_and_whitespace();
|
|
let end_obj_pos = self.parser.tokenizer.pos();
|
|
let Some(PdfToken::Regular(b"endobj")) = self.parser.tokenizer.next() else {
|
|
return Err(PdfParseError::MissingEndObj { pos: end_obj_pos });
|
|
};
|
|
if self
|
|
.objects_map
|
|
.insert(id, PdfObjectAndParseCache::new(object))
|
|
.is_some()
|
|
{
|
|
Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id })
|
|
} else {
|
|
Ok(Some(()))
|
|
}
|
|
}
|
|
fn parse_object_stream(
|
|
&mut self,
|
|
object_stream: &PdfStream<PdfObjectStreamDictionary>,
|
|
) -> Result<(), PdfParseError> {
|
|
let data = object_stream.decoded_data().as_ref()?;
|
|
let mut parser = PdfParser {
|
|
tokenizer: PdfTokenizer::new(
|
|
data,
|
|
PdfInputPositionKnown {
|
|
pos: 0,
|
|
containing_streams_pos: Some(
|
|
object_stream
|
|
.get_pdf_input_position()
|
|
.get()
|
|
.expect("known to be set")
|
|
.pos,
|
|
),
|
|
},
|
|
),
|
|
objects: self.parser.objects.clone(),
|
|
};
|
|
let mut object_ids_and_byte_positions =
|
|
Vec::<(PdfObjectIdentifier, usize)>::with_capacity(object_stream.dictionary().rest.n);
|
|
for _ in 0..object_stream.dictionary().rest.n {
|
|
parser.skip_comments_and_whitespace();
|
|
let Some((pos, object_number)) =
|
|
parser.parse_digits(|pos| Err(PdfParseError::InvalidObjectNumber { pos }))?
|
|
else {
|
|
return Err(PdfParseError::InvalidObjectNumber {
|
|
pos: parser.tokenizer.pos(),
|
|
});
|
|
};
|
|
parser.skip_comments_and_whitespace();
|
|
let Some((_, byte_position)) =
|
|
parser.parse_digits(|pos| Err(PdfParseError::InvalidNumber { pos }))?
|
|
else {
|
|
return Err(PdfParseError::InvalidNumber {
|
|
pos: parser.tokenizer.pos(),
|
|
});
|
|
};
|
|
object_ids_and_byte_positions.push((
|
|
PdfObjectIdentifier {
|
|
pos: pos.into(),
|
|
object_number,
|
|
generation_number: 0,
|
|
},
|
|
byte_position,
|
|
));
|
|
}
|
|
for (id, _byte_position) in object_ids_and_byte_positions {
|
|
let object = parser
|
|
.parse_object_or_operator()?
|
|
.error_on_stream_or_operator()?;
|
|
if self
|
|
.objects_map
|
|
.insert(id, PdfObjectAndParseCache::new(object))
|
|
.is_some()
|
|
{
|
|
return Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id });
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
fn parse_body(&mut self) -> Result<(), PdfParseError> {
|
|
while let Some(()) = self.parse_indirect_object_definition()? {}
|
|
let mut object_streams: Vec<PdfStream<PdfObjectStreamDictionary>> = Vec::new();
|
|
for object in self.objects_map.values_mut() {
|
|
let stream = match &object.object {
|
|
PdfObject::Stream(stream) => stream,
|
|
PdfObject::Boolean(_)
|
|
| PdfObject::Integer(_)
|
|
| PdfObject::Real(_)
|
|
| PdfObject::String(_)
|
|
| PdfObject::Name(_)
|
|
| PdfObject::Array(_)
|
|
| PdfObject::Dictionary(_)
|
|
| PdfObject::Null(_)
|
|
| PdfObject::Indirect(_) => continue,
|
|
};
|
|
if PdfObjectStreamDictionary::parse_type_from_dictionary(&stream.dictionary().rest)
|
|
.is_ok()
|
|
{
|
|
object_streams.push(PdfStream::parse(object.object.clone())?);
|
|
}
|
|
}
|
|
for object_stream in &object_streams {
|
|
self.parse_object_stream(object_stream)?;
|
|
}
|
|
let Ok(()) = self.parser.objects.inner.set(PdfObjectsInner {
|
|
objects: std::mem::take(&mut self.objects_map),
|
|
object_streams,
|
|
}) else {
|
|
unreachable!();
|
|
};
|
|
Ok(())
|
|
}
|
|
fn parse_xref_table(&mut self) -> Result<(), PdfParseError> {
|
|
self.parser.skip_comments_and_whitespace();
|
|
let xref_pos = self.parser.tokenizer.pos();
|
|
let Some(PdfToken::Regular(b"xref")) = self.parser.tokenizer.peek() else {
|
|
return Ok(());
|
|
};
|
|
todo!("{xref_pos}")
|
|
}
|
|
fn parse_trailer(&mut self) -> Result<PdfTrailer, PdfParseError> {
|
|
self.parser.skip_comments_and_whitespace();
|
|
let trailer_pos = self.parser.tokenizer.pos();
|
|
let trailer_dictionary = match self.parser.tokenizer.peek() {
|
|
Some(PdfToken::Regular(b"trailer")) => {
|
|
self.parser.tokenizer.next();
|
|
Some(PdfTrailerDictionary::parse(self.parse_object()?)?)
|
|
}
|
|
Some(PdfToken::Regular(b"startxref")) => None,
|
|
_ => {
|
|
return Err(PdfParseError::MissingTrailer { pos: trailer_pos });
|
|
}
|
|
};
|
|
self.parser.skip_comments_and_whitespace();
|
|
let start_xref_kw_pos = self.parser.tokenizer.pos();
|
|
let Some(PdfToken::Regular(b"startxref")) = self.parser.tokenizer.next() else {
|
|
return Err(PdfParseError::MissingStartXRefKeyword {
|
|
pos: start_xref_kw_pos,
|
|
});
|
|
};
|
|
let start_xref_pos = self.parser.tokenizer.pos();
|
|
let Some((start_xref_pos, start_xref)) = self
|
|
.parser
|
|
.parse_digits(|pos| Err(PdfParseError::IntegerOutOfRange { pos }))?
|
|
else {
|
|
return Err(PdfParseError::MissingStartXRefValue {
|
|
pos: start_xref_pos,
|
|
});
|
|
};
|
|
self.parser.tokenizer.skip_whitespace();
|
|
let eof_comment_pos = self.parser.tokenizer.pos();
|
|
let Some(PdfToken::Comment(b"%%EOF" | b"%%EOF\r" | b"%%EOF\r\n" | b"%%EOF\n")) =
|
|
self.parser.tokenizer.next()
|
|
else {
|
|
return Err(PdfParseError::MissingEofComment {
|
|
pos: eof_comment_pos,
|
|
});
|
|
};
|
|
self.parser.tokenizer.skip_whitespace();
|
|
if let Some(byte) = self.parser.tokenizer.peek_byte() {
|
|
return Err(PdfParseError::UnexpectedByte {
|
|
pos: self.parser.tokenizer.pos(),
|
|
byte,
|
|
});
|
|
}
|
|
if let Some(trailer_dictionary) = trailer_dictionary {
|
|
return Ok(PdfTrailer::Trailer {
|
|
trailer_dictionary,
|
|
start_xref,
|
|
});
|
|
}
|
|
let id = PdfParser {
|
|
tokenizer: PdfTokenizer::new(
|
|
self.parser.tokenizer.bytes,
|
|
PdfInputPositionKnown {
|
|
pos: start_xref,
|
|
containing_streams_pos: None,
|
|
},
|
|
),
|
|
objects: self.parser.objects.clone(),
|
|
}
|
|
.parse_object_identifier(false);
|
|
let Some(id) = id? else {
|
|
return Err(PdfParseError::InvalidStartXRefValue {
|
|
pos: start_xref_pos,
|
|
start_xref,
|
|
});
|
|
};
|
|
let xref_stream = PdfStream::parse(
|
|
PdfObjectIndirect::new(&self.parser.objects, id)
|
|
.get()
|
|
.into(),
|
|
)?;
|
|
Ok(PdfTrailer::Stream {
|
|
xref_stream,
|
|
start_xref,
|
|
})
|
|
}
|
|
fn parse_file(mut self) -> Result<Pdf, PdfParseError> {
|
|
let header = self.parse_header()?;
|
|
self.parse_body()?;
|
|
self.parse_xref_table()?;
|
|
let trailer = self.parse_trailer()?;
|
|
Ok(Pdf {
|
|
header,
|
|
objects: self.parser.objects,
|
|
trailer,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl Pdf {
|
|
pub fn parse(bytes: impl AsRef<[u8]>) -> Result<Pdf, PdfParseError> {
|
|
PdfFileParser {
|
|
parser: PdfParser {
|
|
objects: Arc::new(PdfObjects {
|
|
inner: OnceLock::new(),
|
|
}),
|
|
tokenizer: PdfTokenizer::new(
|
|
bytes.as_ref(),
|
|
PdfInputPositionKnown {
|
|
pos: 0,
|
|
containing_streams_pos: None,
|
|
},
|
|
),
|
|
},
|
|
objects_map: BTreeMap::new(),
|
|
}
|
|
.parse_file()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::{
|
|
pdf::{
|
|
object::{
|
|
PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject, PdfString,
|
|
},
|
|
parse::{PdfInputPosition, PdfParse, PdfParseError},
|
|
},
|
|
util::ArcOrRef,
|
|
};
|
|
|
|
#[test]
|
|
fn test_deserialize_dict() -> Result<(), PdfParseError> {
|
|
crate::pdf::parse::pdf_parse! {
|
|
#[pdf]
|
|
#[derive(Debug)]
|
|
#[allow(dead_code)]
|
|
struct TestStruct {
|
|
#[pdf(name = "a")]
|
|
a: i32,
|
|
#[pdf(name = "c")]
|
|
c: i32,
|
|
#[pdf(name = "b")]
|
|
b: i32,
|
|
#[pdf(flatten)]
|
|
rest: PdfDictionary,
|
|
}
|
|
}
|
|
|
|
let v: TestStruct = PdfParse::parse(PdfObject::from(PdfDictionary::from_iter([
|
|
(
|
|
PdfName::new_static(b"a"),
|
|
PdfInteger::new(PdfInputPosition::empty(), 1).into(),
|
|
),
|
|
(
|
|
PdfName::new_static(b"c"),
|
|
PdfInteger::new(PdfInputPosition::empty(), 7).into(),
|
|
),
|
|
(
|
|
PdfName::new_static(b"b"),
|
|
PdfInteger::new(PdfInputPosition::empty(), 5).into(),
|
|
),
|
|
(
|
|
PdfName::new_static(b"d"),
|
|
PdfBoolean::new(PdfInputPosition::empty(), false).into(),
|
|
),
|
|
(
|
|
PdfName::new_static(b"e"),
|
|
PdfNull::new(PdfInputPosition::empty()).into(),
|
|
),
|
|
(
|
|
PdfName::new_static(b"f"),
|
|
PdfString::new(PdfInputPosition::empty(), ArcOrRef::Ref(b"test")).into(),
|
|
),
|
|
])))?;
|
|
let expected = TestStruct {
|
|
a: 1,
|
|
c: 7,
|
|
b: 5,
|
|
rest: PdfDictionary::from_iter([
|
|
(
|
|
PdfName::new_static(b"d"),
|
|
PdfBoolean::new(PdfInputPosition::empty(), false).into(),
|
|
),
|
|
(
|
|
PdfName::new_static(b"f"),
|
|
PdfString::new(PdfInputPosition::empty(), ArcOrRef::Ref(b"test")).into(),
|
|
),
|
|
]),
|
|
};
|
|
assert_eq!(format!("{v:?}"), format!("{expected:?}"));
|
|
Ok(())
|
|
}
|
|
}
|