parse content streams into a list of operators
This commit is contained in:
parent
13dcea1dab
commit
aba6368948
5 changed files with 1541 additions and 408 deletions
557
src/pdf.rs
557
src/pdf.rs
|
|
@ -1,5 +1,6 @@
|
||||||
use crate::{
|
use crate::{
|
||||||
pdf::{
|
pdf::{
|
||||||
|
content_stream::PdfOperatorUnparsed,
|
||||||
document_structure::PdfDocumentCatalog,
|
document_structure::PdfDocumentCatalog,
|
||||||
object::{
|
object::{
|
||||||
PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject,
|
PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject,
|
||||||
|
|
@ -318,67 +319,35 @@ impl<'a> Iterator for PdfTokenizer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct PdfParser<'a> {
|
struct PdfParser<'a> {
|
||||||
objects_arc: Arc<PdfObjects>,
|
objects: Arc<PdfObjects>,
|
||||||
objects_map: BTreeMap<PdfObjectIdentifier, PdfObject>,
|
|
||||||
unparsed_stream_dictionaries: Vec<UnparsedPdfStreamDictionary<PdfDictionary>>,
|
|
||||||
tokenizer: PdfTokenizer<'a>,
|
tokenizer: PdfTokenizer<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum PdfObjectOrStreamDictionaryOrOperator {
|
||||||
|
StreamDictionary {
|
||||||
|
dictionary: PdfDictionary,
|
||||||
|
stream_kw_pos: PdfInputPosition,
|
||||||
|
},
|
||||||
|
Object(PdfObject),
|
||||||
|
Operator(PdfOperatorUnparsed),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfObjectOrStreamDictionaryOrOperator {
|
||||||
|
fn error_on_stream_or_operator(self) -> Result<PdfObject, PdfParseError> {
|
||||||
|
match self {
|
||||||
|
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
|
||||||
|
dictionary: _,
|
||||||
|
stream_kw_pos,
|
||||||
|
} => Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }),
|
||||||
|
PdfObjectOrStreamDictionaryOrOperator::Object(object) => Ok(object),
|
||||||
|
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
|
||||||
|
Err(PdfParseError::OperatorNotAllowedHere { operator })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> PdfParser<'a> {
|
impl<'a> PdfParser<'a> {
|
||||||
fn with_tokenizer<'b, R>(
|
|
||||||
&mut self,
|
|
||||||
tokenizer: PdfTokenizer<'b>,
|
|
||||||
f: impl FnOnce(&mut PdfParser<'b>) -> R,
|
|
||||||
) -> R {
|
|
||||||
let PdfParser {
|
|
||||||
objects_arc,
|
|
||||||
objects_map,
|
|
||||||
unparsed_stream_dictionaries,
|
|
||||||
tokenizer: _,
|
|
||||||
} = self;
|
|
||||||
let objects_arc = objects_arc.clone();
|
|
||||||
let objects_map = std::mem::take(objects_map);
|
|
||||||
let unparsed_stream_dictionaries = std::mem::take(unparsed_stream_dictionaries);
|
|
||||||
let mut new_parser = PdfParser {
|
|
||||||
objects_arc,
|
|
||||||
objects_map,
|
|
||||||
unparsed_stream_dictionaries,
|
|
||||||
tokenizer,
|
|
||||||
};
|
|
||||||
let retval = f(&mut new_parser);
|
|
||||||
let PdfParser {
|
|
||||||
objects_arc,
|
|
||||||
objects_map,
|
|
||||||
unparsed_stream_dictionaries,
|
|
||||||
tokenizer: _,
|
|
||||||
} = new_parser;
|
|
||||||
self.objects_arc = objects_arc;
|
|
||||||
self.objects_map = objects_map;
|
|
||||||
self.unparsed_stream_dictionaries = unparsed_stream_dictionaries;
|
|
||||||
retval
|
|
||||||
}
|
|
||||||
fn parse_header(&mut self) -> Result<PdfHeader, PdfParseError> {
|
|
||||||
let Some(b'%') = self.tokenizer.bytes.first() else {
|
|
||||||
return Err(PdfParseError::NotAPdfFile);
|
|
||||||
};
|
|
||||||
let Some(PdfToken::Comment(header)) = self.tokenizer.next() else {
|
|
||||||
unreachable!()
|
|
||||||
};
|
|
||||||
let Ok(header) = str::from_utf8(header) else {
|
|
||||||
return Err(PdfParseError::NotAPdfFile);
|
|
||||||
};
|
|
||||||
let header = header.trim_end_matches(['\n', '\r']);
|
|
||||||
let Some(version) = header.strip_prefix(PdfHeader::PREFIX) else {
|
|
||||||
return Err(PdfParseError::NotAPdfFile);
|
|
||||||
};
|
|
||||||
let Some((major_str, minor_str)) = version.split_once('.') else {
|
|
||||||
return Err(PdfParseError::NotAPdfFile);
|
|
||||||
};
|
|
||||||
let (Ok(major), Ok(minor)) = (major_str.parse(), minor_str.parse()) else {
|
|
||||||
return Err(PdfParseError::NotAPdfFile);
|
|
||||||
};
|
|
||||||
Ok(PdfHeader { major, minor })
|
|
||||||
}
|
|
||||||
fn skip_comments_and_whitespace(&mut self) {
|
fn skip_comments_and_whitespace(&mut self) {
|
||||||
self.tokenizer.skip_whitespace();
|
self.tokenizer.skip_whitespace();
|
||||||
while let Some(PdfToken::Comment(_)) = self.tokenizer.peek() {
|
while let Some(PdfToken::Comment(_)) = self.tokenizer.peek() {
|
||||||
|
|
@ -449,7 +418,7 @@ impl<'a> PdfParser<'a> {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
if let Some(PdfToken::Regular(b"R")) = self.tokenizer.next() {
|
if let Some(PdfToken::Regular(b"R")) = self.tokenizer.next() {
|
||||||
Ok(Some(PdfObjectIndirect::new(&self.objects_arc, id)))
|
Ok(Some(PdfObjectIndirect::new(&self.objects, id)))
|
||||||
} else {
|
} else {
|
||||||
self.tokenizer = old_tokenizer;
|
self.tokenizer = old_tokenizer;
|
||||||
Ok(None)
|
Ok(None)
|
||||||
|
|
@ -604,7 +573,10 @@ impl<'a> PdfParser<'a> {
|
||||||
self.tokenizer.next();
|
self.tokenizer.next();
|
||||||
return Ok(PdfArray::from_elements(array_pos, Arc::from(contents)));
|
return Ok(PdfArray::from_elements(array_pos, Arc::from(contents)));
|
||||||
}
|
}
|
||||||
contents.push(self.parse_object()?);
|
contents.push(
|
||||||
|
self.parse_object_or_operator()?
|
||||||
|
.error_on_stream_or_operator()?,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// assumes `self.tokenizer.peek_byte() == Some(b'<')`
|
/// assumes `self.tokenizer.peek_byte() == Some(b'<')`
|
||||||
|
|
@ -630,11 +602,17 @@ impl<'a> PdfParser<'a> {
|
||||||
Arc::new(contents),
|
Arc::new(contents),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
let name = PdfName::parse(self.parse_object()?.into())?;
|
let name = PdfName::parse(
|
||||||
|
self.parse_object_or_operator()?
|
||||||
|
.error_on_stream_or_operator()?,
|
||||||
|
)?;
|
||||||
let name_pos = name.pos();
|
let name_pos = name.pos();
|
||||||
match contents.entry(name) {
|
match contents.entry(name) {
|
||||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||||
entry.insert(self.parse_object()?.into());
|
entry.insert(
|
||||||
|
self.parse_object_or_operator()?
|
||||||
|
.error_on_stream_or_operator()?,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
std::collections::btree_map::Entry::Occupied(entry) => {
|
std::collections::btree_map::Entry::Occupied(entry) => {
|
||||||
return Err(PdfParseError::DuplicateDictionaryKey {
|
return Err(PdfParseError::DuplicateDictionaryKey {
|
||||||
|
|
@ -645,21 +623,146 @@ impl<'a> PdfParser<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
fn parse_object_or_operator(
|
||||||
|
&mut self,
|
||||||
|
) -> Result<PdfObjectOrStreamDictionaryOrOperator, PdfParseError> {
|
||||||
|
self.skip_comments_and_whitespace();
|
||||||
|
if let Some(indirect) = self.parse_indirect_object()? {
|
||||||
|
return Ok(PdfObjectOrStreamDictionaryOrOperator::Object(
|
||||||
|
indirect.into(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let pos = self.tokenizer.pos();
|
||||||
|
Ok(PdfObjectOrStreamDictionaryOrOperator::Object(
|
||||||
|
match self
|
||||||
|
.tokenizer
|
||||||
|
.next()
|
||||||
|
.ok_or(PdfParseError::TruncatedFile { pos })?
|
||||||
|
{
|
||||||
|
PdfToken::Regular(b"true") => PdfObject::Boolean(PdfBoolean::new(pos, true)),
|
||||||
|
PdfToken::Regular(b"false") => PdfObject::Boolean(PdfBoolean::new(pos, false)),
|
||||||
|
PdfToken::Regular(b"null") => PdfObject::Null(PdfNull::new(pos)),
|
||||||
|
PdfToken::Regular(
|
||||||
|
number @ ([b'+' | b'-', b'0'..=b'9' | b'.', ..] | [b'0'..=b'9' | b'.', ..]),
|
||||||
|
) => {
|
||||||
|
// parse number
|
||||||
|
let Ok(number) = str::from_utf8(number) else {
|
||||||
|
return Err(PdfParseError::InvalidNumber { pos });
|
||||||
|
};
|
||||||
|
let mut parts = number
|
||||||
|
.strip_prefix(&['+', '-'])
|
||||||
|
.unwrap_or(number)
|
||||||
|
.split('.');
|
||||||
|
let integer_part = parts
|
||||||
|
.next()
|
||||||
|
.expect("split always returns at least one part");
|
||||||
|
let fraction_part = parts.next();
|
||||||
|
if parts.next().is_some() {
|
||||||
|
return Err(PdfParseError::InvalidNumber { pos });
|
||||||
|
}
|
||||||
|
if integer_part.is_empty() && fraction_part.is_none_or(|v| v.is_empty()) {
|
||||||
|
return Err(PdfParseError::InvalidNumber { pos });
|
||||||
|
}
|
||||||
|
if !integer_part.bytes().all(|v| v.is_ascii_digit()) {
|
||||||
|
return Err(PdfParseError::InvalidNumber { pos });
|
||||||
|
}
|
||||||
|
if let Some(fraction_part) = fraction_part {
|
||||||
|
if !fraction_part.bytes().all(|v| v.is_ascii_digit()) {
|
||||||
|
return Err(PdfParseError::InvalidNumber { pos });
|
||||||
|
}
|
||||||
|
PdfObject::Real(PdfReal::new(
|
||||||
|
pos,
|
||||||
|
number
|
||||||
|
.parse()
|
||||||
|
.map_err(|_| PdfParseError::InvalidNumber { pos })?,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
PdfObject::Integer(PdfInteger::new(
|
||||||
|
pos,
|
||||||
|
number
|
||||||
|
.parse()
|
||||||
|
.map_err(|_| PdfParseError::InvalidNumber { pos })?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PdfToken::Regular(name) => {
|
||||||
|
return Ok(PdfObjectOrStreamDictionaryOrOperator::Operator(
|
||||||
|
PdfOperatorUnparsed::new(pos, ArcOrRef::Arc(name.into())),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
PdfToken::LParen => PdfObject::String(self.parse_string_after_l_paren()?),
|
||||||
|
PdfToken::RParen => todo!(),
|
||||||
|
PdfToken::LAngle => {
|
||||||
|
if self.tokenizer.peek_byte() == Some(b'<') {
|
||||||
|
let dictionary = self.parse_dictionary_after_one_l_angle()?;
|
||||||
|
self.skip_comments_and_whitespace();
|
||||||
|
if let Some(PdfToken::Regular(b"stream")) = self.tokenizer.peek() {
|
||||||
|
return Ok(PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
|
||||||
|
dictionary,
|
||||||
|
stream_kw_pos: self.tokenizer.pos(),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
dictionary.into()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.parse_string_after_l_angle()?.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
PdfToken::RAngle => todo!(),
|
||||||
|
PdfToken::LBracket => self.parse_array_after_l_bracket()?.into(),
|
||||||
|
PdfToken::RBracket => todo!(),
|
||||||
|
PdfToken::LBrace => todo!(),
|
||||||
|
PdfToken::RBrace => todo!(),
|
||||||
|
PdfToken::FSlash => self.parse_name_after_f_slash()?.into(),
|
||||||
|
PdfToken::Comment(_) => unreachable!(),
|
||||||
|
},
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct PdfFileParser<'a> {
|
||||||
|
parser: PdfParser<'a>,
|
||||||
|
objects_map: BTreeMap<PdfObjectIdentifier, PdfObject>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> PdfFileParser<'a> {
|
||||||
|
fn parse_header(&mut self) -> Result<PdfHeader, PdfParseError> {
|
||||||
|
let Some(b'%') = self.parser.tokenizer.bytes.first() else {
|
||||||
|
return Err(PdfParseError::NotAPdfFile);
|
||||||
|
};
|
||||||
|
let Some(PdfToken::Comment(header)) = self.parser.tokenizer.next() else {
|
||||||
|
unreachable!()
|
||||||
|
};
|
||||||
|
let Ok(header) = str::from_utf8(header) else {
|
||||||
|
return Err(PdfParseError::NotAPdfFile);
|
||||||
|
};
|
||||||
|
let header = header.trim_end_matches(['\n', '\r']);
|
||||||
|
let Some(version) = header.strip_prefix(PdfHeader::PREFIX) else {
|
||||||
|
return Err(PdfParseError::NotAPdfFile);
|
||||||
|
};
|
||||||
|
let Some((major_str, minor_str)) = version.split_once('.') else {
|
||||||
|
return Err(PdfParseError::NotAPdfFile);
|
||||||
|
};
|
||||||
|
let (Ok(major), Ok(minor)) = (major_str.parse(), minor_str.parse()) else {
|
||||||
|
return Err(PdfParseError::NotAPdfFile);
|
||||||
|
};
|
||||||
|
Ok(PdfHeader { major, minor })
|
||||||
|
}
|
||||||
/// assumes `self.tokenizer.peek() == Some(PdfToken::Regular(b"stream"))`
|
/// assumes `self.tokenizer.peek() == Some(PdfToken::Regular(b"stream"))`
|
||||||
fn parse_stream_after_dictionary(
|
fn parse_stream_after_dictionary(
|
||||||
&mut self,
|
&mut self,
|
||||||
dictionary: PdfDictionary,
|
dictionary: PdfDictionary,
|
||||||
) -> Result<PdfStream, PdfParseError> {
|
) -> Result<PdfStream, PdfParseError> {
|
||||||
self.tokenizer.skip_whitespace();
|
self.parser.tokenizer.skip_whitespace();
|
||||||
let stream_pos = self.tokenizer.pos();
|
let stream_pos = self.parser.tokenizer.pos();
|
||||||
let stream = self.tokenizer.next();
|
let stream = self.parser.tokenizer.next();
|
||||||
assert_eq!(stream, Some(PdfToken::Regular(b"stream")));
|
assert_eq!(stream, Some(PdfToken::Regular(b"stream")));
|
||||||
let len = PdfStreamDictionary::parse_len_from_dictionary(&dictionary)?;
|
let dictionary = PdfStreamDictionary::parse(dictionary.into())?;
|
||||||
let eol_pos = self.tokenizer.pos();
|
let eol_pos = self.parser.tokenizer.pos();
|
||||||
match self.tokenizer.next_byte() {
|
match self.parser.tokenizer.next_byte() {
|
||||||
None => return Err(PdfParseError::TruncatedFile { pos: eol_pos }),
|
None => return Err(PdfParseError::TruncatedFile { pos: eol_pos }),
|
||||||
Some(b'\r') => {
|
Some(b'\r') => {
|
||||||
let Some(b'\n') = self.tokenizer.next_byte() else {
|
let Some(b'\n') = self.parser.tokenizer.next_byte() else {
|
||||||
return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword {
|
return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword {
|
||||||
pos: eol_pos,
|
pos: eol_pos,
|
||||||
});
|
});
|
||||||
|
|
@ -668,121 +771,56 @@ impl<'a> PdfParser<'a> {
|
||||||
Some(b'\n') => {}
|
Some(b'\n') => {}
|
||||||
_ => return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword { pos: eol_pos }),
|
_ => return Err(PdfParseError::InvalidOrMissingEolAfterStreamKeyword { pos: eol_pos }),
|
||||||
}
|
}
|
||||||
let Some(data) = self.tokenizer.read_bytes(len) else {
|
let Some(data) = self.parser.tokenizer.read_bytes(dictionary.len) else {
|
||||||
return Err(PdfParseError::TruncatedFile {
|
return Err(PdfParseError::TruncatedFile {
|
||||||
pos: PdfInputPosition::new(Some(PdfInputPositionKnown {
|
pos: PdfInputPosition::new(Some(PdfInputPositionKnown {
|
||||||
pos: self.tokenizer.bytes.len(),
|
pos: self.parser.tokenizer.bytes.len(),
|
||||||
..self.tokenizer.pos
|
..self.parser.tokenizer.pos
|
||||||
})),
|
})),
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
let (stream, unparsed) = PdfStream::new_unparsed(stream_pos, dictionary, Arc::from(data));
|
let stream = PdfStream::new(
|
||||||
self.unparsed_stream_dictionaries.push(unparsed);
|
stream_pos,
|
||||||
self.skip_comments_and_whitespace();
|
&self.parser.objects,
|
||||||
let pos = self.tokenizer.pos();
|
dictionary,
|
||||||
if let Some(PdfToken::Regular(b"endstream")) = self.tokenizer.next() {
|
Arc::from(data),
|
||||||
|
);
|
||||||
|
self.parser.skip_comments_and_whitespace();
|
||||||
|
let pos = self.parser.tokenizer.pos();
|
||||||
|
if let Some(PdfToken::Regular(b"endstream")) = self.parser.tokenizer.next() {
|
||||||
Ok(stream)
|
Ok(stream)
|
||||||
} else {
|
} else {
|
||||||
Err(PdfParseError::MissingEndStreamKeyword { pos })
|
Err(PdfParseError::MissingEndStreamKeyword { pos })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn parse_object(&mut self) -> Result<PdfObject, PdfParseError> {
|
fn parse_object(&mut self) -> Result<PdfObject, PdfParseError> {
|
||||||
self.skip_comments_and_whitespace();
|
match self.parser.parse_object_or_operator()? {
|
||||||
if let Some(indirect) = self.parse_indirect_object()? {
|
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
|
||||||
return Ok(indirect.into());
|
dictionary,
|
||||||
}
|
stream_kw_pos: _,
|
||||||
let pos = self.tokenizer.pos();
|
} => Ok(PdfObject::Stream(
|
||||||
match self
|
self.parse_stream_after_dictionary(dictionary)?,
|
||||||
.tokenizer
|
)),
|
||||||
.next()
|
PdfObjectOrStreamDictionaryOrOperator::Object(object) => Ok(object),
|
||||||
.ok_or(PdfParseError::TruncatedFile { pos })?
|
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
|
||||||
{
|
Err(PdfParseError::OperatorNotAllowedHere { operator })
|
||||||
PdfToken::Regular(b"true") => Ok(PdfObject::Boolean(PdfBoolean::new(pos, true))),
|
|
||||||
PdfToken::Regular(b"false") => Ok(PdfObject::Boolean(PdfBoolean::new(pos, false))),
|
|
||||||
PdfToken::Regular(b"null") => Ok(PdfObject::Null(PdfNull::new(pos))),
|
|
||||||
PdfToken::Regular(
|
|
||||||
number @ ([b'+' | b'-', b'0'..=b'9' | b'.', ..] | [b'0'..=b'9' | b'.', ..]),
|
|
||||||
) => {
|
|
||||||
// parse number
|
|
||||||
let Ok(number) = str::from_utf8(number) else {
|
|
||||||
return Err(PdfParseError::InvalidNumber { pos });
|
|
||||||
};
|
|
||||||
let mut parts = number
|
|
||||||
.strip_prefix(&['+', '-'])
|
|
||||||
.unwrap_or(number)
|
|
||||||
.split('.');
|
|
||||||
let integer_part = parts
|
|
||||||
.next()
|
|
||||||
.expect("split always returns at least one part");
|
|
||||||
let fraction_part = parts.next();
|
|
||||||
if parts.next().is_some() {
|
|
||||||
return Err(PdfParseError::InvalidNumber { pos });
|
|
||||||
}
|
|
||||||
if integer_part.is_empty() && fraction_part.is_none_or(|v| v.is_empty()) {
|
|
||||||
return Err(PdfParseError::InvalidNumber { pos });
|
|
||||||
}
|
|
||||||
if !integer_part.bytes().all(|v| v.is_ascii_digit()) {
|
|
||||||
return Err(PdfParseError::InvalidNumber { pos });
|
|
||||||
}
|
|
||||||
if let Some(fraction_part) = fraction_part {
|
|
||||||
if !fraction_part.bytes().all(|v| v.is_ascii_digit()) {
|
|
||||||
return Err(PdfParseError::InvalidNumber { pos });
|
|
||||||
}
|
|
||||||
Ok(PdfObject::Real(PdfReal::new(
|
|
||||||
pos,
|
|
||||||
number
|
|
||||||
.parse()
|
|
||||||
.map_err(|_| PdfParseError::InvalidNumber { pos })?,
|
|
||||||
)))
|
|
||||||
} else {
|
|
||||||
Ok(PdfObject::Integer(PdfInteger::new(
|
|
||||||
pos,
|
|
||||||
number
|
|
||||||
.parse()
|
|
||||||
.map_err(|_| PdfParseError::InvalidNumber { pos })?,
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
PdfToken::Regular(items) => todo!("{:?}", str::from_utf8(items)),
|
|
||||||
PdfToken::LParen => self.parse_string_after_l_paren().map(PdfObject::String),
|
|
||||||
PdfToken::RParen => todo!(),
|
|
||||||
PdfToken::LAngle => {
|
|
||||||
if self.tokenizer.peek_byte() == Some(b'<') {
|
|
||||||
let dictionary = self.parse_dictionary_after_one_l_angle()?;
|
|
||||||
self.skip_comments_and_whitespace();
|
|
||||||
if let Some(PdfToken::Regular(b"stream")) = self.tokenizer.peek() {
|
|
||||||
self.parse_stream_after_dictionary(dictionary)
|
|
||||||
.map(PdfObject::Stream)
|
|
||||||
} else {
|
|
||||||
Ok(dictionary.into())
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
self.parse_string_after_l_angle().map(PdfObject::String)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PdfToken::RAngle => todo!(),
|
|
||||||
PdfToken::LBracket => self.parse_array_after_l_bracket().map(PdfObject::Array),
|
|
||||||
PdfToken::RBracket => todo!(),
|
|
||||||
PdfToken::LBrace => todo!(),
|
|
||||||
PdfToken::RBrace => todo!(),
|
|
||||||
PdfToken::FSlash => self.parse_name_after_f_slash().map(PdfObject::Name),
|
|
||||||
PdfToken::Comment(_) => unreachable!(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn parse_indirect_object_definition(&mut self) -> Result<Option<()>, PdfParseError> {
|
fn parse_indirect_object_definition(&mut self) -> Result<Option<()>, PdfParseError> {
|
||||||
self.skip_comments_and_whitespace();
|
self.parser.skip_comments_and_whitespace();
|
||||||
let Some(id) = self.parse_object_identifier(false)? else {
|
let Some(id) = self.parser.parse_object_identifier(false)? else {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
};
|
};
|
||||||
self.skip_comments_and_whitespace();
|
self.parser.skip_comments_and_whitespace();
|
||||||
let obj_pos = self.tokenizer.pos();
|
let obj_pos = self.parser.tokenizer.pos();
|
||||||
let Some(PdfToken::Regular(b"obj")) = self.tokenizer.next() else {
|
let Some(PdfToken::Regular(b"obj")) = self.parser.tokenizer.next() else {
|
||||||
return Err(PdfParseError::MissingObj { pos: obj_pos });
|
return Err(PdfParseError::MissingObj { pos: obj_pos });
|
||||||
};
|
};
|
||||||
let object = self.parse_object()?;
|
let object = self.parse_object()?;
|
||||||
self.skip_comments_and_whitespace();
|
self.parser.skip_comments_and_whitespace();
|
||||||
let end_obj_pos = self.tokenizer.pos();
|
let end_obj_pos = self.parser.tokenizer.pos();
|
||||||
let Some(PdfToken::Regular(b"endobj")) = self.tokenizer.next() else {
|
let Some(PdfToken::Regular(b"endobj")) = self.parser.tokenizer.next() else {
|
||||||
return Err(PdfParseError::MissingEndObj { pos: end_obj_pos });
|
return Err(PdfParseError::MissingEndObj { pos: end_obj_pos });
|
||||||
};
|
};
|
||||||
if self.objects_map.insert(id, object).is_some() {
|
if self.objects_map.insert(id, object).is_some() {
|
||||||
|
|
@ -791,53 +829,13 @@ impl<'a> PdfParser<'a> {
|
||||||
Ok(Some(()))
|
Ok(Some(()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn parse_object_stream_inner(
|
|
||||||
&mut self,
|
|
||||||
object_stream: &PdfStream<PdfObjectStreamDictionary>,
|
|
||||||
) -> Result<(), PdfParseError> {
|
|
||||||
let mut object_ids_and_byte_positions =
|
|
||||||
Vec::<(PdfObjectIdentifier, usize)>::with_capacity(object_stream.dictionary().rest.n);
|
|
||||||
for _ in 0..object_stream.dictionary().rest.n {
|
|
||||||
self.skip_comments_and_whitespace();
|
|
||||||
let Some((pos, object_number)) =
|
|
||||||
self.parse_digits(|pos| Err(PdfParseError::InvalidObjectNumber { pos }))?
|
|
||||||
else {
|
|
||||||
return Err(PdfParseError::InvalidObjectNumber {
|
|
||||||
pos: self.tokenizer.pos(),
|
|
||||||
});
|
|
||||||
};
|
|
||||||
self.skip_comments_and_whitespace();
|
|
||||||
let Some((_, byte_position)) =
|
|
||||||
self.parse_digits(|pos| Err(PdfParseError::InvalidNumber { pos }))?
|
|
||||||
else {
|
|
||||||
return Err(PdfParseError::InvalidNumber {
|
|
||||||
pos: self.tokenizer.pos(),
|
|
||||||
});
|
|
||||||
};
|
|
||||||
object_ids_and_byte_positions.push((
|
|
||||||
PdfObjectIdentifier {
|
|
||||||
pos: pos.into(),
|
|
||||||
object_number,
|
|
||||||
generation_number: 0,
|
|
||||||
},
|
|
||||||
byte_position,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
for (id, _byte_position) in object_ids_and_byte_positions {
|
|
||||||
let object = self.parse_object()?;
|
|
||||||
if self.objects_map.insert(id, object).is_some() {
|
|
||||||
return Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
fn parse_object_stream(
|
fn parse_object_stream(
|
||||||
&mut self,
|
&mut self,
|
||||||
object_stream: &PdfStream<PdfObjectStreamDictionary>,
|
object_stream: &PdfStream<PdfObjectStreamDictionary>,
|
||||||
) -> Result<(), PdfParseError> {
|
) -> Result<(), PdfParseError> {
|
||||||
let data = object_stream.decoded_data().as_ref()?;
|
let data = object_stream.decoded_data().as_ref()?;
|
||||||
self.with_tokenizer(
|
let mut parser = PdfParser {
|
||||||
PdfTokenizer::new(
|
tokenizer: PdfTokenizer::new(
|
||||||
data,
|
data,
|
||||||
PdfInputPositionKnown {
|
PdfInputPositionKnown {
|
||||||
pos: 0,
|
pos: 0,
|
||||||
|
|
@ -850,18 +848,48 @@ impl<'a> PdfParser<'a> {
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|parser| parser.parse_object_stream_inner(object_stream),
|
objects: self.parser.objects.clone(),
|
||||||
)
|
};
|
||||||
.map_err(|e| PdfParseError::ObjectStreamParseError {
|
let mut object_ids_and_byte_positions =
|
||||||
stream_pos: object_stream.get_pdf_input_position(),
|
Vec::<(PdfObjectIdentifier, usize)>::with_capacity(object_stream.dictionary().rest.n);
|
||||||
error: Arc::new(e),
|
for _ in 0..object_stream.dictionary().rest.n {
|
||||||
})
|
parser.skip_comments_and_whitespace();
|
||||||
|
let Some((pos, object_number)) =
|
||||||
|
parser.parse_digits(|pos| Err(PdfParseError::InvalidObjectNumber { pos }))?
|
||||||
|
else {
|
||||||
|
return Err(PdfParseError::InvalidObjectNumber {
|
||||||
|
pos: parser.tokenizer.pos(),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
parser.skip_comments_and_whitespace();
|
||||||
|
let Some((_, byte_position)) =
|
||||||
|
parser.parse_digits(|pos| Err(PdfParseError::InvalidNumber { pos }))?
|
||||||
|
else {
|
||||||
|
return Err(PdfParseError::InvalidNumber {
|
||||||
|
pos: parser.tokenizer.pos(),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
object_ids_and_byte_positions.push((
|
||||||
|
PdfObjectIdentifier {
|
||||||
|
pos: pos.into(),
|
||||||
|
object_number,
|
||||||
|
generation_number: 0,
|
||||||
|
},
|
||||||
|
byte_position,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
for (id, _byte_position) in object_ids_and_byte_positions {
|
||||||
|
let object = parser
|
||||||
|
.parse_object_or_operator()?
|
||||||
|
.error_on_stream_or_operator()?;
|
||||||
|
if self.objects_map.insert(id, object).is_some() {
|
||||||
|
return Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
fn parse_body(&mut self) -> Result<(), PdfParseError> {
|
fn parse_body(&mut self) -> Result<(), PdfParseError> {
|
||||||
while let Some(()) = self.parse_indirect_object_definition()? {}
|
while let Some(()) = self.parse_indirect_object_definition()? {}
|
||||||
self.unparsed_stream_dictionaries
|
|
||||||
.drain(..)
|
|
||||||
.try_for_each(|v| v.finish_parsing())?;
|
|
||||||
let mut object_streams: Vec<PdfStream<PdfObjectStreamDictionary>> = Vec::new();
|
let mut object_streams: Vec<PdfStream<PdfObjectStreamDictionary>> = Vec::new();
|
||||||
for object in self.objects_map.values_mut() {
|
for object in self.objects_map.values_mut() {
|
||||||
let stream = match object {
|
let stream = match object {
|
||||||
|
|
@ -885,7 +913,7 @@ impl<'a> PdfParser<'a> {
|
||||||
for object_stream in &object_streams {
|
for object_stream in &object_streams {
|
||||||
self.parse_object_stream(object_stream)?;
|
self.parse_object_stream(object_stream)?;
|
||||||
}
|
}
|
||||||
let Ok(()) = self.objects_arc.inner.set(PdfObjectsInner {
|
let Ok(()) = self.parser.objects.inner.set(PdfObjectsInner {
|
||||||
objects: std::mem::take(&mut self.objects_map),
|
objects: std::mem::take(&mut self.objects_map),
|
||||||
object_streams,
|
object_streams,
|
||||||
}) else {
|
}) else {
|
||||||
|
|
@ -894,19 +922,19 @@ impl<'a> PdfParser<'a> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
fn parse_xref_table(&mut self) -> Result<(), PdfParseError> {
|
fn parse_xref_table(&mut self) -> Result<(), PdfParseError> {
|
||||||
self.skip_comments_and_whitespace();
|
self.parser.skip_comments_and_whitespace();
|
||||||
let xref_pos = self.tokenizer.pos();
|
let xref_pos = self.parser.tokenizer.pos();
|
||||||
let Some(PdfToken::Regular(b"xref")) = self.tokenizer.peek() else {
|
let Some(PdfToken::Regular(b"xref")) = self.parser.tokenizer.peek() else {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
};
|
};
|
||||||
todo!("{xref_pos}")
|
todo!("{xref_pos}")
|
||||||
}
|
}
|
||||||
fn parse_trailer(&mut self) -> Result<PdfTrailer, PdfParseError> {
|
fn parse_trailer(&mut self) -> Result<PdfTrailer, PdfParseError> {
|
||||||
self.skip_comments_and_whitespace();
|
self.parser.skip_comments_and_whitespace();
|
||||||
let trailer_pos = self.tokenizer.pos();
|
let trailer_pos = self.parser.tokenizer.pos();
|
||||||
let trailer_dictionary = match self.tokenizer.peek() {
|
let trailer_dictionary = match self.parser.tokenizer.peek() {
|
||||||
Some(PdfToken::Regular(b"trailer")) => {
|
Some(PdfToken::Regular(b"trailer")) => {
|
||||||
self.tokenizer.next();
|
self.parser.tokenizer.next();
|
||||||
Some(PdfTrailerDictionary::parse(self.parse_object()?)?)
|
Some(PdfTrailerDictionary::parse(self.parse_object()?)?)
|
||||||
}
|
}
|
||||||
Some(PdfToken::Regular(b"startxref")) => None,
|
Some(PdfToken::Regular(b"startxref")) => None,
|
||||||
|
|
@ -914,34 +942,35 @@ impl<'a> PdfParser<'a> {
|
||||||
return Err(PdfParseError::MissingTrailer { pos: trailer_pos });
|
return Err(PdfParseError::MissingTrailer { pos: trailer_pos });
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
self.skip_comments_and_whitespace();
|
self.parser.skip_comments_and_whitespace();
|
||||||
let start_xref_kw_pos = self.tokenizer.pos();
|
let start_xref_kw_pos = self.parser.tokenizer.pos();
|
||||||
let Some(PdfToken::Regular(b"startxref")) = self.tokenizer.next() else {
|
let Some(PdfToken::Regular(b"startxref")) = self.parser.tokenizer.next() else {
|
||||||
return Err(PdfParseError::MissingStartXRefKeyword {
|
return Err(PdfParseError::MissingStartXRefKeyword {
|
||||||
pos: start_xref_kw_pos,
|
pos: start_xref_kw_pos,
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
let start_xref_pos = self.tokenizer.pos();
|
let start_xref_pos = self.parser.tokenizer.pos();
|
||||||
let Some((start_xref_pos, start_xref)) =
|
let Some((start_xref_pos, start_xref)) = self
|
||||||
self.parse_digits(|pos| Err(PdfParseError::IntegerOutOfRange { pos }))?
|
.parser
|
||||||
|
.parse_digits(|pos| Err(PdfParseError::IntegerOutOfRange { pos }))?
|
||||||
else {
|
else {
|
||||||
return Err(PdfParseError::MissingStartXRefValue {
|
return Err(PdfParseError::MissingStartXRefValue {
|
||||||
pos: start_xref_pos,
|
pos: start_xref_pos,
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
self.tokenizer.skip_whitespace();
|
self.parser.tokenizer.skip_whitespace();
|
||||||
let eof_comment_pos = self.tokenizer.pos();
|
let eof_comment_pos = self.parser.tokenizer.pos();
|
||||||
let Some(PdfToken::Comment(b"%%EOF" | b"%%EOF\r" | b"%%EOF\r\n" | b"%%EOF\n")) =
|
let Some(PdfToken::Comment(b"%%EOF" | b"%%EOF\r" | b"%%EOF\r\n" | b"%%EOF\n")) =
|
||||||
self.tokenizer.next()
|
self.parser.tokenizer.next()
|
||||||
else {
|
else {
|
||||||
return Err(PdfParseError::MissingEofComment {
|
return Err(PdfParseError::MissingEofComment {
|
||||||
pos: eof_comment_pos,
|
pos: eof_comment_pos,
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
self.tokenizer.skip_whitespace();
|
self.parser.tokenizer.skip_whitespace();
|
||||||
if let Some(byte) = self.tokenizer.peek_byte() {
|
if let Some(byte) = self.parser.tokenizer.peek_byte() {
|
||||||
return Err(PdfParseError::UnexpectedByte {
|
return Err(PdfParseError::UnexpectedByte {
|
||||||
pos: self.tokenizer.pos(),
|
pos: self.parser.tokenizer.pos(),
|
||||||
byte,
|
byte,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -951,24 +980,28 @@ impl<'a> PdfParser<'a> {
|
||||||
start_xref,
|
start_xref,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
let old_tokenizer = self.tokenizer.clone();
|
let id = PdfParser {
|
||||||
self.tokenizer = PdfTokenizer::new(
|
tokenizer: PdfTokenizer::new(
|
||||||
self.tokenizer.bytes,
|
self.parser.tokenizer.bytes,
|
||||||
PdfInputPositionKnown {
|
PdfInputPositionKnown {
|
||||||
pos: start_xref,
|
pos: start_xref,
|
||||||
containing_streams_pos: None,
|
containing_streams_pos: None,
|
||||||
},
|
},
|
||||||
);
|
),
|
||||||
let id = self.parse_object_identifier(false);
|
objects: self.parser.objects.clone(),
|
||||||
self.tokenizer = old_tokenizer;
|
}
|
||||||
|
.parse_object_identifier(false);
|
||||||
let Some(id) = id? else {
|
let Some(id) = id? else {
|
||||||
return Err(PdfParseError::InvalidStartXRefValue {
|
return Err(PdfParseError::InvalidStartXRefValue {
|
||||||
pos: start_xref_pos,
|
pos: start_xref_pos,
|
||||||
start_xref,
|
start_xref,
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
let xref_stream =
|
let xref_stream = PdfStream::parse(
|
||||||
PdfStream::parse(PdfObjectIndirect::new(&self.objects_arc, id).get().into())?;
|
PdfObjectIndirect::new(&self.parser.objects, id)
|
||||||
|
.get()
|
||||||
|
.into(),
|
||||||
|
)?;
|
||||||
Ok(PdfTrailer::Stream {
|
Ok(PdfTrailer::Stream {
|
||||||
xref_stream,
|
xref_stream,
|
||||||
start_xref,
|
start_xref,
|
||||||
|
|
@ -979,9 +1012,14 @@ impl<'a> PdfParser<'a> {
|
||||||
self.parse_body()?;
|
self.parse_body()?;
|
||||||
self.parse_xref_table()?;
|
self.parse_xref_table()?;
|
||||||
let trailer = self.parse_trailer()?;
|
let trailer = self.parse_trailer()?;
|
||||||
|
for page in trailer.trailer_dictionary().root.pages.pages().iter() {
|
||||||
|
for content in page.contents.iter() {
|
||||||
|
content.decoded_data().as_ref()?;
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(Pdf {
|
Ok(Pdf {
|
||||||
header,
|
header,
|
||||||
objects: self.objects_arc,
|
objects: self.parser.objects,
|
||||||
trailer,
|
trailer,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
@ -989,19 +1027,20 @@ impl<'a> PdfParser<'a> {
|
||||||
|
|
||||||
impl Pdf {
|
impl Pdf {
|
||||||
pub fn parse(bytes: impl AsRef<[u8]>) -> Result<Pdf, PdfParseError> {
|
pub fn parse(bytes: impl AsRef<[u8]>) -> Result<Pdf, PdfParseError> {
|
||||||
PdfParser {
|
PdfFileParser {
|
||||||
objects_arc: Arc::new(PdfObjects {
|
parser: PdfParser {
|
||||||
inner: OnceLock::new(),
|
objects: Arc::new(PdfObjects {
|
||||||
}),
|
inner: OnceLock::new(),
|
||||||
|
}),
|
||||||
|
tokenizer: PdfTokenizer::new(
|
||||||
|
bytes.as_ref(),
|
||||||
|
PdfInputPositionKnown {
|
||||||
|
pos: 0,
|
||||||
|
containing_streams_pos: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
objects_map: BTreeMap::new(),
|
objects_map: BTreeMap::new(),
|
||||||
unparsed_stream_dictionaries: vec![],
|
|
||||||
tokenizer: PdfTokenizer::new(
|
|
||||||
bytes.as_ref(),
|
|
||||||
PdfInputPositionKnown {
|
|
||||||
pos: 0,
|
|
||||||
containing_streams_pos: None,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
.parse_file()
|
.parse_file()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,813 @@
|
||||||
use crate::pdf::object::PdfStream;
|
use crate::{
|
||||||
|
pdf::{
|
||||||
|
PdfObjectOrStreamDictionaryOrOperator, PdfObjects, PdfParser, PdfTokenizer,
|
||||||
|
object::{
|
||||||
|
NameOr, PdfDictionary, PdfMatrix, PdfName, PdfObject, PdfObjectDirect, PdfRectangle,
|
||||||
|
PdfStream, PdfStreamContents, PdfString, PdfStringBytesDebug, PdfStringOrNumber,
|
||||||
|
PdfVec2D,
|
||||||
|
},
|
||||||
|
parse::{
|
||||||
|
GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown,
|
||||||
|
PdfInputPositionNoCompare, PdfParse, PdfParseError,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
util::ArcOrRef,
|
||||||
|
};
|
||||||
|
use std::{fmt, sync::Arc};
|
||||||
|
|
||||||
pub struct PdfContentStream {
|
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
stream: PdfStream,
|
pub struct PdfOperatorUnparsed {
|
||||||
// TODO
|
pos: PdfInputPositionNoCompare,
|
||||||
|
bytes: ArcOrRef<'static, [u8]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl GetPdfInputPosition for PdfOperatorUnparsed {
|
||||||
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
||||||
|
self.pos()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for PdfOperatorUnparsed {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
Self::debug_with_name("PdfOperatorUnparsed", &self.bytes, self.pos.0, f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trait PdfParseIter: Sized {
|
||||||
|
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: PdfParse> PdfParseIter for Arc<[T]> {
|
||||||
|
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
|
||||||
|
FromIterator::from_iter(iter.into_iter().map(T::parse))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfOperatorUnparsed {
|
||||||
|
pub fn new(
|
||||||
|
pos: impl Into<PdfInputPositionNoCompare>,
|
||||||
|
bytes: impl Into<ArcOrRef<'static, [u8]>>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
pos: pos.into(),
|
||||||
|
bytes: bytes.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub const fn new_static(bytes: &'static [u8]) -> Self {
|
||||||
|
Self {
|
||||||
|
pos: PdfInputPositionNoCompare::empty(),
|
||||||
|
bytes: ArcOrRef::Ref(bytes),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn pos(&self) -> PdfInputPosition {
|
||||||
|
self.pos.0
|
||||||
|
}
|
||||||
|
pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> {
|
||||||
|
&self.bytes
|
||||||
|
}
|
||||||
|
fn debug_with_name(
|
||||||
|
name: &str,
|
||||||
|
pdf_name: &[u8],
|
||||||
|
pos: PdfInputPosition,
|
||||||
|
f: &mut fmt::Formatter<'_>,
|
||||||
|
) -> fmt::Result {
|
||||||
|
write!(f, "{name}(at {pos}, {})", PdfStringBytesDebug(pdf_name))
|
||||||
|
}
|
||||||
|
pub fn bytes_debug(&self) -> PdfStringBytesDebug<'_> {
|
||||||
|
PdfStringBytesDebug(&self.bytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! make_pdf_operator_enum {
|
||||||
|
(
|
||||||
|
$(#[$($operator_meta:tt)*])*
|
||||||
|
$operator_enum_vis:vis enum $PdfOperator:ident;
|
||||||
|
|
||||||
|
$(#[$($operator_and_operands_meta:tt)*])*
|
||||||
|
$enum_vis:vis enum $PdfOperatorAndOperands:ident {
|
||||||
|
$(#[$($unknown_variant_meta:tt)*])*
|
||||||
|
$Unknown:ident {
|
||||||
|
$(#[$($unknown_operands_meta:tt)*])*
|
||||||
|
$unknown_operands:ident: $unknown_operands_ty:ty,
|
||||||
|
$(#[$($unknown_operator_meta:tt)*])*
|
||||||
|
$unknown_operator:ident: $unknown_operator_ty:ty,
|
||||||
|
},
|
||||||
|
$(
|
||||||
|
#[kw = $kw:literal]
|
||||||
|
$(#[$($variant_meta:tt)*])*
|
||||||
|
$Variant:ident($VariantStruct:ident {
|
||||||
|
$pos:ident: PdfInputPositionNoCompare,
|
||||||
|
$(
|
||||||
|
#[$field_parse:ident($($parse_args:tt)*)]
|
||||||
|
$(#[$($field_meta:tt)*])*
|
||||||
|
$field:ident: $field_ty:ty,
|
||||||
|
)*
|
||||||
|
}),
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
) => {
|
||||||
|
$(#[$($operator_meta)*])*
|
||||||
|
$operator_enum_vis enum $PdfOperator {
|
||||||
|
$(#[$($unknown_variant_meta)*])*
|
||||||
|
$Unknown($unknown_operator_ty),
|
||||||
|
$(
|
||||||
|
$(#[$($variant_meta)*])*
|
||||||
|
$Variant(PdfInputPositionNoCompare),
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
|
||||||
|
impl $PdfOperator {
|
||||||
|
$operator_enum_vis fn parse(self, operands: impl IntoIterator<Item = PdfObject>) -> Result<$PdfOperatorAndOperands, PdfParseError> {
|
||||||
|
let operands = operands.into_iter();
|
||||||
|
Ok(match self {
|
||||||
|
Self::$Unknown(operator) => $PdfOperatorAndOperands::$Unknown {
|
||||||
|
operands: FromIterator::from_iter(operands.map(Into::into)),
|
||||||
|
operator,
|
||||||
|
},
|
||||||
|
$(Self::$Variant(pos) => $VariantStruct::parse(pos, operands)?.into(),)*
|
||||||
|
})
|
||||||
|
}
|
||||||
|
$operator_enum_vis fn pos(&self) -> PdfInputPosition {
|
||||||
|
match *self {
|
||||||
|
Self::$Unknown(ref operator) => operator.pos(),
|
||||||
|
$(Self::$Variant(pos) => pos.0,)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for $PdfOperator {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::$Unknown(operator) => PdfOperatorUnparsed::debug_with_name("Unknown", &operator.bytes, operator.pos.0, f),
|
||||||
|
$(Self::$Variant(pos) => PdfOperatorUnparsed::debug_with_name(stringify!($Variant), $kw, pos.0, f),)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<$PdfOperator> for PdfOperatorUnparsed {
|
||||||
|
fn from(v: $PdfOperator) -> PdfOperatorUnparsed {
|
||||||
|
match v {
|
||||||
|
$PdfOperator::$Unknown(operator) => operator,
|
||||||
|
$($PdfOperator::$Variant(pos) => PdfOperatorUnparsed { pos, bytes: ArcOrRef::Ref($kw) },)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<PdfOperatorUnparsed> for $PdfOperator {
|
||||||
|
fn from(v: PdfOperatorUnparsed) -> $PdfOperator {
|
||||||
|
match &**v.bytes() {
|
||||||
|
$($kw => Self::$Variant(v.pos),)*
|
||||||
|
_ => Self::$Unknown(v),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$(#[derive(Clone)]
|
||||||
|
$(#[$($variant_meta)*])*
|
||||||
|
$enum_vis struct $VariantStruct {
|
||||||
|
$enum_vis $pos: PdfInputPositionNoCompare,
|
||||||
|
$(
|
||||||
|
$(#[$($field_meta)*])*
|
||||||
|
$enum_vis $field: $field_ty,
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for $VariantStruct {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct(stringify!($VariantStruct)).field("pos", &self.pos)$(.field(stringify!($field), &self.$field))*.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GetPdfInputPosition for $VariantStruct {
|
||||||
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
||||||
|
self.pos()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<$VariantStruct> for $PdfOperatorAndOperands {
|
||||||
|
fn from(v: $VariantStruct) -> Self {
|
||||||
|
Self::$Variant(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl $VariantStruct {
|
||||||
|
$enum_vis fn operator_from_pos(pos: impl Into<PdfInputPositionNoCompare>) -> $PdfOperator {
|
||||||
|
$PdfOperator::$Variant(pos.into())
|
||||||
|
}
|
||||||
|
$enum_vis fn operator(&self) -> $PdfOperator {
|
||||||
|
$PdfOperator::$Variant(self.pos)
|
||||||
|
}
|
||||||
|
$enum_vis fn pos(&self) -> PdfInputPosition {
|
||||||
|
self.pos.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_pdf_operator_enum! {
|
||||||
|
@impl_variant_parse
|
||||||
|
$enum_vis enum;
|
||||||
|
struct $VariantStruct {
|
||||||
|
$pos: PdfInputPositionNoCompare,
|
||||||
|
$(
|
||||||
|
#[$field_parse($($parse_args)*)]
|
||||||
|
$(#[$($field_meta)*])*
|
||||||
|
$field: $field_ty,
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
})*
|
||||||
|
|
||||||
|
$(#[$($operator_and_operands_meta)*])*
|
||||||
|
$enum_vis enum $PdfOperatorAndOperands {
|
||||||
|
$(#[$($unknown_variant_meta)*])*
|
||||||
|
$Unknown {
|
||||||
|
$(#[$($unknown_operands_meta)*])*
|
||||||
|
$unknown_operands: $unknown_operands_ty,
|
||||||
|
$(#[$($unknown_operator_meta)*])*
|
||||||
|
$unknown_operator: $unknown_operator_ty,
|
||||||
|
},
|
||||||
|
$(
|
||||||
|
$(#[$($variant_meta)*])*
|
||||||
|
$Variant($VariantStruct),
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
|
||||||
|
impl $PdfOperatorAndOperands {
|
||||||
|
$enum_vis fn operator(&self) -> $PdfOperator {
|
||||||
|
match self {
|
||||||
|
Self::Unknown { operator, .. } => $PdfOperator::Unknown(operator.clone()),
|
||||||
|
$(Self::$Variant(v) => v.operator(),)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$enum_vis fn pos(&self) -> PdfInputPosition {
|
||||||
|
match self {
|
||||||
|
Self::$Unknown { operator, .. } => operator.pos(),
|
||||||
|
$(Self::$Variant(v) => v.pos(),)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for $PdfOperatorAndOperands {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::$Unknown {
|
||||||
|
operands,
|
||||||
|
operator,
|
||||||
|
} => f.debug_struct("Unknown").field("operator", operator).field("operands", operands).finish(),
|
||||||
|
$(Self::$Variant($VariantStruct {
|
||||||
|
$pos,
|
||||||
|
$($field,)*
|
||||||
|
}) => f.debug_struct(stringify!($Variant)).field("pos", $pos)$(.field(stringify!($field), $field))*.finish(),)*
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
(
|
||||||
|
@impl_variant_parse
|
||||||
|
$enum_vis:vis enum;
|
||||||
|
struct $VariantStruct:ident {
|
||||||
|
$pos:ident: PdfInputPositionNoCompare,
|
||||||
|
$(
|
||||||
|
#[$field_parse:ident($($parse_args:ident),* $(,)?)]
|
||||||
|
$(#[$($field_meta:tt)*])*
|
||||||
|
$field:ident: $field_ty:ty,
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
) => {
|
||||||
|
impl $VariantStruct {
|
||||||
|
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
|
||||||
|
let pos = pos.into();
|
||||||
|
let mut operands = operands.into_iter();
|
||||||
|
$($(let Some($parse_args) = operands.next() else {
|
||||||
|
return Err(PdfParseError::OperatorHasTooFewOperands { operator: Self::operator_from_pos(pos) });
|
||||||
|
};)*)*
|
||||||
|
if operands.next().is_some() {
|
||||||
|
return Err(PdfParseError::OperatorHasTooManyOperands { operator: Self::operator_from_pos(pos) });
|
||||||
|
}
|
||||||
|
Ok(Self {
|
||||||
|
pos,
|
||||||
|
$($field: <$field_ty>::$field_parse($($parse_args),*)?,)*
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
(
|
||||||
|
@impl_variant_parse
|
||||||
|
$enum_vis:vis enum;
|
||||||
|
struct $VariantStruct:ident {
|
||||||
|
$pos:ident: PdfInputPositionNoCompare,
|
||||||
|
#[$field_parse:ident(...)]
|
||||||
|
$(#[$($field_meta:tt)*])*
|
||||||
|
$field:ident: $field_ty:ty,
|
||||||
|
}
|
||||||
|
) => {
|
||||||
|
impl $VariantStruct {
|
||||||
|
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
|
||||||
|
let pos = pos.into();
|
||||||
|
let operands = operands.into_iter();
|
||||||
|
Ok(Self {
|
||||||
|
pos,
|
||||||
|
$field: <$field_ty>::$field_parse(operands)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
make_pdf_operator_enum! {
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum PdfOperator;
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum PdfOperatorAndOperands {
|
||||||
|
Unknown {
|
||||||
|
operands: Arc<[PdfObjectDirect]>,
|
||||||
|
operator: PdfOperatorUnparsed,
|
||||||
|
},
|
||||||
|
#[kw = b"b"]
|
||||||
|
CloseFillAndStrokePath(PdfOperatorCloseFillAndStrokePath {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"B"]
|
||||||
|
FillAndStrokePath(PdfOperatorFillAndStrokePath {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"b*"]
|
||||||
|
CloseFillAndStrokePathEvenOdd(PdfOperatorCloseFillAndStrokePathEvenOdd {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"B*"]
|
||||||
|
FillAndStrokePathEvenOdd(PdfOperatorFillAndStrokePathEvenOdd {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"BDC"]
|
||||||
|
BeginMarkedContentWithProperties(PdfOperatorBeginMarkedContentWithProperties {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(tag)]
|
||||||
|
tag: PdfName,
|
||||||
|
#[parse(properties)]
|
||||||
|
properties: NameOr<PdfDictionary>,
|
||||||
|
}),
|
||||||
|
#[kw = b"BI"]
|
||||||
|
BeginInlineImage(PdfOperatorBeginInlineImage {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"BMC"]
|
||||||
|
BeginMarkedContent(PdfOperatorBeginMarkedContent {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(tag)]
|
||||||
|
tag: PdfName,
|
||||||
|
}),
|
||||||
|
#[kw = b"BT"]
|
||||||
|
BeginText(PdfOperatorBeginText {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"BX"]
|
||||||
|
BeginCompatibilitySection(PdfOperatorBeginCompatibilitySection {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"c"]
|
||||||
|
CurveTo(PdfOperatorCurveTo {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(x1, y1)]
|
||||||
|
p1: PdfVec2D,
|
||||||
|
#[parse(x2, y2)]
|
||||||
|
p2: PdfVec2D,
|
||||||
|
#[parse(x3, y3)]
|
||||||
|
p3: PdfVec2D,
|
||||||
|
}),
|
||||||
|
#[kw = b"cm"]
|
||||||
|
ConcatMatrix(PdfOperatorConcatMatrix {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse_flat(a, b, c, d, e, f)]
|
||||||
|
matrix: PdfMatrix,
|
||||||
|
}),
|
||||||
|
#[kw = b"CS"]
|
||||||
|
SetStrokeColorSpace(PdfOperatorSetStrokeColorSpace {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(name)]
|
||||||
|
name: PdfName,
|
||||||
|
}),
|
||||||
|
#[kw = b"cs"]
|
||||||
|
SetNonStrokeColorSpace(PdfOperatorSetNonStrokeColorSpace {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(name)]
|
||||||
|
name: PdfName,
|
||||||
|
}),
|
||||||
|
#[kw = b"d"]
|
||||||
|
SetLineDashPattern(PdfOperatorSetLineDashPattern {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(dash_array)]
|
||||||
|
dash_array: PdfObject, // TODO: actually parse
|
||||||
|
#[parse(dash_phase)]
|
||||||
|
dash_phase: PdfObject, // TODO: actually parse
|
||||||
|
}),
|
||||||
|
#[kw = b"d0"]
|
||||||
|
FontType3SetWidth(PdfOperatorFontType3SetWidth {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(x, y)]
|
||||||
|
width: PdfVec2D,
|
||||||
|
}),
|
||||||
|
#[kw = b"d1"]
|
||||||
|
FontType3SetWidthAndBBox(PdfOperatorFontType3SetWidthAndBBox {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(width_x, width_y)]
|
||||||
|
width: PdfVec2D,
|
||||||
|
#[parse_flat(lower_left_x, lower_left_y, upper_right_x, upper_right_y)]
|
||||||
|
bbox: PdfRectangle,
|
||||||
|
}),
|
||||||
|
#[kw = b"Do"]
|
||||||
|
PaintXObject(PdfOperatorPaintXObject {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(name)]
|
||||||
|
name: PdfName,
|
||||||
|
}),
|
||||||
|
#[kw = b"DP"]
|
||||||
|
DesignateMarkedContentPointWithProperties(PdfOperatorDesignateMarkedContentPointWithProperties {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(tag)]
|
||||||
|
tag: PdfName,
|
||||||
|
#[parse(properties)]
|
||||||
|
properties: NameOr<PdfDictionary>,
|
||||||
|
}),
|
||||||
|
#[kw = b"EI"]
|
||||||
|
EndInlineImage(PdfOperatorEndInlineImage {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"EMC"]
|
||||||
|
EndMarkedContent(PdfOperatorEndMarkedContent {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"ET"]
|
||||||
|
EndText(PdfOperatorEndText {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"EX"]
|
||||||
|
EndCompatibilitySection(PdfOperatorEndCompatibilitySection {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"f"]
|
||||||
|
FillPath(PdfOperatorFillPath {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"F"]
|
||||||
|
FillPathObsolete(PdfOperatorFillPathObsolete {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"f*"]
|
||||||
|
FillPathEvenOdd(PdfOperatorFillPathEvenOdd {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"G"]
|
||||||
|
SetStrokeGray(PdfOperatorSetStrokeGray {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(gray)]
|
||||||
|
gray: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"g"]
|
||||||
|
SetNonStrokeGray(PdfOperatorSetNonStrokeGray {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(gray)]
|
||||||
|
gray: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"gs"]
|
||||||
|
SetGraphicsState(PdfOperatorSetGraphicsState {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(dictionary_name)]
|
||||||
|
dictionary_name: PdfName,
|
||||||
|
}),
|
||||||
|
#[kw = b"h"]
|
||||||
|
CloseSubpath(PdfOperatorCloseSubpath {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"i"]
|
||||||
|
SetFlatnessTolerance(PdfOperatorSetFlatnessTolerance {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(flatness)]
|
||||||
|
flatness: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"ID"]
|
||||||
|
BeginInlineImageData(PdfOperatorBeginInlineImageData {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"j"]
|
||||||
|
SetLineJoinStyle(PdfOperatorSetLineJoinStyle {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(line_join_style)]
|
||||||
|
line_join_style: u8, // TODO parse
|
||||||
|
}),
|
||||||
|
#[kw = b"J"]
|
||||||
|
SetLineCapStyle(PdfOperatorSetLineCapStyle {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(line_cap_style)]
|
||||||
|
line_cap_style: u8, // TODO parse
|
||||||
|
}),
|
||||||
|
#[kw = b"K"]
|
||||||
|
SetStrokeCmyk(PdfOperatorSetStrokeCmyk {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(c)]
|
||||||
|
c: f32,
|
||||||
|
#[parse(m)]
|
||||||
|
m: f32,
|
||||||
|
#[parse(y)]
|
||||||
|
y: f32,
|
||||||
|
#[parse(k)]
|
||||||
|
k: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"k"]
|
||||||
|
SetNonStrokeCmyk(PdfOperatorSetNonStrokeCmyk {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(c)]
|
||||||
|
c: f32,
|
||||||
|
#[parse(m)]
|
||||||
|
m: f32,
|
||||||
|
#[parse(y)]
|
||||||
|
y: f32,
|
||||||
|
#[parse(k)]
|
||||||
|
k: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"l"]
|
||||||
|
LineTo(PdfOperatorLineTo {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(x, y)]
|
||||||
|
to: PdfVec2D,
|
||||||
|
}),
|
||||||
|
#[kw = b"m"]
|
||||||
|
MoveTo(PdfOperatorMoveTo {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(x, y)]
|
||||||
|
to: PdfVec2D,
|
||||||
|
}),
|
||||||
|
#[kw = b"M"]
|
||||||
|
SetMiterLimit(PdfOperatorSetMiterLimit {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(limit)]
|
||||||
|
limit: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"MP"]
|
||||||
|
DesignateMarkedContentPoint(PdfOperatorDesignateMarkedContentPoint {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(tag)]
|
||||||
|
tag: PdfName,
|
||||||
|
}),
|
||||||
|
#[kw = b"n"]
|
||||||
|
EndPath(PdfOperatorEndPath {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"q"]
|
||||||
|
SaveGraphicsState(PdfOperatorSaveGraphicsState {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"Q"]
|
||||||
|
RestoreGraphicsState(PdfOperatorRestoreGraphicsState {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"re"]
|
||||||
|
Rectangle(PdfOperatorRectangle {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(x, y)]
|
||||||
|
p: PdfVec2D,
|
||||||
|
#[parse(width, height)]
|
||||||
|
size: PdfVec2D,
|
||||||
|
}),
|
||||||
|
#[kw = b"RG"]
|
||||||
|
SetStrokeRgb(PdfOperatorSetStrokeRgb {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(r)]
|
||||||
|
r: f32,
|
||||||
|
#[parse(g)]
|
||||||
|
g: f32,
|
||||||
|
#[parse(b)]
|
||||||
|
b: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"rg"]
|
||||||
|
SetNonStrokeRgb(PdfOperatorSetNonStrokeRgb {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(r)]
|
||||||
|
r: f32,
|
||||||
|
#[parse(g)]
|
||||||
|
g: f32,
|
||||||
|
#[parse(b)]
|
||||||
|
b: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"ri"]
|
||||||
|
SetColorRenderingIntent(PdfOperatorSetColorRenderingIntent {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(intent)]
|
||||||
|
intent: PdfName,
|
||||||
|
}),
|
||||||
|
#[kw = b"s"]
|
||||||
|
CloseAndStrokePath(PdfOperatorCloseAndStrokePath {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"S"]
|
||||||
|
StrokePath(PdfOperatorStrokePath {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"SC"]
|
||||||
|
SetStrokeColor(PdfOperatorSetStrokeColor {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse_iter(...)]
|
||||||
|
color: Arc<[f32]>,
|
||||||
|
}),
|
||||||
|
#[kw = b"sc"]
|
||||||
|
SetNonStrokeColor(PdfOperatorSetNonStrokeColor {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse_iter(...)]
|
||||||
|
color: Arc<[f32]>,
|
||||||
|
}),
|
||||||
|
#[kw = b"SCN"]
|
||||||
|
SetStrokeColorWithName(PdfOperatorSetStrokeColorWithName {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse_iter(...)]
|
||||||
|
color_and_name: Arc<[NameOr<f32>]>,
|
||||||
|
}),
|
||||||
|
#[kw = b"scn"]
|
||||||
|
SetNonStrokeColorWithName(PdfOperatorSetNonStrokeColorWithName {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse_iter(...)]
|
||||||
|
color_and_name: Arc<[NameOr<f32>]>,
|
||||||
|
}),
|
||||||
|
#[kw = b"sh"]
|
||||||
|
Shade(PdfOperatorShade {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"T*"]
|
||||||
|
TextNextLine(PdfOperatorTextNextLine {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"Tc"]
|
||||||
|
SetCharacterSpacing(PdfOperatorSetCharacterSpacing {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(char_space)]
|
||||||
|
char_space: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"Td"]
|
||||||
|
TextNextLineWithOffset(PdfOperatorTextNextLineWithOffset {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(x, y)]
|
||||||
|
offset: PdfVec2D,
|
||||||
|
}),
|
||||||
|
#[kw = b"TD"]
|
||||||
|
TextNextLineWithOffsetAndLeading(PdfOperatorTextNextLineWithOffsetAndLeading {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(x, y)]
|
||||||
|
offset: PdfVec2D,
|
||||||
|
}),
|
||||||
|
#[kw = b"Tf"]
|
||||||
|
SetFontAndSize(PdfOperatorSetFontAndSize {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(font)]
|
||||||
|
font: PdfName,
|
||||||
|
#[parse(size)]
|
||||||
|
size: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"Tj"]
|
||||||
|
ShowText(PdfOperatorShowText {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(text)]
|
||||||
|
text: PdfString,
|
||||||
|
}),
|
||||||
|
#[kw = b"TJ"]
|
||||||
|
ShowTextWithGlyphPositioning(PdfOperatorShowTextWithGlyphPositioning {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(text_and_positioning)]
|
||||||
|
text_and_positioning: Arc<[PdfStringOrNumber]>,
|
||||||
|
}),
|
||||||
|
#[kw = b"TL"]
|
||||||
|
SetTextLeading(PdfOperatorSetTextLeading {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(leading)]
|
||||||
|
leading: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"Tm"]
|
||||||
|
SetTextMatrix(PdfOperatorSetTextMatrix {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse_flat(a, b, c, d, e, f)]
|
||||||
|
matrix: PdfMatrix,
|
||||||
|
}),
|
||||||
|
#[kw = b"Tr"]
|
||||||
|
SetTextRenderingMode(PdfOperatorSetTextRenderingMode {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(rendering_mode)]
|
||||||
|
rendering_mode: u8, // TODO: parse
|
||||||
|
}),
|
||||||
|
#[kw = b"Ts"]
|
||||||
|
SetTextRise(PdfOperatorSetTextRise {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(rise)]
|
||||||
|
rise: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"Tw"]
|
||||||
|
SetWordSpacing(PdfOperatorSetWordSpacing {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(word_space)]
|
||||||
|
word_space: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"Tz"]
|
||||||
|
SetTextHorizontalScaling(PdfOperatorSetTextHorizontalScaling {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(scale_percent)]
|
||||||
|
scale_percent: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"v"]
|
||||||
|
CurveTo23(PdfOperatorCurveTo23 {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"w"]
|
||||||
|
SetLineWidth(PdfOperatorSetLineWidth {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(line_width)]
|
||||||
|
line_width: f32,
|
||||||
|
}),
|
||||||
|
#[kw = b"W"]
|
||||||
|
Clip(PdfOperatorClip {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"W*"]
|
||||||
|
ClipEvenOdd(PdfOperatorClipEvenOdd {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"y"]
|
||||||
|
CurveTo13(PdfOperatorCurveTo13 {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
}),
|
||||||
|
#[kw = b"'"]
|
||||||
|
TextNextLineAndShow(PdfOperatorTextNextLineAndShow {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(text)]
|
||||||
|
text: PdfString,
|
||||||
|
}),
|
||||||
|
#[kw = b"\""]
|
||||||
|
SetSpacingThenTextNextLineAndShow(PdfOperatorSetSpacingThenTextNextLineAndShow {
|
||||||
|
pos: PdfInputPositionNoCompare,
|
||||||
|
#[parse(word_space)]
|
||||||
|
word_space: f32,
|
||||||
|
#[parse(char_space)]
|
||||||
|
char_space: f32,
|
||||||
|
#[parse(text)]
|
||||||
|
text: PdfString,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GetPdfInputPosition for PdfOperator {
|
||||||
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
||||||
|
self.pos()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GetPdfInputPosition for PdfOperatorAndOperands {
|
||||||
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
||||||
|
self.pos()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct PdfContentStreamData {
|
||||||
|
pub operators: Arc<[PdfOperatorAndOperands]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfStreamContents for PdfContentStreamData {
|
||||||
|
fn parse(
|
||||||
|
data: &[u8],
|
||||||
|
stream_pos: PdfInputPosition,
|
||||||
|
objects: Arc<PdfObjects>,
|
||||||
|
) -> Result<Self, PdfParseError> {
|
||||||
|
let mut parser = PdfParser {
|
||||||
|
objects,
|
||||||
|
tokenizer: PdfTokenizer::new(
|
||||||
|
data,
|
||||||
|
PdfInputPositionKnown {
|
||||||
|
pos: 0,
|
||||||
|
containing_streams_pos: stream_pos.get().map(|v| v.pos),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
};
|
||||||
|
let mut operands = Vec::new();
|
||||||
|
let mut operators = Vec::new();
|
||||||
|
loop {
|
||||||
|
parser.skip_comments_and_whitespace();
|
||||||
|
if parser.tokenizer.peek().is_none() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
match parser.parse_object_or_operator()? {
|
||||||
|
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
|
||||||
|
stream_kw_pos, ..
|
||||||
|
} => return Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }),
|
||||||
|
PdfObjectOrStreamDictionaryOrOperator::Object(object) => operands.push(object),
|
||||||
|
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
|
||||||
|
operators.push(PdfOperator::from(operator).parse(operands.drain(..))?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if operands.is_empty() {
|
||||||
|
Ok(Self {
|
||||||
|
operators: operators.into(),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Err(PdfParseError::MissingOperator {
|
||||||
|
pos: parser.tokenizer.pos(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type PdfContentStream = PdfStream<PdfDictionary, PdfContentStreamData>;
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ use core::fmt;
|
||||||
use std::{borrow::Cow, sync::Arc};
|
use std::{borrow::Cow, sync::Arc};
|
||||||
|
|
||||||
use crate::pdf::{
|
use crate::pdf::{
|
||||||
|
content_stream::PdfContentStream,
|
||||||
font::PdfFont,
|
font::PdfFont,
|
||||||
object::{
|
object::{
|
||||||
IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject,
|
IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject,
|
||||||
|
|
@ -238,7 +239,7 @@ pdf_parse! {
|
||||||
#[pdf(name = "BoxColorInfo")]
|
#[pdf(name = "BoxColorInfo")]
|
||||||
pub box_color_info: Option<PdfDictionary>,
|
pub box_color_info: Option<PdfDictionary>,
|
||||||
#[pdf(name = "Contents")]
|
#[pdf(name = "Contents")]
|
||||||
pub contents: MaybeArray<PdfStream>,
|
pub contents: MaybeArray<PdfContentStream>,
|
||||||
#[pdf(name = "Group")]
|
#[pdf(name = "Group")]
|
||||||
pub group: Option<PdfDictionary>,
|
pub group: Option<PdfDictionary>,
|
||||||
#[pdf(name = "Thumb")]
|
#[pdf(name = "Thumb")]
|
||||||
|
|
@ -388,7 +389,7 @@ pub struct PdfPage {
|
||||||
pub trim_box: PdfRectangle,
|
pub trim_box: PdfRectangle,
|
||||||
pub art_box: PdfRectangle,
|
pub art_box: PdfRectangle,
|
||||||
pub box_color_info: Option<PdfDictionary>,
|
pub box_color_info: Option<PdfDictionary>,
|
||||||
pub contents: Arc<[PdfStream]>,
|
pub contents: Arc<[PdfContentStream]>,
|
||||||
pub rotate: PdfPageRotation,
|
pub rotate: PdfPageRotation,
|
||||||
pub group: Option<PdfDictionary>,
|
pub group: Option<PdfDictionary>,
|
||||||
pub thumbnail: Option<PdfStream>,
|
pub thumbnail: Option<PdfStream>,
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ impl std::fmt::Debug for PdfString {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub struct PdfStringBytesDebug<'a>(&'a [u8]);
|
pub struct PdfStringBytesDebug<'a>(pub &'a [u8]);
|
||||||
|
|
||||||
impl<'a> fmt::Display for PdfStringBytesDebug<'a> {
|
impl<'a> fmt::Display for PdfStringBytesDebug<'a> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
|
@ -407,6 +407,81 @@ impl PdfParse for PdfNumber {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum PdfStringOrNumber {
|
||||||
|
String(PdfString),
|
||||||
|
Number(PdfNumber),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for PdfStringOrNumber {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::String(v) => v.fmt(f),
|
||||||
|
Self::Number(v) => v.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfStringOrNumber {
|
||||||
|
pub fn pos(self) -> PdfInputPosition {
|
||||||
|
match self {
|
||||||
|
Self::String(v) => v.pos(),
|
||||||
|
Self::Number(v) => v.pos(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfObjectDirect {
|
||||||
|
pub fn string_or_number(&self) -> Option<PdfStringOrNumber> {
|
||||||
|
match *self {
|
||||||
|
PdfObjectDirect::Integer(v) => Some(PdfStringOrNumber::Number(PdfNumber::Integer(v))),
|
||||||
|
PdfObjectDirect::Real(v) => Some(PdfStringOrNumber::Number(PdfNumber::Real(v))),
|
||||||
|
PdfObjectDirect::String(ref v) => Some(PdfStringOrNumber::String(v.clone())),
|
||||||
|
PdfObjectDirect::Boolean(_)
|
||||||
|
| PdfObjectDirect::Name(_)
|
||||||
|
| PdfObjectDirect::Array(_)
|
||||||
|
| PdfObjectDirect::Dictionary(_)
|
||||||
|
| PdfObjectDirect::Stream(_)
|
||||||
|
| PdfObjectDirect::Null(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfObjectNonNull {
|
||||||
|
pub fn string_or_number(&self) -> Option<PdfStringOrNumber> {
|
||||||
|
match *self {
|
||||||
|
PdfObjectNonNull::Integer(v) => Some(PdfStringOrNumber::Number(PdfNumber::Integer(v))),
|
||||||
|
PdfObjectNonNull::Real(v) => Some(PdfStringOrNumber::Number(PdfNumber::Real(v))),
|
||||||
|
PdfObjectNonNull::String(ref v) => Some(PdfStringOrNumber::String(v.clone())),
|
||||||
|
PdfObjectNonNull::Boolean(_)
|
||||||
|
| PdfObjectNonNull::Name(_)
|
||||||
|
| PdfObjectNonNull::Array(_)
|
||||||
|
| PdfObjectNonNull::Dictionary(_)
|
||||||
|
| PdfObjectNonNull::Stream(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IsPdfNull for PdfStringOrNumber {
|
||||||
|
fn is_pdf_null(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfParse for PdfStringOrNumber {
|
||||||
|
fn type_name() -> Cow<'static, str> {
|
||||||
|
Cow::Borrowed("string or number")
|
||||||
|
}
|
||||||
|
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
|
||||||
|
let object = PdfObjectDirect::from(object);
|
||||||
|
object.string_or_number().ok_or(PdfParseError::InvalidType {
|
||||||
|
pos: object.pos(),
|
||||||
|
ty: object.type_name(),
|
||||||
|
expected_ty: "string or number",
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
macro_rules! make_pdf_object {
|
macro_rules! make_pdf_object {
|
||||||
(
|
(
|
||||||
$(
|
$(
|
||||||
|
|
@ -818,34 +893,35 @@ impl PdfObjectIndirect {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn get(&self) -> PdfObjectDirect {
|
pub fn get(&self) -> PdfObjectDirect {
|
||||||
if let Some(objects) = self.objects.upgrade() {
|
let Some(objects) = self.objects.upgrade() else {
|
||||||
if let Some(objects) = objects.inner.get() {
|
panic!("PdfObjects is no longer available");
|
||||||
let final_id = self.final_id.get().copied();
|
};
|
||||||
let limit = if final_id.is_some() { 1 } else { 1000usize };
|
if let Some(objects) = objects.inner.get() {
|
||||||
let mut id = final_id.unwrap_or(self.id);
|
let final_id = self.final_id.get().copied();
|
||||||
for _ in 0..limit {
|
let limit = if final_id.is_some() { 1 } else { 1000usize };
|
||||||
if let Some(object) = objects.objects.get(&self.id) {
|
let mut id = final_id.unwrap_or(self.id);
|
||||||
let retval = match object {
|
for _ in 0..limit {
|
||||||
PdfObject::Boolean(v) => PdfObjectDirect::Boolean(*v),
|
if let Some(object) = objects.objects.get(&self.id) {
|
||||||
PdfObject::Integer(v) => PdfObjectDirect::Integer(*v),
|
let retval = match object {
|
||||||
PdfObject::Real(v) => PdfObjectDirect::Real(*v),
|
PdfObject::Boolean(v) => PdfObjectDirect::Boolean(*v),
|
||||||
PdfObject::String(v) => PdfObjectDirect::String(v.clone()),
|
PdfObject::Integer(v) => PdfObjectDirect::Integer(*v),
|
||||||
PdfObject::Name(v) => PdfObjectDirect::Name(v.clone()),
|
PdfObject::Real(v) => PdfObjectDirect::Real(*v),
|
||||||
PdfObject::Array(v) => PdfObjectDirect::Array(v.clone()),
|
PdfObject::String(v) => PdfObjectDirect::String(v.clone()),
|
||||||
PdfObject::Dictionary(v) => PdfObjectDirect::Dictionary(v.clone()),
|
PdfObject::Name(v) => PdfObjectDirect::Name(v.clone()),
|
||||||
PdfObject::Stream(v) => PdfObjectDirect::Stream(v.clone()),
|
PdfObject::Array(v) => PdfObjectDirect::Array(v.clone()),
|
||||||
PdfObject::Null(v) => PdfObjectDirect::Null(*v),
|
PdfObject::Dictionary(v) => PdfObjectDirect::Dictionary(v.clone()),
|
||||||
PdfObject::Indirect(v) => {
|
PdfObject::Stream(v) => PdfObjectDirect::Stream(v.clone()),
|
||||||
id = v.id;
|
PdfObject::Null(v) => PdfObjectDirect::Null(*v),
|
||||||
continue;
|
PdfObject::Indirect(v) => {
|
||||||
}
|
id = v.id;
|
||||||
};
|
continue;
|
||||||
// we could be racing with another thread, so set can fail but that's not a problem
|
}
|
||||||
let _ = self.final_id.set(id);
|
};
|
||||||
return retval;
|
// we could be racing with another thread, so set can fail but that's not a problem
|
||||||
} else {
|
let _ = self.final_id.set(id);
|
||||||
return PdfObjectDirect::Null(PdfNull::new(id.pos));
|
return retval;
|
||||||
}
|
} else {
|
||||||
|
return PdfObjectDirect::Null(PdfNull::new(id.pos));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1213,21 +1289,150 @@ impl<'a, T> IntoIterator for &'a MaybeArray<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum NameOr<T> {
|
||||||
|
Name(PdfName),
|
||||||
|
Value(T),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T> NameOr<T> {
|
||||||
|
pub fn into_resolved<E>(self, resolve: impl FnOnce(PdfName) -> Result<T, E>) -> Result<T, E> {
|
||||||
|
match self {
|
||||||
|
Self::Name(name) => resolve(name),
|
||||||
|
Self::Value(v) => Ok(v),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn replace_with_resolved<E>(
|
||||||
|
&mut self,
|
||||||
|
resolve: impl FnOnce(&PdfName) -> Result<T, E>,
|
||||||
|
) -> Result<&mut T, E> {
|
||||||
|
match self {
|
||||||
|
Self::Name(name) => {
|
||||||
|
*self = Self::Value(resolve(name)?);
|
||||||
|
let Self::Value(v) = self else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
Ok(v)
|
||||||
|
}
|
||||||
|
Self::Value(v) => Ok(v),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: fmt::Debug> fmt::Debug for NameOr<T> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Name(v) => v.fmt(f),
|
||||||
|
Self::Value(v) => v.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: GetPdfInputPosition> GetPdfInputPosition for NameOr<T> {
|
||||||
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
||||||
|
match self {
|
||||||
|
Self::Name(v) => v.pos(),
|
||||||
|
Self::Value(v) => v.get_pdf_input_position(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: IsPdfNull> IsPdfNull for NameOr<T> {
|
||||||
|
fn is_pdf_null(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Self::Name(_) => false,
|
||||||
|
Self::Value(v) => v.is_pdf_null(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: PdfParse> PdfParse for NameOr<T> {
|
||||||
|
fn type_name() -> Cow<'static, str> {
|
||||||
|
Cow::Owned(format!("NameOr<{}>", T::type_name()))
|
||||||
|
}
|
||||||
|
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
|
||||||
|
Ok(match PdfObjectDirect::from(object) {
|
||||||
|
PdfObjectDirect::Name(name) => Self::Name(name),
|
||||||
|
object => Self::Value(T::parse(object.into())?),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, PartialEq)]
|
#[derive(Copy, Clone, PartialEq)]
|
||||||
pub struct PdfPoint {
|
pub struct PdfMatrix {
|
||||||
|
pub pos: PdfInputPositionNoCompare,
|
||||||
|
pub elements: [f32; 6],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for PdfMatrix {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
let Self { pos, elements } = *self;
|
||||||
|
write!(f, "PdfMatrix(at {pos}, {elements:?})")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IsPdfNull for PdfMatrix {
|
||||||
|
fn is_pdf_null(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfParse for PdfMatrix {
|
||||||
|
fn type_name() -> Cow<'static, str> {
|
||||||
|
Cow::Borrowed("matrix")
|
||||||
|
}
|
||||||
|
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
|
||||||
|
Ok(Self {
|
||||||
|
pos: object.pos().into(),
|
||||||
|
elements: PdfParse::parse(object)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfMatrix {
|
||||||
|
pub fn parse_flat(
|
||||||
|
a: PdfObject,
|
||||||
|
b: PdfObject,
|
||||||
|
c: PdfObject,
|
||||||
|
d: PdfObject,
|
||||||
|
e: PdfObject,
|
||||||
|
f: PdfObject,
|
||||||
|
) -> Result<Self, PdfParseError> {
|
||||||
|
Ok(Self {
|
||||||
|
pos: a.pos().into(),
|
||||||
|
elements: [
|
||||||
|
PdfParse::parse(a)?,
|
||||||
|
PdfParse::parse(b)?,
|
||||||
|
PdfParse::parse(c)?,
|
||||||
|
PdfParse::parse(d)?,
|
||||||
|
PdfParse::parse(e)?,
|
||||||
|
PdfParse::parse(f)?,
|
||||||
|
],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GetPdfInputPosition for PdfMatrix {
|
||||||
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
||||||
|
self.pos.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq)]
|
||||||
|
pub struct PdfVec2D {
|
||||||
pub pos: PdfInputPositionNoCompare,
|
pub pos: PdfInputPositionNoCompare,
|
||||||
pub x: f32,
|
pub x: f32,
|
||||||
pub y: f32,
|
pub y: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for PdfPoint {
|
impl fmt::Debug for PdfVec2D {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
let Self { pos, x, y } = *self;
|
let Self { pos, x, y } = *self;
|
||||||
write!(f, "PdfPoint(at {pos}, {x}, {y})")
|
write!(f, "PdfVec2D(at {pos}, {x}, {y})")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PdfPoint {
|
impl PdfVec2D {
|
||||||
pub fn parse(x: PdfObject, y: PdfObject) -> Result<Self, PdfParseError> {
|
pub fn parse(x: PdfObject, y: PdfObject) -> Result<Self, PdfParseError> {
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
pos: x.pos().into(),
|
pos: x.pos().into(),
|
||||||
|
|
@ -1237,7 +1442,7 @@ impl PdfPoint {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GetPdfInputPosition for PdfPoint {
|
impl GetPdfInputPosition for PdfVec2D {
|
||||||
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
||||||
self.pos.0
|
self.pos.0
|
||||||
}
|
}
|
||||||
|
|
@ -1246,13 +1451,13 @@ impl GetPdfInputPosition for PdfPoint {
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
pub struct PdfRectangle {
|
pub struct PdfRectangle {
|
||||||
/// the corner with the smaller x and y coordinates
|
/// the corner with the smaller x and y coordinates
|
||||||
smaller: PdfPoint,
|
smaller: PdfVec2D,
|
||||||
/// the corner with the larger x and y coordinates
|
/// the corner with the larger x and y coordinates
|
||||||
larger: PdfPoint,
|
larger: PdfVec2D,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PdfRectangle {
|
impl PdfRectangle {
|
||||||
pub fn new(mut smaller: PdfPoint, mut larger: PdfPoint) -> Self {
|
pub fn new(mut smaller: PdfVec2D, mut larger: PdfVec2D) -> Self {
|
||||||
// `pos` follows the `x` coordinate
|
// `pos` follows the `x` coordinate
|
||||||
if smaller.x.is_nan() {
|
if smaller.x.is_nan() {
|
||||||
smaller.pos = larger.pos;
|
smaller.pos = larger.pos;
|
||||||
|
|
@ -1262,12 +1467,12 @@ impl PdfRectangle {
|
||||||
std::mem::swap(&mut smaller.pos, &mut larger.pos);
|
std::mem::swap(&mut smaller.pos, &mut larger.pos);
|
||||||
}
|
}
|
||||||
Self {
|
Self {
|
||||||
smaller: PdfPoint {
|
smaller: PdfVec2D {
|
||||||
pos: smaller.pos,
|
pos: smaller.pos,
|
||||||
x: smaller.x.min(larger.x),
|
x: smaller.x.min(larger.x),
|
||||||
y: smaller.y.min(larger.y),
|
y: smaller.y.min(larger.y),
|
||||||
},
|
},
|
||||||
larger: PdfPoint {
|
larger: PdfVec2D {
|
||||||
pos: larger.pos,
|
pos: larger.pos,
|
||||||
x: smaller.x.max(larger.x),
|
x: smaller.x.max(larger.x),
|
||||||
y: smaller.y.max(larger.y),
|
y: smaller.y.max(larger.y),
|
||||||
|
|
@ -1275,13 +1480,24 @@ impl PdfRectangle {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// return the corner with the smaller x and y coordinates
|
/// return the corner with the smaller x and y coordinates
|
||||||
pub fn smaller(&self) -> PdfPoint {
|
pub fn smaller(&self) -> PdfVec2D {
|
||||||
self.smaller
|
self.smaller
|
||||||
}
|
}
|
||||||
/// return the corner with the larger x and y coordinates
|
/// return the corner with the larger x and y coordinates
|
||||||
pub fn larger(&self) -> PdfPoint {
|
pub fn larger(&self) -> PdfVec2D {
|
||||||
self.larger
|
self.larger
|
||||||
}
|
}
|
||||||
|
pub fn parse_flat(
|
||||||
|
lower_left_x: PdfObject,
|
||||||
|
lower_left_y: PdfObject,
|
||||||
|
upper_right_x: PdfObject,
|
||||||
|
upper_right_y: PdfObject,
|
||||||
|
) -> Result<Self, PdfParseError> {
|
||||||
|
Ok(Self::new(
|
||||||
|
PdfVec2D::parse(lower_left_x, lower_left_y)?,
|
||||||
|
PdfVec2D::parse(upper_right_x, upper_right_y)?,
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GetPdfInputPosition for PdfRectangle {
|
impl GetPdfInputPosition for PdfRectangle {
|
||||||
|
|
@ -1317,10 +1533,12 @@ impl PdfParse for PdfRectangle {
|
||||||
expected_ty: "rectangle",
|
expected_ty: "rectangle",
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
Ok(Self::new(
|
Self::parse_flat(
|
||||||
PdfPoint::parse(lower_left_x.clone(), lower_left_y.clone())?,
|
lower_left_x.clone(),
|
||||||
PdfPoint::parse(upper_right_x.clone(), upper_right_y.clone())?,
|
lower_left_y.clone(),
|
||||||
))
|
upper_right_x.clone(),
|
||||||
|
upper_right_y.clone(),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1366,7 +1584,7 @@ pdf_parse! {
|
||||||
#[pdf]
|
#[pdf]
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct PdfStreamDictionary<Rest = PdfDictionary> {
|
pub struct PdfStreamDictionary<Rest = PdfDictionary> {
|
||||||
#[pdf(name = PdfStreamDictionary::LENGTH_NAME)]
|
#[pdf(name = "Length")]
|
||||||
pub len: usize,
|
pub len: usize,
|
||||||
#[pdf(name = "Filter")]
|
#[pdf(name = "Filter")]
|
||||||
pub filters: MaybeArray<PdfStreamFilter>,
|
pub filters: MaybeArray<PdfStreamFilter>,
|
||||||
|
|
@ -1385,15 +1603,6 @@ pdf_parse! {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PdfStreamDictionary {
|
|
||||||
pub const LENGTH_NAME: &str = "Length";
|
|
||||||
pub(crate) fn parse_len_from_dictionary(
|
|
||||||
dictionary: &PdfDictionary,
|
|
||||||
) -> Result<usize, PdfParseError> {
|
|
||||||
PdfParse::parse(dictionary.get_or_null(Self::LENGTH_NAME.as_bytes()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
pub struct PdfStreamDictionaryFiltersAndParms<'a> {
|
pub struct PdfStreamDictionaryFiltersAndParms<'a> {
|
||||||
filters: std::iter::Enumerate<std::slice::Iter<'a, PdfStreamFilter>>,
|
filters: std::iter::Enumerate<std::slice::Iter<'a, PdfStreamFilter>>,
|
||||||
|
|
@ -1505,12 +1714,45 @@ impl<Rest: PdfParse> UnparsedPdfStreamDictionary<Rest> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub trait PdfStreamContents: Sized + fmt::Debug + 'static {
|
||||||
|
fn parse(
|
||||||
|
data: &[u8],
|
||||||
|
stream_pos: PdfInputPosition,
|
||||||
|
objects: Arc<PdfObjects>,
|
||||||
|
) -> Result<Self, PdfParseError>;
|
||||||
|
fn parse_arc(
|
||||||
|
data: Arc<[u8]>,
|
||||||
|
stream_pos: PdfInputPosition,
|
||||||
|
objects: Arc<PdfObjects>,
|
||||||
|
) -> Result<Self, PdfParseError> {
|
||||||
|
Self::parse(&*data, stream_pos, objects)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PdfStreamContents for Arc<[u8]> {
|
||||||
|
fn parse(
|
||||||
|
data: &[u8],
|
||||||
|
_stream_pos: PdfInputPosition,
|
||||||
|
_objects: Arc<PdfObjects>,
|
||||||
|
) -> Result<Self, PdfParseError> {
|
||||||
|
Ok(Arc::from(data))
|
||||||
|
}
|
||||||
|
fn parse_arc(
|
||||||
|
data: Arc<[u8]>,
|
||||||
|
_stream_pos: PdfInputPosition,
|
||||||
|
_objects: Arc<PdfObjects>,
|
||||||
|
) -> Result<Self, PdfParseError> {
|
||||||
|
Ok(data.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct PdfStream<Rest = PdfDictionary> {
|
pub struct PdfStream<Rest = PdfDictionary, Data: PdfStreamContents = Arc<[u8]>> {
|
||||||
pos: PdfInputPositionNoCompare,
|
pos: PdfInputPositionNoCompare,
|
||||||
dictionary: Arc<OnceLock<PdfStreamDictionary<Rest>>>,
|
objects: std::sync::Weak<PdfObjects>,
|
||||||
|
dictionary: PdfStreamDictionary<Rest>,
|
||||||
encoded_data: Arc<[u8]>,
|
encoded_data: Arc<[u8]>,
|
||||||
decoded_data: Arc<OnceLock<Result<Arc<[u8]>, PdfParseError>>>,
|
decoded_data: Arc<OnceLock<Result<Data, PdfParseError>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct DumpBytes<'a>(&'a [u8]);
|
struct DumpBytes<'a>(&'a [u8]);
|
||||||
|
|
@ -1542,25 +1784,30 @@ impl fmt::Display for DumpBytes<'_> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Rest: fmt::Debug> fmt::Debug for PdfStream<Rest> {
|
impl<Rest: fmt::Debug, Data: PdfStreamContents> fmt::Debug for PdfStream<Rest, Data> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
let Self {
|
let Self {
|
||||||
pos,
|
pos,
|
||||||
|
objects: _,
|
||||||
dictionary,
|
dictionary,
|
||||||
encoded_data,
|
encoded_data,
|
||||||
decoded_data,
|
decoded_data,
|
||||||
} = self;
|
} = self;
|
||||||
let mut debug_struct = f.debug_struct("PdfStream");
|
let mut debug_struct = f.debug_struct("PdfStream");
|
||||||
debug_struct.field("pos", pos);
|
debug_struct.field("pos", pos);
|
||||||
if let Some(dictionary) = dictionary.get() {
|
debug_struct.field("dictionary", dictionary);
|
||||||
debug_struct.field("dictionary", dictionary);
|
|
||||||
} else {
|
|
||||||
debug_struct.field("dictionary", &format_args!("<not-yet-parsed>"));
|
|
||||||
}
|
|
||||||
debug_struct.field("encoded_data", &DumpBytes(encoded_data));
|
debug_struct.field("encoded_data", &DumpBytes(encoded_data));
|
||||||
if let Some(decoded_data) = decoded_data.get() {
|
if let Some(decoded_data) = decoded_data.get() {
|
||||||
match decoded_data {
|
match decoded_data {
|
||||||
Ok(decoded_data) => debug_struct.field("decoded_data", &DumpBytes(decoded_data)),
|
Ok(decoded_data) => {
|
||||||
|
if let Some(decoded_data) =
|
||||||
|
<dyn std::any::Any>::downcast_ref::<Arc<[u8]>>(decoded_data)
|
||||||
|
{
|
||||||
|
debug_struct.field("decoded_data", &DumpBytes(&**decoded_data))
|
||||||
|
} else {
|
||||||
|
debug_struct.field("decoded_data", decoded_data)
|
||||||
|
}
|
||||||
|
}
|
||||||
Err(e) => debug_struct.field("decoded_data", &Err::<(), _>(e)),
|
Err(e) => debug_struct.field("decoded_data", &Err::<(), _>(e)),
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1570,47 +1817,31 @@ impl<Rest: fmt::Debug> fmt::Debug for PdfStream<Rest> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Rest> PdfStream<Rest> {
|
impl<Rest, Data: PdfStreamContents> PdfStream<Rest, Data> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
pos: impl Into<PdfInputPositionNoCompare>,
|
pos: impl Into<PdfInputPositionNoCompare>,
|
||||||
|
objects: &Arc<PdfObjects>,
|
||||||
dictionary: PdfStreamDictionary<Rest>,
|
dictionary: PdfStreamDictionary<Rest>,
|
||||||
encoded_data: Arc<[u8]>,
|
encoded_data: Arc<[u8]>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
pos: pos.into(),
|
pos: pos.into(),
|
||||||
dictionary: Arc::new(OnceLock::from(dictionary)),
|
objects: Arc::downgrade(objects),
|
||||||
|
dictionary,
|
||||||
encoded_data,
|
encoded_data,
|
||||||
decoded_data: Arc::new(OnceLock::new()),
|
decoded_data: Arc::new(OnceLock::new()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub(crate) fn new_unparsed(
|
|
||||||
pos: impl Into<PdfInputPositionNoCompare>,
|
|
||||||
unparsed_dictionary: PdfDictionary,
|
|
||||||
encoded_data: Arc<[u8]>,
|
|
||||||
) -> (Self, UnparsedPdfStreamDictionary<Rest>) {
|
|
||||||
let dictionary = Arc::new(OnceLock::new());
|
|
||||||
(
|
|
||||||
Self {
|
|
||||||
pos: pos.into(),
|
|
||||||
dictionary: dictionary.clone(),
|
|
||||||
encoded_data,
|
|
||||||
decoded_data: Arc::new(OnceLock::new()),
|
|
||||||
},
|
|
||||||
UnparsedPdfStreamDictionary {
|
|
||||||
unparsed_dictionary,
|
|
||||||
dictionary,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
pub fn dictionary(&self) -> &PdfStreamDictionary<Rest> {
|
pub fn dictionary(&self) -> &PdfStreamDictionary<Rest> {
|
||||||
self.dictionary
|
&self.dictionary
|
||||||
.get()
|
|
||||||
.expect("haven't finished parsing all pdf object definitions yet")
|
|
||||||
}
|
}
|
||||||
pub fn encoded_data(&self) -> &Arc<[u8]> {
|
pub fn encoded_data(&self) -> &Arc<[u8]> {
|
||||||
&self.encoded_data
|
&self.encoded_data
|
||||||
}
|
}
|
||||||
fn try_decode_data(&self) -> Result<Arc<[u8]>, PdfParseError> {
|
fn try_decode_data(&self) -> Result<Data, PdfParseError> {
|
||||||
|
let Some(objects) = self.objects.upgrade() else {
|
||||||
|
panic!("PdfObjects is no longer available");
|
||||||
|
};
|
||||||
let dictionary = self.dictionary();
|
let dictionary = self.dictionary();
|
||||||
let (data, filters) = if let Some(file) = &dictionary.file {
|
let (data, filters) = if let Some(file) = &dictionary.file {
|
||||||
todo!()
|
todo!()
|
||||||
|
|
@ -1618,7 +1849,7 @@ impl<Rest> PdfStream<Rest> {
|
||||||
(&self.encoded_data, dictionary.filters_and_parms())
|
(&self.encoded_data, dictionary.filters_and_parms())
|
||||||
};
|
};
|
||||||
if filters.len() == 0 {
|
if filters.len() == 0 {
|
||||||
return Ok(data.clone());
|
return Data::parse_arc(data.clone(), self.pos.0, objects);
|
||||||
}
|
}
|
||||||
let mut data: &[u8] = data;
|
let mut data: &[u8] = data;
|
||||||
let mut buffer;
|
let mut buffer;
|
||||||
|
|
@ -1626,26 +1857,26 @@ impl<Rest> PdfStream<Rest> {
|
||||||
buffer = filter.decode_stream_data(filter_parms.clone(), self.pos.0, &data)?;
|
buffer = filter.decode_stream_data(filter_parms.clone(), self.pos.0, &data)?;
|
||||||
data = &buffer;
|
data = &buffer;
|
||||||
}
|
}
|
||||||
Ok(Arc::from(data))
|
Data::parse(data, self.pos.0, objects)
|
||||||
}
|
}
|
||||||
pub fn decoded_data(&self) -> &Result<Arc<[u8]>, PdfParseError> {
|
pub fn decoded_data(&self) -> &Result<Data, PdfParseError> {
|
||||||
self.decoded_data.get_or_init(|| self.try_decode_data())
|
self.decoded_data.get_or_init(|| self.try_decode_data())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Rest> GetPdfInputPosition for PdfStream<Rest> {
|
impl<Rest, Data: PdfStreamContents> GetPdfInputPosition for PdfStream<Rest, Data> {
|
||||||
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
||||||
self.pos.0
|
self.pos.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Rest> IsPdfNull for PdfStream<Rest> {
|
impl<Rest, Data: PdfStreamContents> IsPdfNull for PdfStream<Rest, Data> {
|
||||||
fn is_pdf_null(&self) -> bool {
|
fn is_pdf_null(&self) -> bool {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Rest: PdfParse> PdfParse for PdfStream<Rest> {
|
impl<Rest: PdfParse, Data: PdfStreamContents> PdfParse for PdfStream<Rest, Data> {
|
||||||
fn type_name() -> Cow<'static, str> {
|
fn type_name() -> Cow<'static, str> {
|
||||||
if TypeId::of::<Rest>() == TypeId::of::<PdfDictionary>() {
|
if TypeId::of::<Rest>() == TypeId::of::<PdfDictionary>() {
|
||||||
Cow::Borrowed("stream")
|
Cow::Borrowed("stream")
|
||||||
|
|
@ -1655,38 +1886,56 @@ impl<Rest: PdfParse> PdfParse for PdfStream<Rest> {
|
||||||
}
|
}
|
||||||
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
|
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
|
||||||
match PdfObjectDirect::from(object) {
|
match PdfObjectDirect::from(object) {
|
||||||
PdfObjectDirect::Stream(stream) => Ok(PdfStream {
|
PdfObjectDirect::Stream(stream) => {
|
||||||
pos: stream.pos,
|
Ok(PdfStream {
|
||||||
dictionary: if let Some(dictionary) = <dyn std::any::Any>::downcast_ref::<
|
pos: stream.pos,
|
||||||
Arc<OnceLock<PdfStreamDictionary<Rest>>>,
|
dictionary: {
|
||||||
>(&stream.dictionary)
|
let PdfStreamDictionary {
|
||||||
{
|
len,
|
||||||
dictionary.clone()
|
filters,
|
||||||
} else {
|
decode_parms,
|
||||||
let PdfStreamDictionary {
|
file,
|
||||||
len,
|
file_filters,
|
||||||
filters,
|
file_decode_parms,
|
||||||
decode_parms,
|
decoded_len,
|
||||||
file,
|
rest,
|
||||||
file_filters,
|
} = stream.dictionary;
|
||||||
file_decode_parms,
|
PdfStreamDictionary {
|
||||||
decoded_len,
|
len,
|
||||||
rest,
|
filters,
|
||||||
} = stream.dictionary();
|
decode_parms,
|
||||||
Arc::new(OnceLock::from(PdfStreamDictionary {
|
file,
|
||||||
len: *len,
|
file_filters,
|
||||||
filters: filters.clone(),
|
file_decode_parms,
|
||||||
decode_parms: decode_parms.clone(),
|
decoded_len,
|
||||||
file: file.clone(),
|
rest: Rest::parse(rest.into())?,
|
||||||
file_filters: file_filters.clone(),
|
}
|
||||||
file_decode_parms: file_decode_parms.clone(),
|
},
|
||||||
decoded_len: *decoded_len,
|
encoded_data: stream.encoded_data,
|
||||||
rest: Rest::parse(rest.clone().into())?,
|
decoded_data: if let Some(decoded_data) =
|
||||||
}))
|
<dyn std::any::Any>::downcast_ref(&stream.decoded_data)
|
||||||
},
|
{
|
||||||
encoded_data: stream.encoded_data,
|
Arc::clone(decoded_data)
|
||||||
decoded_data: stream.decoded_data,
|
} else {
|
||||||
}),
|
let Some(objects) = stream.objects.upgrade() else {
|
||||||
|
panic!("PdfObjects is no longer available");
|
||||||
|
};
|
||||||
|
Arc::new(
|
||||||
|
stream
|
||||||
|
.decoded_data
|
||||||
|
.get()
|
||||||
|
.cloned()
|
||||||
|
.map(|data| {
|
||||||
|
OnceLock::from(data.and_then(|data| {
|
||||||
|
Data::parse_arc(data, stream.pos.0, objects)
|
||||||
|
}))
|
||||||
|
})
|
||||||
|
.unwrap_or_default(),
|
||||||
|
)
|
||||||
|
},
|
||||||
|
objects: stream.objects,
|
||||||
|
})
|
||||||
|
}
|
||||||
object => Err(PdfParseError::InvalidType {
|
object => Err(PdfParseError::InvalidType {
|
||||||
pos: object.get_pdf_input_position(),
|
pos: object.get_pdf_input_position(),
|
||||||
ty: object.type_name(),
|
ty: object.type_name(),
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,9 @@
|
||||||
use crate::pdf::object::{
|
use crate::pdf::{
|
||||||
IsPdfNull, MaybeArray, PdfInteger, PdfName, PdfNull, PdfNumber, PdfObject, PdfObjectDirect,
|
content_stream::{PdfOperator, PdfOperatorUnparsed},
|
||||||
PdfObjectIdentifier, PdfObjectIndirect, PdfObjectNonNull,
|
object::{
|
||||||
|
IsPdfNull, MaybeArray, PdfInteger, PdfName, PdfNull, PdfNumber, PdfObject, PdfObjectDirect,
|
||||||
|
PdfObjectIdentifier, PdfObjectIndirect, PdfObjectNonNull,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
use std::{any::Any, borrow::Cow, fmt, mem, num::NonZero, sync::Arc};
|
use std::{any::Any, borrow::Cow, fmt, mem, num::NonZero, sync::Arc};
|
||||||
|
|
||||||
|
|
@ -265,9 +268,20 @@ pub enum PdfParseError {
|
||||||
filter: PdfName,
|
filter: PdfName,
|
||||||
error: String,
|
error: String,
|
||||||
},
|
},
|
||||||
ObjectStreamParseError {
|
StreamNotAllowedHere {
|
||||||
stream_pos: PdfInputPosition,
|
pos: PdfInputPosition,
|
||||||
error: Arc<PdfParseError>,
|
},
|
||||||
|
OperatorNotAllowedHere {
|
||||||
|
operator: PdfOperatorUnparsed,
|
||||||
|
},
|
||||||
|
MissingOperator {
|
||||||
|
pos: PdfInputPosition,
|
||||||
|
},
|
||||||
|
OperatorHasTooFewOperands {
|
||||||
|
operator: PdfOperator,
|
||||||
|
},
|
||||||
|
OperatorHasTooManyOperands {
|
||||||
|
operator: PdfOperator,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -313,9 +327,11 @@ impl GetPdfInputPosition for PdfParseError {
|
||||||
| PdfParseError::InvalidStartXRefValue { pos, .. }
|
| PdfParseError::InvalidStartXRefValue { pos, .. }
|
||||||
| PdfParseError::UnknownStreamFilter { pos, .. }
|
| PdfParseError::UnknownStreamFilter { pos, .. }
|
||||||
| PdfParseError::StreamFilterError { pos, .. }
|
| PdfParseError::StreamFilterError { pos, .. }
|
||||||
| PdfParseError::ObjectStreamParseError {
|
| PdfParseError::StreamNotAllowedHere { pos }
|
||||||
stream_pos: pos, ..
|
| PdfParseError::MissingOperator { pos } => pos,
|
||||||
} => pos,
|
PdfParseError::OperatorNotAllowedHere { ref operator } => operator.pos(),
|
||||||
|
PdfParseError::OperatorHasTooFewOperands { ref operator }
|
||||||
|
| PdfParseError::OperatorHasTooManyOperands { ref operator } => operator.pos(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -443,12 +459,33 @@ impl fmt::Display for PdfParseError {
|
||||||
} => {
|
} => {
|
||||||
write!(f, "at {pos}: stream filter {filter} error: {error}")
|
write!(f, "at {pos}: stream filter {filter} error: {error}")
|
||||||
}
|
}
|
||||||
PdfParseError::ObjectStreamParseError {
|
PdfParseError::StreamNotAllowedHere { pos } => {
|
||||||
stream_pos,
|
write!(f, "at {pos}: stream not allowed here")
|
||||||
ref error,
|
}
|
||||||
} => {
|
PdfParseError::OperatorNotAllowedHere { ref operator } => {
|
||||||
write!(f, "at {stream_pos}: object stream error: ")?;
|
write!(
|
||||||
error.fmt(f)
|
f,
|
||||||
|
"at {}: operator not allowed here: {}",
|
||||||
|
operator.pos(),
|
||||||
|
operator.bytes_debug()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
PdfParseError::MissingOperator { pos } => {
|
||||||
|
write!(f, "at {pos}: stream not allowed here")
|
||||||
|
}
|
||||||
|
PdfParseError::OperatorHasTooFewOperands { ref operator } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"at {}: operator has too few operands: {operator:?}",
|
||||||
|
operator.pos(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
PdfParseError::OperatorHasTooManyOperands { ref operator } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"at {}: operator has too many operands: {operator:?}",
|
||||||
|
operator.pos(),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue