parses root successfully
This commit is contained in:
parent
5fbfaa8053
commit
83631cc4c6
7 changed files with 623 additions and 118 deletions
154
src/pdf.rs
154
src/pdf.rs
|
|
@ -1,11 +1,11 @@
|
|||
use crate::{
|
||||
pdf::{
|
||||
object::{
|
||||
MaybeArray, PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull,
|
||||
PdfObject, PdfObjectIdentifier, PdfObjectIndirect, PdfReal, PdfStream,
|
||||
PdfArray, PdfBoolean, PdfDictionary, PdfInteger, PdfName, PdfNull, PdfObject,
|
||||
PdfObjectIdentifier, PdfObjectIndirect, PdfObjectStreamDictionary, PdfReal, PdfStream,
|
||||
PdfStreamDictionary, PdfString, UnparsedPdfStreamDictionary,
|
||||
},
|
||||
parse::{PdfInputPosition, PdfParse, PdfParseError},
|
||||
parse::{GetPdfInputPosition, PdfInputPosition, PdfParse, PdfParseError},
|
||||
},
|
||||
pdf_parse,
|
||||
util::ArcOrRef,
|
||||
|
|
@ -21,9 +21,15 @@ use std::{
|
|||
|
||||
pub mod object;
|
||||
pub mod parse;
|
||||
pub mod stream_filters;
|
||||
|
||||
struct PdfObjectsInner {
|
||||
objects: BTreeMap<PdfObjectIdentifier, PdfObject>,
|
||||
object_streams: Vec<PdfStream<PdfObjectStreamDictionary>>,
|
||||
}
|
||||
|
||||
pub struct PdfObjects {
|
||||
objects: OnceLock<BTreeMap<PdfObjectIdentifier, PdfObject>>,
|
||||
inner: OnceLock<PdfObjectsInner>,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
|
|
@ -70,24 +76,12 @@ pdf_parse! {
|
|||
pub struct PdfXRefStreamDictionaryRest {
|
||||
#[pdf(name = "Type")]
|
||||
pub ty: PdfXRefName,
|
||||
#[pdf(name = "Size")]
|
||||
pub size: usize,
|
||||
#[pdf(name = "Index")]
|
||||
pub index: Option<Arc<[usize]>>,
|
||||
#[pdf(name = "Prev")]
|
||||
pub prev: Option<usize>,
|
||||
#[pdf(name = "W")]
|
||||
pub w: Option<Arc<[usize]>>,
|
||||
#[pdf(name = "Root")]
|
||||
pub root: PdfDictionary,
|
||||
#[pdf(name = "Encrypt")]
|
||||
pub encrypt: Option<PdfDictionary>,
|
||||
#[pdf(name = "Info")]
|
||||
pub info: Option<PdfDictionary>,
|
||||
#[pdf(name = "ID")]
|
||||
pub id: Option<[PdfString; 2]>,
|
||||
#[pdf(flatten)]
|
||||
pub rest: PdfDictionary,
|
||||
pub rest: PdfTrailerDictionary,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -308,6 +302,38 @@ struct PdfParser<'a> {
|
|||
}
|
||||
|
||||
impl<'a> PdfParser<'a> {
|
||||
fn with_tokenizer<'b, R>(
|
||||
&mut self,
|
||||
tokenizer: PdfTokenizer<'b>,
|
||||
f: impl FnOnce(&mut PdfParser<'b>) -> R,
|
||||
) -> R {
|
||||
let PdfParser {
|
||||
objects_arc,
|
||||
objects_map,
|
||||
unparsed_stream_dictionaries,
|
||||
tokenizer: _,
|
||||
} = self;
|
||||
let objects_arc = objects_arc.clone();
|
||||
let objects_map = std::mem::take(objects_map);
|
||||
let unparsed_stream_dictionaries = std::mem::take(unparsed_stream_dictionaries);
|
||||
let mut new_parser = PdfParser {
|
||||
objects_arc,
|
||||
objects_map,
|
||||
unparsed_stream_dictionaries,
|
||||
tokenizer,
|
||||
};
|
||||
let retval = f(&mut new_parser);
|
||||
let PdfParser {
|
||||
objects_arc,
|
||||
objects_map,
|
||||
unparsed_stream_dictionaries,
|
||||
tokenizer: _,
|
||||
} = new_parser;
|
||||
self.objects_arc = objects_arc;
|
||||
self.objects_map = objects_map;
|
||||
self.unparsed_stream_dictionaries = unparsed_stream_dictionaries;
|
||||
retval
|
||||
}
|
||||
fn parse_header(&mut self) -> Result<PdfHeader, PdfParseError> {
|
||||
let Some(b'%') = self.tokenizer.bytes.first() else {
|
||||
return Err(PdfParseError::NotAPdfFile);
|
||||
|
|
@ -739,18 +765,94 @@ impl<'a> PdfParser<'a> {
|
|||
Ok(Some(()))
|
||||
}
|
||||
}
|
||||
fn parse_object_stream_inner(
|
||||
&mut self,
|
||||
object_stream: &PdfStream<PdfObjectStreamDictionary>,
|
||||
) -> Result<(), PdfParseError> {
|
||||
let mut object_ids_and_byte_positions =
|
||||
Vec::<(PdfObjectIdentifier, usize)>::with_capacity(object_stream.dictionary().rest.n);
|
||||
for _ in 0..object_stream.dictionary().rest.n {
|
||||
self.skip_comments_and_whitespace();
|
||||
let Some((pos, object_number)) =
|
||||
self.parse_digits(|pos| Err(PdfParseError::InvalidObjectNumber { pos }))?
|
||||
else {
|
||||
return Err(PdfParseError::InvalidObjectNumber {
|
||||
pos: self.tokenizer.pos(),
|
||||
});
|
||||
};
|
||||
self.skip_comments_and_whitespace();
|
||||
let Some((_, byte_position)) =
|
||||
self.parse_digits(|pos| Err(PdfParseError::InvalidNumber { pos }))?
|
||||
else {
|
||||
return Err(PdfParseError::InvalidNumber {
|
||||
pos: self.tokenizer.pos(),
|
||||
});
|
||||
};
|
||||
object_ids_and_byte_positions.push((
|
||||
PdfObjectIdentifier {
|
||||
pos: pos.into(),
|
||||
object_number,
|
||||
generation_number: 0,
|
||||
},
|
||||
byte_position,
|
||||
));
|
||||
}
|
||||
for (id, _byte_position) in object_ids_and_byte_positions {
|
||||
let object = self.parse_object()?;
|
||||
if self.objects_map.insert(id, object).is_some() {
|
||||
return Err(PdfParseError::DuplicateIndirectObjectDefinition { pos: id.pos.0, id });
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
fn parse_object_stream(
|
||||
&mut self,
|
||||
object_stream: &PdfStream<PdfObjectStreamDictionary>,
|
||||
) -> Result<(), PdfParseError> {
|
||||
let data = object_stream.decoded_data().as_ref()?;
|
||||
self.with_tokenizer(PdfTokenizer::new(data, 0), |parser| {
|
||||
parser.parse_object_stream_inner(object_stream)
|
||||
})
|
||||
.map_err(|e| PdfParseError::ObjectStreamParseError {
|
||||
stream_pos: object_stream.get_pdf_input_position(),
|
||||
error: Arc::new(e),
|
||||
})
|
||||
}
|
||||
fn parse_body(&mut self) -> Result<(), PdfParseError> {
|
||||
while let Some(()) = self.parse_indirect_object_definition()? {}
|
||||
let Ok(()) = self
|
||||
.objects_arc
|
||||
.objects
|
||||
.set(std::mem::take(&mut self.objects_map))
|
||||
else {
|
||||
unreachable!();
|
||||
};
|
||||
self.unparsed_stream_dictionaries
|
||||
.drain(..)
|
||||
.try_for_each(|v| v.finish_parsing())
|
||||
.try_for_each(|v| v.finish_parsing())?;
|
||||
let mut object_streams: Vec<PdfStream<PdfObjectStreamDictionary>> = Vec::new();
|
||||
for object in self.objects_map.values_mut() {
|
||||
let stream = match object {
|
||||
PdfObject::Stream(stream) => stream,
|
||||
PdfObject::Boolean(_)
|
||||
| PdfObject::Integer(_)
|
||||
| PdfObject::Real(_)
|
||||
| PdfObject::String(_)
|
||||
| PdfObject::Name(_)
|
||||
| PdfObject::Array(_)
|
||||
| PdfObject::Dictionary(_)
|
||||
| PdfObject::Null(_)
|
||||
| PdfObject::Indirect(_) => continue,
|
||||
};
|
||||
if PdfObjectStreamDictionary::parse_type_from_dictionary(&stream.dictionary().rest)
|
||||
.is_ok()
|
||||
{
|
||||
object_streams.push(PdfStream::parse(object.clone())?);
|
||||
}
|
||||
}
|
||||
for object_stream in &object_streams {
|
||||
self.parse_object_stream(object_stream)?;
|
||||
}
|
||||
let Ok(()) = self.objects_arc.inner.set(PdfObjectsInner {
|
||||
objects: std::mem::take(&mut self.objects_map),
|
||||
object_streams,
|
||||
}) else {
|
||||
unreachable!();
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
fn parse_xref_table(&mut self) -> Result<(), PdfParseError> {
|
||||
self.skip_comments_and_whitespace();
|
||||
|
|
@ -844,7 +946,7 @@ impl Pdf {
|
|||
pub fn parse(bytes: impl AsRef<[u8]>) -> Result<Pdf, PdfParseError> {
|
||||
PdfParser {
|
||||
objects_arc: Arc::new(PdfObjects {
|
||||
objects: OnceLock::new(),
|
||||
inner: OnceLock::new(),
|
||||
}),
|
||||
objects_map: BTreeMap::new(),
|
||||
unparsed_stream_dictionaries: vec![],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue