parses root successfully
This commit is contained in:
parent
5fbfaa8053
commit
83631cc4c6
7 changed files with 623 additions and 118 deletions
|
|
@ -5,15 +5,17 @@ use crate::{
|
|||
GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse,
|
||||
PdfParseError,
|
||||
},
|
||||
stream_filters::PdfStreamFilter,
|
||||
},
|
||||
pdf_parse,
|
||||
util::ArcOrRef,
|
||||
};
|
||||
use std::{
|
||||
any::TypeId,
|
||||
borrow::Cow,
|
||||
borrow::{Borrow, Cow},
|
||||
collections::BTreeMap,
|
||||
fmt::{self, Write},
|
||||
iter::FusedIterator,
|
||||
num::NonZero,
|
||||
sync::{Arc, OnceLock},
|
||||
};
|
||||
|
|
@ -61,6 +63,12 @@ pub struct PdfName {
|
|||
bytes: ArcOrRef<'static, [u8]>,
|
||||
}
|
||||
|
||||
impl Borrow<[u8]> for PdfName {
|
||||
fn borrow(&self) -> &[u8] {
|
||||
&self.bytes
|
||||
}
|
||||
}
|
||||
|
||||
impl PdfName {
|
||||
pub fn try_new(
|
||||
pos: impl Into<PdfInputPositionNoCompare>,
|
||||
|
|
@ -218,24 +226,51 @@ macro_rules! make_pdf_object {
|
|||
$Variant:ident($ty:ty),
|
||||
)+
|
||||
) => {
|
||||
#[derive(Clone, Debug)]
|
||||
#[derive(Clone)]
|
||||
pub enum PdfObjectNonNull {
|
||||
$($Variant($ty),)*
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
impl fmt::Debug for PdfObjectNonNull {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
$(Self::$Variant(v) => v.fmt(f),)*
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum PdfObjectDirect {
|
||||
$($Variant($ty),)*
|
||||
Null(PdfNull),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
impl fmt::Debug for PdfObjectDirect {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
$(Self::$Variant(v) => v.fmt(f),)*
|
||||
Self::Null(v) => v.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum PdfObject {
|
||||
$($Variant($ty),)*
|
||||
Null(PdfNull),
|
||||
Indirect(PdfObjectIndirect),
|
||||
}
|
||||
|
||||
impl fmt::Debug for PdfObject {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
$(Self::$Variant(v) => v.fmt(f),)*
|
||||
Self::Null(v) => v.fmt(f),
|
||||
Self::Indirect(v) => v.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$(
|
||||
impl From<$ty> for PdfObjectNonNull {
|
||||
fn from(value: $ty) -> Self {
|
||||
|
|
@ -546,12 +581,12 @@ impl PdfObjectIndirect {
|
|||
}
|
||||
pub fn get(&self) -> PdfObjectDirect {
|
||||
if let Some(objects) = self.objects.upgrade() {
|
||||
if let Some(objects) = objects.objects.get() {
|
||||
if let Some(objects) = objects.inner.get() {
|
||||
let final_id = self.final_id.get().copied();
|
||||
let limit = if final_id.is_some() { 1 } else { 1000usize };
|
||||
let mut id = final_id.unwrap_or(self.id);
|
||||
for _ in 0..limit {
|
||||
if let Some(object) = objects.get(&self.id) {
|
||||
if let Some(object) = objects.objects.get(&self.id) {
|
||||
let retval = match object {
|
||||
PdfObject::Boolean(v) => PdfObjectDirect::Boolean(*v),
|
||||
PdfObject::Integer(v) => PdfObjectDirect::Integer(*v),
|
||||
|
|
@ -628,18 +663,27 @@ impl PdfDictionary {
|
|||
}
|
||||
pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool
|
||||
where
|
||||
PdfName: std::borrow::Borrow<Q> + Ord,
|
||||
PdfName: std::borrow::Borrow<Q>,
|
||||
Q: Ord,
|
||||
{
|
||||
self.fields.contains_key(key)
|
||||
}
|
||||
pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&PdfObject>
|
||||
where
|
||||
PdfName: std::borrow::Borrow<Q> + Ord,
|
||||
PdfName: std::borrow::Borrow<Q>,
|
||||
Q: Ord,
|
||||
{
|
||||
self.fields.get(key)
|
||||
}
|
||||
pub fn get_or_null<Q: ?Sized>(&self, key: &Q) -> PdfObject
|
||||
where
|
||||
PdfName: std::borrow::Borrow<Q>,
|
||||
Q: Ord,
|
||||
{
|
||||
self.get(key)
|
||||
.cloned()
|
||||
.unwrap_or(PdfObject::Null(PdfNull(self.pos)))
|
||||
}
|
||||
pub fn pos(&self) -> PdfInputPosition {
|
||||
self.pos.0
|
||||
}
|
||||
|
|
@ -842,35 +886,6 @@ impl<T> std::ops::DerefMut for MaybeArray<T> {
|
|||
}
|
||||
}
|
||||
|
||||
pdf_parse! {
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[non_exhaustive]
|
||||
pub enum PdfStreamFilter {
|
||||
#[pdf(name = "ASCIIHexDecode")]
|
||||
AsciiHexDecode,
|
||||
#[pdf(name = "ASCII85Decode")]
|
||||
Ascii85Decode,
|
||||
#[pdf(name = "LZWDecode")]
|
||||
LzwDecode,
|
||||
#[pdf(name = "FlateDecode")]
|
||||
FlateDecode,
|
||||
#[pdf(name = "RunLengthDecode")]
|
||||
RunLengthDecode,
|
||||
#[pdf(name = "CCITTFaxDecode")]
|
||||
CcittFaxDecode,
|
||||
#[pdf(name = "JBIG2Decode")]
|
||||
Jbig2Decode,
|
||||
#[pdf(name = "DCTDecode")]
|
||||
DctDecode,
|
||||
#[pdf(name = "JPXDecode")]
|
||||
JpxDecode,
|
||||
#[pdf(name = "Crypt")]
|
||||
Crypt,
|
||||
#[pdf(other)]
|
||||
Unknown(PdfName),
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Default for MaybeArray<T> {
|
||||
fn default() -> Self {
|
||||
Self(Arc::default())
|
||||
|
|
@ -936,47 +951,101 @@ impl PdfStreamDictionary {
|
|||
pub(crate) fn parse_len_from_dictionary(
|
||||
dictionary: &PdfDictionary,
|
||||
) -> Result<usize, PdfParseError> {
|
||||
PdfParse::parse(
|
||||
dictionary
|
||||
.get(&PdfName::new_static(Self::LENGTH_NAME.as_bytes()))
|
||||
.cloned()
|
||||
.unwrap_or_default(),
|
||||
PdfParse::parse(dictionary.get_or_null(Self::LENGTH_NAME.as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct PdfStreamDictionaryFiltersAndParms<'a> {
|
||||
filters: std::iter::Enumerate<std::slice::Iter<'a, PdfStreamFilter>>,
|
||||
decode_parms: &'a [Option<PdfDictionary>],
|
||||
}
|
||||
|
||||
impl<'a> PdfStreamDictionaryFiltersAndParms<'a> {
|
||||
fn item_helper(
|
||||
filter: (usize, &'a PdfStreamFilter),
|
||||
decode_parms: &'a [Option<PdfDictionary>],
|
||||
) -> (&'a PdfStreamFilter, &'a PdfDictionary) {
|
||||
static EMPTY_DICTIONARY: OnceLock<PdfDictionary> = OnceLock::new();
|
||||
let (index, filter) = filter;
|
||||
(
|
||||
filter,
|
||||
match decode_parms.get(index) {
|
||||
Some(Some(v)) => v,
|
||||
_ => EMPTY_DICTIONARY.get_or_init(PdfDictionary::default),
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Rest> PdfStreamDictionary<Rest> {
|
||||
pub fn filters_and_parms(
|
||||
&self,
|
||||
) -> impl Clone + ExactSizeIterator + DoubleEndedIterator<Item = (PdfStreamFilter, PdfDictionary)>
|
||||
{
|
||||
self.filters.iter().enumerate().map(|(index, filter)| {
|
||||
(
|
||||
filter.clone(),
|
||||
self.decode_parms
|
||||
.0
|
||||
.get(index)
|
||||
.cloned()
|
||||
.flatten()
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
})
|
||||
impl<'a> Iterator for PdfStreamDictionaryFiltersAndParms<'a> {
|
||||
type Item = (&'a PdfStreamFilter, &'a PdfDictionary);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.filters
|
||||
.next()
|
||||
.map(|filter| Self::item_helper(filter, self.decode_parms))
|
||||
}
|
||||
pub fn file_filters_and_parms(
|
||||
&self,
|
||||
) -> impl Clone + ExactSizeIterator + DoubleEndedIterator<Item = (PdfStreamFilter, PdfDictionary)>
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.filters.size_hint()
|
||||
}
|
||||
|
||||
fn nth(&mut self, n: usize) -> Option<Self::Item> {
|
||||
self.filters
|
||||
.nth(n)
|
||||
.map(|filter| Self::item_helper(filter, self.decode_parms))
|
||||
}
|
||||
|
||||
fn fold<B, F>(self, init: B, f: F) -> B
|
||||
where
|
||||
F: FnMut(B, Self::Item) -> B,
|
||||
{
|
||||
self.file_filters.iter().enumerate().map(|(index, filter)| {
|
||||
(
|
||||
filter.clone(),
|
||||
self.file_decode_parms
|
||||
.0
|
||||
.get(index)
|
||||
.cloned()
|
||||
.flatten()
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
})
|
||||
self.filters
|
||||
.map(|filter| Self::item_helper(filter, self.decode_parms))
|
||||
.fold(init, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FusedIterator for PdfStreamDictionaryFiltersAndParms<'a> {}
|
||||
|
||||
impl<'a> ExactSizeIterator for PdfStreamDictionaryFiltersAndParms<'a> {}
|
||||
|
||||
impl<'a> DoubleEndedIterator for PdfStreamDictionaryFiltersAndParms<'a> {
|
||||
fn next_back(&mut self) -> Option<Self::Item> {
|
||||
self.filters
|
||||
.next_back()
|
||||
.map(|filter| Self::item_helper(filter, self.decode_parms))
|
||||
}
|
||||
|
||||
fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
|
||||
self.filters
|
||||
.nth_back(n)
|
||||
.map(|filter| Self::item_helper(filter, self.decode_parms))
|
||||
}
|
||||
|
||||
fn rfold<B, F>(self, init: B, f: F) -> B
|
||||
where
|
||||
F: FnMut(B, Self::Item) -> B,
|
||||
{
|
||||
self.filters
|
||||
.map(|filter| Self::item_helper(filter, self.decode_parms))
|
||||
.rfold(init, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Rest> PdfStreamDictionary<Rest> {
|
||||
pub fn filters_and_parms<'a>(&'a self) -> PdfStreamDictionaryFiltersAndParms<'a> {
|
||||
PdfStreamDictionaryFiltersAndParms {
|
||||
filters: self.filters.iter().enumerate(),
|
||||
decode_parms: &self.decode_parms,
|
||||
}
|
||||
}
|
||||
pub fn file_filters_and_parms<'a>(&'a self) -> PdfStreamDictionaryFiltersAndParms<'a> {
|
||||
PdfStreamDictionaryFiltersAndParms {
|
||||
filters: self.file_filters.iter().enumerate(),
|
||||
decode_parms: &self.file_decode_parms,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1001,16 +1070,64 @@ impl<Rest: PdfParse> UnparsedPdfStreamDictionary<Rest> {
|
|||
pub struct PdfStream<Rest = PdfDictionary> {
|
||||
pos: PdfInputPositionNoCompare,
|
||||
dictionary: Arc<OnceLock<PdfStreamDictionary<Rest>>>,
|
||||
data: Arc<[u8]>,
|
||||
encoded_data: Arc<[u8]>,
|
||||
decoded_data: Arc<OnceLock<Result<Arc<[u8]>, PdfParseError>>>,
|
||||
}
|
||||
|
||||
struct DumpBytes<'a>(&'a [u8]);
|
||||
|
||||
impl<'a> fmt::Debug for DumpBytes<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt::Display::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DumpBytes<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut first = true;
|
||||
let mut fmt_chunk = |chunk: &[u8]| -> fmt::Result {
|
||||
if first {
|
||||
first = false;
|
||||
} else {
|
||||
f.write_str("\n")?;
|
||||
}
|
||||
write!(f, "\"{}\"", chunk.escape_ascii())
|
||||
};
|
||||
if self.0.is_empty() {
|
||||
return fmt_chunk(self.0);
|
||||
}
|
||||
for chunk in self.0.chunks(32) {
|
||||
fmt_chunk(chunk)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<Rest: fmt::Debug> fmt::Debug for PdfStream<Rest> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("PdfStream")
|
||||
.field("pos", &self.pos)
|
||||
.field("dictionary", &self.dictionary)
|
||||
.field("data", &format_args!("{:02x?}", self.data))
|
||||
.finish()
|
||||
let Self {
|
||||
pos,
|
||||
dictionary,
|
||||
encoded_data,
|
||||
decoded_data,
|
||||
} = self;
|
||||
let mut debug_struct = f.debug_struct("PdfStream");
|
||||
debug_struct.field("pos", pos);
|
||||
if let Some(dictionary) = dictionary.get() {
|
||||
debug_struct.field("dictionary", dictionary);
|
||||
} else {
|
||||
debug_struct.field("dictionary", &format_args!("<not-yet-parsed>"));
|
||||
}
|
||||
debug_struct.field("encoded_data", &DumpBytes(encoded_data));
|
||||
if let Some(decoded_data) = decoded_data.get() {
|
||||
match decoded_data {
|
||||
Ok(decoded_data) => debug_struct.field("decoded_data", &DumpBytes(decoded_data)),
|
||||
Err(e) => debug_struct.field("decoded_data", &Err::<(), _>(e)),
|
||||
};
|
||||
} else {
|
||||
debug_struct.field("decoded_data", &format_args!("<not-yet-decoded>"));
|
||||
}
|
||||
debug_struct.finish()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1018,25 +1135,27 @@ impl<Rest> PdfStream<Rest> {
|
|||
pub fn new(
|
||||
pos: impl Into<PdfInputPositionNoCompare>,
|
||||
dictionary: PdfStreamDictionary<Rest>,
|
||||
data: Arc<[u8]>,
|
||||
encoded_data: Arc<[u8]>,
|
||||
) -> Self {
|
||||
Self {
|
||||
pos: pos.into(),
|
||||
dictionary: Arc::new(OnceLock::from(dictionary)),
|
||||
data,
|
||||
encoded_data,
|
||||
decoded_data: Arc::new(OnceLock::new()),
|
||||
}
|
||||
}
|
||||
pub(crate) fn new_unparsed(
|
||||
pos: impl Into<PdfInputPositionNoCompare>,
|
||||
unparsed_dictionary: PdfDictionary,
|
||||
data: Arc<[u8]>,
|
||||
encoded_data: Arc<[u8]>,
|
||||
) -> (Self, UnparsedPdfStreamDictionary<Rest>) {
|
||||
let dictionary = Arc::new(OnceLock::new());
|
||||
(
|
||||
Self {
|
||||
pos: pos.into(),
|
||||
dictionary: dictionary.clone(),
|
||||
data,
|
||||
encoded_data,
|
||||
decoded_data: Arc::new(OnceLock::new()),
|
||||
},
|
||||
UnparsedPdfStreamDictionary {
|
||||
unparsed_dictionary,
|
||||
|
|
@ -1049,8 +1168,29 @@ impl<Rest> PdfStream<Rest> {
|
|||
.get()
|
||||
.expect("haven't finished parsing all pdf object definitions yet")
|
||||
}
|
||||
pub fn data(&self) -> &Arc<[u8]> {
|
||||
&self.data
|
||||
pub fn encoded_data(&self) -> &Arc<[u8]> {
|
||||
&self.encoded_data
|
||||
}
|
||||
fn try_decode_data(&self) -> Result<Arc<[u8]>, PdfParseError> {
|
||||
let dictionary = self.dictionary();
|
||||
let (data, filters) = if let Some(file) = &dictionary.file {
|
||||
todo!()
|
||||
} else {
|
||||
(&self.encoded_data, dictionary.filters_and_parms())
|
||||
};
|
||||
if filters.len() == 0 {
|
||||
return Ok(data.clone());
|
||||
}
|
||||
let mut data: &[u8] = data;
|
||||
let mut buffer;
|
||||
for (filter, filter_parms) in filters {
|
||||
buffer = filter.decode_stream_data(filter_parms.clone(), self.pos.0, &data)?;
|
||||
data = &buffer;
|
||||
}
|
||||
Ok(Arc::from(data))
|
||||
}
|
||||
pub fn decoded_data(&self) -> &Result<Arc<[u8]>, PdfParseError> {
|
||||
self.decoded_data.get_or_init(|| self.try_decode_data())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1099,7 +1239,8 @@ impl<Rest: PdfParse> PdfParse for PdfStream<Rest> {
|
|||
rest: Rest::parse(rest.clone().into())?,
|
||||
}))
|
||||
},
|
||||
data: stream.data,
|
||||
encoded_data: stream.encoded_data,
|
||||
decoded_data: stream.decoded_data,
|
||||
}),
|
||||
object => Err(PdfParseError::InvalidType {
|
||||
pos: object.get_pdf_input_position(),
|
||||
|
|
@ -1109,3 +1250,37 @@ impl<Rest: PdfParse> PdfParse for PdfStream<Rest> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
pdf_parse! {
|
||||
#[derive(Clone, Copy, Debug, Hash, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum PdfObjectStreamType {
|
||||
#[pdf(name = "ObjStm")]
|
||||
#[default]
|
||||
ObjStm,
|
||||
}
|
||||
}
|
||||
|
||||
pdf_parse! {
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PdfObjectStreamDictionary {
|
||||
#[pdf(name = Self::TYPE_NAME)]
|
||||
pub ty: PdfObjectStreamType,
|
||||
#[pdf(name = "N")]
|
||||
pub n: usize,
|
||||
#[pdf(name = "First")]
|
||||
pub first: usize,
|
||||
#[pdf(name = "Extends")]
|
||||
pub extends: Option<PdfObjectIndirect>,
|
||||
#[pdf(flatten)]
|
||||
pub rest: PdfDictionary,
|
||||
}
|
||||
}
|
||||
|
||||
impl PdfObjectStreamDictionary {
|
||||
pub const TYPE_NAME: &str = "Type";
|
||||
pub(crate) fn parse_type_from_dictionary(
|
||||
dictionary: &PdfDictionary,
|
||||
) -> Result<PdfObjectStreamType, PdfParseError> {
|
||||
PdfParse::parse(dictionary.get_or_null(Self::TYPE_NAME.as_bytes()))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue