parse_powerisa_pdf/src/pdf/object.rs
2025-12-24 07:12:48 -08:00

1111 lines
31 KiB
Rust

use crate::{
pdf::{
PdfObjects,
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse,
PdfParseError,
},
},
pdf_parse,
util::ArcOrRef,
};
use std::{
any::TypeId,
borrow::Cow,
collections::BTreeMap,
fmt::{self, Write},
num::NonZero,
sync::{Arc, OnceLock},
};
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct PdfString {
pos: PdfInputPositionNoCompare,
bytes: ArcOrRef<'static, [u8]>,
}
impl std::fmt::Debug for PdfString {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { pos, bytes } = self;
f.debug_struct("PdfString")
.field("pos", pos)
.field("bytes", &format_args!("b\"{}\"", bytes.escape_ascii()))
.finish()
}
}
impl PdfString {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, bytes: ArcOrRef<'static, [u8]>) -> Self {
Self {
pos: pos.into(),
bytes,
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> {
&self.bytes
}
}
impl GetPdfInputPosition for PdfString {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct PdfName {
pos: PdfInputPositionNoCompare,
bytes: ArcOrRef<'static, [u8]>,
}
impl PdfName {
pub fn try_new(
pos: impl Into<PdfInputPositionNoCompare>,
bytes: impl Into<ArcOrRef<'static, [u8]>>,
) -> Option<Self> {
let bytes = bytes.into();
if bytes.contains(&0) {
None
} else {
Some(Self {
pos: pos.into(),
bytes,
})
}
}
#[track_caller]
pub const fn new_static(bytes: &'static [u8]) -> Self {
let mut i = 0;
while i < bytes.len() {
if bytes[i] == 0 {
panic!("shouldn't contain any nul bytes");
}
i += 1;
}
Self {
pos: PdfInputPositionNoCompare::empty(),
bytes: ArcOrRef::Ref(bytes),
}
}
#[track_caller]
pub fn new(
pos: impl Into<PdfInputPositionNoCompare>,
bytes: impl Into<ArcOrRef<'static, [u8]>>,
) -> Self {
Self::try_new(pos, bytes).expect("shouldn't contain any nul bytes")
}
pub fn as_bytes(&self) -> &ArcOrRef<'static, [u8]> {
&self.bytes
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
}
impl GetPdfInputPosition for PdfName {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
impl fmt::Debug for PdfName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "PdfName(at {}: {self})", self.pos)
}
}
impl fmt::Display for PdfName {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("/")?;
for &b in self.bytes.iter() {
match b {
0x21..=0x7E if b != b'#' => f.write_char(b.into())?,
_ => write!(f, "#{b:02X}")?,
}
}
Ok(())
}
}
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
pub struct PdfBoolean {
pos: PdfInputPositionNoCompare,
value: bool,
}
impl PdfBoolean {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, value: bool) -> Self {
Self {
pos: pos.into(),
value,
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn value(&self) -> bool {
self.value
}
}
impl GetPdfInputPosition for PdfBoolean {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
pub struct PdfInteger {
pos: PdfInputPositionNoCompare,
value: i128,
}
impl PdfInteger {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, value: i128) -> Self {
Self {
pos: pos.into(),
value,
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn value(&self) -> i128 {
self.value
}
}
impl GetPdfInputPosition for PdfInteger {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Default)]
pub struct PdfReal {
pos: PdfInputPositionNoCompare,
value: f64,
}
impl PdfReal {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, value: f64) -> Self {
Self {
pos: pos.into(),
value,
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn value(&self) -> f64 {
self.value
}
}
impl GetPdfInputPosition for PdfReal {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
macro_rules! make_pdf_object {
(
$(
#[parse = $($parse:ident)?, type_name = $type_name:literal]
$Variant:ident($ty:ty),
)+
) => {
#[derive(Clone, Debug)]
pub enum PdfObjectNonNull {
$($Variant($ty),)*
}
#[derive(Clone, Debug)]
pub enum PdfObjectDirect {
$($Variant($ty),)*
Null(PdfNull),
}
#[derive(Clone, Debug)]
pub enum PdfObject {
$($Variant($ty),)*
Null(PdfNull),
Indirect(PdfObjectIndirect),
}
$(
impl From<$ty> for PdfObjectNonNull {
fn from(value: $ty) -> Self {
Self::$Variant(value)
}
}
impl From<$ty> for PdfObjectDirect {
fn from(value: $ty) -> Self {
Self::$Variant(value)
}
}
impl From<$ty> for PdfObject {
fn from(value: $ty) -> Self {
Self::$Variant(value)
}
}
impl From<Option<$ty>> for PdfObjectDirect {
fn from(value: Option<$ty>) -> Self {
match value {
Some(value) => Self::$Variant(value),
None => Self::Null(Default::default()),
}
}
}
impl From<Option<$ty>> for PdfObject {
fn from(value: Option<$ty>) -> Self {
match value {
Some(value) => Self::$Variant(value),
None => Self::Null(Default::default()),
}
}
}
$(impl crate::pdf::parse::PdfParse for $ty {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed($type_name)
}
fn $parse(object: PdfObject) -> Result<Self, crate::pdf::parse::PdfParseError> {
match PdfObjectDirect::from(object) {
PdfObjectDirect::$Variant(v) => Ok(v),
object => Err(crate::pdf::parse::PdfParseError::InvalidType {
pos: object.get_pdf_input_position(),
ty: object.type_name(),
expected_ty: $type_name,
}),
}
}
})?
)*
impl From<PdfObjectNonNull> for PdfObjectDirect {
fn from(value: PdfObjectNonNull) -> Self {
match value {
$(PdfObjectNonNull::$Variant(v) => Self::$Variant(v),)*
}
}
}
impl From<PdfObjectNonNull> for PdfObject {
fn from(value: PdfObjectNonNull) -> Self {
match value {
$(PdfObjectNonNull::$Variant(v) => Self::$Variant(v),)*
}
}
}
impl From<PdfObjectDirect> for PdfObject {
fn from(value: PdfObjectDirect) -> Self {
match value {
$(PdfObjectDirect::$Variant(v) => Self::$Variant(v),)*
PdfObjectDirect::Null(v) => Self::Null(v),
}
}
}
impl From<PdfObject> for PdfObjectDirect {
fn from(value: PdfObject) -> Self {
match value {
$(PdfObject::$Variant(v) => Self::$Variant(v),)*
PdfObject::Null(v) => Self::Null(v),
PdfObject::Indirect(v) => v.into(),
}
}
}
impl PdfObjectNonNull {
pub fn type_name(&self) -> &'static str {
match self {
$(PdfObjectNonNull::$Variant(_) => $type_name,)*
}
}
pub fn pos(&self) -> PdfInputPosition {
self.get_pdf_input_position()
}
}
impl GetPdfInputPosition for PdfObjectNonNull {
fn get_pdf_input_position(&self) -> PdfInputPosition {
match self {
$(PdfObjectNonNull::$Variant(v) => <$ty as GetPdfInputPosition>::get_pdf_input_position(v),)*
}
}
}
impl From<PdfObjectDirect> for Option<PdfObjectNonNull> {
fn from(value: PdfObjectDirect) -> Self {
match value {
$(PdfObjectDirect::$Variant(v) => Some(PdfObjectNonNull::$Variant(v)),)*
PdfObjectDirect::Null(_) => None,
}
}
}
impl From<PdfObject> for Option<PdfObjectNonNull> {
fn from(value: PdfObject) -> Self {
PdfObjectDirect::from(value).into()
}
}
impl PdfObjectDirect {
pub fn is_null(&self) -> bool {
matches!(self, PdfObjectDirect::Null(_))
}
pub fn type_name(&self) -> &'static str {
match self {
$(PdfObjectDirect::$Variant(_) => $type_name,)*
PdfObjectDirect::Null(_) => "null",
}
}
pub fn pos(&self) -> PdfInputPosition {
self.get_pdf_input_position()
}
}
impl GetPdfInputPosition for PdfObjectDirect {
fn get_pdf_input_position(&self) -> PdfInputPosition {
match self {
$(PdfObjectDirect::$Variant(v) => <$ty as GetPdfInputPosition>::get_pdf_input_position(v),)*
PdfObjectDirect::Null(v) => <PdfNull as GetPdfInputPosition>::get_pdf_input_position(v),
}
}
}
impl PdfObject {
pub fn is_null(&self) -> bool {
matches!(self, PdfObject::Null(_))
}
pub fn type_name(&self) -> &'static str {
match self {
$(PdfObject::$Variant(_) => $type_name,)*
PdfObject::Null(_) => "null",
PdfObject::Indirect(_) => "indirect object",
}
}
pub fn pos(&self) -> PdfInputPosition {
self.get_pdf_input_position()
}
}
impl GetPdfInputPosition for PdfObject {
fn get_pdf_input_position(&self) -> PdfInputPosition {
match self {
$(PdfObject::$Variant(v) => <$ty as GetPdfInputPosition>::get_pdf_input_position(v),)*
PdfObject::Null(v) => <PdfNull as GetPdfInputPosition>::get_pdf_input_position(v),
PdfObject::Indirect(v) => <PdfObjectIndirect as GetPdfInputPosition>::get_pdf_input_position(v),
}
}
}
const _: () = {
fn _assert_parsable<T: crate::pdf::parse::PdfParse>() {}
$(let _ = _assert_parsable::<$ty>;)*
let _ = _assert_parsable::<PdfNull>;
let _ = _assert_parsable::<PdfObjectIndirect>;
let _ = _assert_parsable::<PdfObjectNonNull>;
let _ = _assert_parsable::<PdfObjectDirect>;
let _ = _assert_parsable::<PdfObject>;
};
};
}
make_pdf_object! {
#[parse = parse, type_name = "boolean"]
Boolean(PdfBoolean),
#[parse = parse, type_name = "integer"]
Integer(PdfInteger),
#[parse = parse, type_name = "real"]
Real(PdfReal),
#[parse = parse, type_name = "string"]
String(PdfString),
#[parse = parse, type_name = "name"]
Name(PdfName),
#[parse = parse, type_name = "array"]
Array(PdfArray),
#[parse = parse, type_name = "dictionary"]
Dictionary(PdfDictionary),
#[parse =, type_name = "stream"]
Stream(PdfStream),
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct PdfNull(PdfInputPositionNoCompare);
impl PdfNull {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>) -> Self {
Self(pos.into())
}
}
impl GetPdfInputPosition for PdfNull {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.0.0
}
}
impl From<PdfNull> for PdfObjectDirect {
fn from(v: PdfNull) -> Self {
Self::Null(v)
}
}
impl Default for PdfObjectDirect {
fn default() -> Self {
Self::Null(PdfNull(PdfInputPositionNoCompare::empty()))
}
}
impl From<PdfNull> for PdfObject {
fn from(v: PdfNull) -> Self {
Self::Null(v)
}
}
impl Default for PdfObject {
fn default() -> Self {
Self::Null(PdfNull(PdfInputPositionNoCompare::empty()))
}
}
impl From<PdfObjectIndirect> for PdfObject {
fn from(v: PdfObjectIndirect) -> Self {
Self::Indirect(v)
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct PdfObjectIdentifier {
pub pos: PdfInputPositionNoCompare,
pub object_number: NonZero<u32>,
pub generation_number: u16,
}
impl GetPdfInputPosition for PdfObjectIdentifier {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
#[derive(Clone)]
pub struct PdfObjectIndirect {
objects: std::sync::Weak<PdfObjects>,
id: PdfObjectIdentifier,
final_id: Arc<OnceLock<PdfObjectIdentifier>>,
}
impl fmt::Debug for PdfObjectIndirect {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let Self {
objects: _,
id,
final_id: _,
} = self;
f.debug_struct("PdfObjectIndirect")
.field("id", id)
.finish_non_exhaustive()
}
}
impl GetPdfInputPosition for PdfObjectIndirect {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.id.get_pdf_input_position()
}
}
impl PartialEq for PdfObjectIndirect {
fn eq(&self, other: &Self) -> bool {
let Self {
objects,
id,
final_id: _,
} = self;
objects.ptr_eq(&other.objects) && *id == other.id
}
}
impl PdfObjectIndirect {
pub fn new(objects: &Arc<PdfObjects>, id: PdfObjectIdentifier) -> Self {
Self {
objects: Arc::downgrade(objects),
id,
final_id: Arc::new(OnceLock::new()),
}
}
pub fn get(&self) -> PdfObjectDirect {
if let Some(objects) = self.objects.upgrade() {
if let Some(objects) = objects.objects.get() {
let final_id = self.final_id.get().copied();
let limit = if final_id.is_some() { 1 } else { 1000usize };
let mut id = final_id.unwrap_or(self.id);
for _ in 0..limit {
if let Some(object) = objects.get(&self.id) {
let retval = match object {
PdfObject::Boolean(v) => PdfObjectDirect::Boolean(*v),
PdfObject::Integer(v) => PdfObjectDirect::Integer(*v),
PdfObject::Real(v) => PdfObjectDirect::Real(*v),
PdfObject::String(v) => PdfObjectDirect::String(v.clone()),
PdfObject::Name(v) => PdfObjectDirect::Name(v.clone()),
PdfObject::Array(v) => PdfObjectDirect::Array(v.clone()),
PdfObject::Dictionary(v) => PdfObjectDirect::Dictionary(v.clone()),
PdfObject::Stream(v) => PdfObjectDirect::Stream(v.clone()),
PdfObject::Null(v) => PdfObjectDirect::Null(*v),
PdfObject::Indirect(v) => {
id = v.id;
continue;
}
};
// we could be racing with another thread, so set can fail but that's not a problem
let _ = self.final_id.set(id);
return retval;
} else {
return PdfObjectDirect::Null(PdfNull::new(id.pos));
}
}
}
}
PdfObjectDirect::Null(PdfNull::new(self.pos()))
}
pub fn id(&self) -> PdfObjectIdentifier {
self.id
}
pub fn pos(&self) -> PdfInputPosition {
self.id.pos.0
}
}
impl From<PdfObjectIndirect> for PdfObjectDirect {
fn from(value: PdfObjectIndirect) -> Self {
value.get()
}
}
#[derive(Clone)]
pub struct PdfDictionary {
pos: PdfInputPositionNoCompare,
fields: Arc<BTreeMap<PdfName, PdfObject>>,
}
impl PdfDictionary {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>) -> Self {
Self {
pos: pos.into(),
fields: Arc::new(BTreeMap::new()),
}
}
pub fn from_fields(
pos: impl Into<PdfInputPositionNoCompare>,
mut fields: Arc<BTreeMap<PdfName, PdfObject>>,
) -> Self {
if fields.values().any(|v| matches!(v, PdfObject::Null(_))) {
Arc::make_mut(&mut fields).retain(|_k, v| !matches!(v, PdfObject::Null(_)));
}
Self {
pos: pos.into(),
fields,
}
}
pub fn fields(&self) -> &Arc<BTreeMap<PdfName, PdfObject>> {
&self.fields
}
pub fn into_fields(self) -> Arc<BTreeMap<PdfName, PdfObject>> {
self.fields
}
pub fn iter(&self) -> std::collections::btree_map::Iter<'_, PdfName, PdfObject> {
self.fields.iter()
}
pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool
where
PdfName: std::borrow::Borrow<Q> + Ord,
Q: Ord,
{
self.fields.contains_key(key)
}
pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&PdfObject>
where
PdfName: std::borrow::Borrow<Q> + Ord,
Q: Ord,
{
self.fields.get(key)
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
}
impl GetPdfInputPosition for PdfDictionary {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
impl Default for PdfDictionary {
fn default() -> Self {
Self::new(PdfInputPosition::empty())
}
}
impl FromIterator<(PdfName, PdfObject)> for PdfDictionary {
fn from_iter<T: IntoIterator<Item = (PdfName, PdfObject)>>(iter: T) -> Self {
Self {
pos: PdfInputPositionNoCompare::empty(),
fields: Arc::new(BTreeMap::from_iter(
iter.into_iter()
.filter(|(_name, value)| !matches!(value, PdfObject::Null(_))),
)),
}
}
}
impl IntoIterator for PdfDictionary {
type Item = (PdfName, PdfObject);
type IntoIter = std::collections::btree_map::IntoIter<PdfName, PdfObject>;
fn into_iter(self) -> Self::IntoIter {
Arc::unwrap_or_clone(self.fields).into_iter()
}
}
impl<'a> IntoIterator for &'a PdfDictionary {
type Item = (&'a PdfName, &'a PdfObject);
type IntoIter = std::collections::btree_map::Iter<'a, PdfName, PdfObject>;
fn into_iter(self) -> Self::IntoIter {
self.fields.iter()
}
}
impl fmt::Debug for PdfDictionary {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_map().entries(self).finish()
}
}
#[derive(Clone, Default)]
pub struct PdfArray {
pos: PdfInputPositionNoCompare,
elements: Arc<[PdfObject]>,
}
impl PdfArray {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>) -> Self {
Self {
pos: pos.into(),
elements: Arc::default(),
}
}
pub fn from_elements(
pos: impl Into<PdfInputPositionNoCompare>,
elements: Arc<[PdfObject]>,
) -> Self {
Self {
pos: pos.into(),
elements,
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn elements(&self) -> &Arc<[PdfObject]> {
&self.elements
}
pub fn into_elements(self) -> Arc<[PdfObject]> {
self.elements
}
pub fn iter(&self) -> std::slice::Iter<'_, PdfObject> {
self.elements.iter()
}
}
impl GetPdfInputPosition for PdfArray {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
impl FromIterator<PdfObject> for PdfArray {
fn from_iter<T: IntoIterator<Item = PdfObject>>(iter: T) -> Self {
Self {
pos: PdfInputPositionNoCompare::empty(),
elements: Arc::from_iter(iter),
}
}
}
#[derive(Clone)]
pub struct PdfArrayIntoIter {
indexes: std::ops::Range<usize>,
elements: Arc<[PdfObject]>,
}
impl Iterator for PdfArrayIntoIter {
type Item = PdfObject;
fn next(&mut self) -> Option<Self::Item> {
self.indexes.next().map(|i| self.elements[i].clone())
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.indexes.size_hint()
}
fn nth(&mut self, n: usize) -> Option<Self::Item> {
self.indexes.nth(n).map(|i| self.elements[i].clone())
}
fn last(self) -> Option<Self::Item> {
self.indexes.last().map(|i| self.elements[i].clone())
}
fn fold<B, F>(self, init: B, mut f: F) -> B
where
F: FnMut(B, Self::Item) -> B,
{
self.indexes
.fold(init, |init, i| f(init, self.elements[i].clone()))
}
}
impl std::iter::FusedIterator for PdfArrayIntoIter {}
impl DoubleEndedIterator for PdfArrayIntoIter {
fn next_back(&mut self) -> Option<Self::Item> {
self.indexes.next_back().map(|i| self.elements[i].clone())
}
fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
self.indexes.nth_back(n).map(|i| self.elements[i].clone())
}
fn rfold<B, F>(self, init: B, mut f: F) -> B
where
F: FnMut(B, Self::Item) -> B,
{
self.indexes
.rfold(init, |init, i| f(init, self.elements[i].clone()))
}
}
impl ExactSizeIterator for PdfArrayIntoIter {}
impl IntoIterator for PdfArray {
type Item = PdfObject;
type IntoIter = PdfArrayIntoIter;
fn into_iter(self) -> Self::IntoIter {
PdfArrayIntoIter {
indexes: 0..self.elements.len(),
elements: self.elements,
}
}
}
impl<'a> IntoIterator for &'a PdfArray {
type Item = &'a PdfObject;
type IntoIter = std::slice::Iter<'a, PdfObject>;
fn into_iter(self) -> Self::IntoIter {
self.elements.iter()
}
}
impl fmt::Debug for PdfArray {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.elements.fmt(f)
}
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct MaybeArray<T>(pub Arc<[T]>);
impl<T> std::ops::Deref for MaybeArray<T> {
type Target = Arc<[T]>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<T> std::ops::DerefMut for MaybeArray<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
pdf_parse! {
#[derive(Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum PdfStreamFilter {
#[pdf(name = "ASCIIHexDecode")]
AsciiHexDecode,
#[pdf(name = "ASCII85Decode")]
Ascii85Decode,
#[pdf(name = "LZWDecode")]
LzwDecode,
#[pdf(name = "FlateDecode")]
FlateDecode,
#[pdf(name = "RunLengthDecode")]
RunLengthDecode,
#[pdf(name = "CCITTFaxDecode")]
CcittFaxDecode,
#[pdf(name = "JBIG2Decode")]
Jbig2Decode,
#[pdf(name = "DCTDecode")]
DctDecode,
#[pdf(name = "JPXDecode")]
JpxDecode,
#[pdf(name = "Crypt")]
Crypt,
#[pdf(other)]
Unknown(PdfName),
}
}
impl<T> Default for MaybeArray<T> {
fn default() -> Self {
Self(Arc::default())
}
}
impl<'a, T> IntoIterator for &'a MaybeArray<T> {
type Item = &'a T;
type IntoIter = std::slice::Iter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}
#[derive(Clone, Debug)]
pub enum PdfFileSpecification {
String(PdfString),
Dictionary(PdfDictionary),
}
impl PdfParse for PdfFileSpecification {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("file specification")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
match PdfObjectDirect::from(object) {
PdfObjectDirect::String(v) => Ok(Self::String(v)),
PdfObjectDirect::Dictionary(v) => Ok(Self::Dictionary(v)),
object => Err(PdfParseError::InvalidType {
pos: object.pos(),
ty: object.type_name(),
expected_ty: "PdfFileSpecification",
}),
}
}
}
pdf_parse! {
#[derive(Clone, Debug)]
pub struct PdfStreamDictionary<Rest = PdfDictionary> {
#[pdf(name = PdfStreamDictionary::LENGTH_NAME)]
pub len: usize,
#[pdf(name = "Filter")]
pub filters: MaybeArray<PdfStreamFilter>,
#[pdf(name = "DecodeParms")]
pub decode_parms: MaybeArray<Option<PdfDictionary>>,
#[pdf(name = "F")]
pub file: Option<PdfFileSpecification>,
#[pdf(name = "FFilter")]
pub file_filters: MaybeArray<PdfStreamFilter>,
#[pdf(name = "FDecodeParms")]
pub file_decode_parms: MaybeArray<Option<PdfDictionary>>,
#[pdf(name = "DL")]
pub decoded_len: Option<usize>,
#[pdf(flatten)]
pub rest: Rest,
}
}
impl PdfStreamDictionary {
pub const LENGTH_NAME: &str = "Length";
pub(crate) fn parse_len_from_dictionary(
dictionary: &PdfDictionary,
) -> Result<usize, PdfParseError> {
PdfParse::parse(
dictionary
.get(&PdfName::new_static(Self::LENGTH_NAME.as_bytes()))
.cloned()
.unwrap_or_default(),
)
}
}
impl<Rest> PdfStreamDictionary<Rest> {
pub fn filters_and_parms(
&self,
) -> impl Clone + ExactSizeIterator + DoubleEndedIterator<Item = (PdfStreamFilter, PdfDictionary)>
{
self.filters.iter().enumerate().map(|(index, filter)| {
(
filter.clone(),
self.decode_parms
.0
.get(index)
.cloned()
.flatten()
.unwrap_or_default(),
)
})
}
pub fn file_filters_and_parms(
&self,
) -> impl Clone + ExactSizeIterator + DoubleEndedIterator<Item = (PdfStreamFilter, PdfDictionary)>
{
self.file_filters.iter().enumerate().map(|(index, filter)| {
(
filter.clone(),
self.file_decode_parms
.0
.get(index)
.cloned()
.flatten()
.unwrap_or_default(),
)
})
}
}
pub(crate) struct UnparsedPdfStreamDictionary<Rest> {
unparsed_dictionary: PdfDictionary,
dictionary: Arc<OnceLock<PdfStreamDictionary<Rest>>>,
}
impl<Rest: PdfParse> UnparsedPdfStreamDictionary<Rest> {
pub(crate) fn finish_parsing(self) -> Result<(), PdfParseError> {
let Ok(()) = self
.dictionary
.set(PdfParse::parse(self.unparsed_dictionary.into())?)
else {
unreachable!();
};
Ok(())
}
}
#[derive(Clone)]
pub struct PdfStream<Rest = PdfDictionary> {
pos: PdfInputPositionNoCompare,
dictionary: Arc<OnceLock<PdfStreamDictionary<Rest>>>,
data: Arc<[u8]>,
}
impl<Rest: fmt::Debug> fmt::Debug for PdfStream<Rest> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PdfStream")
.field("pos", &self.pos)
.field("dictionary", &self.dictionary)
.field("data", &format_args!("{:02x?}", self.data))
.finish()
}
}
impl<Rest> PdfStream<Rest> {
pub fn new(
pos: impl Into<PdfInputPositionNoCompare>,
dictionary: PdfStreamDictionary<Rest>,
data: Arc<[u8]>,
) -> Self {
Self {
pos: pos.into(),
dictionary: Arc::new(OnceLock::from(dictionary)),
data,
}
}
pub(crate) fn new_unparsed(
pos: impl Into<PdfInputPositionNoCompare>,
unparsed_dictionary: PdfDictionary,
data: Arc<[u8]>,
) -> (Self, UnparsedPdfStreamDictionary<Rest>) {
let dictionary = Arc::new(OnceLock::new());
(
Self {
pos: pos.into(),
dictionary: dictionary.clone(),
data,
},
UnparsedPdfStreamDictionary {
unparsed_dictionary,
dictionary,
},
)
}
pub fn dictionary(&self) -> &PdfStreamDictionary<Rest> {
self.dictionary
.get()
.expect("haven't finished parsing all pdf object definitions yet")
}
pub fn data(&self) -> &Arc<[u8]> {
&self.data
}
}
impl<Rest> GetPdfInputPosition for PdfStream<Rest> {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
impl<Rest: PdfParse> PdfParse for PdfStream<Rest> {
fn type_name() -> Cow<'static, str> {
if TypeId::of::<Rest>() == TypeId::of::<PdfDictionary>() {
Cow::Borrowed("stream")
} else {
Cow::Owned(format!("PdfStream<{}>", Rest::type_name()))
}
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
match PdfObjectDirect::from(object) {
PdfObjectDirect::Stream(stream) => Ok(PdfStream {
pos: stream.pos,
dictionary: if let Some(dictionary) = <dyn std::any::Any>::downcast_ref::<
Arc<OnceLock<PdfStreamDictionary<Rest>>>,
>(&stream.dictionary)
{
dictionary.clone()
} else {
let PdfStreamDictionary {
len,
filters,
decode_parms,
file,
file_filters,
file_decode_parms,
decoded_len,
rest,
} = stream.dictionary();
Arc::new(OnceLock::from(PdfStreamDictionary {
len: *len,
filters: filters.clone(),
decode_parms: decode_parms.clone(),
file: file.clone(),
file_filters: file_filters.clone(),
file_decode_parms: file_decode_parms.clone(),
decoded_len: *decoded_len,
rest: Rest::parse(rest.clone().into())?,
}))
},
data: stream.data,
}),
object => Err(PdfParseError::InvalidType {
pos: object.get_pdf_input_position(),
ty: object.type_name(),
expected_ty: "stream",
}),
}
}
}