From 5247d69ebd94e5540e54e7e833a9bce40e511555 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Tue, 23 Dec 2025 04:41:09 -0800 Subject: [PATCH] WIP rust implementation --- .gitignore | 3 +- Cargo.lock | 75 +++++ Cargo.toml | 9 + src/lib.rs | 2 + src/pdf.rs | 956 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/util.rs | 102 ++++++ 6 files changed, 1146 insertions(+), 1 deletion(-) create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/lib.rs create mode 100644 src/pdf.rs create mode 100644 src/util.rs diff --git a/.gitignore b/.gitignore index 50e4eb1..e11e257 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ *.egg-info __pycache__ *.log -/powerisa-instructions.xml \ No newline at end of file +/powerisa-instructions.xml +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..2d752ff --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,75 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "parse_powerisa_pdf" +version = "0.1.0" +dependencies = [ + "serde", +] + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d2f159a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "parse_powerisa_pdf" +version = "0.1.0" +edition = "2024" +license = "LGPL-3.0-or-later" + +[dependencies] +serde = { version = "1.0.228", features = ["derive"] } + diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..938fe11 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,2 @@ +mod pdf; +mod util; diff --git a/src/pdf.rs b/src/pdf.rs new file mode 100644 index 0000000..0af9d6b --- /dev/null +++ b/src/pdf.rs @@ -0,0 +1,956 @@ +use crate::util::ArcOrRef; +use serde::{de, forward_to_deserialize_any}; +use std::{ + cell::RefCell, + collections::BTreeMap, + convert::Infallible, + fmt::{self, Write}, + iter::FusedIterator, + marker::PhantomData, + num::NonZero, + sync::{Arc, Weak}, +}; + +#[derive(Debug)] +pub(crate) enum PdfParseError { + InvalidFieldKind { + containing_ty: &'static str, + field: &'static str, + expected_kind: &'static str, + kind: &'static str, + }, + Custom(String), +} + +impl From for PdfParseError { + fn from(value: Infallible) -> Self { + match value {} + } +} + +impl fmt::Display for PdfParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + &PdfParseError::InvalidFieldKind { + containing_ty, + field, + expected_kind, + kind, + } => write!( + f, + "invalid field kind: {containing_ty}.{field}: expected {expected_kind}, got {kind}" + ), + PdfParseError::Custom(msg) => f.write_str(msg), + } + } +} + +impl std::error::Error for PdfParseError {} + +impl de::Error for PdfParseError { + fn custom(msg: T) -> Self + where + T: fmt::Display, + { + PdfParseError::Custom(msg.to_string()) + } +} + +impl<'de> de::IntoDeserializer<'de, PdfParseError> for PdfName { + type Deserializer = PdfObject; + + fn into_deserializer(self) -> Self::Deserializer { + self.into() + } +} + +impl<'de> de::IntoDeserializer<'de, PdfParseError> for PdfObject { + type Deserializer = Self; + + fn into_deserializer(self) -> Self::Deserializer { + self + } +} + +impl PdfObject { + const SERDE_FIELD_NAME: &str = "__PdfObject__look_in_thread_local"; + const SERDE_NAME_AND_FIELDS: (&str, &[&str]) = ("PdfObject", &[Self::SERDE_FIELD_NAME]); + fn with_thread_local(f: impl FnOnce(&RefCell>) -> R) -> R { + thread_local! { + static CURRENT_OBJECT: RefCell> = const { RefCell::new(None) }; + } + CURRENT_OBJECT.with(f) + } + fn set_thread_local_scoped(self, f: impl FnOnce() -> R) -> R { + Self::with_thread_local(|current_object| { + struct PutBackOnDrop<'a> { + current_object: &'a RefCell>, + old_object: Option, + } + impl Drop for PutBackOnDrop<'_> { + fn drop(&mut self) { + self.current_object.replace(self.old_object.take()); + } + } + let put_back_on_drop = PutBackOnDrop { + current_object, + old_object: current_object.replace(Some(self)), + }; + let retval = f(); + drop(put_back_on_drop); + retval + }) + } + fn take_thread_local() -> Option { + Self::with_thread_local(RefCell::take) + } +} + +trait PdfObjectDeserializeHelperTrait: Sized { + fn expecting(f: &mut fmt::Formatter<'_>) -> fmt::Result; + fn from_pdf_object( + value: PdfObject, + expected: &dyn de::Expected, + ) -> Result; +} + +struct PdfObjectDeserializeHelper(T); + +impl<'de, T: PdfObjectDeserializeHelperTrait> de::Deserialize<'de> + for PdfObjectDeserializeHelper +{ + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + struct PdfObjectVisitor(PhantomData); + fn expected_pdf_object() -> E { + de::Error::invalid_type(de::Unexpected::Map, &PdfObjectVisitor::(PhantomData)) + } + impl<'de, T: PdfObjectDeserializeHelperTrait> de::Visitor<'de> for PdfObjectVisitor { + type Value = PdfObject; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + T::expecting(formatter) + } + + fn visit_map(self, mut map: A) -> Result + where + A: de::MapAccess<'de>, + { + struct Field(PhantomData); + impl<'de, T: PdfObjectDeserializeHelperTrait> de::Deserialize<'de> for Field { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + deserializer.deserialize_identifier(Field(PhantomData)) + } + } + impl<'de, T: PdfObjectDeserializeHelperTrait> de::Visitor<'de> for Field { + type Value = Self; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + T::expecting(formatter) + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + if v == PdfObject::SERDE_FIELD_NAME { + Ok(self) + } else { + Err(expected_pdf_object::()) + } + } + } + let (Field::(PhantomData), ()) = map + .next_entry()? + .ok_or_else(expected_pdf_object::)?; + let None = map.next_entry::, ()>()? else { + return Err(expected_pdf_object::<_, T>()); + }; + PdfObject::take_thread_local().ok_or_else(expected_pdf_object::<_, T>) + } + } + let (name, fields) = PdfObject::SERDE_NAME_AND_FIELDS; + let pdf_object = + deserializer.deserialize_struct(name, fields, PdfObjectVisitor::(PhantomData))?; + T::from_pdf_object::(pdf_object, &PdfObjectVisitor::(PhantomData)).map(Self) + } +} + +macro_rules! forward_deserialize_to_pdf_object_helper { + ($ty:ty) => { + impl<'de> de::Deserialize<'de> for $ty { + fn deserialize(deserializer: D) -> Result + where + D: de::Deserializer<'de>, + { + let PdfObjectDeserializeHelper(v) = de::Deserialize::deserialize(deserializer)?; + Ok(v) + } + } + }; +} + +forward_deserialize_to_pdf_object_helper!(PdfObject); + +impl PdfObjectDeserializeHelperTrait for PdfObject { + fn expecting(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("PdfObject") + } + + fn from_pdf_object( + value: PdfObject, + _expected: &dyn de::Expected, + ) -> Result { + Ok(value) + } +} + +forward_deserialize_to_pdf_object_helper!(PdfObjectIndirect); + +impl PdfObjectDeserializeHelperTrait for PdfObjectIndirect { + fn expecting(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("PdfObjectIndirect") + } + + fn from_pdf_object( + value: PdfObject, + expected: &dyn de::Expected, + ) -> Result { + match value { + PdfObject::Indirect(v) => Ok(v), + _ => Err(E::invalid_type(value.as_unexpected(), expected)), + } + } +} + +forward_deserialize_to_pdf_object_helper!(PdfString); + +impl PdfObjectDeserializeHelperTrait for PdfString { + fn expecting(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("PdfString") + } + + fn from_pdf_object( + value: PdfObject, + expected: &dyn de::Expected, + ) -> Result { + match value { + PdfObject::String(v) => Ok(v), + _ => Err(E::invalid_type(value.as_unexpected(), expected)), + } + } +} + +forward_deserialize_to_pdf_object_helper!(PdfName); + +impl PdfObjectDeserializeHelperTrait for PdfName { + fn expecting(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("PdfName") + } + + fn from_pdf_object( + value: PdfObject, + expected: &dyn de::Expected, + ) -> Result { + match value { + PdfObject::Name(v) => Ok(v), + _ => Err(E::invalid_type(value.as_unexpected(), expected)), + } + } +} + +forward_deserialize_to_pdf_object_helper!(PdfArray); + +impl PdfObjectDeserializeHelperTrait for PdfArray { + fn expecting(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("PdfArray") + } + + fn from_pdf_object( + value: PdfObject, + expected: &dyn de::Expected, + ) -> Result { + match value { + PdfObject::Array(v) => Ok(v), + _ => Err(E::invalid_type(value.as_unexpected(), expected)), + } + } +} + +forward_deserialize_to_pdf_object_helper!(PdfDictionary); + +impl PdfObjectDeserializeHelperTrait for PdfDictionary { + fn expecting(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("PdfDictionary") + } + + fn from_pdf_object( + value: PdfObject, + expected: &dyn de::Expected, + ) -> Result { + match value { + PdfObject::Dictionary(v) => Ok(v), + _ => Err(E::invalid_type(value.as_unexpected(), expected)), + } + } +} + +forward_deserialize_to_pdf_object_helper!(PdfStream); + +impl PdfObjectDeserializeHelperTrait for PdfStream { + fn expecting(f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("PdfStream") + } + + fn from_pdf_object( + value: PdfObject, + expected: &dyn de::Expected, + ) -> Result { + match value { + PdfObject::Stream(v) => Ok(v), + _ => Err(E::invalid_type(value.as_unexpected(), expected)), + } + } +} + +impl<'de> de::Deserializer<'de> for PdfObject { + type Error = PdfParseError; + + fn deserialize_any(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + match PdfObjectDirect::from(self) { + PdfObjectDirect::Boolean(v) => visitor.visit_bool(v), + PdfObjectDirect::Integer(v) => visitor.visit_i32(v), + PdfObjectDirect::Real(v) => visitor.visit_f32(v), + v @ (PdfObjectDirect::String(_) | PdfObjectDirect::Stream(_)) => { + Err(de::Error::invalid_type(v.as_unexpected(), &visitor)) + } + PdfObjectDirect::Name(v) => { + if let Ok(v) = str::from_utf8(v.as_bytes()) { + visitor.visit_str(v) + } else { + Err(de::Error::invalid_type( + PdfObject::from(v).as_unexpected(), + &visitor, + )) + } + } + PdfObjectDirect::Array(v) => { + visitor.visit_seq(de::value::SeqDeserializer::new(v.iter().cloned())) + } + PdfObjectDirect::Dictionary(v) => { + visitor.visit_map(de::value::MapDeserializer::new(v.into_iter())) + } + PdfObjectDirect::Null(PdfNull {}) => visitor.visit_unit(), + } + } + + fn deserialize_struct( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: de::Visitor<'de>, + { + match (name, fields) { + PdfObject::SERDE_NAME_AND_FIELDS => self.set_thread_local_scoped(|| { + visitor.visit_map(de::value::MapDeserializer::new(std::iter::once(( + PdfObject::SERDE_FIELD_NAME, + (), + )))) + }), + _ => self.deserialize_any(visitor), + } + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: de::Visitor<'de>, + { + let is_null = match self { + Self::Indirect(ref v) => !v.exists(), + Self::Null(_) => true, + _ => false, + }; + if is_null { + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + + fn deserialize_newtype_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: de::Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf unit unit_struct seq tuple + tuple_struct map enum identifier ignored_any + } +} + +#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) struct PdfString { + bytes: ArcOrRef<'static, [u8]>, +} + +impl std::fmt::Debug for PdfString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PdfString") + .field("bytes", &&*self.bytes) + .finish() + } +} + +impl PdfString { + pub(crate) fn new(bytes: ArcOrRef<'static, [u8]>) -> Self { + Self { bytes } + } + pub(crate) fn bytes(&self) -> &ArcOrRef<'static, [u8]> { + &self.bytes + } +} + +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct PdfName { + bytes: ArcOrRef<'static, [u8]>, +} + +impl PdfName { + pub(crate) fn try_new(bytes: impl Into>) -> Option { + let bytes = bytes.into(); + if bytes.contains(&0) { + None + } else { + Some(Self { bytes }) + } + } + #[track_caller] + pub(crate) const fn new_static(bytes: &'static [u8]) -> Self { + let mut i = 0; + while i < bytes.len() { + if bytes[i] == 0 { + panic!("shouldn't contain any nul bytes"); + } + i += 1; + } + Self { + bytes: ArcOrRef::Ref(bytes), + } + } + #[track_caller] + pub(crate) fn new(bytes: ArcOrRef<'static, [u8]>) -> Self { + Self::try_new(bytes).expect("shouldn't contain any nul bytes") + } + pub(crate) fn as_bytes(&self) -> &ArcOrRef<'static, [u8]> { + &self.bytes + } +} + +macro_rules! make_pdf_names { + ( + $vis:vis mod $pdf_names:ident { + $($ident:ident;)* + } + ) => { + $vis mod $pdf_names { + $(#[allow(non_upper_case_globals)] + $vis const $ident: $crate::pdf::PdfName = $crate::pdf::PdfName::new_static(stringify!($ident).as_bytes());)* + } + }; +} + +make_pdf_names! { + pub(crate) mod pdf_names { + DecodeParms; + DL; + F; + FDecodeParms; + FFilter; + Filter; + Length; + } +} + +impl fmt::Debug for PdfName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "PdfName({self})") + } +} + +impl fmt::Display for PdfName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("/")?; + for &b in self.bytes.iter() { + match b { + 0x21..=0x7E if b != b'#' => f.write_char(b.into())?, + _ => write!(f, "#{b:02X}")?, + } + } + Ok(()) + } +} + +macro_rules! make_pdf_object { + ( + $( + #[from = $($from:ident)?, as_unexpected = |$as_unexpected_arg:pat_param| $as_unexpected_expr:expr] + $Variant:ident($ty:ty), + )+ + ) => { + #[derive(Clone, Debug, PartialEq)] + pub(crate) enum PdfObjectNonNull { + $($Variant($ty),)* + } + + #[derive(Clone, Debug, PartialEq)] + pub(crate) enum PdfObjectDirect { + $($Variant($ty),)* + Null(PdfNull), + } + + #[derive(Clone, Debug, PartialEq)] + pub(crate) enum PdfObject { + $($Variant($ty),)* + Null(PdfNull), + Indirect(PdfObjectIndirect), + } + + $($( + impl From<$ty> for PdfObjectNonNull { + fn $from(value: $ty) -> Self { + Self::$Variant(value) + } + } + + impl From<$ty> for PdfObjectDirect { + fn $from(value: $ty) -> Self { + Self::$Variant(value) + } + } + + impl From<$ty> for PdfObject { + fn $from(value: $ty) -> Self { + Self::$Variant(value) + } + } + + impl From> for PdfObjectDirect { + fn $from(value: Option<$ty>) -> Self { + match value { + Some(value) => Self::$Variant(value), + None => Self::Null(PdfNull), + } + } + } + + impl From> for PdfObject { + fn $from(value: Option<$ty>) -> Self { + match value { + Some(value) => Self::$Variant(value), + None => Self::Null(PdfNull), + } + } + } + )?)* + + impl From for PdfObjectDirect { + fn from(value: PdfObjectNonNull) -> Self { + match value { + $(PdfObjectNonNull::$Variant(v) => Self::$Variant(v),)* + } + } + } + + impl From for PdfObject { + fn from(value: PdfObjectNonNull) -> Self { + match value { + $(PdfObjectNonNull::$Variant(v) => Self::$Variant(v),)* + } + } + } + + impl From for PdfObject { + fn from(value: PdfObjectDirect) -> Self { + match value { + $(PdfObjectDirect::$Variant(v) => Self::$Variant(v),)* + PdfObjectDirect::Null(v) => Self::Null(v), + } + } + } + + impl From for PdfObjectDirect { + fn from(value: PdfObject) -> Self { + match value { + $(PdfObject::$Variant(v) => Self::$Variant(v),)* + PdfObject::Null(v) => Self::Null(v), + PdfObject::Indirect(v) => v.into(), + } + } + } + + impl PdfObjectNonNull { + fn as_unexpected(&self) -> de::Unexpected<'static> { + match *self { + $(PdfObjectNonNull::$Variant($as_unexpected_arg) => $as_unexpected_expr,)* + } + } + } + + impl PdfObjectDirect { + fn as_unexpected(&self) -> de::Unexpected<'static> { + match *self { + $(PdfObjectDirect::$Variant($as_unexpected_arg) => $as_unexpected_expr,)* + PdfObjectDirect::Null(_) => de::Unexpected::Option, + } + } + } + + impl PdfObject { + fn as_unexpected(&self) -> de::Unexpected<'static> { + match *self { + $(PdfObject::$Variant($as_unexpected_arg) => $as_unexpected_expr,)* + PdfObject::Null(_) => de::Unexpected::Option, + PdfObject::Indirect(ref v) => v.get().as_unexpected(), + } + } + } + + const _: () = { + fn _assert_impls_deserialize() {} + + $(let _ = _assert_impls_deserialize::<$ty>;)* + }; + }; +} + +make_pdf_object! { + #[from = from, as_unexpected = |v| de::Unexpected::Bool(v)] + Boolean(bool), + #[from = from, as_unexpected = |v| de::Unexpected::Signed(v.into())] + Integer(i32), + #[from = from, as_unexpected = |v| de::Unexpected::Float(v.into())] + Real(f32), + #[from = from, as_unexpected = |_| de::Unexpected::Other("PdfString")] + String(PdfString), + #[from = from, as_unexpected = |_| de::Unexpected::Other("PdfName")] + Name(PdfName), + #[from = from, as_unexpected = |_| de::Unexpected::Seq] + Array(PdfArray), + #[from = from, as_unexpected = |_| de::Unexpected::Map] + Dictionary(PdfDictionary), + #[from = from, as_unexpected = |_| de::Unexpected::Other("PdfStream")] + Stream(PdfStream), +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct PdfNull; + +impl From for PdfObjectDirect { + fn from(v: PdfNull) -> Self { + Self::Null(v) + } +} + +impl From for PdfObject { + fn from(v: PdfNull) -> Self { + Self::Null(v) + } +} + +impl From for PdfObject { + fn from(v: PdfObjectIndirect) -> Self { + Self::Indirect(v) + } +} + +#[derive(Clone, Debug)] +pub(crate) struct PdfObjectIndirect { + xref_table: Weak, + object_number: NonZero, + generation_number: u16, +} + +impl PartialEq for PdfObjectIndirect { + fn eq(&self, other: &Self) -> bool { + let Self { + xref_table, + object_number, + generation_number, + } = self; + xref_table.ptr_eq(&other.xref_table) + && *object_number == other.object_number + && *generation_number == other.generation_number + } +} + +impl PdfObjectIndirect { + pub fn exists(&self) -> bool { + todo!() + } + pub fn get(&self) -> PdfObjectDirect { + todo!() + } +} + +impl From for PdfObjectDirect { + fn from(value: PdfObjectIndirect) -> Self { + value.get() + } +} + +#[derive(Clone, PartialEq)] +pub(crate) struct PdfDictionary { + fields: Arc>, +} + +impl PdfDictionary { + pub(crate) fn fields(&self) -> &Arc> { + &self.fields + } + pub(crate) fn into_fields(self) -> Arc> { + self.fields + } + pub(crate) fn iter(&self) -> std::collections::btree_map::Iter<'_, PdfName, PdfObject> { + self.fields.iter() + } + pub(crate) fn contains_key(&self, key: &Q) -> bool + where + PdfName: std::borrow::Borrow + Ord, + Q: Ord, + { + self.fields.contains_key(key) + } + pub(crate) fn get(&self, key: &Q) -> Option<&PdfObject> + where + PdfName: std::borrow::Borrow + Ord, + Q: Ord, + { + self.fields.get(key) + } +} + +impl FromIterator<(PdfName, PdfObject)> for PdfDictionary { + fn from_iter>(iter: T) -> Self { + Self { + fields: Arc::new(BTreeMap::from_iter( + iter.into_iter() + .filter(|(_name, value)| !matches!(value, PdfObject::Null(_))), + )), + } + } +} + +impl IntoIterator for PdfDictionary { + type Item = (PdfName, PdfObject); + type IntoIter = std::collections::btree_map::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + Arc::unwrap_or_clone(self.fields).into_iter() + } +} + +impl<'a> IntoIterator for &'a PdfDictionary { + type Item = (&'a PdfName, &'a PdfObject); + type IntoIter = std::collections::btree_map::Iter<'a, PdfName, PdfObject>; + + fn into_iter(self) -> Self::IntoIter { + self.fields.iter() + } +} + +impl fmt::Debug for PdfDictionary { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_map().entries(self).finish() + } +} + +#[derive(Clone, Default, PartialEq)] +pub(crate) struct PdfArray { + elements: Arc<[PdfObject]>, +} + +impl PdfArray { + pub(crate) fn new() -> Self { + Self::default() + } + pub(crate) fn elements(&self) -> &Arc<[PdfObject]> { + &self.elements + } + pub(crate) fn into_elements(self) -> Arc<[PdfObject]> { + self.elements + } + pub(crate) fn iter(&self) -> std::slice::Iter<'_, PdfObject> { + self.elements.iter() + } +} + +impl FromIterator for PdfArray { + fn from_iter>(iter: T) -> Self { + Self { + elements: Arc::from_iter(iter), + } + } +} + +#[derive(Clone)] +pub(crate) struct PdfArrayIntoIter { + indexes: std::ops::Range, + elements: Arc<[PdfObject]>, +} + +impl Iterator for PdfArrayIntoIter { + type Item = PdfObject; + + fn next(&mut self) -> Option { + self.indexes.next().map(|i| self.elements[i].clone()) + } + + fn size_hint(&self) -> (usize, Option) { + self.indexes.size_hint() + } + + fn nth(&mut self, n: usize) -> Option { + self.indexes.nth(n).map(|i| self.elements[i].clone()) + } + + fn last(self) -> Option { + self.indexes.last().map(|i| self.elements[i].clone()) + } + + fn fold(self, init: B, mut f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + self.indexes + .fold(init, |init, i| f(init, self.elements[i].clone())) + } +} + +impl FusedIterator for PdfArrayIntoIter {} + +impl DoubleEndedIterator for PdfArrayIntoIter { + fn next_back(&mut self) -> Option { + self.indexes.next_back().map(|i| self.elements[i].clone()) + } + fn nth_back(&mut self, n: usize) -> Option { + self.indexes.nth_back(n).map(|i| self.elements[i].clone()) + } + fn rfold(self, init: B, mut f: F) -> B + where + F: FnMut(B, Self::Item) -> B, + { + self.indexes + .rfold(init, |init, i| f(init, self.elements[i].clone())) + } +} + +impl ExactSizeIterator for PdfArrayIntoIter {} + +impl IntoIterator for PdfArray { + type Item = PdfObject; + type IntoIter = PdfArrayIntoIter; + + fn into_iter(self) -> Self::IntoIter { + PdfArrayIntoIter { + indexes: 0..self.elements.len(), + elements: self.elements, + } + } +} + +impl<'a> IntoIterator for &'a PdfArray { + type Item = &'a PdfObject; + type IntoIter = std::slice::Iter<'a, PdfObject>; + + fn into_iter(self) -> Self::IntoIter { + self.elements.iter() + } +} + +impl fmt::Debug for PdfArray { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.elements.fmt(f) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct PdfStream { + dictionary: PdfDictionary, + data: Arc, +} + +pub(crate) enum PdfBody {} + +pub(crate) struct PdfObjects {} + +pub(crate) struct PdfXRefTable {} + +pub(crate) struct Pdf { + pub(crate) header: PdfHeader, + pub(crate) body: PdfBody, +} + +pub(crate) struct PdfHeader {} + +#[cfg(test)] + +mod tests { + use super::*; + + #[test] + fn test_deserialize_dict() -> Result<(), PdfParseError> { + #[derive(serde::Deserialize, Debug, PartialEq)] + struct TestStruct { + a: i32, + c: i32, + b: i32, + #[serde(flatten)] + others: PdfDictionary, + } + + let v: TestStruct = + de::Deserialize::deserialize(PdfObject::from(PdfDictionary::from_iter([ + (PdfName::new_static(b"a"), 1.into()), + (PdfName::new_static(b"c"), 7.into()), + (PdfName::new_static(b"b"), 5.into()), + (PdfName::new_static(b"d"), false.into()), + (PdfName::new_static(b"e"), PdfNull.into()), + ( + PdfName::new_static(b"f"), + PdfString::new(ArcOrRef::Ref(b"test")).into(), + ), + ])))?; + let expected = TestStruct { + a: 1, + c: 7, + b: 5, + others: PdfDictionary::from_iter([ + (PdfName::new_static(b"d"), false.into()), + ( + PdfName::new_static(b"f"), + PdfString::new(ArcOrRef::Ref(b"test")).into(), + ), + ]), + }; + assert_eq!(v, expected); + Ok(()) + } +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..e234a7d --- /dev/null +++ b/src/util.rs @@ -0,0 +1,102 @@ +use std::{ + borrow::Borrow, + fmt, + hash::{Hash, Hasher}, + sync::Arc, +}; + +pub(crate) enum ArcOrRef<'a, T: ?Sized> { + Arc(Arc), + Ref(&'a T), +} + +impl<'a, T: ?Sized> AsRef for ArcOrRef<'a, T> { + fn as_ref(&self) -> &T { + self + } +} + +impl<'a, T: ?Sized> Borrow for ArcOrRef<'a, T> { + fn borrow(&self) -> &T { + self + } +} + +impl<'a, T: ?Sized> From> for ArcOrRef<'a, T> { + fn from(value: Arc) -> Self { + Self::Arc(value) + } +} + +impl<'a, T: ?Sized> From<&'a T> for ArcOrRef<'a, T> { + fn from(value: &'a T) -> Self { + Self::Ref(value) + } +} + +impl<'a, T: ?Sized> Default for ArcOrRef<'a, T> +where + &'a T: Default, +{ + fn default() -> Self { + Self::Ref(Default::default()) + } +} + +impl Clone for ArcOrRef<'_, T> { + fn clone(&self) -> Self { + match self { + Self::Arc(v) => Self::Arc(v.clone()), + Self::Ref(v) => Self::Ref(v), + } + } +} + +impl Hash for ArcOrRef<'_, T> { + fn hash(&self, state: &mut H) { + T::hash(self, state) + } +} + +impl<'a, 'b, T: ?Sized + PartialEq, U: ?Sized> PartialEq> for ArcOrRef<'a, T> { + fn eq(&self, other: &ArcOrRef<'b, U>) -> bool { + T::eq(self, other) + } +} + +impl Eq for ArcOrRef<'_, T> {} + +impl<'a, 'b, T: ?Sized + PartialOrd, U: ?Sized> PartialOrd> for ArcOrRef<'a, T> { + fn partial_cmp(&self, other: &ArcOrRef<'b, U>) -> Option { + T::partial_cmp(self, other) + } +} + +impl Ord for ArcOrRef<'_, T> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + T::cmp(self, other) + } +} + +impl std::ops::Deref for ArcOrRef<'_, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + match self { + ArcOrRef::Arc(v) => v, + ArcOrRef::Ref(v) => v, + } + } +} + +impl fmt::Debug for ArcOrRef<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + T::fmt(self, f) + } +} + +impl fmt::Display for ArcOrRef<'_, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + T::fmt(self, f) + } +}