1111 lines
31 KiB
Rust
1111 lines
31 KiB
Rust
use crate::{
|
|
pdf::{
|
|
PdfObjects,
|
|
parse::{
|
|
GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse,
|
|
PdfParseError,
|
|
},
|
|
},
|
|
pdf_parse,
|
|
util::ArcOrRef,
|
|
};
|
|
use std::{
|
|
any::TypeId,
|
|
borrow::Cow,
|
|
collections::BTreeMap,
|
|
fmt::{self, Write},
|
|
num::NonZero,
|
|
sync::{Arc, OnceLock},
|
|
};
|
|
|
|
#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
|
|
pub struct PdfString {
|
|
pos: PdfInputPositionNoCompare,
|
|
bytes: ArcOrRef<'static, [u8]>,
|
|
}
|
|
|
|
impl std::fmt::Debug for PdfString {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
let Self { pos, bytes } = self;
|
|
f.debug_struct("PdfString")
|
|
.field("pos", pos)
|
|
.field("bytes", &format_args!("b\"{}\"", bytes.escape_ascii()))
|
|
.finish()
|
|
}
|
|
}
|
|
|
|
impl PdfString {
|
|
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, bytes: ArcOrRef<'static, [u8]>) -> Self {
|
|
Self {
|
|
pos: pos.into(),
|
|
bytes,
|
|
}
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> {
|
|
&self.bytes
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfString {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub struct PdfName {
|
|
pos: PdfInputPositionNoCompare,
|
|
bytes: ArcOrRef<'static, [u8]>,
|
|
}
|
|
|
|
impl PdfName {
|
|
pub fn try_new(
|
|
pos: impl Into<PdfInputPositionNoCompare>,
|
|
bytes: impl Into<ArcOrRef<'static, [u8]>>,
|
|
) -> Option<Self> {
|
|
let bytes = bytes.into();
|
|
if bytes.contains(&0) {
|
|
None
|
|
} else {
|
|
Some(Self {
|
|
pos: pos.into(),
|
|
bytes,
|
|
})
|
|
}
|
|
}
|
|
#[track_caller]
|
|
pub const fn new_static(bytes: &'static [u8]) -> Self {
|
|
let mut i = 0;
|
|
while i < bytes.len() {
|
|
if bytes[i] == 0 {
|
|
panic!("shouldn't contain any nul bytes");
|
|
}
|
|
i += 1;
|
|
}
|
|
Self {
|
|
pos: PdfInputPositionNoCompare::empty(),
|
|
bytes: ArcOrRef::Ref(bytes),
|
|
}
|
|
}
|
|
#[track_caller]
|
|
pub fn new(
|
|
pos: impl Into<PdfInputPositionNoCompare>,
|
|
bytes: impl Into<ArcOrRef<'static, [u8]>>,
|
|
) -> Self {
|
|
Self::try_new(pos, bytes).expect("shouldn't contain any nul bytes")
|
|
}
|
|
pub fn as_bytes(&self) -> &ArcOrRef<'static, [u8]> {
|
|
&self.bytes
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfName {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for PdfName {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "PdfName(at {}: {self})", self.pos)
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for PdfName {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
f.write_str("/")?;
|
|
for &b in self.bytes.iter() {
|
|
match b {
|
|
0x21..=0x7E if b != b'#' => f.write_char(b.into())?,
|
|
_ => write!(f, "#{b:02X}")?,
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
|
|
pub struct PdfBoolean {
|
|
pos: PdfInputPositionNoCompare,
|
|
value: bool,
|
|
}
|
|
|
|
impl PdfBoolean {
|
|
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, value: bool) -> Self {
|
|
Self {
|
|
pos: pos.into(),
|
|
value,
|
|
}
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
pub fn value(&self) -> bool {
|
|
self.value
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfBoolean {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
|
|
pub struct PdfInteger {
|
|
pos: PdfInputPositionNoCompare,
|
|
value: i128,
|
|
}
|
|
|
|
impl PdfInteger {
|
|
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, value: i128) -> Self {
|
|
Self {
|
|
pos: pos.into(),
|
|
value,
|
|
}
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
pub fn value(&self) -> i128 {
|
|
self.value
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfInteger {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Default)]
|
|
pub struct PdfReal {
|
|
pos: PdfInputPositionNoCompare,
|
|
value: f64,
|
|
}
|
|
|
|
impl PdfReal {
|
|
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, value: f64) -> Self {
|
|
Self {
|
|
pos: pos.into(),
|
|
value,
|
|
}
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
pub fn value(&self) -> f64 {
|
|
self.value
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfReal {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
macro_rules! make_pdf_object {
|
|
(
|
|
$(
|
|
#[parse = $($parse:ident)?, type_name = $type_name:literal]
|
|
$Variant:ident($ty:ty),
|
|
)+
|
|
) => {
|
|
#[derive(Clone, Debug)]
|
|
pub enum PdfObjectNonNull {
|
|
$($Variant($ty),)*
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub enum PdfObjectDirect {
|
|
$($Variant($ty),)*
|
|
Null(PdfNull),
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub enum PdfObject {
|
|
$($Variant($ty),)*
|
|
Null(PdfNull),
|
|
Indirect(PdfObjectIndirect),
|
|
}
|
|
|
|
$(
|
|
impl From<$ty> for PdfObjectNonNull {
|
|
fn from(value: $ty) -> Self {
|
|
Self::$Variant(value)
|
|
}
|
|
}
|
|
|
|
impl From<$ty> for PdfObjectDirect {
|
|
fn from(value: $ty) -> Self {
|
|
Self::$Variant(value)
|
|
}
|
|
}
|
|
|
|
impl From<$ty> for PdfObject {
|
|
fn from(value: $ty) -> Self {
|
|
Self::$Variant(value)
|
|
}
|
|
}
|
|
|
|
impl From<Option<$ty>> for PdfObjectDirect {
|
|
fn from(value: Option<$ty>) -> Self {
|
|
match value {
|
|
Some(value) => Self::$Variant(value),
|
|
None => Self::Null(Default::default()),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<Option<$ty>> for PdfObject {
|
|
fn from(value: Option<$ty>) -> Self {
|
|
match value {
|
|
Some(value) => Self::$Variant(value),
|
|
None => Self::Null(Default::default()),
|
|
}
|
|
}
|
|
}
|
|
|
|
$(impl crate::pdf::parse::PdfParse for $ty {
|
|
fn type_name() -> Cow<'static, str> {
|
|
Cow::Borrowed($type_name)
|
|
}
|
|
fn $parse(object: PdfObject) -> Result<Self, crate::pdf::parse::PdfParseError> {
|
|
match PdfObjectDirect::from(object) {
|
|
PdfObjectDirect::$Variant(v) => Ok(v),
|
|
object => Err(crate::pdf::parse::PdfParseError::InvalidType {
|
|
pos: object.get_pdf_input_position(),
|
|
ty: object.type_name(),
|
|
expected_ty: $type_name,
|
|
}),
|
|
}
|
|
}
|
|
})?
|
|
)*
|
|
|
|
impl From<PdfObjectNonNull> for PdfObjectDirect {
|
|
fn from(value: PdfObjectNonNull) -> Self {
|
|
match value {
|
|
$(PdfObjectNonNull::$Variant(v) => Self::$Variant(v),)*
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<PdfObjectNonNull> for PdfObject {
|
|
fn from(value: PdfObjectNonNull) -> Self {
|
|
match value {
|
|
$(PdfObjectNonNull::$Variant(v) => Self::$Variant(v),)*
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<PdfObjectDirect> for PdfObject {
|
|
fn from(value: PdfObjectDirect) -> Self {
|
|
match value {
|
|
$(PdfObjectDirect::$Variant(v) => Self::$Variant(v),)*
|
|
PdfObjectDirect::Null(v) => Self::Null(v),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<PdfObject> for PdfObjectDirect {
|
|
fn from(value: PdfObject) -> Self {
|
|
match value {
|
|
$(PdfObject::$Variant(v) => Self::$Variant(v),)*
|
|
PdfObject::Null(v) => Self::Null(v),
|
|
PdfObject::Indirect(v) => v.into(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PdfObjectNonNull {
|
|
pub fn type_name(&self) -> &'static str {
|
|
match self {
|
|
$(PdfObjectNonNull::$Variant(_) => $type_name,)*
|
|
}
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.get_pdf_input_position()
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfObjectNonNull {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
match self {
|
|
$(PdfObjectNonNull::$Variant(v) => <$ty as GetPdfInputPosition>::get_pdf_input_position(v),)*
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<PdfObjectDirect> for Option<PdfObjectNonNull> {
|
|
fn from(value: PdfObjectDirect) -> Self {
|
|
match value {
|
|
$(PdfObjectDirect::$Variant(v) => Some(PdfObjectNonNull::$Variant(v)),)*
|
|
PdfObjectDirect::Null(_) => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl From<PdfObject> for Option<PdfObjectNonNull> {
|
|
fn from(value: PdfObject) -> Self {
|
|
PdfObjectDirect::from(value).into()
|
|
}
|
|
}
|
|
|
|
impl PdfObjectDirect {
|
|
pub fn is_null(&self) -> bool {
|
|
matches!(self, PdfObjectDirect::Null(_))
|
|
}
|
|
pub fn type_name(&self) -> &'static str {
|
|
match self {
|
|
$(PdfObjectDirect::$Variant(_) => $type_name,)*
|
|
PdfObjectDirect::Null(_) => "null",
|
|
}
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.get_pdf_input_position()
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfObjectDirect {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
match self {
|
|
$(PdfObjectDirect::$Variant(v) => <$ty as GetPdfInputPosition>::get_pdf_input_position(v),)*
|
|
PdfObjectDirect::Null(v) => <PdfNull as GetPdfInputPosition>::get_pdf_input_position(v),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl PdfObject {
|
|
pub fn is_null(&self) -> bool {
|
|
matches!(self, PdfObject::Null(_))
|
|
}
|
|
pub fn type_name(&self) -> &'static str {
|
|
match self {
|
|
$(PdfObject::$Variant(_) => $type_name,)*
|
|
PdfObject::Null(_) => "null",
|
|
PdfObject::Indirect(_) => "indirect object",
|
|
}
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.get_pdf_input_position()
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfObject {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
match self {
|
|
$(PdfObject::$Variant(v) => <$ty as GetPdfInputPosition>::get_pdf_input_position(v),)*
|
|
PdfObject::Null(v) => <PdfNull as GetPdfInputPosition>::get_pdf_input_position(v),
|
|
PdfObject::Indirect(v) => <PdfObjectIndirect as GetPdfInputPosition>::get_pdf_input_position(v),
|
|
}
|
|
}
|
|
}
|
|
|
|
const _: () = {
|
|
fn _assert_parsable<T: crate::pdf::parse::PdfParse>() {}
|
|
|
|
$(let _ = _assert_parsable::<$ty>;)*
|
|
let _ = _assert_parsable::<PdfNull>;
|
|
let _ = _assert_parsable::<PdfObjectIndirect>;
|
|
let _ = _assert_parsable::<PdfObjectNonNull>;
|
|
let _ = _assert_parsable::<PdfObjectDirect>;
|
|
let _ = _assert_parsable::<PdfObject>;
|
|
};
|
|
};
|
|
}
|
|
|
|
make_pdf_object! {
|
|
#[parse = parse, type_name = "boolean"]
|
|
Boolean(PdfBoolean),
|
|
#[parse = parse, type_name = "integer"]
|
|
Integer(PdfInteger),
|
|
#[parse = parse, type_name = "real"]
|
|
Real(PdfReal),
|
|
#[parse = parse, type_name = "string"]
|
|
String(PdfString),
|
|
#[parse = parse, type_name = "name"]
|
|
Name(PdfName),
|
|
#[parse = parse, type_name = "array"]
|
|
Array(PdfArray),
|
|
#[parse = parse, type_name = "dictionary"]
|
|
Dictionary(PdfDictionary),
|
|
#[parse =, type_name = "stream"]
|
|
Stream(PdfStream),
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub struct PdfNull(PdfInputPositionNoCompare);
|
|
|
|
impl PdfNull {
|
|
pub fn new(pos: impl Into<PdfInputPositionNoCompare>) -> Self {
|
|
Self(pos.into())
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfNull {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.0.0
|
|
}
|
|
}
|
|
|
|
impl From<PdfNull> for PdfObjectDirect {
|
|
fn from(v: PdfNull) -> Self {
|
|
Self::Null(v)
|
|
}
|
|
}
|
|
|
|
impl Default for PdfObjectDirect {
|
|
fn default() -> Self {
|
|
Self::Null(PdfNull(PdfInputPositionNoCompare::empty()))
|
|
}
|
|
}
|
|
|
|
impl From<PdfNull> for PdfObject {
|
|
fn from(v: PdfNull) -> Self {
|
|
Self::Null(v)
|
|
}
|
|
}
|
|
|
|
impl Default for PdfObject {
|
|
fn default() -> Self {
|
|
Self::Null(PdfNull(PdfInputPositionNoCompare::empty()))
|
|
}
|
|
}
|
|
|
|
impl From<PdfObjectIndirect> for PdfObject {
|
|
fn from(v: PdfObjectIndirect) -> Self {
|
|
Self::Indirect(v)
|
|
}
|
|
}
|
|
|
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
|
pub struct PdfObjectIdentifier {
|
|
pub pos: PdfInputPositionNoCompare,
|
|
pub object_number: NonZero<u32>,
|
|
pub generation_number: u16,
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfObjectIdentifier {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct PdfObjectIndirect {
|
|
objects: std::sync::Weak<PdfObjects>,
|
|
id: PdfObjectIdentifier,
|
|
final_id: Arc<OnceLock<PdfObjectIdentifier>>,
|
|
}
|
|
|
|
impl fmt::Debug for PdfObjectIndirect {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
let Self {
|
|
objects: _,
|
|
id,
|
|
final_id: _,
|
|
} = self;
|
|
f.debug_struct("PdfObjectIndirect")
|
|
.field("id", id)
|
|
.finish_non_exhaustive()
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfObjectIndirect {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.id.get_pdf_input_position()
|
|
}
|
|
}
|
|
|
|
impl PartialEq for PdfObjectIndirect {
|
|
fn eq(&self, other: &Self) -> bool {
|
|
let Self {
|
|
objects,
|
|
id,
|
|
final_id: _,
|
|
} = self;
|
|
objects.ptr_eq(&other.objects) && *id == other.id
|
|
}
|
|
}
|
|
|
|
impl PdfObjectIndirect {
|
|
pub fn new(objects: &Arc<PdfObjects>, id: PdfObjectIdentifier) -> Self {
|
|
Self {
|
|
objects: Arc::downgrade(objects),
|
|
id,
|
|
final_id: Arc::new(OnceLock::new()),
|
|
}
|
|
}
|
|
pub fn get(&self) -> PdfObjectDirect {
|
|
if let Some(objects) = self.objects.upgrade() {
|
|
if let Some(objects) = objects.objects.get() {
|
|
let final_id = self.final_id.get().copied();
|
|
let limit = if final_id.is_some() { 1 } else { 1000usize };
|
|
let mut id = final_id.unwrap_or(self.id);
|
|
for _ in 0..limit {
|
|
if let Some(object) = objects.get(&self.id) {
|
|
let retval = match object {
|
|
PdfObject::Boolean(v) => PdfObjectDirect::Boolean(*v),
|
|
PdfObject::Integer(v) => PdfObjectDirect::Integer(*v),
|
|
PdfObject::Real(v) => PdfObjectDirect::Real(*v),
|
|
PdfObject::String(v) => PdfObjectDirect::String(v.clone()),
|
|
PdfObject::Name(v) => PdfObjectDirect::Name(v.clone()),
|
|
PdfObject::Array(v) => PdfObjectDirect::Array(v.clone()),
|
|
PdfObject::Dictionary(v) => PdfObjectDirect::Dictionary(v.clone()),
|
|
PdfObject::Stream(v) => PdfObjectDirect::Stream(v.clone()),
|
|
PdfObject::Null(v) => PdfObjectDirect::Null(*v),
|
|
PdfObject::Indirect(v) => {
|
|
id = v.id;
|
|
continue;
|
|
}
|
|
};
|
|
// we could be racing with another thread, so set can fail but that's not a problem
|
|
let _ = self.final_id.set(id);
|
|
return retval;
|
|
} else {
|
|
return PdfObjectDirect::Null(PdfNull::new(id.pos));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
PdfObjectDirect::Null(PdfNull::new(self.pos()))
|
|
}
|
|
pub fn id(&self) -> PdfObjectIdentifier {
|
|
self.id
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.id.pos.0
|
|
}
|
|
}
|
|
|
|
impl From<PdfObjectIndirect> for PdfObjectDirect {
|
|
fn from(value: PdfObjectIndirect) -> Self {
|
|
value.get()
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct PdfDictionary {
|
|
pos: PdfInputPositionNoCompare,
|
|
fields: Arc<BTreeMap<PdfName, PdfObject>>,
|
|
}
|
|
|
|
impl PdfDictionary {
|
|
pub fn new(pos: impl Into<PdfInputPositionNoCompare>) -> Self {
|
|
Self {
|
|
pos: pos.into(),
|
|
fields: Arc::new(BTreeMap::new()),
|
|
}
|
|
}
|
|
pub fn from_fields(
|
|
pos: impl Into<PdfInputPositionNoCompare>,
|
|
mut fields: Arc<BTreeMap<PdfName, PdfObject>>,
|
|
) -> Self {
|
|
if fields.values().any(|v| matches!(v, PdfObject::Null(_))) {
|
|
Arc::make_mut(&mut fields).retain(|_k, v| !matches!(v, PdfObject::Null(_)));
|
|
}
|
|
Self {
|
|
pos: pos.into(),
|
|
fields,
|
|
}
|
|
}
|
|
pub fn fields(&self) -> &Arc<BTreeMap<PdfName, PdfObject>> {
|
|
&self.fields
|
|
}
|
|
pub fn into_fields(self) -> Arc<BTreeMap<PdfName, PdfObject>> {
|
|
self.fields
|
|
}
|
|
pub fn iter(&self) -> std::collections::btree_map::Iter<'_, PdfName, PdfObject> {
|
|
self.fields.iter()
|
|
}
|
|
pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool
|
|
where
|
|
PdfName: std::borrow::Borrow<Q> + Ord,
|
|
Q: Ord,
|
|
{
|
|
self.fields.contains_key(key)
|
|
}
|
|
pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&PdfObject>
|
|
where
|
|
PdfName: std::borrow::Borrow<Q> + Ord,
|
|
Q: Ord,
|
|
{
|
|
self.fields.get(key)
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfDictionary {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
impl Default for PdfDictionary {
|
|
fn default() -> Self {
|
|
Self::new(PdfInputPosition::empty())
|
|
}
|
|
}
|
|
|
|
impl FromIterator<(PdfName, PdfObject)> for PdfDictionary {
|
|
fn from_iter<T: IntoIterator<Item = (PdfName, PdfObject)>>(iter: T) -> Self {
|
|
Self {
|
|
pos: PdfInputPositionNoCompare::empty(),
|
|
fields: Arc::new(BTreeMap::from_iter(
|
|
iter.into_iter()
|
|
.filter(|(_name, value)| !matches!(value, PdfObject::Null(_))),
|
|
)),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl IntoIterator for PdfDictionary {
|
|
type Item = (PdfName, PdfObject);
|
|
type IntoIter = std::collections::btree_map::IntoIter<PdfName, PdfObject>;
|
|
|
|
fn into_iter(self) -> Self::IntoIter {
|
|
Arc::unwrap_or_clone(self.fields).into_iter()
|
|
}
|
|
}
|
|
|
|
impl<'a> IntoIterator for &'a PdfDictionary {
|
|
type Item = (&'a PdfName, &'a PdfObject);
|
|
type IntoIter = std::collections::btree_map::Iter<'a, PdfName, PdfObject>;
|
|
|
|
fn into_iter(self) -> Self::IntoIter {
|
|
self.fields.iter()
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for PdfDictionary {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
f.debug_map().entries(self).finish()
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Default)]
|
|
pub struct PdfArray {
|
|
pos: PdfInputPositionNoCompare,
|
|
elements: Arc<[PdfObject]>,
|
|
}
|
|
|
|
impl PdfArray {
|
|
pub fn new(pos: impl Into<PdfInputPositionNoCompare>) -> Self {
|
|
Self {
|
|
pos: pos.into(),
|
|
elements: Arc::default(),
|
|
}
|
|
}
|
|
pub fn from_elements(
|
|
pos: impl Into<PdfInputPositionNoCompare>,
|
|
elements: Arc<[PdfObject]>,
|
|
) -> Self {
|
|
Self {
|
|
pos: pos.into(),
|
|
elements,
|
|
}
|
|
}
|
|
pub fn pos(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
pub fn elements(&self) -> &Arc<[PdfObject]> {
|
|
&self.elements
|
|
}
|
|
pub fn into_elements(self) -> Arc<[PdfObject]> {
|
|
self.elements
|
|
}
|
|
pub fn iter(&self) -> std::slice::Iter<'_, PdfObject> {
|
|
self.elements.iter()
|
|
}
|
|
}
|
|
|
|
impl GetPdfInputPosition for PdfArray {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
impl FromIterator<PdfObject> for PdfArray {
|
|
fn from_iter<T: IntoIterator<Item = PdfObject>>(iter: T) -> Self {
|
|
Self {
|
|
pos: PdfInputPositionNoCompare::empty(),
|
|
elements: Arc::from_iter(iter),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct PdfArrayIntoIter {
|
|
indexes: std::ops::Range<usize>,
|
|
elements: Arc<[PdfObject]>,
|
|
}
|
|
|
|
impl Iterator for PdfArrayIntoIter {
|
|
type Item = PdfObject;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.indexes.next().map(|i| self.elements[i].clone())
|
|
}
|
|
|
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
self.indexes.size_hint()
|
|
}
|
|
|
|
fn nth(&mut self, n: usize) -> Option<Self::Item> {
|
|
self.indexes.nth(n).map(|i| self.elements[i].clone())
|
|
}
|
|
|
|
fn last(self) -> Option<Self::Item> {
|
|
self.indexes.last().map(|i| self.elements[i].clone())
|
|
}
|
|
|
|
fn fold<B, F>(self, init: B, mut f: F) -> B
|
|
where
|
|
F: FnMut(B, Self::Item) -> B,
|
|
{
|
|
self.indexes
|
|
.fold(init, |init, i| f(init, self.elements[i].clone()))
|
|
}
|
|
}
|
|
|
|
impl std::iter::FusedIterator for PdfArrayIntoIter {}
|
|
|
|
impl DoubleEndedIterator for PdfArrayIntoIter {
|
|
fn next_back(&mut self) -> Option<Self::Item> {
|
|
self.indexes.next_back().map(|i| self.elements[i].clone())
|
|
}
|
|
fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
|
|
self.indexes.nth_back(n).map(|i| self.elements[i].clone())
|
|
}
|
|
fn rfold<B, F>(self, init: B, mut f: F) -> B
|
|
where
|
|
F: FnMut(B, Self::Item) -> B,
|
|
{
|
|
self.indexes
|
|
.rfold(init, |init, i| f(init, self.elements[i].clone()))
|
|
}
|
|
}
|
|
|
|
impl ExactSizeIterator for PdfArrayIntoIter {}
|
|
|
|
impl IntoIterator for PdfArray {
|
|
type Item = PdfObject;
|
|
type IntoIter = PdfArrayIntoIter;
|
|
|
|
fn into_iter(self) -> Self::IntoIter {
|
|
PdfArrayIntoIter {
|
|
indexes: 0..self.elements.len(),
|
|
elements: self.elements,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<'a> IntoIterator for &'a PdfArray {
|
|
type Item = &'a PdfObject;
|
|
type IntoIter = std::slice::Iter<'a, PdfObject>;
|
|
|
|
fn into_iter(self) -> Self::IntoIter {
|
|
self.elements.iter()
|
|
}
|
|
}
|
|
|
|
impl fmt::Debug for PdfArray {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
self.elements.fmt(f)
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
pub struct MaybeArray<T>(pub Arc<[T]>);
|
|
|
|
impl<T> std::ops::Deref for MaybeArray<T> {
|
|
type Target = Arc<[T]>;
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
&self.0
|
|
}
|
|
}
|
|
|
|
impl<T> std::ops::DerefMut for MaybeArray<T> {
|
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
|
&mut self.0
|
|
}
|
|
}
|
|
|
|
pdf_parse! {
|
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
#[non_exhaustive]
|
|
pub enum PdfStreamFilter {
|
|
#[pdf(name = "ASCIIHexDecode")]
|
|
AsciiHexDecode,
|
|
#[pdf(name = "ASCII85Decode")]
|
|
Ascii85Decode,
|
|
#[pdf(name = "LZWDecode")]
|
|
LzwDecode,
|
|
#[pdf(name = "FlateDecode")]
|
|
FlateDecode,
|
|
#[pdf(name = "RunLengthDecode")]
|
|
RunLengthDecode,
|
|
#[pdf(name = "CCITTFaxDecode")]
|
|
CcittFaxDecode,
|
|
#[pdf(name = "JBIG2Decode")]
|
|
Jbig2Decode,
|
|
#[pdf(name = "DCTDecode")]
|
|
DctDecode,
|
|
#[pdf(name = "JPXDecode")]
|
|
JpxDecode,
|
|
#[pdf(name = "Crypt")]
|
|
Crypt,
|
|
#[pdf(other)]
|
|
Unknown(PdfName),
|
|
}
|
|
}
|
|
|
|
impl<T> Default for MaybeArray<T> {
|
|
fn default() -> Self {
|
|
Self(Arc::default())
|
|
}
|
|
}
|
|
|
|
impl<'a, T> IntoIterator for &'a MaybeArray<T> {
|
|
type Item = &'a T;
|
|
type IntoIter = std::slice::Iter<'a, T>;
|
|
|
|
fn into_iter(self) -> Self::IntoIter {
|
|
self.iter()
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub enum PdfFileSpecification {
|
|
String(PdfString),
|
|
Dictionary(PdfDictionary),
|
|
}
|
|
|
|
impl PdfParse for PdfFileSpecification {
|
|
fn type_name() -> Cow<'static, str> {
|
|
Cow::Borrowed("file specification")
|
|
}
|
|
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
|
|
match PdfObjectDirect::from(object) {
|
|
PdfObjectDirect::String(v) => Ok(Self::String(v)),
|
|
PdfObjectDirect::Dictionary(v) => Ok(Self::Dictionary(v)),
|
|
object => Err(PdfParseError::InvalidType {
|
|
pos: object.pos(),
|
|
ty: object.type_name(),
|
|
expected_ty: "PdfFileSpecification",
|
|
}),
|
|
}
|
|
}
|
|
}
|
|
|
|
pdf_parse! {
|
|
#[derive(Clone, Debug)]
|
|
pub struct PdfStreamDictionary<Rest = PdfDictionary> {
|
|
#[pdf(name = PdfStreamDictionary::LENGTH_NAME)]
|
|
pub len: usize,
|
|
#[pdf(name = "Filter")]
|
|
pub filters: MaybeArray<PdfStreamFilter>,
|
|
#[pdf(name = "DecodeParms")]
|
|
pub decode_parms: MaybeArray<Option<PdfDictionary>>,
|
|
#[pdf(name = "F")]
|
|
pub file: Option<PdfFileSpecification>,
|
|
#[pdf(name = "FFilter")]
|
|
pub file_filters: MaybeArray<PdfStreamFilter>,
|
|
#[pdf(name = "FDecodeParms")]
|
|
pub file_decode_parms: MaybeArray<Option<PdfDictionary>>,
|
|
#[pdf(name = "DL")]
|
|
pub decoded_len: Option<usize>,
|
|
#[pdf(flatten)]
|
|
pub rest: Rest,
|
|
}
|
|
}
|
|
|
|
impl PdfStreamDictionary {
|
|
pub const LENGTH_NAME: &str = "Length";
|
|
pub(crate) fn parse_len_from_dictionary(
|
|
dictionary: &PdfDictionary,
|
|
) -> Result<usize, PdfParseError> {
|
|
PdfParse::parse(
|
|
dictionary
|
|
.get(&PdfName::new_static(Self::LENGTH_NAME.as_bytes()))
|
|
.cloned()
|
|
.unwrap_or_default(),
|
|
)
|
|
}
|
|
}
|
|
|
|
impl<Rest> PdfStreamDictionary<Rest> {
|
|
pub fn filters_and_parms(
|
|
&self,
|
|
) -> impl Clone + ExactSizeIterator + DoubleEndedIterator<Item = (PdfStreamFilter, PdfDictionary)>
|
|
{
|
|
self.filters.iter().enumerate().map(|(index, filter)| {
|
|
(
|
|
filter.clone(),
|
|
self.decode_parms
|
|
.0
|
|
.get(index)
|
|
.cloned()
|
|
.flatten()
|
|
.unwrap_or_default(),
|
|
)
|
|
})
|
|
}
|
|
pub fn file_filters_and_parms(
|
|
&self,
|
|
) -> impl Clone + ExactSizeIterator + DoubleEndedIterator<Item = (PdfStreamFilter, PdfDictionary)>
|
|
{
|
|
self.file_filters.iter().enumerate().map(|(index, filter)| {
|
|
(
|
|
filter.clone(),
|
|
self.file_decode_parms
|
|
.0
|
|
.get(index)
|
|
.cloned()
|
|
.flatten()
|
|
.unwrap_or_default(),
|
|
)
|
|
})
|
|
}
|
|
}
|
|
|
|
pub(crate) struct UnparsedPdfStreamDictionary<Rest> {
|
|
unparsed_dictionary: PdfDictionary,
|
|
dictionary: Arc<OnceLock<PdfStreamDictionary<Rest>>>,
|
|
}
|
|
|
|
impl<Rest: PdfParse> UnparsedPdfStreamDictionary<Rest> {
|
|
pub(crate) fn finish_parsing(self) -> Result<(), PdfParseError> {
|
|
let Ok(()) = self
|
|
.dictionary
|
|
.set(PdfParse::parse(self.unparsed_dictionary.into())?)
|
|
else {
|
|
unreachable!();
|
|
};
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct PdfStream<Rest = PdfDictionary> {
|
|
pos: PdfInputPositionNoCompare,
|
|
dictionary: Arc<OnceLock<PdfStreamDictionary<Rest>>>,
|
|
data: Arc<[u8]>,
|
|
}
|
|
|
|
impl<Rest: fmt::Debug> fmt::Debug for PdfStream<Rest> {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
f.debug_struct("PdfStream")
|
|
.field("pos", &self.pos)
|
|
.field("dictionary", &self.dictionary)
|
|
.field("data", &format_args!("{:02x?}", self.data))
|
|
.finish()
|
|
}
|
|
}
|
|
|
|
impl<Rest> PdfStream<Rest> {
|
|
pub fn new(
|
|
pos: impl Into<PdfInputPositionNoCompare>,
|
|
dictionary: PdfStreamDictionary<Rest>,
|
|
data: Arc<[u8]>,
|
|
) -> Self {
|
|
Self {
|
|
pos: pos.into(),
|
|
dictionary: Arc::new(OnceLock::from(dictionary)),
|
|
data,
|
|
}
|
|
}
|
|
pub(crate) fn new_unparsed(
|
|
pos: impl Into<PdfInputPositionNoCompare>,
|
|
unparsed_dictionary: PdfDictionary,
|
|
data: Arc<[u8]>,
|
|
) -> (Self, UnparsedPdfStreamDictionary<Rest>) {
|
|
let dictionary = Arc::new(OnceLock::new());
|
|
(
|
|
Self {
|
|
pos: pos.into(),
|
|
dictionary: dictionary.clone(),
|
|
data,
|
|
},
|
|
UnparsedPdfStreamDictionary {
|
|
unparsed_dictionary,
|
|
dictionary,
|
|
},
|
|
)
|
|
}
|
|
pub fn dictionary(&self) -> &PdfStreamDictionary<Rest> {
|
|
self.dictionary
|
|
.get()
|
|
.expect("haven't finished parsing all pdf object definitions yet")
|
|
}
|
|
pub fn data(&self) -> &Arc<[u8]> {
|
|
&self.data
|
|
}
|
|
}
|
|
|
|
impl<Rest> GetPdfInputPosition for PdfStream<Rest> {
|
|
fn get_pdf_input_position(&self) -> PdfInputPosition {
|
|
self.pos.0
|
|
}
|
|
}
|
|
|
|
impl<Rest: PdfParse> PdfParse for PdfStream<Rest> {
|
|
fn type_name() -> Cow<'static, str> {
|
|
if TypeId::of::<Rest>() == TypeId::of::<PdfDictionary>() {
|
|
Cow::Borrowed("stream")
|
|
} else {
|
|
Cow::Owned(format!("PdfStream<{}>", Rest::type_name()))
|
|
}
|
|
}
|
|
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
|
|
match PdfObjectDirect::from(object) {
|
|
PdfObjectDirect::Stream(stream) => Ok(PdfStream {
|
|
pos: stream.pos,
|
|
dictionary: if let Some(dictionary) = <dyn std::any::Any>::downcast_ref::<
|
|
Arc<OnceLock<PdfStreamDictionary<Rest>>>,
|
|
>(&stream.dictionary)
|
|
{
|
|
dictionary.clone()
|
|
} else {
|
|
let PdfStreamDictionary {
|
|
len,
|
|
filters,
|
|
decode_parms,
|
|
file,
|
|
file_filters,
|
|
file_decode_parms,
|
|
decoded_len,
|
|
rest,
|
|
} = stream.dictionary();
|
|
Arc::new(OnceLock::from(PdfStreamDictionary {
|
|
len: *len,
|
|
filters: filters.clone(),
|
|
decode_parms: decode_parms.clone(),
|
|
file: file.clone(),
|
|
file_filters: file_filters.clone(),
|
|
file_decode_parms: file_decode_parms.clone(),
|
|
decoded_len: *decoded_len,
|
|
rest: Rest::parse(rest.clone().into())?,
|
|
}))
|
|
},
|
|
data: stream.data,
|
|
}),
|
|
object => Err(PdfParseError::InvalidType {
|
|
pos: object.get_pdf_input_position(),
|
|
ty: object.type_name(),
|
|
expected_ty: "stream",
|
|
}),
|
|
}
|
|
}
|
|
}
|