Compare commits

..

No commits in common. "d7727289ebe2eb366a005100b9d528965b878283" and "03bfdd3d995b9c17465f91e8d870552bb25b6abc" have entirely different histories.

17 changed files with 1 additions and 11470 deletions

3
.gitignore vendored
View file

@ -3,5 +3,4 @@
*.egg-info
__pycache__
*.log
/powerisa-instructions.xml
/target
/powerisa-instructions.xml

109
Cargo.lock generated
View file

@ -1,109 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "flate2"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "parse_powerisa_pdf"
version = "0.1.0"
dependencies = [
"flate2",
"rayon",
]
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"

View file

@ -1,10 +0,0 @@
[package]
name = "parse_powerisa_pdf"
version = "0.1.0"
edition = "2024"
license = "LGPL-3.0-or-later"
[dependencies]
flate2 = "1.1.5"
rayon = "1.11.0"

View file

@ -1,5 +0,0 @@
#[doc(hidden)]
pub use std as __std;
pub mod pdf;
pub mod util;

View file

@ -1,38 +0,0 @@
use parse_powerisa_pdf::pdf::Pdf;
use std::{
error::Error,
io::{IsTerminal, Read},
process::ExitCode,
};
fn main() -> Result<ExitCode, Box<dyn Error>> {
let args: Vec<_> = std::env::args_os().collect();
if args
.iter()
.skip(1)
.any(|v| v.as_encoded_bytes().starts_with(b"-") && v != "-")
|| args.len() > 2
|| (args.len() == 1 && std::io::stdin().is_terminal())
{
eprintln!(
"Usage: {} [<path/to/file.pdf>]\n\
Reads the PDF file passed on the command line,\n\
Reads stdin if no arguments are passed or if the file name is just a dash `-`.\n\
If stdin is a terminal, you have to pass `-` explicitly to read from it.",
args[0].display()
);
return Ok(ExitCode::FAILURE);
}
let file_path = args.get(1).filter(|v| *v != "-");
let input = if let Some(file_path) = file_path {
std::fs::read(file_path)?
} else {
let mut buf = Vec::new();
std::io::stdin().lock().read_to_end(&mut buf)?;
buf
};
let pdf = Pdf::parse(input)?;
println!("{:#?}", pdf.trailer.trailer_dictionary());
todo!();
Ok(ExitCode::SUCCESS)
}

1279
src/pdf.rs

File diff suppressed because it is too large Load diff

View file

@ -1,829 +0,0 @@
use crate::{
pdf::{
PdfObjectOrStreamDictionaryOrOperator, PdfObjects, PdfParser, PdfTokenizer,
object::{
NameOr, PdfDictionary, PdfMatrix, PdfName, PdfObject, PdfObjectDirect, PdfRectangle,
PdfStream, PdfStreamContents, PdfString, PdfStringBytesDebug, PdfStringOrNumber,
PdfVec2D,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown,
PdfInputPositionNoCompare, PdfParse, PdfParseError,
},
render::{
PdfColorDeviceGray, PdfColorDeviceRgb, PdfRenderOperator, PdfRenderState,
PdfRenderingIntent,
},
},
util::ArcOrRef,
};
use std::{fmt, sync::Arc};
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct PdfOperatorUnparsed {
pos: PdfInputPositionNoCompare,
bytes: ArcOrRef<'static, [u8]>,
}
impl GetPdfInputPosition for PdfOperatorUnparsed {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl fmt::Debug for PdfOperatorUnparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Self::debug_with_name("PdfOperatorUnparsed", &self.bytes, self.pos.0, f)
}
}
trait PdfParseIter: Sized {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError>;
}
impl<T: PdfParse> PdfParseIter for Arc<[T]> {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
FromIterator::from_iter(iter.into_iter().map(T::parse))
}
}
impl PdfOperatorUnparsed {
pub fn new(
pos: impl Into<PdfInputPositionNoCompare>,
bytes: impl Into<ArcOrRef<'static, [u8]>>,
) -> Self {
Self {
pos: pos.into(),
bytes: bytes.into(),
}
}
pub const fn new_static(bytes: &'static [u8]) -> Self {
Self {
pos: PdfInputPositionNoCompare::empty(),
bytes: ArcOrRef::Ref(bytes),
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> {
&self.bytes
}
fn debug_with_name(
name: &str,
pdf_name: &[u8],
pos: PdfInputPosition,
f: &mut fmt::Formatter<'_>,
) -> fmt::Result {
write!(f, "{name}(at {pos}, {})", PdfStringBytesDebug(pdf_name))
}
pub fn bytes_debug(&self) -> PdfStringBytesDebug<'_> {
PdfStringBytesDebug(&self.bytes)
}
}
macro_rules! make_pdf_operator_enum {
(
$(#[$($operator_meta:tt)*])*
$operator_enum_vis:vis enum $PdfOperator:ident;
$(#[$($operator_and_operands_meta:tt)*])*
$enum_vis:vis enum $PdfOperatorAndOperands:ident {
$(#[$($unknown_variant_meta:tt)*])*
$Unknown:ident {
$(#[$($unknown_operands_meta:tt)*])*
$unknown_operands:ident: $unknown_operands_ty:ty,
$(#[$($unknown_operator_meta:tt)*])*
$unknown_operator:ident: $unknown_operator_ty:ty,
},
$(
#[kw = $kw:literal]
$(#[$($variant_meta:tt)*])*
$Variant:ident($VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:tt)*)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}),
)*
}
) => {
$(#[$($operator_meta)*])*
$operator_enum_vis enum $PdfOperator {
$(#[$($unknown_variant_meta)*])*
$Unknown($unknown_operator_ty),
$(
$(#[$($variant_meta)*])*
$Variant(PdfInputPositionNoCompare),
)*
}
impl $PdfOperator {
$operator_enum_vis fn parse(self, operands: impl IntoIterator<Item = PdfObject>) -> Result<$PdfOperatorAndOperands, PdfParseError> {
let operands = operands.into_iter();
Ok(match self {
Self::$Unknown(operator) => $PdfOperatorAndOperands::$Unknown {
operands: FromIterator::from_iter(operands.map(Into::into)),
operator,
},
$(Self::$Variant(pos) => $VariantStruct::parse(pos, operands)?.into(),)*
})
}
$operator_enum_vis fn pos(&self) -> PdfInputPosition {
match *self {
Self::$Unknown(ref operator) => operator.pos(),
$(Self::$Variant(pos) => pos.0,)*
}
}
}
impl fmt::Debug for $PdfOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown(operator) => PdfOperatorUnparsed::debug_with_name("Unknown", &operator.bytes, operator.pos.0, f),
$(Self::$Variant(pos) => PdfOperatorUnparsed::debug_with_name(stringify!($Variant), $kw, pos.0, f),)*
}
}
}
impl From<$PdfOperator> for PdfOperatorUnparsed {
fn from(v: $PdfOperator) -> PdfOperatorUnparsed {
match v {
$PdfOperator::$Unknown(operator) => operator,
$($PdfOperator::$Variant(pos) => PdfOperatorUnparsed { pos, bytes: ArcOrRef::Ref($kw) },)*
}
}
}
impl From<PdfOperatorUnparsed> for $PdfOperator {
fn from(v: PdfOperatorUnparsed) -> $PdfOperator {
match &**v.bytes() {
$($kw => Self::$Variant(v.pos),)*
_ => Self::$Unknown(v),
}
}
}
$(#[derive(Clone)]
$(#[$($variant_meta)*])*
$enum_vis struct $VariantStruct {
$enum_vis $pos: PdfInputPositionNoCompare,
$(
$(#[$($field_meta)*])*
$enum_vis $field: $field_ty,
)*
}
impl fmt::Debug for $VariantStruct {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct(stringify!($VariantStruct)).field("pos", &self.pos)$(.field(stringify!($field), &self.$field))*.finish()
}
}
impl GetPdfInputPosition for $VariantStruct {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl From<$VariantStruct> for $PdfOperatorAndOperands {
fn from(v: $VariantStruct) -> Self {
Self::$Variant(v)
}
}
impl $VariantStruct {
$enum_vis fn operator_from_pos(pos: impl Into<PdfInputPositionNoCompare>) -> $PdfOperator {
$PdfOperator::$Variant(pos.into())
}
$enum_vis fn operator(&self) -> $PdfOperator {
$PdfOperator::$Variant(self.pos)
}
$enum_vis fn pos(&self) -> PdfInputPosition {
self.pos.0
}
}
make_pdf_operator_enum! {
@impl_variant_parse
$enum_vis enum;
struct $VariantStruct {
$pos: PdfInputPositionNoCompare,
$(
#[$field_parse($($parse_args)*)]
$(#[$($field_meta)*])*
$field: $field_ty,
)*
}
})*
$(#[$($operator_and_operands_meta)*])*
$enum_vis enum $PdfOperatorAndOperands {
$(#[$($unknown_variant_meta)*])*
$Unknown {
$(#[$($unknown_operands_meta)*])*
$unknown_operands: $unknown_operands_ty,
$(#[$($unknown_operator_meta)*])*
$unknown_operator: $unknown_operator_ty,
},
$(
$(#[$($variant_meta)*])*
$Variant($VariantStruct),
)*
}
impl $PdfOperatorAndOperands {
$enum_vis fn operator(&self) -> $PdfOperator {
match self {
Self::Unknown { operator, .. } => $PdfOperator::Unknown(operator.clone()),
$(Self::$Variant(v) => v.operator(),)*
}
}
$enum_vis fn pos(&self) -> PdfInputPosition {
match self {
Self::$Unknown { operator, .. } => operator.pos(),
$(Self::$Variant(v) => v.pos(),)*
}
}
}
impl fmt::Debug for $PdfOperatorAndOperands {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown {
operands,
operator,
} => f.debug_struct("Unknown").field("operator", operator).field("operands", operands).finish(),
$(Self::$Variant($VariantStruct {
$pos,
$($field,)*
}) => f.debug_struct(stringify!($Variant)).field("pos", $pos)$(.field(stringify!($field), $field))*.finish(),)*
}
}
}
impl PdfRenderOperator for $PdfOperatorAndOperands {
fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> {
match self {
Self::$Unknown {
operands,
operator,
} => state.handle_unknown_operator(operator, operands),
$(Self::$Variant(v) => <$VariantStruct as PdfRenderOperator>::render(v, state),)*
}
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:ident),* $(,)?)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let mut operands = operands.into_iter();
$($(let Some($parse_args) = operands.next() else {
return Err(PdfParseError::OperatorHasTooFewOperands { operator: Self::operator_from_pos(pos) });
};)*)*
if operands.next().is_some() {
return Err(PdfParseError::OperatorHasTooManyOperands { operator: Self::operator_from_pos(pos) });
}
Ok(Self {
pos,
$($field: <$field_ty>::$field_parse($($parse_args),*)?,)*
})
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
#[$field_parse:ident(...)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let operands = operands.into_iter();
Ok(Self {
pos,
$field: <$field_ty>::$field_parse(operands)?,
})
}
}
};
}
make_pdf_operator_enum! {
#[derive(Clone)]
pub enum PdfOperator;
#[derive(Clone)]
pub enum PdfOperatorAndOperands {
Unknown {
operands: Arc<[PdfObjectDirect]>,
operator: PdfOperatorUnparsed,
},
#[kw = b"b"]
CloseFillAndStrokePath(PdfOperatorCloseFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B"]
FillAndStrokePath(PdfOperatorFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"b*"]
CloseFillAndStrokePathEvenOdd(PdfOperatorCloseFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B*"]
FillAndStrokePathEvenOdd(PdfOperatorFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BDC"]
BeginMarkedContentWithProperties(PdfOperatorBeginMarkedContentWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"BI"]
BeginInlineImage(PdfOperatorBeginInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BMC"]
BeginMarkedContent(PdfOperatorBeginMarkedContent {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"BT"]
BeginText(PdfOperatorBeginText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BX"]
BeginCompatibilitySection(PdfOperatorBeginCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"c"]
CurveTo(PdfOperatorCurveTo {
pos: PdfInputPositionNoCompare,
#[parse(x1, y1)]
p1: PdfVec2D,
#[parse(x2, y2)]
p2: PdfVec2D,
#[parse(x3, y3)]
p3: PdfVec2D,
}),
#[kw = b"cm"]
ConcatMatrix(PdfOperatorConcatMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"CS"]
SetStrokeColorSpace(PdfOperatorSetStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"cs"]
SetNonStrokeColorSpace(PdfOperatorSetNonStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"d"]
SetLineDashPattern(PdfOperatorSetLineDashPattern {
pos: PdfInputPositionNoCompare,
#[parse(dash_array)]
dash_array: PdfObject, // TODO: actually parse
#[parse(dash_phase)]
dash_phase: PdfObject, // TODO: actually parse
}),
#[kw = b"d0"]
FontType3SetWidth(PdfOperatorFontType3SetWidth {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
width: PdfVec2D,
}),
#[kw = b"d1"]
FontType3SetWidthAndBBox(PdfOperatorFontType3SetWidthAndBBox {
pos: PdfInputPositionNoCompare,
#[parse(width_x, width_y)]
width: PdfVec2D,
#[parse_flat(lower_left_x, lower_left_y, upper_right_x, upper_right_y)]
bbox: PdfRectangle,
}),
#[kw = b"Do"]
PaintXObject(PdfOperatorPaintXObject {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"DP"]
DesignateMarkedContentPointWithProperties(PdfOperatorDesignateMarkedContentPointWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"EI"]
EndInlineImage(PdfOperatorEndInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EMC"]
EndMarkedContent(PdfOperatorEndMarkedContent {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"ET"]
EndText(PdfOperatorEndText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EX"]
EndCompatibilitySection(PdfOperatorEndCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f"]
FillPath(PdfOperatorFillPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"F"]
FillPathObsolete(PdfOperatorFillPathObsolete {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f*"]
FillPathEvenOdd(PdfOperatorFillPathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"G"]
SetStrokeGray(PdfOperatorSetStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"g"]
SetNonStrokeGray(PdfOperatorSetNonStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"gs"]
SetGraphicsState(PdfOperatorSetGraphicsState {
pos: PdfInputPositionNoCompare,
#[parse(dictionary_name)]
dictionary_name: PdfName,
}),
#[kw = b"h"]
CloseSubpath(PdfOperatorCloseSubpath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"i"]
SetFlatnessTolerance(PdfOperatorSetFlatnessTolerance {
pos: PdfInputPositionNoCompare,
#[parse(flatness)]
flatness: f32,
}),
#[kw = b"ID"]
BeginInlineImageData(PdfOperatorBeginInlineImageData {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"j"]
SetLineJoinStyle(PdfOperatorSetLineJoinStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_join_style)]
line_join_style: u8, // TODO parse
}),
#[kw = b"J"]
SetLineCapStyle(PdfOperatorSetLineCapStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_cap_style)]
line_cap_style: u8, // TODO parse
}),
#[kw = b"K"]
SetStrokeCmyk(PdfOperatorSetStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"k"]
SetNonStrokeCmyk(PdfOperatorSetNonStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"l"]
LineTo(PdfOperatorLineTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"m"]
MoveTo(PdfOperatorMoveTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"M"]
SetMiterLimit(PdfOperatorSetMiterLimit {
pos: PdfInputPositionNoCompare,
#[parse(limit)]
limit: f32,
}),
#[kw = b"MP"]
DesignateMarkedContentPoint(PdfOperatorDesignateMarkedContentPoint {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"n"]
EndPath(PdfOperatorEndPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"q"]
SaveGraphicsState(PdfOperatorSaveGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Q"]
RestoreGraphicsState(PdfOperatorRestoreGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"re"]
Rectangle(PdfOperatorRectangle {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
p: PdfVec2D,
#[parse(width, height)]
size: PdfVec2D,
}),
#[kw = b"RG"]
SetStrokeRgb(PdfOperatorSetStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"rg"]
SetNonStrokeRgb(PdfOperatorSetNonStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"ri"]
SetColorRenderingIntent(PdfOperatorSetColorRenderingIntent {
pos: PdfInputPositionNoCompare,
#[parse(intent)]
intent: PdfRenderingIntent,
}),
#[kw = b"s"]
CloseAndStrokePath(PdfOperatorCloseAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"S"]
StrokePath(PdfOperatorStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"SC"]
SetStrokeColor(PdfOperatorSetStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"sc"]
SetNonStrokeColor(PdfOperatorSetNonStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"SCN"]
SetStrokeColorWithName(PdfOperatorSetStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"scn"]
SetNonStrokeColorWithName(PdfOperatorSetNonStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"sh"]
Shade(PdfOperatorShade {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"T*"]
TextNextLine(PdfOperatorTextNextLine {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Tc"]
SetCharacterSpacing(PdfOperatorSetCharacterSpacing {
pos: PdfInputPositionNoCompare,
#[parse(char_space)]
char_space: f32,
}),
#[kw = b"Td"]
TextNextLineWithOffset(PdfOperatorTextNextLineWithOffset {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"TD"]
TextNextLineWithOffsetAndLeading(PdfOperatorTextNextLineWithOffsetAndLeading {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"Tf"]
SetFontAndSize(PdfOperatorSetFontAndSize {
pos: PdfInputPositionNoCompare,
#[parse(font)]
font: PdfName,
#[parse(size)]
size: f32,
}),
#[kw = b"Tj"]
ShowText(PdfOperatorShowText {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"TJ"]
ShowTextWithGlyphPositioning(PdfOperatorShowTextWithGlyphPositioning {
pos: PdfInputPositionNoCompare,
#[parse(text_and_positioning)]
text_and_positioning: Arc<[PdfStringOrNumber]>,
}),
#[kw = b"TL"]
SetTextLeading(PdfOperatorSetTextLeading {
pos: PdfInputPositionNoCompare,
#[parse(leading)]
leading: f32,
}),
#[kw = b"Tm"]
SetTextMatrix(PdfOperatorSetTextMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"Tr"]
SetTextRenderingMode(PdfOperatorSetTextRenderingMode {
pos: PdfInputPositionNoCompare,
#[parse(rendering_mode)]
rendering_mode: u8, // TODO: parse
}),
#[kw = b"Ts"]
SetTextRise(PdfOperatorSetTextRise {
pos: PdfInputPositionNoCompare,
#[parse(rise)]
rise: f32,
}),
#[kw = b"Tw"]
SetWordSpacing(PdfOperatorSetWordSpacing {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
}),
#[kw = b"Tz"]
SetTextHorizontalScaling(PdfOperatorSetTextHorizontalScaling {
pos: PdfInputPositionNoCompare,
#[parse(scale_percent)]
scale_percent: f32,
}),
#[kw = b"v"]
CurveTo23(PdfOperatorCurveTo23 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"w"]
SetLineWidth(PdfOperatorSetLineWidth {
pos: PdfInputPositionNoCompare,
#[parse(line_width)]
line_width: f32,
}),
#[kw = b"W"]
Clip(PdfOperatorClip {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"W*"]
ClipEvenOdd(PdfOperatorClipEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"y"]
CurveTo13(PdfOperatorCurveTo13 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"'"]
TextNextLineAndShow(PdfOperatorTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"\""]
SetSpacingThenTextNextLineAndShow(PdfOperatorSetSpacingThenTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
#[parse(char_space)]
char_space: f32,
#[parse(text)]
text: PdfString,
}),
}
}
impl GetPdfInputPosition for PdfOperator {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl GetPdfInputPosition for PdfOperatorAndOperands {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
#[derive(Clone)]
pub struct PdfContentStreamData {
pub operators: Arc<[PdfOperatorAndOperands]>,
}
impl fmt::Debug for PdfContentStreamData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PdfContentStreamData")
.field("operators", &self.operators)
.finish()
}
}
impl PdfStreamContents for PdfContentStreamData {
fn parse(
data: &[u8],
stream_pos: PdfInputPosition,
objects: Arc<PdfObjects>,
) -> Result<Self, PdfParseError> {
let mut parser = PdfParser {
objects,
tokenizer: PdfTokenizer::new(
data,
PdfInputPositionKnown {
pos: 0,
containing_streams_pos: stream_pos.get().map(|v| v.pos),
},
),
};
let mut operands = Vec::new();
let mut operators = Vec::new();
loop {
parser.skip_comments_and_whitespace();
if parser.tokenizer.peek().is_none() {
break;
}
match parser.parse_object_or_operator()? {
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
stream_kw_pos, ..
} => return Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }),
PdfObjectOrStreamDictionaryOrOperator::Object(object) => operands.push(object),
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
operators.push(PdfOperator::from(operator).parse(operands.drain(..))?);
}
}
}
if operands.is_empty() {
Ok(Self {
operators: operators.into(),
})
} else {
Err(PdfParseError::MissingOperator {
pos: parser.tokenizer.pos(),
})
}
}
}
pub type PdfContentStream = PdfStream<PdfDictionary, PdfContentStreamData>;

View file

@ -1,743 +0,0 @@
use crate::{
pdf::{
content_stream::PdfContentStream,
font::PdfFont,
object::{
IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject,
PdfObjectDirect, PdfObjectIndirect, PdfRectangle, PdfStream, PdfString,
},
parse::{PdfParse, PdfParseError},
pdf_parse,
render::{PdfRenderOperator, PdfRenderState},
},
util::DagDebugState,
};
use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
use std::{borrow::Cow, fmt, sync::Arc};
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfDocumentCatalogType {
#[pdf(name = "Catalog")]
#[default]
Catalog,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfDocumentCatalog {
#[pdf(name = "Type")]
pub ty: PdfDocumentCatalogType,
#[pdf(name = "Version")]
pub version: Option<PdfName>,
#[pdf(name = "Extensions")]
pub extensions: Option<PdfDictionary>,
#[pdf(name = "Pages")]
pub pages: PdfPageTree,
// TODO
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfDocumentCatalog {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
version,
extensions,
pages,
rest,
} = self;
f.debug_struct("PdfDocumentCatalog")
.field("ty", ty)
.field("version", version)
.field("extensions", extensions)
.field("pages", pages)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
pub struct PdfResourcesDictionary {
#[pdf(name = "Font")]
pub fonts: PdfDictionary<PdfFont>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
#[derive(Clone)]
pub struct PdfPageTree {
page_tree: PdfPageTreeNode,
pages: Arc<[PdfPage]>,
}
impl PdfPageTree {
fn collect_leaves(
node: &PdfPageTreeNode,
leaves: &mut Vec<PdfPageTreeLeaf>,
) -> Result<(), PdfParseError> {
for kid in node.kids.iter() {
match kid {
PdfPageTreeNodeOrLeaf::Node(node) => Self::collect_leaves(node, leaves)?,
PdfPageTreeNodeOrLeaf::Leaf(leaf) => {
leaves.push(leaf.clone());
}
PdfPageTreeNodeOrLeaf::Other(v) => {
return Err(PdfParseError::InvalidType {
pos: v.pos(),
ty: "dictionary",
expected_ty: "PdfPageTreeNodeOrLeaf",
});
}
}
}
Ok(())
}
pub fn try_from_page_tree_root(mut page_tree: PdfPageTreeNode) -> Result<Self, PdfParseError> {
page_tree.propagate_inheritable_data_to_leaves();
let mut leaves = Vec::new();
Self::collect_leaves(&page_tree, &mut leaves)?;
Ok(Self {
page_tree,
pages: Result::from_par_iter(
leaves
.into_par_iter()
.map(PdfPage::parse_after_propagating_inheritable_data)
.panic_fuse(),
)?,
})
}
pub fn page_tree(&self) -> &PdfPageTreeNode {
&self.page_tree
}
pub fn pages(&self) -> &Arc<[PdfPage]> {
&self.pages
}
}
impl fmt::Debug for PdfPageTree {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
page_tree: _,
pages,
} = self;
f.debug_struct("PdfPageTree")
.field("pages", pages)
.finish_non_exhaustive()
})
}
}
impl IsPdfNull for PdfPageTree {
fn is_pdf_null(&self) -> bool {
self.page_tree.is_pdf_null()
}
}
impl PdfParse for PdfPageTree {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfPageTree")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
Self::try_from_page_tree_root(PdfParse::parse(object)?)
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Default, Debug)]
pub struct PdfPageInheritableData {
#[pdf(name = "Resources")]
pub resources: Option<PdfResourcesDictionary>,
#[pdf(name = "MediaBox")]
pub media_box: Option<PdfRectangle>,
#[pdf(name = "CropBox")]
pub crop_box: Option<PdfRectangle>,
#[pdf(name = "Rotate")]
pub rotate: Option<PdfPageRotation>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfPageInheritableData {
pub fn propagate_to(&self, target: &mut Self) {
let Self {
resources,
media_box,
crop_box,
rotate,
rest: _,
} = self;
fn propagate_to<T: Clone>(this: &Option<T>, target: &mut Option<T>) {
if let (Some(this), target @ None) = (this, target) {
*target = Some(this.clone());
}
}
propagate_to(resources, &mut target.resources);
propagate_to(media_box, &mut target.media_box);
propagate_to(crop_box, &mut target.crop_box);
propagate_to(rotate, &mut target.rotate);
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageTreeNodeType {
#[pdf(name = "Pages")]
#[default]
Pages,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeNode {
#[pdf(name = "Type")]
pub ty: PdfPageTreeNodeType,
#[pdf(name = "Parent")]
pub parent: Option<PdfObjectIndirect>,
#[pdf(name = "Kids")]
pub kids: Arc<[PdfPageTreeNodeOrLeaf]>,
#[pdf(name = "Count")]
pub count: usize,
// TODO
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
kids,
count,
inheritable,
} = self;
f.debug_struct("PdfPageTreeNode")
.field("ty", ty)
.field("parent", parent)
.field("kids", kids)
.field("count", count)
.field("inheritable", inheritable)
.finish()
})
}
}
impl PdfPageTreeNode {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
for kid in Arc::make_mut(&mut self.kids) {
if let Some(target) = kid.inheritable_data_mut() {
self.inheritable.propagate_to(target);
}
kid.propagate_inheritable_data_to_leaves();
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageType {
#[pdf(name = "Page")]
#[default]
Page,
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum PdfPageAnnotationsTabOrder {
#[pdf(name = "R")]
RowOrder,
#[pdf(name = "C")]
ColumnOrder,
#[pdf(name = "S")]
StructureOrder,
#[pdf(other)]
Other(PdfName),
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeLeaf {
#[pdf(name = "Type")]
pub ty: PdfPageType,
#[pdf(name = "Parent")]
pub parent: PdfObjectIndirect,
#[pdf(name = "LastModified")]
pub last_modified: Option<PdfDate>,
#[pdf(name = "BleedBox")]
pub bleed_box: Option<PdfRectangle>,
#[pdf(name = "TrimBox")]
pub trim_box: Option<PdfRectangle>,
#[pdf(name = "ArtBox")]
pub art_box: Option<PdfRectangle>,
#[pdf(name = "BoxColorInfo")]
pub box_color_info: Option<PdfDictionary>,
#[pdf(name = "Contents")]
pub contents: MaybeArray<PdfContentStream>,
#[pdf(name = "Group")]
pub group: Option<PdfDictionary>,
#[pdf(name = "Thumb")]
pub thumbnail: Option<PdfStream>,
#[pdf(name = "B")]
pub beads: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "Dur")]
pub duration: Option<f32>,
#[pdf(name = "Trans")]
pub transition: Option<PdfDictionary>,
#[pdf(name = "Annots")]
pub annotations: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "AA")]
pub additional_actions: Option<PdfDictionary>,
#[pdf(name = "Metadata")]
pub metadata: Option<PdfStream>,
#[pdf(name = "PieceInfo")]
pub piece_info: Option<PdfDictionary>,
#[pdf(name = "StructParents")]
pub structural_parents: Option<PdfInteger>,
#[pdf(name = "ID")]
pub parent_web_capture_content_set_id: Option<PdfString>,
#[pdf(name = "PZ")]
pub preferred_zoom_factor: Option<f32>,
#[pdf(name = "SeparationInfo")]
pub separation_info: Option<PdfDictionary>,
#[pdf(name = "Tabs")]
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
#[pdf(name = "TemplateInstantiated")]
pub template_instantiated: Option<PdfName>,
#[pdf(name = "PresSteps")]
pub pres_steps: Option<PdfDictionary>,
#[pdf(name = "UserUnit")]
pub user_unit: Option<f32>,
#[pdf(name = "VP")]
pub viewports: Option<Arc<[PdfDictionary]>>,
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable,
} = self;
f.debug_struct("PdfPageTreeLeaf")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("inheritable", inheritable)
.finish()
})
}
}
pdf_parse! {
#[pdf(tag = "Type")]
#[derive(Clone)]
pub enum PdfPageTreeNodeOrLeaf {
#[pdf(tag_value = "Pages")]
Node(PdfPageTreeNode),
#[pdf(tag_value = "Page")]
Leaf(PdfPageTreeLeaf),
#[pdf(other)]
Other(PdfDictionary),
}
}
impl PdfPageTreeNodeOrLeaf {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => v.propagate_inheritable_data_to_leaves(),
PdfPageTreeNodeOrLeaf::Leaf(_) | PdfPageTreeNodeOrLeaf::Other(_) => {}
}
}
pub fn inheritable_data_mut(&mut self) -> Option<&mut PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
pub fn inheritable_data(&self) -> Option<&PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
}
impl fmt::Debug for PdfPageTreeNodeOrLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Node(v) => v.fmt(f),
Self::Leaf(v) => v.fmt(f),
Self::Other(v) => v.fmt(f),
}
}
}
/// the amount by which the page is rotated clockwise when displaying or printing, is always a multiple of 90 degrees.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub enum PdfPageRotation {
#[default]
NoRotation = 0,
ClockwiseBy90Degrees = 90,
By180Degrees = 180,
ClockwiseBy270Degrees = 270,
}
impl PdfPageRotation {
pub fn from_clockwise_angle_in_degrees(angle: i32) -> Option<Self> {
match angle.rem_euclid(360) {
0 => Some(Self::NoRotation),
90 => Some(Self::ClockwiseBy90Degrees),
180 => Some(Self::By180Degrees),
270 => Some(Self::ClockwiseBy270Degrees),
_ => None,
}
}
pub fn from_clockwise_angle_in_degrees_i128(angle: i128) -> Option<Self> {
Self::from_clockwise_angle_in_degrees((angle % 360) as i32)
}
}
impl From<PdfPageRotation> for i32 {
fn from(value: PdfPageRotation) -> Self {
value as i32
}
}
impl IsPdfNull for PdfPageRotation {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfPageRotation {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("page rotation")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = PdfObjectDirect::from(object);
let pos = object.pos();
let angle = PdfInteger::parse(object.into())?;
Self::from_clockwise_angle_in_degrees_i128(angle.value())
.ok_or(PdfParseError::IntegerOutOfRange { pos })
}
}
#[derive(Clone)]
pub struct PdfPage {
pub ty: PdfPageType,
pub parent: PdfObjectIndirect,
pub last_modified: Option<PdfDate>,
pub resources: PdfResourcesDictionary,
pub media_box: PdfRectangle,
pub crop_box: PdfRectangle,
pub bleed_box: PdfRectangle,
pub trim_box: PdfRectangle,
pub art_box: PdfRectangle,
pub box_color_info: Option<PdfDictionary>,
pub contents: Arc<[PdfContentStream]>,
pub rotate: PdfPageRotation,
pub group: Option<PdfDictionary>,
pub thumbnail: Option<PdfStream>,
pub beads: Option<Arc<[PdfDictionary]>>,
pub duration: Option<f32>,
pub transition: Option<PdfDictionary>,
pub annotations: Option<Arc<[PdfDictionary]>>,
pub additional_actions: Option<PdfDictionary>,
pub metadata: Option<PdfStream>,
pub piece_info: Option<PdfDictionary>,
pub structural_parents: Option<PdfInteger>,
pub parent_web_capture_content_set_id: Option<PdfString>,
pub preferred_zoom_factor: Option<f32>,
pub separation_info: Option<PdfDictionary>,
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
pub template_instantiated: Option<PdfName>,
pub pres_steps: Option<PdfDictionary>,
pub user_unit: f32,
pub viewports: Option<Arc<[PdfDictionary]>>,
pub rest: PdfDictionary,
rendered_objects: Option<PdfPageRenderedObjects>,
}
impl PdfPage {
pub fn rendered_objects(&self) -> &PdfPageRenderedObjects {
let Some(retval) = &self.rendered_objects else {
unreachable!();
};
retval
}
pub fn parse_after_propagating_inheritable_data(
leaf: PdfPageTreeLeaf,
) -> Result<Self, PdfParseError> {
let PdfPageTreeLeaf {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable:
PdfPageInheritableData {
resources,
media_box,
crop_box,
rotate,
rest,
},
} = leaf;
let pos = rest.pos();
let resources = resources.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page resources dictionary",
})?;
let media_box = media_box.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page MediaBox rectangle",
})?;
let crop_box = crop_box.unwrap_or(media_box);
let rotate = rotate.unwrap_or(PdfPageRotation::NoRotation);
let mut retval = Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box: bleed_box.unwrap_or(crop_box),
trim_box: trim_box.unwrap_or(crop_box),
art_box: art_box.unwrap_or(crop_box),
box_color_info,
contents: contents.0,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit: user_unit.unwrap_or(1.0),
viewports,
rest,
rendered_objects: None,
};
retval.rendered_objects = Some(PdfPageRenderedObjects::render_page(&retval)?);
Ok(retval)
}
}
impl fmt::Debug for PdfPage {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
rest,
rendered_objects,
} = self;
struct Unparsed;
impl fmt::Debug for Unparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("<unparsed>")
}
}
f.debug_struct("PdfPage")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("resources", resources)
.field("media_box", media_box)
.field("crop_box", crop_box)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("rotate", rotate)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("rest", rest)
.field(
"rendered_objects",
if let Some(rendered_objects) = rendered_objects {
rendered_objects
} else {
&Unparsed
},
)
.finish()
})
}
}
#[derive(Clone, Debug)]
pub struct PdfPageRenderedObjects {}
impl PdfPageRenderedObjects {
fn render_page(page: &PdfPage) -> Result<Self, PdfParseError> {
let mut state = PdfRenderState::new(page);
for content_stream in page.contents.iter() {
for op in content_stream.decoded_data().as_ref()?.operators.iter() {
op.render(&mut state)?;
}
}
Ok(Self {})
}
}

View file

@ -1,924 +0,0 @@
use crate::{
pdf::{
object::{
IsPdfNull, PdfArray, PdfDictionary, PdfMatrix, PdfName, PdfNameOrInteger, PdfObject,
PdfObjectDirect, PdfRectangle, PdfStream, PdfString,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse,
PdfParseError,
},
pdf_parse,
},
util::{ArcOrRef, DagDebugState},
};
use std::{borrow::Cow, collections::BTreeMap, fmt, sync::Arc};
mod tables;
mod type_1_parse;
pdf_parse! {
#[pdf(transparent)]
#[derive(Clone)]
// TODO: actually parse the stream
pub struct PdfFontToUnicode {
#[pdf]
stream: PdfStream,
}
}
impl fmt::Debug for PdfFontToUnicode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self { stream } = self;
f.debug_struct("PdfFontToUnicode")
.field("stream", stream)
.finish()
})
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfFontDescriptorType {
#[pdf(name = "FontDescriptor")]
#[default]
FontDescriptor,
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub enum PdfFontStretch {
#[pdf(name = "UltraCondensed")]
UltraCondensed,
#[pdf(name = "ExtraCondensed")]
ExtraCondensed,
#[pdf(name = "Condensed")]
Condensed,
#[pdf(name = "SemiCondensed")]
SemiCondensed,
#[pdf(name = "Normal")]
Normal,
#[pdf(name = "SemiExpanded")]
SemiExpanded,
#[pdf(name = "Expanded")]
Expanded,
#[pdf(name = "ExtraExpanded")]
ExtraExpanded,
#[pdf(name = "UltraExpanded")]
UltraExpanded,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfFontDescriptor {
#[pdf(name = "Type")]
pub ty: PdfFontDescriptorType,
#[pdf(name = "FontName")]
pub font_name: PdfName,
#[pdf(name = "FontFamily")]
pub font_family: Option<PdfString>,
#[pdf(name = "FontStretch")]
pub font_stretch: Option<PdfFontStretch>,
#[pdf(name = "FontWeight")]
pub font_weight: Option<u32>,
#[pdf(name = "Flags")]
pub flags: u32,
#[pdf(name = "FontBBox")]
pub font_bounding_box: Option<PdfRectangle>,
#[pdf(name = "ItalicAngle")]
pub italic_angle: f32,
#[pdf(name = "Ascent")]
pub ascent: Option<f32>,
#[pdf(name = "Descent")]
pub descent: Option<f32>,
#[pdf(name = "Leading")]
pub leading: Option<f32>,
#[pdf(name = "CapHeight")]
pub cap_height: Option<f32>,
#[pdf(name = "XHeight")]
pub x_height: Option<f32>,
#[pdf(name = "StemV")]
pub stem_v: Option<f32>,
#[pdf(name = "StemH")]
pub stem_h: Option<f32>,
#[pdf(name = "AvgWidth")]
pub avg_width: Option<f32>,
#[pdf(name = "MaxWidth")]
pub max_width: Option<f32>,
#[pdf(name = "MissingWidth")]
pub missing_width: Option<f32>,
#[pdf(name = "FontFile")]
pub font_file: Option<PdfStream<PdfDictionary, PdfFontType1Program>>,
#[pdf(name = "FontFile2")]
pub font_file2: Option<PdfStream>,
#[pdf(name = "FontFile3")]
pub font_file3: Option<PdfStream>,
#[pdf(name = "CharSet")]
pub char_set: Option<PdfString>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfFontDescriptor {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
font_name,
font_family,
font_stretch,
font_weight,
flags,
font_bounding_box,
italic_angle,
ascent,
descent,
leading,
cap_height,
x_height,
stem_v,
stem_h,
avg_width,
max_width,
missing_width,
font_file,
font_file2,
font_file3,
char_set,
rest,
} = self;
f.debug_struct("PdfFontDescriptor")
.field("ty", ty)
.field("font_name", font_name)
.field("font_family", font_family)
.field("font_stretch", font_stretch)
.field("font_weight", font_weight)
.field("flags", flags)
.field("font_bounding_box", font_bounding_box)
.field("italic_angle", italic_angle)
.field("ascent", ascent)
.field("descent", descent)
.field("leading", leading)
.field("cap_height", cap_height)
.field("x_height", x_height)
.field("stem_v", stem_v)
.field("stem_h", stem_h)
.field("avg_width", avg_width)
.field("max_width", max_width)
.field("missing_width", missing_width)
.field("font_file", font_file)
.field("font_file2", font_file2)
.field("font_file3", font_file3)
.field("char_set", char_set)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfFontType {
#[pdf(name = "Font")]
#[default]
Font,
}
}
#[derive(Clone)]
pub enum PdfTodo {}
impl fmt::Debug for PdfTodo {
fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {}
}
}
impl IsPdfNull for PdfTodo {
fn is_pdf_null(&self) -> bool {
match *self {}
}
}
impl PdfParse for PdfTodo {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfTodo")
}
#[track_caller]
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
todo!("{object:?}")
}
}
pdf_parse! {
#[pdf(tag = "Subtype")]
#[derive(Clone)]
pub enum PdfFont {
#[pdf(tag_value = "Type0")]
Type0(Arc<PdfFontType0>),
#[pdf(tag_value = "Type1")]
Type1(PdfFontType1),
#[pdf(other)]
Other(Arc<PdfTodo>),
}
}
impl fmt::Debug for PdfFont {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|state| match self {
PdfFont::Type0(v) => state.debug_or_id(v, "PdfFontType0(...)").fmt(f),
PdfFont::Type1(v) => v.fmt(f),
PdfFont::Other(v) => match **v {},
})
}
}
impl PdfFont {
pub(crate) fn is_vertical_writing_mode(&self) -> bool {
// TODO:
false
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfFontType0Subtype {
#[pdf(name = "Type0")]
#[default]
Type0,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfFontType0 {
#[pdf(name = "Type")]
pub ty: PdfFontType,
#[pdf(name = "Subtype")]
pub subtype: PdfFontType0Subtype,
#[pdf(name = "BaseFont")]
pub base_font: PdfName,
#[pdf(name = "Encoding")]
// TODO
pub encoding: PdfObjectDirect,
#[pdf(name = "DescendentFonts")]
// TODO
pub descendent_fonts: [PdfDictionary; 1],
#[pdf(name = "ToUnicode")]
pub to_unicode: Option<PdfFontToUnicode>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfFontType0 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
subtype,
base_font,
encoding,
descendent_fonts,
to_unicode,
rest,
} = self;
f.debug_struct("PdfFontType0")
.field("ty", ty)
.field("subtype", subtype)
.field("base_font", base_font)
.field("encoding", encoding)
.field("descendent_fonts", descendent_fonts)
.field("to_unicode", to_unicode)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfFontType1Subtype {
#[pdf(name = "Type1")]
#[default]
Type1,
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub enum PdfStandardFontName {
#[pdf(name = "Times-Roman")]
TimesRoman,
#[pdf(name = "Helvetica")]
Helvetica,
#[pdf(name = "Courier")]
Courier,
#[pdf(name = "Symbol")]
Symbol,
#[pdf(name = "Times-Bold")]
TimesBold,
#[pdf(name = "Helvetica-Bold")]
HelveticaBold,
#[pdf(name = "Courier-Bold")]
CourierBold,
#[pdf(name = "ZapfDingbats")]
ZapfDingbats,
#[pdf(name = "Times-Italic")]
TimesItalic,
#[pdf(name = "Helvetica-Oblique")]
HelveticaOblique,
#[pdf(name = "Courier-Oblique")]
CourierOblique,
#[pdf(name = "Times-BoldItalic")]
TimesBoldItalic,
#[pdf(name = "Helvetica-BoldOblique")]
HelveticaBoldOblique,
#[pdf(name = "Courier-BoldOblique")]
CourierBoldOblique,
}
}
#[derive(Clone)]
pub enum PdfFontType1 {
Standard(Arc<PdfFontType1Standard>),
Other(Arc<PdfFontType1Other>),
}
impl fmt::Debug for PdfFontType1 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|state| match self {
PdfFontType1::Standard(v) => state.debug_or_id(v, "PdfFontType1Standard(...)").fmt(f),
PdfFontType1::Other(v) => state.debug_or_id(v, "PdfFontType1Other(...)").fmt(f),
})
}
}
impl PdfFontType1 {
pub fn common(&self) -> PdfFontType1Common {
match self {
PdfFontType1::Standard(v) => v.common(),
PdfFontType1::Other(v) => v.common(),
}
}
pub fn name(&self) -> &Option<PdfName> {
match self {
Self::Standard(v) => &v.name,
Self::Other(v) => &v.name,
}
}
pub fn base_font(&self) -> PdfName {
match self {
Self::Standard(v) => v.base_font.into(),
Self::Other(v) => v.base_font.clone(),
}
}
pub fn first_char(&self) -> Option<u32> {
match self {
Self::Standard(v) => v.first_char,
Self::Other(v) => Some(v.first_char),
}
}
pub fn last_char(&self) -> Option<u32> {
match self {
Self::Standard(v) => v.last_char,
Self::Other(v) => Some(v.last_char),
}
}
pub fn widths(&self) -> Option<&Arc<[f32]>> {
match self {
Self::Standard(v) => v.widths.as_ref(),
Self::Other(v) => Some(&v.widths),
}
}
pub fn font_descriptor(&self) -> Option<&PdfFontDescriptor> {
match self {
Self::Standard(v) => v.font_descriptor.as_ref(),
Self::Other(v) => Some(&v.font_descriptor),
}
}
pub fn encoding(&self) -> &Option<PdfSimpleFontEncoding> {
match self {
Self::Standard(v) => &v.encoding,
Self::Other(v) => &v.encoding,
}
}
pub fn to_unicode(&self) -> &Option<PdfFontToUnicode> {
match self {
Self::Standard(v) => &v.to_unicode,
Self::Other(v) => &v.to_unicode,
}
}
pub fn rest(&self) -> &PdfDictionary {
match self {
Self::Standard(v) => &v.rest,
Self::Other(v) => &v.rest,
}
}
}
impl IsPdfNull for PdfFontType1 {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfFontType1 {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfFontType1")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = object.into();
let font = if let PdfObjectDirect::Dictionary(object) = object {
if let Ok(_) = PdfStandardFontName::parse(object.get_or_null(b"BaseFont".as_slice())) {
Self::Standard(PdfParse::parse(object.into())?)
} else {
Self::Other(PdfParse::parse(object.into())?)
}
} else {
Self::Other(PdfParse::parse(object.into())?)
};
if let Some(font_file) = font.font_descriptor().and_then(|v| v.font_file.as_ref()) {
font_file.decoded_data().as_ref()?;
}
Ok(font)
}
}
#[derive(Clone)]
pub struct PdfFontType1Common {
pub ty: PdfFontType,
pub subtype: PdfFontType1Subtype,
pub name: Option<PdfName>,
pub base_font: PdfName,
pub first_char: Option<u32>,
pub last_char: Option<u32>,
pub widths: Option<Arc<[f32]>>,
pub font_descriptor: Option<PdfFontDescriptor>,
pub encoding: Option<PdfSimpleFontEncoding>,
pub to_unicode: Option<PdfFontToUnicode>,
pub rest: PdfDictionary,
}
impl fmt::Debug for PdfFontType1Common {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
subtype,
name,
base_font,
first_char,
last_char,
widths,
font_descriptor,
encoding,
to_unicode,
rest,
} = self;
f.debug_struct("PdfFontType1Common")
.field("ty", ty)
.field("subtype", subtype)
.field("name", name)
.field("base_font", base_font)
.field("first_char", first_char)
.field("last_char", last_char)
.field("widths", widths)
.field("font_descriptor", font_descriptor)
.field("encoding", encoding)
.field("to_unicode", to_unicode)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfFontType1Standard {
#[pdf(name = "Type")]
pub ty: PdfFontType,
#[pdf(name = "Subtype")]
pub subtype: PdfFontType1Subtype,
#[pdf(name = "Name")]
pub name: Option<PdfName>,
#[pdf(name = "BaseFont")]
pub base_font: PdfStandardFontName,
#[pdf(name = "FirstChar")]
pub first_char: Option<u32>,
#[pdf(name = "LastChar")]
pub last_char: Option<u32>,
#[pdf(name = "Widths")]
pub widths: Option<Arc<[f32]>>,
#[pdf(name = "FontDescriptor")]
pub font_descriptor: Option<PdfFontDescriptor>,
#[pdf(name = "Encoding")]
pub encoding: Option<PdfSimpleFontEncoding>,
#[pdf(name = "ToUnicode")]
pub to_unicode: Option<PdfFontToUnicode>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfFontType1Standard {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
subtype,
name,
base_font,
first_char,
last_char,
widths,
font_descriptor,
encoding,
to_unicode,
rest,
} = self;
f.debug_struct("PdfFontType1Standard")
.field("ty", ty)
.field("subtype", subtype)
.field("name", name)
.field("base_font", base_font)
.field("first_char", first_char)
.field("last_char", last_char)
.field("widths", widths)
.field("font_descriptor", font_descriptor)
.field("encoding", encoding)
.field("to_unicode", to_unicode)
.field("rest", rest)
.finish()
})
}
}
impl PdfFontType1Standard {
pub fn common(&self) -> PdfFontType1Common {
let Self {
ty,
subtype,
ref name,
base_font,
first_char,
last_char,
ref widths,
ref font_descriptor,
ref encoding,
ref to_unicode,
ref rest,
} = *self;
PdfFontType1Common {
ty,
subtype,
name: name.clone(),
base_font: base_font.into(),
first_char,
last_char,
widths: widths.clone(),
font_descriptor: font_descriptor.clone(),
encoding: encoding.clone(),
to_unicode: to_unicode.clone(),
rest: rest.clone(),
}
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfFontType1Other {
#[pdf(name = "Type")]
pub ty: PdfFontType,
#[pdf(name = "Subtype")]
pub subtype: PdfFontType1Subtype,
#[pdf(name = "Name")]
pub name: Option<PdfName>,
#[pdf(name = "BaseFont")]
pub base_font: PdfName,
#[pdf(name = "FirstChar")]
pub first_char: u32,
#[pdf(name = "LastChar")]
pub last_char: u32,
#[pdf(name = "Widths")]
pub widths: Arc<[f32]>,
#[pdf(name = "FontDescriptor")]
pub font_descriptor: PdfFontDescriptor,
#[pdf(name = "Encoding")]
pub encoding: Option<PdfSimpleFontEncoding>,
#[pdf(name = "ToUnicode")]
pub to_unicode: Option<PdfFontToUnicode>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfFontType1Other {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
subtype,
name,
base_font,
first_char,
last_char,
widths,
font_descriptor,
encoding,
to_unicode,
rest,
} = self;
f.debug_struct("PdfFontType1Other")
.field("ty", ty)
.field("subtype", subtype)
.field("name", name)
.field("base_font", base_font)
.field("first_char", first_char)
.field("last_char", last_char)
.field("widths", widths)
.field("font_descriptor", font_descriptor)
.field("encoding", encoding)
.field("to_unicode", to_unicode)
.field("rest", rest)
.finish()
})
}
}
impl PdfFontType1Other {
pub fn common(&self) -> PdfFontType1Common {
let Self {
ty,
subtype,
ref name,
ref base_font,
first_char,
last_char,
ref widths,
ref font_descriptor,
ref encoding,
ref to_unicode,
ref rest,
} = *self;
PdfFontType1Common {
ty,
subtype,
name: name.clone(),
base_font: base_font.clone(),
first_char: Some(first_char),
last_char: Some(last_char),
widths: Some(widths.clone()),
font_descriptor: Some(font_descriptor.clone()),
encoding: encoding.clone(),
to_unicode: to_unicode.clone(),
rest: rest.clone(),
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub enum PdfSimpleFontEncodingPredefined {
#[pdf(name = "MacRomanEncoding")]
MacRomanEncoding,
#[pdf(name = "MacExpertEncoding")]
MacExpertEncoding,
#[pdf(name = "WinAnsiEncoding")]
WinAnsiEncoding,
}
}
impl PdfSimpleFontEncodingPredefined {
pub const fn table(self) -> PdfSimpleFontEncodingTable {
match self {
Self::MacRomanEncoding => PdfSimpleFontEncodingTable::MAC_ROMAN,
Self::MacExpertEncoding => PdfSimpleFontEncodingTable::MAC_EXPERT,
Self::WinAnsiEncoding => PdfSimpleFontEncodingTable::WIN_ANSI,
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
pub enum PdfSimpleFontEncodingDictionaryType {
#[pdf(name = "Encoding")]
#[default]
Encoding,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
pub struct PdfSimpleFontEncodingDictionary {
#[pdf(name = "Type")]
pub ty: Option<PdfSimpleFontEncodingDictionaryType>,
#[pdf(name = "BaseEncoding")]
pub base_encoding: Option<PdfSimpleFontEncodingPredefined>,
#[pdf(name = "Differences")]
pub differences: Option<PdfSimpleFontEncodingDifferences>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfSimpleFontEncodingDictionary {
pub fn table(
&self,
default_table: impl FnOnce() -> PdfSimpleFontEncodingTable,
) -> PdfSimpleFontEncodingTable {
let Self {
ty: _,
base_encoding,
differences,
rest: _,
} = self;
let mut retval = base_encoding
.map(|v| v.table())
.unwrap_or_else(default_table);
if let Some(differences) = differences {
retval = differences.table(retval);
}
retval
}
}
#[derive(Clone, Debug)]
pub struct PdfSimpleFontEncodingDifferences {
pos: PdfInputPositionNoCompare,
map: Arc<BTreeMap<u8, PdfName>>,
}
impl PdfSimpleFontEncodingDifferences {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, map: Arc<BTreeMap<u8, PdfName>>) -> Self {
Self {
pos: pos.into(),
map,
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn map(&self) -> &Arc<BTreeMap<u8, PdfName>> {
&self.map
}
pub fn table(&self, base_table: PdfSimpleFontEncodingTable) -> PdfSimpleFontEncodingTable {
let mut retval = base_table;
let table: &mut [_; 0x100] = ArcOrRef::make_mut(&mut retval.table);
for (&byte, name) in self.map.iter() {
table[usize::from(byte)] = PdfSimpleFontEncodingTableEntry {
name: Some(name.clone()),
presumed_unicode: None,
};
}
retval
}
}
impl GetPdfInputPosition for PdfSimpleFontEncodingDifferences {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
impl IsPdfNull for PdfSimpleFontEncodingDifferences {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfSimpleFontEncodingDifferences {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfSimpleFontEncodingDifferences")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let array = PdfArray::parse(object)?;
let pos = array.pos();
let mut map = BTreeMap::new();
let mut next_byte = None::<u8>;
for i in array.iter() {
let i = PdfNameOrInteger::parse(i.clone())?;
match i {
PdfNameOrInteger::Name(name) => {
let pos = name.pos();
let byte = next_byte.ok_or(PdfParseError::IntegerOutOfRange { pos })?;
next_byte = byte.checked_add(1);
map.insert(byte, name);
}
PdfNameOrInteger::Integer(v) => next_byte = Some(u8::parse(v.into())?),
}
}
Ok(Self {
pos: pos.into(),
map: Arc::new(map),
})
}
}
#[derive(Clone, Default, Debug)]
pub struct PdfSimpleFontEncodingTableEntry {
pub name: Option<PdfName>,
pub presumed_unicode: Option<&'static str>,
}
impl PdfSimpleFontEncodingTableEntry {
pub const fn new_static(
name: Option<&'static [u8]>,
presumed_unicode: Option<&'static str>,
) -> Self {
Self {
name: match name {
Some(name) => Some(PdfName::new_static(name)),
None => None,
},
presumed_unicode,
}
}
}
#[derive(Clone, Debug)]
pub struct PdfSimpleFontEncodingTable {
pub table: ArcOrRef<'static, [PdfSimpleFontEncodingTableEntry; 0x100]>,
}
#[derive(Clone, Debug)]
pub enum PdfSimpleFontEncoding {
Predefined(PdfSimpleFontEncodingPredefined),
Dictionary(PdfSimpleFontEncodingDictionary),
}
impl PdfSimpleFontEncoding {
pub fn table(
&self,
default_table: impl FnOnce() -> PdfSimpleFontEncodingTable,
) -> PdfSimpleFontEncodingTable {
match self {
PdfSimpleFontEncoding::Predefined(v) => v.table(),
PdfSimpleFontEncoding::Dictionary(v) => v.table(default_table),
}
}
}
impl IsPdfNull for PdfSimpleFontEncoding {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfSimpleFontEncoding {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfSimpleFontEncoding")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = PdfObjectDirect::from(object);
match object {
PdfObjectDirect::Name(v) => Ok(Self::Predefined(PdfParse::parse(v.into())?)),
PdfObjectDirect::Dictionary(v) => Ok(Self::Dictionary(PdfParse::parse(v.into())?)),
_ => Err(PdfParseError::InvalidType {
pos: object.pos(),
ty: object.type_name(),
expected_ty: "PdfSimpleFontEncoding",
}),
}
}
}
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct PdfFontType1Program {
pub encoding: Option<Arc<[Option<PdfName>]>>,
pub font_bbox: Option<PdfRectangle>,
pub font_info: Option<PdfFontType1FontInfo>,
pub font_matrix: Option<PdfMatrix>,
pub font_name: Option<PdfName>,
}
#[derive(Clone, Debug)]
pub struct PdfFontType1FontInfo {
pub family_name: Option<PdfString>,
pub full_name: Option<PdfString>,
pub notice: Option<PdfString>,
pub weight: Option<PdfString>,
pub version: Option<PdfString>,
pub italic_angle: Option<f32>,
pub is_fixed_pitch: Option<bool>,
pub underline_position: Option<f32>,
pub underline_thickness: Option<f32>,
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,66 +0,0 @@
use crate::pdf::{
object::{PdfDictionary, PdfName},
parse::{PdfInputPosition, PdfParse, PdfParseError},
pdf_parse,
};
pub mod flate;
pdf_parse! {
#[pdf(name)]
#[derive(Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum PdfStreamFilter {
#[pdf(name = "ASCIIHexDecode")]
AsciiHexDecode,
#[pdf(name = "ASCII85Decode")]
Ascii85Decode,
#[pdf(name = "LZWDecode")]
LzwDecode,
#[pdf(name = "FlateDecode")]
FlateDecode,
#[pdf(name = "RunLengthDecode")]
RunLengthDecode,
#[pdf(name = "CCITTFaxDecode")]
CcittFaxDecode,
#[pdf(name = "JBIG2Decode")]
Jbig2Decode,
#[pdf(name = "DCTDecode")]
DctDecode,
#[pdf(name = "JPXDecode")]
JpxDecode,
#[pdf(name = "Crypt")]
Crypt,
#[pdf(other)]
Unknown(PdfName),
}
}
impl PdfStreamFilter {
pub fn decode_stream_data(
&self,
filter_parms: PdfDictionary,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
match self {
PdfStreamFilter::AsciiHexDecode => todo!(),
PdfStreamFilter::Ascii85Decode => todo!(),
PdfStreamFilter::LzwDecode => todo!(),
PdfStreamFilter::FlateDecode => {
flate::PdfFilterParmsFlateDecode::parse(filter_parms.into())?
.decode_stream_data(stream_pos, encoded_data)
}
PdfStreamFilter::RunLengthDecode => todo!(),
PdfStreamFilter::CcittFaxDecode => todo!(),
PdfStreamFilter::Jbig2Decode => todo!(),
PdfStreamFilter::DctDecode => todo!(),
PdfStreamFilter::JpxDecode => todo!(),
PdfStreamFilter::Crypt => todo!(),
PdfStreamFilter::Unknown(filter) => Err(PdfParseError::UnknownStreamFilter {
pos: stream_pos,
filter: filter.clone(),
}),
}
}
}

View file

@ -1,74 +0,0 @@
use crate::pdf::{
object::PdfDictionary,
parse::{PdfInputPosition, PdfParseError},
pdf_parse,
stream_filters::PdfStreamFilter,
};
use std::{io::Read, num::NonZero};
pdf_parse! {
#[pdf]
#[derive(Clone, Debug, Default)]
pub struct PdfFilterParmsFlateDecode {
#[pdf(name = "Predictor")]
pub predictor: Option<NonZero<u32>>,
#[pdf(name = "Colors")]
pub colors: Option<NonZero<u32>>,
#[pdf(name = "BitsPerComponent")]
pub bits_per_component: Option<NonZero<u32>>,
#[pdf(name = "Columns")]
pub columns: Option<NonZero<u32>>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfFilterParmsFlateDecode {
pub const FILTER: PdfStreamFilter = PdfStreamFilter::FlateDecode;
pub const DEFAULT_PREDICTOR: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_COLORS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_BITS_PER_COMPONENT: NonZero<u32> = const { NonZero::new(8).unwrap() };
pub const DEFAULT_COLUMNS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub fn predictor(&self) -> NonZero<u32> {
self.predictor.unwrap_or(Self::DEFAULT_PREDICTOR)
}
pub fn colors(&self) -> NonZero<u32> {
self.colors.unwrap_or(Self::DEFAULT_COLORS)
}
pub fn bits_per_component(&self) -> NonZero<u32> {
self.bits_per_component
.unwrap_or(Self::DEFAULT_BITS_PER_COMPONENT)
}
pub fn columns(&self) -> NonZero<u32> {
self.columns.unwrap_or(Self::DEFAULT_COLUMNS)
}
pub fn decode_stream_data(
&self,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
let Self {
predictor: _,
colors: _,
bits_per_component: _,
columns: _,
rest: _,
} = self;
let mut decoded_data = vec![];
flate2::bufread::ZlibDecoder::new(encoded_data)
.read_to_end(&mut decoded_data)
.map_err(|e| PdfParseError::StreamFilterError {
pos: stream_pos,
filter: Self::FILTER.into(),
error: e.to_string(),
})?;
let predictor = self.predictor();
let colors = self.colors();
let bits_per_component = self.bits_per_component();
let columns = self.columns();
match predictor {
Self::DEFAULT_PREDICTOR => Ok(decoded_data),
_ => todo!("{predictor}"),
}
}
}

View file

@ -1,382 +0,0 @@
use std::{
any::{Any, TypeId},
borrow::Borrow,
cell::Cell,
collections::HashMap,
fmt,
hash::{Hash, Hasher},
sync::Arc,
};
pub enum ArcOrRef<'a, T: ?Sized> {
Arc(Arc<T>),
Ref(&'a T),
}
impl<'a, T: ?Sized> AsRef<T> for ArcOrRef<'a, T> {
fn as_ref(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> Borrow<T> for ArcOrRef<'a, T> {
fn borrow(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> From<Arc<T>> for ArcOrRef<'a, T> {
fn from(value: Arc<T>) -> Self {
Self::Arc(value)
}
}
impl<'a, T: ?Sized> From<&'a T> for ArcOrRef<'a, T> {
fn from(value: &'a T) -> Self {
Self::Ref(value)
}
}
impl<'a, T: ?Sized> Default for ArcOrRef<'a, T>
where
&'a T: Default,
{
fn default() -> Self {
Self::Ref(Default::default())
}
}
impl<T: ?Sized> Clone for ArcOrRef<'_, T> {
fn clone(&self) -> Self {
match self {
Self::Arc(v) => Self::Arc(v.clone()),
Self::Ref(v) => Self::Ref(v),
}
}
}
impl<T: ?Sized + Hash> Hash for ArcOrRef<'_, T> {
fn hash<H: Hasher>(&self, state: &mut H) {
T::hash(self, state)
}
}
impl<'a, 'b, T: ?Sized + PartialEq<U>, U: ?Sized> PartialEq<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn eq(&self, other: &ArcOrRef<'b, U>) -> bool {
T::eq(self, other)
}
}
impl<T: ?Sized + Eq> Eq for ArcOrRef<'_, T> {}
impl<'a, 'b, T: ?Sized + PartialOrd<U>, U: ?Sized> PartialOrd<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn partial_cmp(&self, other: &ArcOrRef<'b, U>) -> Option<std::cmp::Ordering> {
T::partial_cmp(self, other)
}
}
impl<T: ?Sized + Ord> Ord for ArcOrRef<'_, T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
T::cmp(self, other)
}
}
impl<T: ?Sized> std::ops::Deref for ArcOrRef<'_, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
match self {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => v,
}
}
}
impl<T: ?Sized + fmt::Debug> fmt::Debug for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
impl<T: ?Sized + fmt::Display> fmt::Display for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
/// a stable alternative to `CloneToUninit` for `Arc`
pub trait ArcFromRef {
/// like `Arc::new(Self::clone(self))` but works for unsized types too
fn arc_from_ref(&self) -> Arc<Self>;
/// generic version of `Arc::make_mut`
fn make_mut(this: &mut Arc<Self>) -> &mut Self;
}
impl<T: Clone> ArcFromRef for T {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::new(Self::clone(self))
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<T: Clone> ArcFromRef for [T] {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl ArcFromRef for str {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<'a, T: ?Sized + ArcFromRef> ArcOrRef<'a, T> {
pub fn into_arc(this: Self) -> Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => T::arc_from_ref(v),
}
}
pub fn make_arc(this: &mut Self) -> &mut Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => {
*this = ArcOrRef::Arc(T::arc_from_ref(v));
let ArcOrRef::Arc(v) = this else {
unreachable!();
};
v
}
}
}
pub fn make_mut(this: &mut Self) -> &mut T {
T::make_mut(Self::make_arc(this))
}
}
trait DagDebugStateSealed {}
#[expect(private_bounds)]
pub trait SupportsDagDebugState: DagDebugStateSealed + 'static + Clone {
type Key: Clone + Hash + Eq + 'static;
fn key(this: &Self) -> Self::Key;
}
impl<T: 'static> DagDebugStateSealed for Arc<T> {}
impl<T: 'static> SupportsDagDebugState for Arc<T> {
type Key = *const T;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl<T: 'static> DagDebugStateSealed for Arc<[T]> {}
impl<T: 'static> SupportsDagDebugState for Arc<[T]> {
type Key = *const [T];
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl DagDebugStateSealed for Arc<str> {}
impl SupportsDagDebugState for Arc<str> {
type Key = *const str;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
trait DagDebugStatePartTrait: 'static {
fn reset(&mut self);
fn as_any_mut(&mut self) -> &mut dyn Any;
}
struct DagDebugStatePart<T: SupportsDagDebugState> {
table: HashMap<T::Key, (u64, T)>,
next_id: u64,
}
impl<T: SupportsDagDebugState> DagDebugStatePartTrait for DagDebugStatePart<T> {
fn reset(&mut self) {
let Self { table, next_id } = self;
table.clear();
*next_id = 0;
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
impl<T: SupportsDagDebugState> DagDebugStatePart<T> {
fn insert(&mut self, value: &T) -> DagDebugStateInsertResult {
use std::collections::hash_map::Entry;
match self.table.entry(T::key(value)) {
Entry::Occupied(entry) => DagDebugStateInsertResult::Old { id: entry.get().0 },
Entry::Vacant(entry) => {
let value = T::clone(value);
let id = self.next_id;
self.next_id += 1;
entry.insert((id, value));
DagDebugStateInsertResult::New { id }
}
}
}
}
impl<T: SupportsDagDebugState> Default for DagDebugStatePart<T> {
fn default() -> Self {
Self {
table: HashMap::default(),
next_id: 0,
}
}
}
pub struct DagDebugState {
parts: std::cell::RefCell<HashMap<TypeId, Box<dyn DagDebugStatePartTrait>>>,
ref_count: Cell<usize>,
}
#[derive(Clone, Copy, Debug)]
pub enum DagDebugStateInsertResult {
New { id: u64 },
Old { id: u64 },
}
impl DagDebugStateInsertResult {
pub fn id(self) -> u64 {
match self {
Self::New { id } | Self::Old { id } => id,
}
}
}
impl DagDebugState {
fn with_part<T: SupportsDagDebugState, R>(
&self,
f: impl FnOnce(&mut DagDebugStatePart<T>) -> R,
) -> R {
let mut parts = self.parts.borrow_mut();
let Some(part) = parts
.entry(TypeId::of::<DagDebugStatePart<T>>())
.or_insert_with(|| Box::new(DagDebugStatePart::<T>::default()))
.as_any_mut()
.downcast_mut::<DagDebugStatePart<T>>()
else {
unreachable!()
};
f(part)
}
pub fn insert<T: SupportsDagDebugState>(&self, value: &T) -> DagDebugStateInsertResult {
self.with_part(|part: &mut DagDebugStatePart<T>| part.insert(value))
}
pub fn debug_or_id<'a, T: SupportsDagDebugState + fmt::Debug, Abbreviated: fmt::Display>(
&self,
value: &'a T,
abbreviated: Abbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, Abbreviated> {
self.debug_or_id_with(value, fmt::Debug::fmt, move |f| abbreviated.fmt(f))
}
pub fn debug_or_id_with<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
>(
&self,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, DebugValue, DebugAbbreviated> {
struct DebugOrIdWith<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> {
insert_result: DagDebugStateInsertResult,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Debug for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Display for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "#{} ", self.insert_result.id())?;
match self.insert_result {
DagDebugStateInsertResult::New { id: _ } => (self.debug_value)(self.value, f),
DagDebugStateInsertResult::Old { id: _ } => (self.debug_abbreviated)(f),
}
}
}
DebugOrIdWith {
insert_result: self.insert(value),
value,
debug_value,
debug_abbreviated,
}
}
#[must_use]
fn inc_ref_count_scope(&self) -> impl Sized {
struct DecRefCountOnDrop<'a>(&'a DagDebugState);
impl Drop for DecRefCountOnDrop<'_> {
fn drop(&mut self) {
self.0.ref_count.set(self.0.ref_count.get() - 1);
if self.0.ref_count.get() == 0 {
self.0
.parts
.borrow_mut()
.values_mut()
.for_each(|v| v.reset());
}
}
}
self.ref_count.set(
self.ref_count
.get()
.checked_add(1)
.expect("too many nested calls"),
);
DecRefCountOnDrop(self)
}
pub fn scope<R>(f: impl FnOnce(&Self) -> R) -> R {
thread_local! {
static STATE: DagDebugState = DagDebugState { parts: Default::default(), ref_count: Cell::new(0) };
}
STATE.with(|state| {
let _scope = state.inc_ref_count_scope();
f(state)
})
}
}