Compare commits

..

8 commits

17 changed files with 11470 additions and 1 deletions

3
.gitignore vendored
View file

@ -3,4 +3,5 @@
*.egg-info
__pycache__
*.log
/powerisa-instructions.xml
/powerisa-instructions.xml
/target

109
Cargo.lock generated Normal file
View file

@ -0,0 +1,109 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "flate2"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"adler2",
"simd-adler32",
]
[[package]]
name = "parse_powerisa_pdf"
version = "0.1.0"
dependencies = [
"flate2",
"rayon",
]
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"

10
Cargo.toml Normal file
View file

@ -0,0 +1,10 @@
[package]
name = "parse_powerisa_pdf"
version = "0.1.0"
edition = "2024"
license = "LGPL-3.0-or-later"
[dependencies]
flate2 = "1.1.5"
rayon = "1.11.0"

5
src/lib.rs Normal file
View file

@ -0,0 +1,5 @@
#[doc(hidden)]
pub use std as __std;
pub mod pdf;
pub mod util;

38
src/main.rs Normal file
View file

@ -0,0 +1,38 @@
use parse_powerisa_pdf::pdf::Pdf;
use std::{
error::Error,
io::{IsTerminal, Read},
process::ExitCode,
};
fn main() -> Result<ExitCode, Box<dyn Error>> {
let args: Vec<_> = std::env::args_os().collect();
if args
.iter()
.skip(1)
.any(|v| v.as_encoded_bytes().starts_with(b"-") && v != "-")
|| args.len() > 2
|| (args.len() == 1 && std::io::stdin().is_terminal())
{
eprintln!(
"Usage: {} [<path/to/file.pdf>]\n\
Reads the PDF file passed on the command line,\n\
Reads stdin if no arguments are passed or if the file name is just a dash `-`.\n\
If stdin is a terminal, you have to pass `-` explicitly to read from it.",
args[0].display()
);
return Ok(ExitCode::FAILURE);
}
let file_path = args.get(1).filter(|v| *v != "-");
let input = if let Some(file_path) = file_path {
std::fs::read(file_path)?
} else {
let mut buf = Vec::new();
std::io::stdin().lock().read_to_end(&mut buf)?;
buf
};
let pdf = Pdf::parse(input)?;
println!("{:#?}", pdf.trailer.trailer_dictionary());
todo!();
Ok(ExitCode::SUCCESS)
}

1279
src/pdf.rs Normal file

File diff suppressed because it is too large Load diff

829
src/pdf/content_stream.rs Normal file
View file

@ -0,0 +1,829 @@
use crate::{
pdf::{
PdfObjectOrStreamDictionaryOrOperator, PdfObjects, PdfParser, PdfTokenizer,
object::{
NameOr, PdfDictionary, PdfMatrix, PdfName, PdfObject, PdfObjectDirect, PdfRectangle,
PdfStream, PdfStreamContents, PdfString, PdfStringBytesDebug, PdfStringOrNumber,
PdfVec2D,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown,
PdfInputPositionNoCompare, PdfParse, PdfParseError,
},
render::{
PdfColorDeviceGray, PdfColorDeviceRgb, PdfRenderOperator, PdfRenderState,
PdfRenderingIntent,
},
},
util::ArcOrRef,
};
use std::{fmt, sync::Arc};
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct PdfOperatorUnparsed {
pos: PdfInputPositionNoCompare,
bytes: ArcOrRef<'static, [u8]>,
}
impl GetPdfInputPosition for PdfOperatorUnparsed {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl fmt::Debug for PdfOperatorUnparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Self::debug_with_name("PdfOperatorUnparsed", &self.bytes, self.pos.0, f)
}
}
trait PdfParseIter: Sized {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError>;
}
impl<T: PdfParse> PdfParseIter for Arc<[T]> {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
FromIterator::from_iter(iter.into_iter().map(T::parse))
}
}
impl PdfOperatorUnparsed {
pub fn new(
pos: impl Into<PdfInputPositionNoCompare>,
bytes: impl Into<ArcOrRef<'static, [u8]>>,
) -> Self {
Self {
pos: pos.into(),
bytes: bytes.into(),
}
}
pub const fn new_static(bytes: &'static [u8]) -> Self {
Self {
pos: PdfInputPositionNoCompare::empty(),
bytes: ArcOrRef::Ref(bytes),
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> {
&self.bytes
}
fn debug_with_name(
name: &str,
pdf_name: &[u8],
pos: PdfInputPosition,
f: &mut fmt::Formatter<'_>,
) -> fmt::Result {
write!(f, "{name}(at {pos}, {})", PdfStringBytesDebug(pdf_name))
}
pub fn bytes_debug(&self) -> PdfStringBytesDebug<'_> {
PdfStringBytesDebug(&self.bytes)
}
}
macro_rules! make_pdf_operator_enum {
(
$(#[$($operator_meta:tt)*])*
$operator_enum_vis:vis enum $PdfOperator:ident;
$(#[$($operator_and_operands_meta:tt)*])*
$enum_vis:vis enum $PdfOperatorAndOperands:ident {
$(#[$($unknown_variant_meta:tt)*])*
$Unknown:ident {
$(#[$($unknown_operands_meta:tt)*])*
$unknown_operands:ident: $unknown_operands_ty:ty,
$(#[$($unknown_operator_meta:tt)*])*
$unknown_operator:ident: $unknown_operator_ty:ty,
},
$(
#[kw = $kw:literal]
$(#[$($variant_meta:tt)*])*
$Variant:ident($VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:tt)*)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}),
)*
}
) => {
$(#[$($operator_meta)*])*
$operator_enum_vis enum $PdfOperator {
$(#[$($unknown_variant_meta)*])*
$Unknown($unknown_operator_ty),
$(
$(#[$($variant_meta)*])*
$Variant(PdfInputPositionNoCompare),
)*
}
impl $PdfOperator {
$operator_enum_vis fn parse(self, operands: impl IntoIterator<Item = PdfObject>) -> Result<$PdfOperatorAndOperands, PdfParseError> {
let operands = operands.into_iter();
Ok(match self {
Self::$Unknown(operator) => $PdfOperatorAndOperands::$Unknown {
operands: FromIterator::from_iter(operands.map(Into::into)),
operator,
},
$(Self::$Variant(pos) => $VariantStruct::parse(pos, operands)?.into(),)*
})
}
$operator_enum_vis fn pos(&self) -> PdfInputPosition {
match *self {
Self::$Unknown(ref operator) => operator.pos(),
$(Self::$Variant(pos) => pos.0,)*
}
}
}
impl fmt::Debug for $PdfOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown(operator) => PdfOperatorUnparsed::debug_with_name("Unknown", &operator.bytes, operator.pos.0, f),
$(Self::$Variant(pos) => PdfOperatorUnparsed::debug_with_name(stringify!($Variant), $kw, pos.0, f),)*
}
}
}
impl From<$PdfOperator> for PdfOperatorUnparsed {
fn from(v: $PdfOperator) -> PdfOperatorUnparsed {
match v {
$PdfOperator::$Unknown(operator) => operator,
$($PdfOperator::$Variant(pos) => PdfOperatorUnparsed { pos, bytes: ArcOrRef::Ref($kw) },)*
}
}
}
impl From<PdfOperatorUnparsed> for $PdfOperator {
fn from(v: PdfOperatorUnparsed) -> $PdfOperator {
match &**v.bytes() {
$($kw => Self::$Variant(v.pos),)*
_ => Self::$Unknown(v),
}
}
}
$(#[derive(Clone)]
$(#[$($variant_meta)*])*
$enum_vis struct $VariantStruct {
$enum_vis $pos: PdfInputPositionNoCompare,
$(
$(#[$($field_meta)*])*
$enum_vis $field: $field_ty,
)*
}
impl fmt::Debug for $VariantStruct {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct(stringify!($VariantStruct)).field("pos", &self.pos)$(.field(stringify!($field), &self.$field))*.finish()
}
}
impl GetPdfInputPosition for $VariantStruct {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl From<$VariantStruct> for $PdfOperatorAndOperands {
fn from(v: $VariantStruct) -> Self {
Self::$Variant(v)
}
}
impl $VariantStruct {
$enum_vis fn operator_from_pos(pos: impl Into<PdfInputPositionNoCompare>) -> $PdfOperator {
$PdfOperator::$Variant(pos.into())
}
$enum_vis fn operator(&self) -> $PdfOperator {
$PdfOperator::$Variant(self.pos)
}
$enum_vis fn pos(&self) -> PdfInputPosition {
self.pos.0
}
}
make_pdf_operator_enum! {
@impl_variant_parse
$enum_vis enum;
struct $VariantStruct {
$pos: PdfInputPositionNoCompare,
$(
#[$field_parse($($parse_args)*)]
$(#[$($field_meta)*])*
$field: $field_ty,
)*
}
})*
$(#[$($operator_and_operands_meta)*])*
$enum_vis enum $PdfOperatorAndOperands {
$(#[$($unknown_variant_meta)*])*
$Unknown {
$(#[$($unknown_operands_meta)*])*
$unknown_operands: $unknown_operands_ty,
$(#[$($unknown_operator_meta)*])*
$unknown_operator: $unknown_operator_ty,
},
$(
$(#[$($variant_meta)*])*
$Variant($VariantStruct),
)*
}
impl $PdfOperatorAndOperands {
$enum_vis fn operator(&self) -> $PdfOperator {
match self {
Self::Unknown { operator, .. } => $PdfOperator::Unknown(operator.clone()),
$(Self::$Variant(v) => v.operator(),)*
}
}
$enum_vis fn pos(&self) -> PdfInputPosition {
match self {
Self::$Unknown { operator, .. } => operator.pos(),
$(Self::$Variant(v) => v.pos(),)*
}
}
}
impl fmt::Debug for $PdfOperatorAndOperands {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown {
operands,
operator,
} => f.debug_struct("Unknown").field("operator", operator).field("operands", operands).finish(),
$(Self::$Variant($VariantStruct {
$pos,
$($field,)*
}) => f.debug_struct(stringify!($Variant)).field("pos", $pos)$(.field(stringify!($field), $field))*.finish(),)*
}
}
}
impl PdfRenderOperator for $PdfOperatorAndOperands {
fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> {
match self {
Self::$Unknown {
operands,
operator,
} => state.handle_unknown_operator(operator, operands),
$(Self::$Variant(v) => <$VariantStruct as PdfRenderOperator>::render(v, state),)*
}
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:ident),* $(,)?)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let mut operands = operands.into_iter();
$($(let Some($parse_args) = operands.next() else {
return Err(PdfParseError::OperatorHasTooFewOperands { operator: Self::operator_from_pos(pos) });
};)*)*
if operands.next().is_some() {
return Err(PdfParseError::OperatorHasTooManyOperands { operator: Self::operator_from_pos(pos) });
}
Ok(Self {
pos,
$($field: <$field_ty>::$field_parse($($parse_args),*)?,)*
})
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
#[$field_parse:ident(...)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let operands = operands.into_iter();
Ok(Self {
pos,
$field: <$field_ty>::$field_parse(operands)?,
})
}
}
};
}
make_pdf_operator_enum! {
#[derive(Clone)]
pub enum PdfOperator;
#[derive(Clone)]
pub enum PdfOperatorAndOperands {
Unknown {
operands: Arc<[PdfObjectDirect]>,
operator: PdfOperatorUnparsed,
},
#[kw = b"b"]
CloseFillAndStrokePath(PdfOperatorCloseFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B"]
FillAndStrokePath(PdfOperatorFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"b*"]
CloseFillAndStrokePathEvenOdd(PdfOperatorCloseFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B*"]
FillAndStrokePathEvenOdd(PdfOperatorFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BDC"]
BeginMarkedContentWithProperties(PdfOperatorBeginMarkedContentWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"BI"]
BeginInlineImage(PdfOperatorBeginInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BMC"]
BeginMarkedContent(PdfOperatorBeginMarkedContent {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"BT"]
BeginText(PdfOperatorBeginText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BX"]
BeginCompatibilitySection(PdfOperatorBeginCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"c"]
CurveTo(PdfOperatorCurveTo {
pos: PdfInputPositionNoCompare,
#[parse(x1, y1)]
p1: PdfVec2D,
#[parse(x2, y2)]
p2: PdfVec2D,
#[parse(x3, y3)]
p3: PdfVec2D,
}),
#[kw = b"cm"]
ConcatMatrix(PdfOperatorConcatMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"CS"]
SetStrokeColorSpace(PdfOperatorSetStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"cs"]
SetNonStrokeColorSpace(PdfOperatorSetNonStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"d"]
SetLineDashPattern(PdfOperatorSetLineDashPattern {
pos: PdfInputPositionNoCompare,
#[parse(dash_array)]
dash_array: PdfObject, // TODO: actually parse
#[parse(dash_phase)]
dash_phase: PdfObject, // TODO: actually parse
}),
#[kw = b"d0"]
FontType3SetWidth(PdfOperatorFontType3SetWidth {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
width: PdfVec2D,
}),
#[kw = b"d1"]
FontType3SetWidthAndBBox(PdfOperatorFontType3SetWidthAndBBox {
pos: PdfInputPositionNoCompare,
#[parse(width_x, width_y)]
width: PdfVec2D,
#[parse_flat(lower_left_x, lower_left_y, upper_right_x, upper_right_y)]
bbox: PdfRectangle,
}),
#[kw = b"Do"]
PaintXObject(PdfOperatorPaintXObject {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"DP"]
DesignateMarkedContentPointWithProperties(PdfOperatorDesignateMarkedContentPointWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"EI"]
EndInlineImage(PdfOperatorEndInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EMC"]
EndMarkedContent(PdfOperatorEndMarkedContent {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"ET"]
EndText(PdfOperatorEndText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EX"]
EndCompatibilitySection(PdfOperatorEndCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f"]
FillPath(PdfOperatorFillPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"F"]
FillPathObsolete(PdfOperatorFillPathObsolete {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f*"]
FillPathEvenOdd(PdfOperatorFillPathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"G"]
SetStrokeGray(PdfOperatorSetStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"g"]
SetNonStrokeGray(PdfOperatorSetNonStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"gs"]
SetGraphicsState(PdfOperatorSetGraphicsState {
pos: PdfInputPositionNoCompare,
#[parse(dictionary_name)]
dictionary_name: PdfName,
}),
#[kw = b"h"]
CloseSubpath(PdfOperatorCloseSubpath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"i"]
SetFlatnessTolerance(PdfOperatorSetFlatnessTolerance {
pos: PdfInputPositionNoCompare,
#[parse(flatness)]
flatness: f32,
}),
#[kw = b"ID"]
BeginInlineImageData(PdfOperatorBeginInlineImageData {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"j"]
SetLineJoinStyle(PdfOperatorSetLineJoinStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_join_style)]
line_join_style: u8, // TODO parse
}),
#[kw = b"J"]
SetLineCapStyle(PdfOperatorSetLineCapStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_cap_style)]
line_cap_style: u8, // TODO parse
}),
#[kw = b"K"]
SetStrokeCmyk(PdfOperatorSetStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"k"]
SetNonStrokeCmyk(PdfOperatorSetNonStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"l"]
LineTo(PdfOperatorLineTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"m"]
MoveTo(PdfOperatorMoveTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"M"]
SetMiterLimit(PdfOperatorSetMiterLimit {
pos: PdfInputPositionNoCompare,
#[parse(limit)]
limit: f32,
}),
#[kw = b"MP"]
DesignateMarkedContentPoint(PdfOperatorDesignateMarkedContentPoint {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"n"]
EndPath(PdfOperatorEndPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"q"]
SaveGraphicsState(PdfOperatorSaveGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Q"]
RestoreGraphicsState(PdfOperatorRestoreGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"re"]
Rectangle(PdfOperatorRectangle {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
p: PdfVec2D,
#[parse(width, height)]
size: PdfVec2D,
}),
#[kw = b"RG"]
SetStrokeRgb(PdfOperatorSetStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"rg"]
SetNonStrokeRgb(PdfOperatorSetNonStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"ri"]
SetColorRenderingIntent(PdfOperatorSetColorRenderingIntent {
pos: PdfInputPositionNoCompare,
#[parse(intent)]
intent: PdfRenderingIntent,
}),
#[kw = b"s"]
CloseAndStrokePath(PdfOperatorCloseAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"S"]
StrokePath(PdfOperatorStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"SC"]
SetStrokeColor(PdfOperatorSetStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"sc"]
SetNonStrokeColor(PdfOperatorSetNonStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"SCN"]
SetStrokeColorWithName(PdfOperatorSetStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"scn"]
SetNonStrokeColorWithName(PdfOperatorSetNonStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"sh"]
Shade(PdfOperatorShade {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"T*"]
TextNextLine(PdfOperatorTextNextLine {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Tc"]
SetCharacterSpacing(PdfOperatorSetCharacterSpacing {
pos: PdfInputPositionNoCompare,
#[parse(char_space)]
char_space: f32,
}),
#[kw = b"Td"]
TextNextLineWithOffset(PdfOperatorTextNextLineWithOffset {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"TD"]
TextNextLineWithOffsetAndLeading(PdfOperatorTextNextLineWithOffsetAndLeading {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"Tf"]
SetFontAndSize(PdfOperatorSetFontAndSize {
pos: PdfInputPositionNoCompare,
#[parse(font)]
font: PdfName,
#[parse(size)]
size: f32,
}),
#[kw = b"Tj"]
ShowText(PdfOperatorShowText {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"TJ"]
ShowTextWithGlyphPositioning(PdfOperatorShowTextWithGlyphPositioning {
pos: PdfInputPositionNoCompare,
#[parse(text_and_positioning)]
text_and_positioning: Arc<[PdfStringOrNumber]>,
}),
#[kw = b"TL"]
SetTextLeading(PdfOperatorSetTextLeading {
pos: PdfInputPositionNoCompare,
#[parse(leading)]
leading: f32,
}),
#[kw = b"Tm"]
SetTextMatrix(PdfOperatorSetTextMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"Tr"]
SetTextRenderingMode(PdfOperatorSetTextRenderingMode {
pos: PdfInputPositionNoCompare,
#[parse(rendering_mode)]
rendering_mode: u8, // TODO: parse
}),
#[kw = b"Ts"]
SetTextRise(PdfOperatorSetTextRise {
pos: PdfInputPositionNoCompare,
#[parse(rise)]
rise: f32,
}),
#[kw = b"Tw"]
SetWordSpacing(PdfOperatorSetWordSpacing {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
}),
#[kw = b"Tz"]
SetTextHorizontalScaling(PdfOperatorSetTextHorizontalScaling {
pos: PdfInputPositionNoCompare,
#[parse(scale_percent)]
scale_percent: f32,
}),
#[kw = b"v"]
CurveTo23(PdfOperatorCurveTo23 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"w"]
SetLineWidth(PdfOperatorSetLineWidth {
pos: PdfInputPositionNoCompare,
#[parse(line_width)]
line_width: f32,
}),
#[kw = b"W"]
Clip(PdfOperatorClip {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"W*"]
ClipEvenOdd(PdfOperatorClipEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"y"]
CurveTo13(PdfOperatorCurveTo13 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"'"]
TextNextLineAndShow(PdfOperatorTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"\""]
SetSpacingThenTextNextLineAndShow(PdfOperatorSetSpacingThenTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
#[parse(char_space)]
char_space: f32,
#[parse(text)]
text: PdfString,
}),
}
}
impl GetPdfInputPosition for PdfOperator {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl GetPdfInputPosition for PdfOperatorAndOperands {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
#[derive(Clone)]
pub struct PdfContentStreamData {
pub operators: Arc<[PdfOperatorAndOperands]>,
}
impl fmt::Debug for PdfContentStreamData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PdfContentStreamData")
.field("operators", &self.operators)
.finish()
}
}
impl PdfStreamContents for PdfContentStreamData {
fn parse(
data: &[u8],
stream_pos: PdfInputPosition,
objects: Arc<PdfObjects>,
) -> Result<Self, PdfParseError> {
let mut parser = PdfParser {
objects,
tokenizer: PdfTokenizer::new(
data,
PdfInputPositionKnown {
pos: 0,
containing_streams_pos: stream_pos.get().map(|v| v.pos),
},
),
};
let mut operands = Vec::new();
let mut operators = Vec::new();
loop {
parser.skip_comments_and_whitespace();
if parser.tokenizer.peek().is_none() {
break;
}
match parser.parse_object_or_operator()? {
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
stream_kw_pos, ..
} => return Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }),
PdfObjectOrStreamDictionaryOrOperator::Object(object) => operands.push(object),
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
operators.push(PdfOperator::from(operator).parse(operands.drain(..))?);
}
}
}
if operands.is_empty() {
Ok(Self {
operators: operators.into(),
})
} else {
Err(PdfParseError::MissingOperator {
pos: parser.tokenizer.pos(),
})
}
}
}
pub type PdfContentStream = PdfStream<PdfDictionary, PdfContentStreamData>;

View file

@ -0,0 +1,743 @@
use crate::{
pdf::{
content_stream::PdfContentStream,
font::PdfFont,
object::{
IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject,
PdfObjectDirect, PdfObjectIndirect, PdfRectangle, PdfStream, PdfString,
},
parse::{PdfParse, PdfParseError},
pdf_parse,
render::{PdfRenderOperator, PdfRenderState},
},
util::DagDebugState,
};
use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
use std::{borrow::Cow, fmt, sync::Arc};
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfDocumentCatalogType {
#[pdf(name = "Catalog")]
#[default]
Catalog,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfDocumentCatalog {
#[pdf(name = "Type")]
pub ty: PdfDocumentCatalogType,
#[pdf(name = "Version")]
pub version: Option<PdfName>,
#[pdf(name = "Extensions")]
pub extensions: Option<PdfDictionary>,
#[pdf(name = "Pages")]
pub pages: PdfPageTree,
// TODO
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfDocumentCatalog {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
version,
extensions,
pages,
rest,
} = self;
f.debug_struct("PdfDocumentCatalog")
.field("ty", ty)
.field("version", version)
.field("extensions", extensions)
.field("pages", pages)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
pub struct PdfResourcesDictionary {
#[pdf(name = "Font")]
pub fonts: PdfDictionary<PdfFont>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
#[derive(Clone)]
pub struct PdfPageTree {
page_tree: PdfPageTreeNode,
pages: Arc<[PdfPage]>,
}
impl PdfPageTree {
fn collect_leaves(
node: &PdfPageTreeNode,
leaves: &mut Vec<PdfPageTreeLeaf>,
) -> Result<(), PdfParseError> {
for kid in node.kids.iter() {
match kid {
PdfPageTreeNodeOrLeaf::Node(node) => Self::collect_leaves(node, leaves)?,
PdfPageTreeNodeOrLeaf::Leaf(leaf) => {
leaves.push(leaf.clone());
}
PdfPageTreeNodeOrLeaf::Other(v) => {
return Err(PdfParseError::InvalidType {
pos: v.pos(),
ty: "dictionary",
expected_ty: "PdfPageTreeNodeOrLeaf",
});
}
}
}
Ok(())
}
pub fn try_from_page_tree_root(mut page_tree: PdfPageTreeNode) -> Result<Self, PdfParseError> {
page_tree.propagate_inheritable_data_to_leaves();
let mut leaves = Vec::new();
Self::collect_leaves(&page_tree, &mut leaves)?;
Ok(Self {
page_tree,
pages: Result::from_par_iter(
leaves
.into_par_iter()
.map(PdfPage::parse_after_propagating_inheritable_data)
.panic_fuse(),
)?,
})
}
pub fn page_tree(&self) -> &PdfPageTreeNode {
&self.page_tree
}
pub fn pages(&self) -> &Arc<[PdfPage]> {
&self.pages
}
}
impl fmt::Debug for PdfPageTree {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
page_tree: _,
pages,
} = self;
f.debug_struct("PdfPageTree")
.field("pages", pages)
.finish_non_exhaustive()
})
}
}
impl IsPdfNull for PdfPageTree {
fn is_pdf_null(&self) -> bool {
self.page_tree.is_pdf_null()
}
}
impl PdfParse for PdfPageTree {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfPageTree")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
Self::try_from_page_tree_root(PdfParse::parse(object)?)
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Default, Debug)]
pub struct PdfPageInheritableData {
#[pdf(name = "Resources")]
pub resources: Option<PdfResourcesDictionary>,
#[pdf(name = "MediaBox")]
pub media_box: Option<PdfRectangle>,
#[pdf(name = "CropBox")]
pub crop_box: Option<PdfRectangle>,
#[pdf(name = "Rotate")]
pub rotate: Option<PdfPageRotation>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfPageInheritableData {
pub fn propagate_to(&self, target: &mut Self) {
let Self {
resources,
media_box,
crop_box,
rotate,
rest: _,
} = self;
fn propagate_to<T: Clone>(this: &Option<T>, target: &mut Option<T>) {
if let (Some(this), target @ None) = (this, target) {
*target = Some(this.clone());
}
}
propagate_to(resources, &mut target.resources);
propagate_to(media_box, &mut target.media_box);
propagate_to(crop_box, &mut target.crop_box);
propagate_to(rotate, &mut target.rotate);
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageTreeNodeType {
#[pdf(name = "Pages")]
#[default]
Pages,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeNode {
#[pdf(name = "Type")]
pub ty: PdfPageTreeNodeType,
#[pdf(name = "Parent")]
pub parent: Option<PdfObjectIndirect>,
#[pdf(name = "Kids")]
pub kids: Arc<[PdfPageTreeNodeOrLeaf]>,
#[pdf(name = "Count")]
pub count: usize,
// TODO
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
kids,
count,
inheritable,
} = self;
f.debug_struct("PdfPageTreeNode")
.field("ty", ty)
.field("parent", parent)
.field("kids", kids)
.field("count", count)
.field("inheritable", inheritable)
.finish()
})
}
}
impl PdfPageTreeNode {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
for kid in Arc::make_mut(&mut self.kids) {
if let Some(target) = kid.inheritable_data_mut() {
self.inheritable.propagate_to(target);
}
kid.propagate_inheritable_data_to_leaves();
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageType {
#[pdf(name = "Page")]
#[default]
Page,
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum PdfPageAnnotationsTabOrder {
#[pdf(name = "R")]
RowOrder,
#[pdf(name = "C")]
ColumnOrder,
#[pdf(name = "S")]
StructureOrder,
#[pdf(other)]
Other(PdfName),
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeLeaf {
#[pdf(name = "Type")]
pub ty: PdfPageType,
#[pdf(name = "Parent")]
pub parent: PdfObjectIndirect,
#[pdf(name = "LastModified")]
pub last_modified: Option<PdfDate>,
#[pdf(name = "BleedBox")]
pub bleed_box: Option<PdfRectangle>,
#[pdf(name = "TrimBox")]
pub trim_box: Option<PdfRectangle>,
#[pdf(name = "ArtBox")]
pub art_box: Option<PdfRectangle>,
#[pdf(name = "BoxColorInfo")]
pub box_color_info: Option<PdfDictionary>,
#[pdf(name = "Contents")]
pub contents: MaybeArray<PdfContentStream>,
#[pdf(name = "Group")]
pub group: Option<PdfDictionary>,
#[pdf(name = "Thumb")]
pub thumbnail: Option<PdfStream>,
#[pdf(name = "B")]
pub beads: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "Dur")]
pub duration: Option<f32>,
#[pdf(name = "Trans")]
pub transition: Option<PdfDictionary>,
#[pdf(name = "Annots")]
pub annotations: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "AA")]
pub additional_actions: Option<PdfDictionary>,
#[pdf(name = "Metadata")]
pub metadata: Option<PdfStream>,
#[pdf(name = "PieceInfo")]
pub piece_info: Option<PdfDictionary>,
#[pdf(name = "StructParents")]
pub structural_parents: Option<PdfInteger>,
#[pdf(name = "ID")]
pub parent_web_capture_content_set_id: Option<PdfString>,
#[pdf(name = "PZ")]
pub preferred_zoom_factor: Option<f32>,
#[pdf(name = "SeparationInfo")]
pub separation_info: Option<PdfDictionary>,
#[pdf(name = "Tabs")]
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
#[pdf(name = "TemplateInstantiated")]
pub template_instantiated: Option<PdfName>,
#[pdf(name = "PresSteps")]
pub pres_steps: Option<PdfDictionary>,
#[pdf(name = "UserUnit")]
pub user_unit: Option<f32>,
#[pdf(name = "VP")]
pub viewports: Option<Arc<[PdfDictionary]>>,
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable,
} = self;
f.debug_struct("PdfPageTreeLeaf")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("inheritable", inheritable)
.finish()
})
}
}
pdf_parse! {
#[pdf(tag = "Type")]
#[derive(Clone)]
pub enum PdfPageTreeNodeOrLeaf {
#[pdf(tag_value = "Pages")]
Node(PdfPageTreeNode),
#[pdf(tag_value = "Page")]
Leaf(PdfPageTreeLeaf),
#[pdf(other)]
Other(PdfDictionary),
}
}
impl PdfPageTreeNodeOrLeaf {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => v.propagate_inheritable_data_to_leaves(),
PdfPageTreeNodeOrLeaf::Leaf(_) | PdfPageTreeNodeOrLeaf::Other(_) => {}
}
}
pub fn inheritable_data_mut(&mut self) -> Option<&mut PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
pub fn inheritable_data(&self) -> Option<&PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
}
impl fmt::Debug for PdfPageTreeNodeOrLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Node(v) => v.fmt(f),
Self::Leaf(v) => v.fmt(f),
Self::Other(v) => v.fmt(f),
}
}
}
/// the amount by which the page is rotated clockwise when displaying or printing, is always a multiple of 90 degrees.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub enum PdfPageRotation {
#[default]
NoRotation = 0,
ClockwiseBy90Degrees = 90,
By180Degrees = 180,
ClockwiseBy270Degrees = 270,
}
impl PdfPageRotation {
pub fn from_clockwise_angle_in_degrees(angle: i32) -> Option<Self> {
match angle.rem_euclid(360) {
0 => Some(Self::NoRotation),
90 => Some(Self::ClockwiseBy90Degrees),
180 => Some(Self::By180Degrees),
270 => Some(Self::ClockwiseBy270Degrees),
_ => None,
}
}
pub fn from_clockwise_angle_in_degrees_i128(angle: i128) -> Option<Self> {
Self::from_clockwise_angle_in_degrees((angle % 360) as i32)
}
}
impl From<PdfPageRotation> for i32 {
fn from(value: PdfPageRotation) -> Self {
value as i32
}
}
impl IsPdfNull for PdfPageRotation {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfPageRotation {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("page rotation")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = PdfObjectDirect::from(object);
let pos = object.pos();
let angle = PdfInteger::parse(object.into())?;
Self::from_clockwise_angle_in_degrees_i128(angle.value())
.ok_or(PdfParseError::IntegerOutOfRange { pos })
}
}
#[derive(Clone)]
pub struct PdfPage {
pub ty: PdfPageType,
pub parent: PdfObjectIndirect,
pub last_modified: Option<PdfDate>,
pub resources: PdfResourcesDictionary,
pub media_box: PdfRectangle,
pub crop_box: PdfRectangle,
pub bleed_box: PdfRectangle,
pub trim_box: PdfRectangle,
pub art_box: PdfRectangle,
pub box_color_info: Option<PdfDictionary>,
pub contents: Arc<[PdfContentStream]>,
pub rotate: PdfPageRotation,
pub group: Option<PdfDictionary>,
pub thumbnail: Option<PdfStream>,
pub beads: Option<Arc<[PdfDictionary]>>,
pub duration: Option<f32>,
pub transition: Option<PdfDictionary>,
pub annotations: Option<Arc<[PdfDictionary]>>,
pub additional_actions: Option<PdfDictionary>,
pub metadata: Option<PdfStream>,
pub piece_info: Option<PdfDictionary>,
pub structural_parents: Option<PdfInteger>,
pub parent_web_capture_content_set_id: Option<PdfString>,
pub preferred_zoom_factor: Option<f32>,
pub separation_info: Option<PdfDictionary>,
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
pub template_instantiated: Option<PdfName>,
pub pres_steps: Option<PdfDictionary>,
pub user_unit: f32,
pub viewports: Option<Arc<[PdfDictionary]>>,
pub rest: PdfDictionary,
rendered_objects: Option<PdfPageRenderedObjects>,
}
impl PdfPage {
pub fn rendered_objects(&self) -> &PdfPageRenderedObjects {
let Some(retval) = &self.rendered_objects else {
unreachable!();
};
retval
}
pub fn parse_after_propagating_inheritable_data(
leaf: PdfPageTreeLeaf,
) -> Result<Self, PdfParseError> {
let PdfPageTreeLeaf {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable:
PdfPageInheritableData {
resources,
media_box,
crop_box,
rotate,
rest,
},
} = leaf;
let pos = rest.pos();
let resources = resources.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page resources dictionary",
})?;
let media_box = media_box.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page MediaBox rectangle",
})?;
let crop_box = crop_box.unwrap_or(media_box);
let rotate = rotate.unwrap_or(PdfPageRotation::NoRotation);
let mut retval = Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box: bleed_box.unwrap_or(crop_box),
trim_box: trim_box.unwrap_or(crop_box),
art_box: art_box.unwrap_or(crop_box),
box_color_info,
contents: contents.0,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit: user_unit.unwrap_or(1.0),
viewports,
rest,
rendered_objects: None,
};
retval.rendered_objects = Some(PdfPageRenderedObjects::render_page(&retval)?);
Ok(retval)
}
}
impl fmt::Debug for PdfPage {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
rest,
rendered_objects,
} = self;
struct Unparsed;
impl fmt::Debug for Unparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("<unparsed>")
}
}
f.debug_struct("PdfPage")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("resources", resources)
.field("media_box", media_box)
.field("crop_box", crop_box)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("rotate", rotate)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("rest", rest)
.field(
"rendered_objects",
if let Some(rendered_objects) = rendered_objects {
rendered_objects
} else {
&Unparsed
},
)
.finish()
})
}
}
#[derive(Clone, Debug)]
pub struct PdfPageRenderedObjects {}
impl PdfPageRenderedObjects {
fn render_page(page: &PdfPage) -> Result<Self, PdfParseError> {
let mut state = PdfRenderState::new(page);
for content_stream in page.contents.iter() {
for op in content_stream.decoded_data().as_ref()?.operators.iter() {
op.render(&mut state)?;
}
}
Ok(Self {})
}
}

924
src/pdf/font.rs Normal file
View file

@ -0,0 +1,924 @@
use crate::{
pdf::{
object::{
IsPdfNull, PdfArray, PdfDictionary, PdfMatrix, PdfName, PdfNameOrInteger, PdfObject,
PdfObjectDirect, PdfRectangle, PdfStream, PdfString,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionNoCompare, PdfParse,
PdfParseError,
},
pdf_parse,
},
util::{ArcOrRef, DagDebugState},
};
use std::{borrow::Cow, collections::BTreeMap, fmt, sync::Arc};
mod tables;
mod type_1_parse;
pdf_parse! {
#[pdf(transparent)]
#[derive(Clone)]
// TODO: actually parse the stream
pub struct PdfFontToUnicode {
#[pdf]
stream: PdfStream,
}
}
impl fmt::Debug for PdfFontToUnicode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self { stream } = self;
f.debug_struct("PdfFontToUnicode")
.field("stream", stream)
.finish()
})
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfFontDescriptorType {
#[pdf(name = "FontDescriptor")]
#[default]
FontDescriptor,
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub enum PdfFontStretch {
#[pdf(name = "UltraCondensed")]
UltraCondensed,
#[pdf(name = "ExtraCondensed")]
ExtraCondensed,
#[pdf(name = "Condensed")]
Condensed,
#[pdf(name = "SemiCondensed")]
SemiCondensed,
#[pdf(name = "Normal")]
Normal,
#[pdf(name = "SemiExpanded")]
SemiExpanded,
#[pdf(name = "Expanded")]
Expanded,
#[pdf(name = "ExtraExpanded")]
ExtraExpanded,
#[pdf(name = "UltraExpanded")]
UltraExpanded,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfFontDescriptor {
#[pdf(name = "Type")]
pub ty: PdfFontDescriptorType,
#[pdf(name = "FontName")]
pub font_name: PdfName,
#[pdf(name = "FontFamily")]
pub font_family: Option<PdfString>,
#[pdf(name = "FontStretch")]
pub font_stretch: Option<PdfFontStretch>,
#[pdf(name = "FontWeight")]
pub font_weight: Option<u32>,
#[pdf(name = "Flags")]
pub flags: u32,
#[pdf(name = "FontBBox")]
pub font_bounding_box: Option<PdfRectangle>,
#[pdf(name = "ItalicAngle")]
pub italic_angle: f32,
#[pdf(name = "Ascent")]
pub ascent: Option<f32>,
#[pdf(name = "Descent")]
pub descent: Option<f32>,
#[pdf(name = "Leading")]
pub leading: Option<f32>,
#[pdf(name = "CapHeight")]
pub cap_height: Option<f32>,
#[pdf(name = "XHeight")]
pub x_height: Option<f32>,
#[pdf(name = "StemV")]
pub stem_v: Option<f32>,
#[pdf(name = "StemH")]
pub stem_h: Option<f32>,
#[pdf(name = "AvgWidth")]
pub avg_width: Option<f32>,
#[pdf(name = "MaxWidth")]
pub max_width: Option<f32>,
#[pdf(name = "MissingWidth")]
pub missing_width: Option<f32>,
#[pdf(name = "FontFile")]
pub font_file: Option<PdfStream<PdfDictionary, PdfFontType1Program>>,
#[pdf(name = "FontFile2")]
pub font_file2: Option<PdfStream>,
#[pdf(name = "FontFile3")]
pub font_file3: Option<PdfStream>,
#[pdf(name = "CharSet")]
pub char_set: Option<PdfString>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfFontDescriptor {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
font_name,
font_family,
font_stretch,
font_weight,
flags,
font_bounding_box,
italic_angle,
ascent,
descent,
leading,
cap_height,
x_height,
stem_v,
stem_h,
avg_width,
max_width,
missing_width,
font_file,
font_file2,
font_file3,
char_set,
rest,
} = self;
f.debug_struct("PdfFontDescriptor")
.field("ty", ty)
.field("font_name", font_name)
.field("font_family", font_family)
.field("font_stretch", font_stretch)
.field("font_weight", font_weight)
.field("flags", flags)
.field("font_bounding_box", font_bounding_box)
.field("italic_angle", italic_angle)
.field("ascent", ascent)
.field("descent", descent)
.field("leading", leading)
.field("cap_height", cap_height)
.field("x_height", x_height)
.field("stem_v", stem_v)
.field("stem_h", stem_h)
.field("avg_width", avg_width)
.field("max_width", max_width)
.field("missing_width", missing_width)
.field("font_file", font_file)
.field("font_file2", font_file2)
.field("font_file3", font_file3)
.field("char_set", char_set)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfFontType {
#[pdf(name = "Font")]
#[default]
Font,
}
}
#[derive(Clone)]
pub enum PdfTodo {}
impl fmt::Debug for PdfTodo {
fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {}
}
}
impl IsPdfNull for PdfTodo {
fn is_pdf_null(&self) -> bool {
match *self {}
}
}
impl PdfParse for PdfTodo {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfTodo")
}
#[track_caller]
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
todo!("{object:?}")
}
}
pdf_parse! {
#[pdf(tag = "Subtype")]
#[derive(Clone)]
pub enum PdfFont {
#[pdf(tag_value = "Type0")]
Type0(Arc<PdfFontType0>),
#[pdf(tag_value = "Type1")]
Type1(PdfFontType1),
#[pdf(other)]
Other(Arc<PdfTodo>),
}
}
impl fmt::Debug for PdfFont {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|state| match self {
PdfFont::Type0(v) => state.debug_or_id(v, "PdfFontType0(...)").fmt(f),
PdfFont::Type1(v) => v.fmt(f),
PdfFont::Other(v) => match **v {},
})
}
}
impl PdfFont {
pub(crate) fn is_vertical_writing_mode(&self) -> bool {
// TODO:
false
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfFontType0Subtype {
#[pdf(name = "Type0")]
#[default]
Type0,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfFontType0 {
#[pdf(name = "Type")]
pub ty: PdfFontType,
#[pdf(name = "Subtype")]
pub subtype: PdfFontType0Subtype,
#[pdf(name = "BaseFont")]
pub base_font: PdfName,
#[pdf(name = "Encoding")]
// TODO
pub encoding: PdfObjectDirect,
#[pdf(name = "DescendentFonts")]
// TODO
pub descendent_fonts: [PdfDictionary; 1],
#[pdf(name = "ToUnicode")]
pub to_unicode: Option<PdfFontToUnicode>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfFontType0 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
subtype,
base_font,
encoding,
descendent_fonts,
to_unicode,
rest,
} = self;
f.debug_struct("PdfFontType0")
.field("ty", ty)
.field("subtype", subtype)
.field("base_font", base_font)
.field("encoding", encoding)
.field("descendent_fonts", descendent_fonts)
.field("to_unicode", to_unicode)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfFontType1Subtype {
#[pdf(name = "Type1")]
#[default]
Type1,
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub enum PdfStandardFontName {
#[pdf(name = "Times-Roman")]
TimesRoman,
#[pdf(name = "Helvetica")]
Helvetica,
#[pdf(name = "Courier")]
Courier,
#[pdf(name = "Symbol")]
Symbol,
#[pdf(name = "Times-Bold")]
TimesBold,
#[pdf(name = "Helvetica-Bold")]
HelveticaBold,
#[pdf(name = "Courier-Bold")]
CourierBold,
#[pdf(name = "ZapfDingbats")]
ZapfDingbats,
#[pdf(name = "Times-Italic")]
TimesItalic,
#[pdf(name = "Helvetica-Oblique")]
HelveticaOblique,
#[pdf(name = "Courier-Oblique")]
CourierOblique,
#[pdf(name = "Times-BoldItalic")]
TimesBoldItalic,
#[pdf(name = "Helvetica-BoldOblique")]
HelveticaBoldOblique,
#[pdf(name = "Courier-BoldOblique")]
CourierBoldOblique,
}
}
#[derive(Clone)]
pub enum PdfFontType1 {
Standard(Arc<PdfFontType1Standard>),
Other(Arc<PdfFontType1Other>),
}
impl fmt::Debug for PdfFontType1 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|state| match self {
PdfFontType1::Standard(v) => state.debug_or_id(v, "PdfFontType1Standard(...)").fmt(f),
PdfFontType1::Other(v) => state.debug_or_id(v, "PdfFontType1Other(...)").fmt(f),
})
}
}
impl PdfFontType1 {
pub fn common(&self) -> PdfFontType1Common {
match self {
PdfFontType1::Standard(v) => v.common(),
PdfFontType1::Other(v) => v.common(),
}
}
pub fn name(&self) -> &Option<PdfName> {
match self {
Self::Standard(v) => &v.name,
Self::Other(v) => &v.name,
}
}
pub fn base_font(&self) -> PdfName {
match self {
Self::Standard(v) => v.base_font.into(),
Self::Other(v) => v.base_font.clone(),
}
}
pub fn first_char(&self) -> Option<u32> {
match self {
Self::Standard(v) => v.first_char,
Self::Other(v) => Some(v.first_char),
}
}
pub fn last_char(&self) -> Option<u32> {
match self {
Self::Standard(v) => v.last_char,
Self::Other(v) => Some(v.last_char),
}
}
pub fn widths(&self) -> Option<&Arc<[f32]>> {
match self {
Self::Standard(v) => v.widths.as_ref(),
Self::Other(v) => Some(&v.widths),
}
}
pub fn font_descriptor(&self) -> Option<&PdfFontDescriptor> {
match self {
Self::Standard(v) => v.font_descriptor.as_ref(),
Self::Other(v) => Some(&v.font_descriptor),
}
}
pub fn encoding(&self) -> &Option<PdfSimpleFontEncoding> {
match self {
Self::Standard(v) => &v.encoding,
Self::Other(v) => &v.encoding,
}
}
pub fn to_unicode(&self) -> &Option<PdfFontToUnicode> {
match self {
Self::Standard(v) => &v.to_unicode,
Self::Other(v) => &v.to_unicode,
}
}
pub fn rest(&self) -> &PdfDictionary {
match self {
Self::Standard(v) => &v.rest,
Self::Other(v) => &v.rest,
}
}
}
impl IsPdfNull for PdfFontType1 {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfFontType1 {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfFontType1")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = object.into();
let font = if let PdfObjectDirect::Dictionary(object) = object {
if let Ok(_) = PdfStandardFontName::parse(object.get_or_null(b"BaseFont".as_slice())) {
Self::Standard(PdfParse::parse(object.into())?)
} else {
Self::Other(PdfParse::parse(object.into())?)
}
} else {
Self::Other(PdfParse::parse(object.into())?)
};
if let Some(font_file) = font.font_descriptor().and_then(|v| v.font_file.as_ref()) {
font_file.decoded_data().as_ref()?;
}
Ok(font)
}
}
#[derive(Clone)]
pub struct PdfFontType1Common {
pub ty: PdfFontType,
pub subtype: PdfFontType1Subtype,
pub name: Option<PdfName>,
pub base_font: PdfName,
pub first_char: Option<u32>,
pub last_char: Option<u32>,
pub widths: Option<Arc<[f32]>>,
pub font_descriptor: Option<PdfFontDescriptor>,
pub encoding: Option<PdfSimpleFontEncoding>,
pub to_unicode: Option<PdfFontToUnicode>,
pub rest: PdfDictionary,
}
impl fmt::Debug for PdfFontType1Common {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
subtype,
name,
base_font,
first_char,
last_char,
widths,
font_descriptor,
encoding,
to_unicode,
rest,
} = self;
f.debug_struct("PdfFontType1Common")
.field("ty", ty)
.field("subtype", subtype)
.field("name", name)
.field("base_font", base_font)
.field("first_char", first_char)
.field("last_char", last_char)
.field("widths", widths)
.field("font_descriptor", font_descriptor)
.field("encoding", encoding)
.field("to_unicode", to_unicode)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfFontType1Standard {
#[pdf(name = "Type")]
pub ty: PdfFontType,
#[pdf(name = "Subtype")]
pub subtype: PdfFontType1Subtype,
#[pdf(name = "Name")]
pub name: Option<PdfName>,
#[pdf(name = "BaseFont")]
pub base_font: PdfStandardFontName,
#[pdf(name = "FirstChar")]
pub first_char: Option<u32>,
#[pdf(name = "LastChar")]
pub last_char: Option<u32>,
#[pdf(name = "Widths")]
pub widths: Option<Arc<[f32]>>,
#[pdf(name = "FontDescriptor")]
pub font_descriptor: Option<PdfFontDescriptor>,
#[pdf(name = "Encoding")]
pub encoding: Option<PdfSimpleFontEncoding>,
#[pdf(name = "ToUnicode")]
pub to_unicode: Option<PdfFontToUnicode>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfFontType1Standard {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
subtype,
name,
base_font,
first_char,
last_char,
widths,
font_descriptor,
encoding,
to_unicode,
rest,
} = self;
f.debug_struct("PdfFontType1Standard")
.field("ty", ty)
.field("subtype", subtype)
.field("name", name)
.field("base_font", base_font)
.field("first_char", first_char)
.field("last_char", last_char)
.field("widths", widths)
.field("font_descriptor", font_descriptor)
.field("encoding", encoding)
.field("to_unicode", to_unicode)
.field("rest", rest)
.finish()
})
}
}
impl PdfFontType1Standard {
pub fn common(&self) -> PdfFontType1Common {
let Self {
ty,
subtype,
ref name,
base_font,
first_char,
last_char,
ref widths,
ref font_descriptor,
ref encoding,
ref to_unicode,
ref rest,
} = *self;
PdfFontType1Common {
ty,
subtype,
name: name.clone(),
base_font: base_font.into(),
first_char,
last_char,
widths: widths.clone(),
font_descriptor: font_descriptor.clone(),
encoding: encoding.clone(),
to_unicode: to_unicode.clone(),
rest: rest.clone(),
}
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfFontType1Other {
#[pdf(name = "Type")]
pub ty: PdfFontType,
#[pdf(name = "Subtype")]
pub subtype: PdfFontType1Subtype,
#[pdf(name = "Name")]
pub name: Option<PdfName>,
#[pdf(name = "BaseFont")]
pub base_font: PdfName,
#[pdf(name = "FirstChar")]
pub first_char: u32,
#[pdf(name = "LastChar")]
pub last_char: u32,
#[pdf(name = "Widths")]
pub widths: Arc<[f32]>,
#[pdf(name = "FontDescriptor")]
pub font_descriptor: PdfFontDescriptor,
#[pdf(name = "Encoding")]
pub encoding: Option<PdfSimpleFontEncoding>,
#[pdf(name = "ToUnicode")]
pub to_unicode: Option<PdfFontToUnicode>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfFontType1Other {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
subtype,
name,
base_font,
first_char,
last_char,
widths,
font_descriptor,
encoding,
to_unicode,
rest,
} = self;
f.debug_struct("PdfFontType1Other")
.field("ty", ty)
.field("subtype", subtype)
.field("name", name)
.field("base_font", base_font)
.field("first_char", first_char)
.field("last_char", last_char)
.field("widths", widths)
.field("font_descriptor", font_descriptor)
.field("encoding", encoding)
.field("to_unicode", to_unicode)
.field("rest", rest)
.finish()
})
}
}
impl PdfFontType1Other {
pub fn common(&self) -> PdfFontType1Common {
let Self {
ty,
subtype,
ref name,
ref base_font,
first_char,
last_char,
ref widths,
ref font_descriptor,
ref encoding,
ref to_unicode,
ref rest,
} = *self;
PdfFontType1Common {
ty,
subtype,
name: name.clone(),
base_font: base_font.clone(),
first_char: Some(first_char),
last_char: Some(last_char),
widths: Some(widths.clone()),
font_descriptor: Some(font_descriptor.clone()),
encoding: encoding.clone(),
to_unicode: to_unicode.clone(),
rest: rest.clone(),
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
pub enum PdfSimpleFontEncodingPredefined {
#[pdf(name = "MacRomanEncoding")]
MacRomanEncoding,
#[pdf(name = "MacExpertEncoding")]
MacExpertEncoding,
#[pdf(name = "WinAnsiEncoding")]
WinAnsiEncoding,
}
}
impl PdfSimpleFontEncodingPredefined {
pub const fn table(self) -> PdfSimpleFontEncodingTable {
match self {
Self::MacRomanEncoding => PdfSimpleFontEncodingTable::MAC_ROMAN,
Self::MacExpertEncoding => PdfSimpleFontEncodingTable::MAC_EXPERT,
Self::WinAnsiEncoding => PdfSimpleFontEncodingTable::WIN_ANSI,
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Default)]
pub enum PdfSimpleFontEncodingDictionaryType {
#[pdf(name = "Encoding")]
#[default]
Encoding,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
pub struct PdfSimpleFontEncodingDictionary {
#[pdf(name = "Type")]
pub ty: Option<PdfSimpleFontEncodingDictionaryType>,
#[pdf(name = "BaseEncoding")]
pub base_encoding: Option<PdfSimpleFontEncodingPredefined>,
#[pdf(name = "Differences")]
pub differences: Option<PdfSimpleFontEncodingDifferences>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfSimpleFontEncodingDictionary {
pub fn table(
&self,
default_table: impl FnOnce() -> PdfSimpleFontEncodingTable,
) -> PdfSimpleFontEncodingTable {
let Self {
ty: _,
base_encoding,
differences,
rest: _,
} = self;
let mut retval = base_encoding
.map(|v| v.table())
.unwrap_or_else(default_table);
if let Some(differences) = differences {
retval = differences.table(retval);
}
retval
}
}
#[derive(Clone, Debug)]
pub struct PdfSimpleFontEncodingDifferences {
pos: PdfInputPositionNoCompare,
map: Arc<BTreeMap<u8, PdfName>>,
}
impl PdfSimpleFontEncodingDifferences {
pub fn new(pos: impl Into<PdfInputPositionNoCompare>, map: Arc<BTreeMap<u8, PdfName>>) -> Self {
Self {
pos: pos.into(),
map,
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn map(&self) -> &Arc<BTreeMap<u8, PdfName>> {
&self.map
}
pub fn table(&self, base_table: PdfSimpleFontEncodingTable) -> PdfSimpleFontEncodingTable {
let mut retval = base_table;
let table: &mut [_; 0x100] = ArcOrRef::make_mut(&mut retval.table);
for (&byte, name) in self.map.iter() {
table[usize::from(byte)] = PdfSimpleFontEncodingTableEntry {
name: Some(name.clone()),
presumed_unicode: None,
};
}
retval
}
}
impl GetPdfInputPosition for PdfSimpleFontEncodingDifferences {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos.0
}
}
impl IsPdfNull for PdfSimpleFontEncodingDifferences {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfSimpleFontEncodingDifferences {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfSimpleFontEncodingDifferences")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let array = PdfArray::parse(object)?;
let pos = array.pos();
let mut map = BTreeMap::new();
let mut next_byte = None::<u8>;
for i in array.iter() {
let i = PdfNameOrInteger::parse(i.clone())?;
match i {
PdfNameOrInteger::Name(name) => {
let pos = name.pos();
let byte = next_byte.ok_or(PdfParseError::IntegerOutOfRange { pos })?;
next_byte = byte.checked_add(1);
map.insert(byte, name);
}
PdfNameOrInteger::Integer(v) => next_byte = Some(u8::parse(v.into())?),
}
}
Ok(Self {
pos: pos.into(),
map: Arc::new(map),
})
}
}
#[derive(Clone, Default, Debug)]
pub struct PdfSimpleFontEncodingTableEntry {
pub name: Option<PdfName>,
pub presumed_unicode: Option<&'static str>,
}
impl PdfSimpleFontEncodingTableEntry {
pub const fn new_static(
name: Option<&'static [u8]>,
presumed_unicode: Option<&'static str>,
) -> Self {
Self {
name: match name {
Some(name) => Some(PdfName::new_static(name)),
None => None,
},
presumed_unicode,
}
}
}
#[derive(Clone, Debug)]
pub struct PdfSimpleFontEncodingTable {
pub table: ArcOrRef<'static, [PdfSimpleFontEncodingTableEntry; 0x100]>,
}
#[derive(Clone, Debug)]
pub enum PdfSimpleFontEncoding {
Predefined(PdfSimpleFontEncodingPredefined),
Dictionary(PdfSimpleFontEncodingDictionary),
}
impl PdfSimpleFontEncoding {
pub fn table(
&self,
default_table: impl FnOnce() -> PdfSimpleFontEncodingTable,
) -> PdfSimpleFontEncodingTable {
match self {
PdfSimpleFontEncoding::Predefined(v) => v.table(),
PdfSimpleFontEncoding::Dictionary(v) => v.table(default_table),
}
}
}
impl IsPdfNull for PdfSimpleFontEncoding {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfSimpleFontEncoding {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfSimpleFontEncoding")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = PdfObjectDirect::from(object);
match object {
PdfObjectDirect::Name(v) => Ok(Self::Predefined(PdfParse::parse(v.into())?)),
PdfObjectDirect::Dictionary(v) => Ok(Self::Dictionary(PdfParse::parse(v.into())?)),
_ => Err(PdfParseError::InvalidType {
pos: object.pos(),
ty: object.type_name(),
expected_ty: "PdfSimpleFontEncoding",
}),
}
}
}
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct PdfFontType1Program {
pub encoding: Option<Arc<[Option<PdfName>]>>,
pub font_bbox: Option<PdfRectangle>,
pub font_info: Option<PdfFontType1FontInfo>,
pub font_matrix: Option<PdfMatrix>,
pub font_name: Option<PdfName>,
}
#[derive(Clone, Debug)]
pub struct PdfFontType1FontInfo {
pub family_name: Option<PdfString>,
pub full_name: Option<PdfString>,
pub notice: Option<PdfString>,
pub weight: Option<PdfString>,
pub version: Option<PdfString>,
pub italic_angle: Option<f32>,
pub is_fixed_pitch: Option<bool>,
pub underline_position: Option<f32>,
pub underline_thickness: Option<f32>,
}

1067
src/pdf/font/tables.rs Normal file

File diff suppressed because it is too large Load diff

1423
src/pdf/font/type_1_parse.rs Normal file

File diff suppressed because it is too large Load diff

2142
src/pdf/object.rs Normal file

File diff suppressed because it is too large Load diff

1313
src/pdf/parse.rs Normal file

File diff suppressed because it is too large Load diff

1064
src/pdf/render.rs Normal file

File diff suppressed because it is too large Load diff

66
src/pdf/stream_filters.rs Normal file
View file

@ -0,0 +1,66 @@
use crate::pdf::{
object::{PdfDictionary, PdfName},
parse::{PdfInputPosition, PdfParse, PdfParseError},
pdf_parse,
};
pub mod flate;
pdf_parse! {
#[pdf(name)]
#[derive(Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum PdfStreamFilter {
#[pdf(name = "ASCIIHexDecode")]
AsciiHexDecode,
#[pdf(name = "ASCII85Decode")]
Ascii85Decode,
#[pdf(name = "LZWDecode")]
LzwDecode,
#[pdf(name = "FlateDecode")]
FlateDecode,
#[pdf(name = "RunLengthDecode")]
RunLengthDecode,
#[pdf(name = "CCITTFaxDecode")]
CcittFaxDecode,
#[pdf(name = "JBIG2Decode")]
Jbig2Decode,
#[pdf(name = "DCTDecode")]
DctDecode,
#[pdf(name = "JPXDecode")]
JpxDecode,
#[pdf(name = "Crypt")]
Crypt,
#[pdf(other)]
Unknown(PdfName),
}
}
impl PdfStreamFilter {
pub fn decode_stream_data(
&self,
filter_parms: PdfDictionary,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
match self {
PdfStreamFilter::AsciiHexDecode => todo!(),
PdfStreamFilter::Ascii85Decode => todo!(),
PdfStreamFilter::LzwDecode => todo!(),
PdfStreamFilter::FlateDecode => {
flate::PdfFilterParmsFlateDecode::parse(filter_parms.into())?
.decode_stream_data(stream_pos, encoded_data)
}
PdfStreamFilter::RunLengthDecode => todo!(),
PdfStreamFilter::CcittFaxDecode => todo!(),
PdfStreamFilter::Jbig2Decode => todo!(),
PdfStreamFilter::DctDecode => todo!(),
PdfStreamFilter::JpxDecode => todo!(),
PdfStreamFilter::Crypt => todo!(),
PdfStreamFilter::Unknown(filter) => Err(PdfParseError::UnknownStreamFilter {
pos: stream_pos,
filter: filter.clone(),
}),
}
}
}

View file

@ -0,0 +1,74 @@
use crate::pdf::{
object::PdfDictionary,
parse::{PdfInputPosition, PdfParseError},
pdf_parse,
stream_filters::PdfStreamFilter,
};
use std::{io::Read, num::NonZero};
pdf_parse! {
#[pdf]
#[derive(Clone, Debug, Default)]
pub struct PdfFilterParmsFlateDecode {
#[pdf(name = "Predictor")]
pub predictor: Option<NonZero<u32>>,
#[pdf(name = "Colors")]
pub colors: Option<NonZero<u32>>,
#[pdf(name = "BitsPerComponent")]
pub bits_per_component: Option<NonZero<u32>>,
#[pdf(name = "Columns")]
pub columns: Option<NonZero<u32>>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfFilterParmsFlateDecode {
pub const FILTER: PdfStreamFilter = PdfStreamFilter::FlateDecode;
pub const DEFAULT_PREDICTOR: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_COLORS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_BITS_PER_COMPONENT: NonZero<u32> = const { NonZero::new(8).unwrap() };
pub const DEFAULT_COLUMNS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub fn predictor(&self) -> NonZero<u32> {
self.predictor.unwrap_or(Self::DEFAULT_PREDICTOR)
}
pub fn colors(&self) -> NonZero<u32> {
self.colors.unwrap_or(Self::DEFAULT_COLORS)
}
pub fn bits_per_component(&self) -> NonZero<u32> {
self.bits_per_component
.unwrap_or(Self::DEFAULT_BITS_PER_COMPONENT)
}
pub fn columns(&self) -> NonZero<u32> {
self.columns.unwrap_or(Self::DEFAULT_COLUMNS)
}
pub fn decode_stream_data(
&self,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
let Self {
predictor: _,
colors: _,
bits_per_component: _,
columns: _,
rest: _,
} = self;
let mut decoded_data = vec![];
flate2::bufread::ZlibDecoder::new(encoded_data)
.read_to_end(&mut decoded_data)
.map_err(|e| PdfParseError::StreamFilterError {
pos: stream_pos,
filter: Self::FILTER.into(),
error: e.to_string(),
})?;
let predictor = self.predictor();
let colors = self.colors();
let bits_per_component = self.bits_per_component();
let columns = self.columns();
match predictor {
Self::DEFAULT_PREDICTOR => Ok(decoded_data),
_ => todo!("{predictor}"),
}
}
}

382
src/util.rs Normal file
View file

@ -0,0 +1,382 @@
use std::{
any::{Any, TypeId},
borrow::Borrow,
cell::Cell,
collections::HashMap,
fmt,
hash::{Hash, Hasher},
sync::Arc,
};
pub enum ArcOrRef<'a, T: ?Sized> {
Arc(Arc<T>),
Ref(&'a T),
}
impl<'a, T: ?Sized> AsRef<T> for ArcOrRef<'a, T> {
fn as_ref(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> Borrow<T> for ArcOrRef<'a, T> {
fn borrow(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> From<Arc<T>> for ArcOrRef<'a, T> {
fn from(value: Arc<T>) -> Self {
Self::Arc(value)
}
}
impl<'a, T: ?Sized> From<&'a T> for ArcOrRef<'a, T> {
fn from(value: &'a T) -> Self {
Self::Ref(value)
}
}
impl<'a, T: ?Sized> Default for ArcOrRef<'a, T>
where
&'a T: Default,
{
fn default() -> Self {
Self::Ref(Default::default())
}
}
impl<T: ?Sized> Clone for ArcOrRef<'_, T> {
fn clone(&self) -> Self {
match self {
Self::Arc(v) => Self::Arc(v.clone()),
Self::Ref(v) => Self::Ref(v),
}
}
}
impl<T: ?Sized + Hash> Hash for ArcOrRef<'_, T> {
fn hash<H: Hasher>(&self, state: &mut H) {
T::hash(self, state)
}
}
impl<'a, 'b, T: ?Sized + PartialEq<U>, U: ?Sized> PartialEq<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn eq(&self, other: &ArcOrRef<'b, U>) -> bool {
T::eq(self, other)
}
}
impl<T: ?Sized + Eq> Eq for ArcOrRef<'_, T> {}
impl<'a, 'b, T: ?Sized + PartialOrd<U>, U: ?Sized> PartialOrd<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn partial_cmp(&self, other: &ArcOrRef<'b, U>) -> Option<std::cmp::Ordering> {
T::partial_cmp(self, other)
}
}
impl<T: ?Sized + Ord> Ord for ArcOrRef<'_, T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
T::cmp(self, other)
}
}
impl<T: ?Sized> std::ops::Deref for ArcOrRef<'_, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
match self {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => v,
}
}
}
impl<T: ?Sized + fmt::Debug> fmt::Debug for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
impl<T: ?Sized + fmt::Display> fmt::Display for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
/// a stable alternative to `CloneToUninit` for `Arc`
pub trait ArcFromRef {
/// like `Arc::new(Self::clone(self))` but works for unsized types too
fn arc_from_ref(&self) -> Arc<Self>;
/// generic version of `Arc::make_mut`
fn make_mut(this: &mut Arc<Self>) -> &mut Self;
}
impl<T: Clone> ArcFromRef for T {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::new(Self::clone(self))
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<T: Clone> ArcFromRef for [T] {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl ArcFromRef for str {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<'a, T: ?Sized + ArcFromRef> ArcOrRef<'a, T> {
pub fn into_arc(this: Self) -> Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => T::arc_from_ref(v),
}
}
pub fn make_arc(this: &mut Self) -> &mut Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => {
*this = ArcOrRef::Arc(T::arc_from_ref(v));
let ArcOrRef::Arc(v) = this else {
unreachable!();
};
v
}
}
}
pub fn make_mut(this: &mut Self) -> &mut T {
T::make_mut(Self::make_arc(this))
}
}
trait DagDebugStateSealed {}
#[expect(private_bounds)]
pub trait SupportsDagDebugState: DagDebugStateSealed + 'static + Clone {
type Key: Clone + Hash + Eq + 'static;
fn key(this: &Self) -> Self::Key;
}
impl<T: 'static> DagDebugStateSealed for Arc<T> {}
impl<T: 'static> SupportsDagDebugState for Arc<T> {
type Key = *const T;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl<T: 'static> DagDebugStateSealed for Arc<[T]> {}
impl<T: 'static> SupportsDagDebugState for Arc<[T]> {
type Key = *const [T];
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl DagDebugStateSealed for Arc<str> {}
impl SupportsDagDebugState for Arc<str> {
type Key = *const str;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
trait DagDebugStatePartTrait: 'static {
fn reset(&mut self);
fn as_any_mut(&mut self) -> &mut dyn Any;
}
struct DagDebugStatePart<T: SupportsDagDebugState> {
table: HashMap<T::Key, (u64, T)>,
next_id: u64,
}
impl<T: SupportsDagDebugState> DagDebugStatePartTrait for DagDebugStatePart<T> {
fn reset(&mut self) {
let Self { table, next_id } = self;
table.clear();
*next_id = 0;
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
impl<T: SupportsDagDebugState> DagDebugStatePart<T> {
fn insert(&mut self, value: &T) -> DagDebugStateInsertResult {
use std::collections::hash_map::Entry;
match self.table.entry(T::key(value)) {
Entry::Occupied(entry) => DagDebugStateInsertResult::Old { id: entry.get().0 },
Entry::Vacant(entry) => {
let value = T::clone(value);
let id = self.next_id;
self.next_id += 1;
entry.insert((id, value));
DagDebugStateInsertResult::New { id }
}
}
}
}
impl<T: SupportsDagDebugState> Default for DagDebugStatePart<T> {
fn default() -> Self {
Self {
table: HashMap::default(),
next_id: 0,
}
}
}
pub struct DagDebugState {
parts: std::cell::RefCell<HashMap<TypeId, Box<dyn DagDebugStatePartTrait>>>,
ref_count: Cell<usize>,
}
#[derive(Clone, Copy, Debug)]
pub enum DagDebugStateInsertResult {
New { id: u64 },
Old { id: u64 },
}
impl DagDebugStateInsertResult {
pub fn id(self) -> u64 {
match self {
Self::New { id } | Self::Old { id } => id,
}
}
}
impl DagDebugState {
fn with_part<T: SupportsDagDebugState, R>(
&self,
f: impl FnOnce(&mut DagDebugStatePart<T>) -> R,
) -> R {
let mut parts = self.parts.borrow_mut();
let Some(part) = parts
.entry(TypeId::of::<DagDebugStatePart<T>>())
.or_insert_with(|| Box::new(DagDebugStatePart::<T>::default()))
.as_any_mut()
.downcast_mut::<DagDebugStatePart<T>>()
else {
unreachable!()
};
f(part)
}
pub fn insert<T: SupportsDagDebugState>(&self, value: &T) -> DagDebugStateInsertResult {
self.with_part(|part: &mut DagDebugStatePart<T>| part.insert(value))
}
pub fn debug_or_id<'a, T: SupportsDagDebugState + fmt::Debug, Abbreviated: fmt::Display>(
&self,
value: &'a T,
abbreviated: Abbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, Abbreviated> {
self.debug_or_id_with(value, fmt::Debug::fmt, move |f| abbreviated.fmt(f))
}
pub fn debug_or_id_with<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
>(
&self,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, DebugValue, DebugAbbreviated> {
struct DebugOrIdWith<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> {
insert_result: DagDebugStateInsertResult,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Debug for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Display for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "#{} ", self.insert_result.id())?;
match self.insert_result {
DagDebugStateInsertResult::New { id: _ } => (self.debug_value)(self.value, f),
DagDebugStateInsertResult::Old { id: _ } => (self.debug_abbreviated)(f),
}
}
}
DebugOrIdWith {
insert_result: self.insert(value),
value,
debug_value,
debug_abbreviated,
}
}
#[must_use]
fn inc_ref_count_scope(&self) -> impl Sized {
struct DecRefCountOnDrop<'a>(&'a DagDebugState);
impl Drop for DecRefCountOnDrop<'_> {
fn drop(&mut self) {
self.0.ref_count.set(self.0.ref_count.get() - 1);
if self.0.ref_count.get() == 0 {
self.0
.parts
.borrow_mut()
.values_mut()
.for_each(|v| v.reset());
}
}
}
self.ref_count.set(
self.ref_count
.get()
.checked_add(1)
.expect("too many nested calls"),
);
DecRefCountOnDrop(self)
}
pub fn scope<R>(f: impl FnOnce(&Self) -> R) -> R {
thread_local! {
static STATE: DagDebugState = DagDebugState { parts: Default::default(), ref_count: Cell::new(0) };
}
STATE.with(|state| {
let _scope = state.inc_ref_count_scope();
f(state)
})
}
}