Compare commits

..

10 commits

28 changed files with 12027 additions and 6489 deletions

View file

@ -1,46 +0,0 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
on: [push, pull_request]
env:
PDF_HASH: 56372d23ece7e9e2c1b381a639443982a3e16e38109df1c141d655b779b61fdb
OUTPUT_XML_HASH: c0b4592cbd0a3e59b9b2931a6a75a3d87ebf23bf453e8587a1522dd157f15ee9
jobs:
test:
runs-on: debian-12
container:
image: git.libre-chip.org/libre-chip/fayalite-deps:latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- run: |
scripts/check-copyright.sh
- uses: https://git.libre-chip.org/mirrors/rust-cache@v2
with:
save-if: ${{ github.ref == 'refs/heads/master' }}
- run: |
apt-get update -qq
apt-get install -qq python3-venv wget
# copy of https://files.openpower.foundation/s/9izgC5Rogi5Ywmm/download/OPF_PowerISA_v3.1C.pdf
wget -O OPF_PowerISA_v3.1C.pdf https://libre-chip.org/OPF_PowerISA_v3.1C.pdf
echo "$PDF_HASH OPF_PowerISA_v3.1C.pdf" | sha256sum -c
- run: |
cargo test
- run: |
cargo build --release
- run: |
cargo run --release -- OPF_PowerISA_v3.1C.pdf &> >(tee out.log | grep '^page ') || { tail -n1000 out.log; false; }
echo "expected output (not all instructions are decoded yet, change when the output is improved):"
echo "$OUTPUT_XML_HASH powerisa-instructions.xml" | sha256sum -c
mv powerisa-instructions.xml powerisa-instructions-rust.xml
- run: |
python3 -m venv --upgrade-deps .venv
. .venv/bin/activate
pip install -e .
parse_powerisa_pdf OPF_PowerISA_v3.1C.pdf &> >(tee out.log | grep '^page ') || { tail -n1000 out.log; false; }
echo "expected output (not all instructions are decoded yet, change when the output is improved):"
echo "$OUTPUT_XML_HASH powerisa-instructions.xml" | sha256sum -c
mv powerisa-instructions.xml powerisa-instructions-python.xml

3
.gitignore vendored
View file

@ -1,10 +1,7 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
/.venv
/.vscode
*.egg-info
__pycache__
*.log
/powerisa-instructions.xml
/*.pdf
/target

308
Cargo.lock generated
View file

@ -3,56 +3,10 @@
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
name = "adler2"
version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "bindgen"
version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"itertools",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
]
[[package]]
name = "bitflags"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
[[package]]
name = "cc"
version = "1.2.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "cfg-if"
@ -61,16 +15,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "clang-sys"
version = "1.8.1"
name = "crc32fast"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [
"glob",
"libc",
"libloading",
"cfg-if",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "either"
version = "1.15.0"
@ -78,228 +55,55 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "equivalent"
version = "1.0.2"
name = "flate2"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "find-msvc-tools"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]]
name = "indexmap"
version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
dependencies = [
"equivalent",
"hashbrown",
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.178"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
[[package]]
name = "libloading"
name = "miniz_oxide"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
"cfg-if",
"windows-link",
]
[[package]]
name = "libm"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "mupdf-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e9a0d4e844ab50315d43312f3d62f72c77205b07c8ee21cbd4b52bdc2a9910"
dependencies = [
"bindgen",
"cc",
"pkg-config",
"regex",
"zerocopy",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
"adler2",
"simd-adler32",
]
[[package]]
name = "parse_powerisa_pdf"
version = "0.1.0"
dependencies = [
"indexmap",
"libm",
"mupdf-sys",
"quick-xml",
"flate2",
"rayon",
]
[[package]]
name = "pkg-config"
version = "0.3.32"
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "proc-macro2"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"unicode-ident",
"either",
"rayon-core",
]
[[package]]
name = "quick-xml"
version = "0.38.4"
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"memchr",
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "quote"
version = "1.0.42"
name = "simd-adler32"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "syn"
version = "2.0.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "zerocopy"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"

View file

@ -1,17 +1,10 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
[package]
name = "parse_powerisa_pdf"
version = "0.1.0"
license = "LGPL-3.0-or-later"
edition = "2024"
repository = ""
keywords = []
categories = []
rust-version = "1.89.0"
license = "LGPL-3.0-or-later"
[dependencies]
indexmap = "2.12.1"
libm = "0.2.15"
mupdf-sys = { version = "0.5.0", default-features = false }
quick-xml = "0.38.4"
flate2 = "1.1.5"
rayon = "1.11.0"

View file

@ -1,38 +1,5 @@
<!--
SPDX-License-Identifier: LGPL-3.0-or-later
See Notices.txt for copyright information
-->
parser for the OPF PowerISA 3.1C pdf to attempt to extract all instructions' pseudo-code including subscripts/superscripts and other formatting
# Using the new Rust code:
Usage:
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/>
* Install Rust -- you need version 1.89.0 or later.
Getting it from https://rustup.rs/ is recommended.
* Install required build dependencies:
On Debian 12:
```bash
sudo apt update
sudo apt install build-essential clang unzip
```
* Compile and run:
```bash
cargo run -- path/to/downloaded/OPF_PowerISA_v3.1C.pdf > out.log
```
* This will spit out lots of errors and then successfully create
the output file -- `powerisa-instructions.xml` in the current directory.
# Using the old Python code:
Usage:
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/>
* Obtain CPython 3.11 (the default `python3` in [Debian Bookworm](https://www.debian.org/releases/bookworm/))

View file

@ -1,5 +1,3 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
from __future__ import annotations
from collections import defaultdict
from collections.abc import Generator, Iterable, Iterator, Callable
@ -765,7 +763,7 @@ class Page:
unprocessed_non_text: SetById[LTLine | LTRect]
@staticmethod
def from_lt_page(page_num: int, page: LTPage, first_seen_fonts: defaultdict[str, set[float]]) -> Page:
def from_lt_page(page_num: int, page: LTPage) -> Page:
qt: defaultdict[TextSection, QuadTree[Char | LTLine | LTRect]] = defaultdict(QuadTree)
unprocessed_chars = defaultdict(lambda: defaultdict(SetById[Char]))
unprocessed_non_text: SetById[LTLine | LTRect] = SetById()
@ -804,25 +802,20 @@ class Page:
raise AssertionError(
f"char not in text section: {element}\npage_num={page_num}")
continue
font_size = round(element.size, 3)
char = Char(
text=element.get_text(),
font=Font(font_name=element.fontname, size=font_size),
font=Font(font_name=element.fontname, size=round(element.size, 3)),
adv=element.adv,
min_x=element.x0,
min_y=element.y0,
max_x=element.x1,
max_y=element.y1,
)
if font_size not in first_seen_fonts[element.fontname]:
first_seen_fonts[element.fontname].add(font_size)
print(f"first seen font: {element.fontname!r} {font_size}: page {page_num} {char!r}")
qt[text_section].insert(char.min_x, char.min_y, char)
unprocessed_chars[text_section][char.font].add(char)
for text_section, i in unprocessed_chars.items():
for chars in i.values():
chars.sort(key=Char.top_down_left_to_right_sort_key)
print(f"first char: {text_section!r}: {next(iter(chars), None)!r}")
for i in unprocessed_chars.values():
for j in i.values():
j.sort(key=Char.top_down_left_to_right_sort_key)
unknown_fonts=[]
unknown_font_errors=[]
for i in unprocessed_chars.values():
@ -1186,14 +1179,13 @@ class Parser:
def __pages_gen(file: Path, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]:
if page_numbers is not None:
page_numbers = sorted(i - 1 for i in page_numbers)
first_seen_fonts = defaultdict(set)
for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)):
if page_numbers is not None:
page_num = page_numbers[i] + 1
else:
page_num = i + 1
print(f"page {page_num}")
yield Page.from_lt_page(page_num=page_num, page=page, first_seen_fonts=first_seen_fonts)
yield Page.from_lt_page(page_num=page_num, page=page)
def parse_pdf(self, file: Path, page_numbers: Iterable[int] | None = None):
self.pages = Pages(pages_gen=Parser.__pages_gen(
@ -1509,7 +1501,7 @@ class Parser:
f"instruction bit fields box has wrong number of horizontal lines:\n{h_lines}")
if len(v_lines) < 2:
raise InsnParseError(
f"instruction bit fields box has too few vertical lines:\n{v_lines}")
f"instruction bit fields box has too few vertical lines:\n{h_lines}")
bottom_line, top_line = h_lines
box_min_x = v_lines[0].x0
box_max_x = v_lines[-1].x0

View file

@ -1,5 +1,3 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
from __future__ import annotations
from typing import Callable, Generic, Iterable, Iterator, TypeVar
from math import frexp, isfinite, isnan, ldexp

View file

@ -1,5 +1,3 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
from collections import abc
from typing import Callable, Generic, Iterable, Iterator, Protocol, TypeAlias, TypeVar, overload

View file

@ -1,5 +1,3 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
[build-system]
requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta"
@ -13,7 +11,4 @@ dependencies = [
requires-python = ">= 3.11"
[project.scripts]
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"
[tool.setuptools]
packages = ["parse_powerisa_pdf"]
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"

View file

@ -1,70 +0,0 @@
#!/bin/bash
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
set -e
function fail()
{
local error="$1"
echo "error: $error" >&2
exit 1
}
function fail_file()
{
local file="$1" line="$2" error="$3"
fail "$file:$((line + 1)): $error"
}
function check_file()
{
local file="$1" regexes=("${@:2}")
local lines
mapfile -t lines < "$file"
if (("${#lines[@]}" == 0)); then
return # empty file, no copyright needed
fi
local line
for line in "${!regexes[@]}"; do
eval '[[ "${lines[i]}" =~ '"${regexes[i]}"' ]]' ||
fail_file "$file" "$line" "doesn't match regex: ${regexes[i]}"
done
}
POUND_HEADER=('^"# SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"# See Notices.txt for copyright information"$')
SLASH_HEADER=('^"// SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"// See Notices.txt for copyright information"$')
MD_HEADER=('^"<!--"$' '^"SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"See Notices.txt for copyright information"$')
function main()
{
local IFS=$'\n'
[[ -z "$(git status --porcelain)" ]] || fail "git repo is dirty"
local file
for file in $(git ls-tree --name-only --full-tree -r HEAD); do
case "/$file" in
/Cargo.lock)
# generated file
;;
*/LICENSE.md|*/Notices.txt)
# copyright file
;;
/.forgejo/workflows/*.yml|*/.gitignore|*.toml|*.py)
check_file "$file" "${POUND_HEADER[@]}"
;;
*.md)
check_file "$file" "${MD_HEADER[@]}"
;;
*.sh)
check_file "$file" '^'\''#!'\' "${POUND_HEADER[@]}"
;;
*.rs)
check_file "$file" "${SLASH_HEADER[@]}"
;;
*)
fail_file "$file" 0 "unimplemented file kind -- you need to add it to $0"
;;
esac
done
}
main

3831
src/lib.rs

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,38 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use parse_powerisa_pdf::pdf::Pdf;
use std::{
error::Error,
io::{IsTerminal, Read},
process::ExitCode,
};
fn main() -> std::process::ExitCode {
parse_powerisa_pdf::main()
fn main() -> Result<ExitCode, Box<dyn Error>> {
let args: Vec<_> = std::env::args_os().collect();
if args
.iter()
.skip(1)
.any(|v| v.as_encoded_bytes().starts_with(b"-") && v != "-")
|| args.len() > 2
|| (args.len() == 1 && std::io::stdin().is_terminal())
{
eprintln!(
"Usage: {} [<path/to/file.pdf>]\n\
Reads the PDF file passed on the command line,\n\
Reads stdin if no arguments are passed or if the file name is just a dash `-`.\n\
If stdin is a terminal, you have to pass `-` explicitly to read from it.",
args[0].display()
);
return Ok(ExitCode::FAILURE);
}
let file_path = args.get(1).filter(|v| *v != "-");
let input = if let Some(file_path) = file_path {
std::fs::read(file_path)?
} else {
let mut buf = Vec::new();
std::io::stdin().lock().read_to_end(&mut buf)?;
buf
};
let pdf = Pdf::parse(input)?;
println!("{:#?}", pdf.trailer.trailer_dictionary());
todo!();
Ok(ExitCode::SUCCESS)
}

View file

@ -1,871 +0,0 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use mupdf_sys::{
fz_buffer, fz_buffer_storage, fz_clone_context, fz_color_params, fz_colorspace, fz_concat,
fz_context, fz_device, fz_document, fz_drop_buffer, fz_drop_context, fz_drop_device,
fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text, fz_error_type_FZ_ERROR_GENERIC,
fz_font, fz_font_ascender, fz_font_descender, fz_font_is_bold, fz_font_is_italic, fz_font_name,
fz_matrix, fz_matrix_expansion, fz_page, fz_path, fz_path_walker, fz_point, fz_rect,
fz_stroke_state, fz_text, fz_text_item, fz_text_span, fz_transform_point,
fz_transform_point_xy, fz_transform_vector, fz_walk_path, mupdf_document_page_count,
mupdf_drop_error, mupdf_error_t, mupdf_load_page, mupdf_new_base_context,
mupdf_new_derived_device, mupdf_open_document, mupdf_page_to_xml, mupdf_pdf_page_transform,
mupdf_run_page, pdf_page, pdf_page_from_fz_page,
};
use std::{
cell::{Cell, UnsafeCell},
ffi::{CStr, c_int, c_void},
fmt,
marker::PhantomData,
mem::ManuallyDrop,
ptr::{self, NonNull},
sync::{Mutex, OnceLock},
};
#[derive(Debug)]
pub(crate) struct MuPdfError {
type_: c_int,
message: String,
}
impl MuPdfError {
fn new_generic(message: impl ToString) -> Self {
Self {
type_: fz_error_type_FZ_ERROR_GENERIC as _,
message: message.to_string(),
}
}
}
impl fmt::Display for MuPdfError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"MuPDF error: type: {}, message: {}",
self.type_, self.message
)
}
}
impl std::error::Error for MuPdfError {}
struct OwnedMuPdfError(NonNull<mupdf_error_t>);
impl Drop for OwnedMuPdfError {
fn drop(&mut self) {
unsafe {
mupdf_drop_error(self.0.as_ptr());
}
}
}
unsafe fn mupdf_try<R>(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result<R, MuPdfError> {
let mut err = ptr::null_mut();
let retval = f(&mut err);
let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else {
return Ok(retval);
};
unsafe {
Err(MuPdfError {
type_: (*err.0.as_ptr()).type_,
message: CStr::from_ptr((*err.0.as_ptr()).message)
.to_string_lossy()
.into_owned(),
})
}
}
pub(crate) struct Context(NonNull<fz_context>);
impl Context {
fn new() -> Self {
struct BaseContext(NonNull<fz_context>);
unsafe impl Send for BaseContext {}
static CTX: OnceLock<Mutex<BaseContext>> = OnceLock::new();
let base = CTX
.get_or_init(|| {
let ctx = unsafe { mupdf_new_base_context() };
let Some(ctx) = NonNull::new(ctx).map(BaseContext) else {
panic!("failed to allocate a MuPDF context");
};
Mutex::new(ctx)
})
.lock()
.expect("not poisoned");
let ctx = unsafe { fz_clone_context(base.0.as_ptr()) };
let Some(ctx) = NonNull::new(ctx).map(Self) else {
drop(base);
panic!("failed to clone a MuPDF context");
};
ctx
}
pub(crate) fn with<R>(f: impl FnOnce(&Self) -> R) -> R {
thread_local! {
static CTX: Context = Context::new();
}
CTX.with(f)
}
pub(crate) fn as_ref(&self) -> ContextRef<'_> {
unsafe { ContextRef::from_ptr(self.0.as_ptr()) }
}
}
impl Drop for Context {
fn drop(&mut self) {
unsafe {
fz_drop_context(self.0.as_ptr());
}
}
}
#[derive(Clone, Copy)]
pub(crate) struct ContextRef<'ctx>(&'ctx UnsafeCell<fz_context>);
impl<'ctx> ContextRef<'ctx> {
unsafe fn from_ptr(ptr: *mut fz_context) -> Self {
Self(unsafe { &*ptr.cast() })
}
}
impl<'ctx> From<&'ctx Context> for ContextRef<'ctx> {
fn from(value: &'ctx Context) -> Self {
value.as_ref()
}
}
pub(crate) struct Document<'ctx> {
ptr: *mut fz_document,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Document<'ctx> {
pub(crate) fn open(
ctx: impl Into<ContextRef<'ctx>>,
file_name: &CStr,
) -> Result<Document<'ctx>, MuPdfError> {
let ctx = ctx.into();
unsafe {
mupdf_try(|errptr| mupdf_open_document(ctx.0.get(), file_name.as_ptr(), errptr))
.map(|ptr| Document { ptr, ctx })
}
}
pub(crate) fn page_count(&self) -> Result<usize, MuPdfError> {
unsafe {
mupdf_try(|errptr| mupdf_document_page_count(self.ctx.0.get(), self.ptr, errptr))?
.try_into()
.map_err(MuPdfError::new_generic)
}
}
pub(crate) fn load_page(&self, page: usize) -> Result<Page<'ctx>, MuPdfError> {
let page = page.try_into().map_err(MuPdfError::new_generic)?;
unsafe {
mupdf_try(|errptr| mupdf_load_page(self.ctx.0.get(), self.ptr, page, errptr))
.map(|ptr| Page { ptr, ctx: self.ctx })
}
}
}
impl<'ctx> Drop for Document<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_document(self.ctx.0.get(), self.ptr);
}
}
}
struct Buffer<'ctx> {
ptr: *mut fz_buffer,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Buffer<'ctx> {
fn storage(&mut self) -> &mut [u8] {
unsafe {
let mut ptr = ptr::null_mut();
let len = fz_buffer_storage(self.ctx.0.get(), self.ptr, &raw mut ptr);
if len == 0 {
&mut []
} else {
std::slice::from_raw_parts_mut(ptr, len)
}
}
}
}
impl<'ctx> Drop for Buffer<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_buffer(self.ctx.0.get(), self.ptr);
}
}
}
pub(crate) struct Page<'ctx> {
ptr: *mut fz_page,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Page<'ctx> {
pub(crate) fn ctx(&self) -> ContextRef<'ctx> {
self.ctx
}
pub(crate) fn run<T>(
&self,
device: &Device<'ctx, T>,
ctm: fz_matrix,
) -> Result<(), MuPdfError> {
unsafe {
mupdf_try(|errptr| {
mupdf_run_page(
self.ctx.0.get(),
self.ptr,
device.dev,
ctm,
ptr::null_mut(),
errptr,
)
})
}
}
pub(crate) fn to_xml(&self) -> Result<String, MuPdfError> {
unsafe {
let mut buffer =
mupdf_try(|errptr| mupdf_page_to_xml(self.ctx.0.get(), self.ptr, errptr))
.map(|ptr| Buffer { ptr, ctx: self.ctx })?;
Ok(str::from_utf8(buffer.storage())
.map_err(MuPdfError::new_generic)?
.into())
}
}
pub(crate) fn pdf_page<'a>(&'a self) -> Option<PdfPageRef<'a, 'ctx>> {
unsafe {
let ptr = pdf_page_from_fz_page(self.ctx.0.get(), self.ptr);
NonNull::new(ptr).map(|ptr| PdfPageRef {
ptr: &*ptr.as_ptr().cast(),
ctx: self.ctx,
})
}
}
}
impl<'ctx> Drop for Page<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_page(self.ctx.0.get(), self.ptr);
}
}
}
#[derive(Clone, Copy)]
pub(crate) struct PdfPageRef<'a, 'ctx> {
ptr: &'a UnsafeCell<pdf_page>,
ctx: ContextRef<'ctx>,
}
impl<'a, 'ctx> PdfPageRef<'a, 'ctx> {
pub(crate) fn transform(self) -> Result<fz_matrix, MuPdfError> {
unsafe {
mupdf_try(|errptr| mupdf_pdf_page_transform(self.ctx.0.get(), self.ptr.get(), errptr))
}
}
}
pub(crate) struct Device<'ctx, T: 'ctx> {
dev: *mut fz_device,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<Box<Cell<T>>>,
}
pub(crate) trait DeviceCallbacks<'ctx> {
fn fill_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, even_odd: bool, ctm: fz_matrix);
fn stroke_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, ctm: fz_matrix);
fn clip_path(
&self,
ctx: ContextRef<'ctx>,
path: &Path<'ctx>,
even_odd: bool,
ctm: fz_matrix,
scissor: fz_rect,
);
fn clip_stroke_path(
&self,
ctx: ContextRef<'ctx>,
path: &Path<'ctx>,
ctm: fz_matrix,
scissor: fz_rect,
);
fn fill_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
fn stroke_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
fn clip_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix, scissor: fz_rect);
fn clip_stroke_text(
&self,
ctx: ContextRef<'ctx>,
text: &Text<'ctx>,
ctm: fz_matrix,
scissor: fz_rect,
);
fn ignore_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
}
impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
pub(crate) fn new(ctx: impl Into<ContextRef<'ctx>>, value: Box<T>) -> Result<Self, MuPdfError> {
let ctx = ctx.into();
unsafe {
let dev_ptr = mupdf_try(|errptr| {
mupdf_new_derived_device::<DeviceStruct<T>>(
ctx.0.get(),
c"parse_powerisa_pdf::mupdf_ffi::Device",
errptr,
)
})?;
let retval = Device {
dev: dev_ptr.cast(),
ctx,
_phantom: PhantomData,
};
(&raw mut (*dev_ptr).value).write(value);
let fz_device {
drop_device,
fill_path,
stroke_path,
clip_path,
clip_stroke_path,
fill_text,
stroke_text,
clip_text,
clip_stroke_text,
ignore_text,
..
} = &mut (*dev_ptr).base;
*drop_device = Some(Self::drop_device_fn);
*fill_path = Some(Self::fill_path_fn);
*stroke_path = Some(Self::stroke_path_fn);
*clip_path = Some(Self::clip_path_fn);
*clip_stroke_path = Some(Self::clip_stroke_path_fn);
*fill_text = Some(Self::fill_text_fn);
*stroke_text = Some(Self::stroke_text_fn);
*clip_text = Some(Self::clip_text_fn);
*clip_stroke_text = Some(Self::clip_stroke_text_fn);
*ignore_text = Some(Self::ignore_text_fn);
Ok(retval)
}
}
pub(crate) fn get(&self) -> &T {
unsafe { &(*self.dev.cast::<DeviceStruct<T>>()).value }
}
unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) {
unsafe {
(&raw mut (*dev.cast::<DeviceStruct<T>>()).value).drop_in_place();
}
}
unsafe extern "C" fn fill_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
even_odd: c_int,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.fill_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
even_odd != 0,
ctm,
);
}
unsafe extern "C" fn stroke_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.stroke_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
ctm,
);
}
unsafe extern "C" fn clip_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
even_odd: ::std::os::raw::c_int,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
even_odd != 0,
ctm,
scissor,
);
}
unsafe extern "C" fn clip_stroke_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_stroke_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
ctm,
scissor,
);
}
unsafe extern "C" fn fill_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.fill_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
);
}
unsafe extern "C" fn stroke_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.stroke_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
);
}
unsafe extern "C" fn clip_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
scissor,
);
}
unsafe extern "C" fn clip_stroke_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_stroke_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
scissor,
);
}
unsafe extern "C" fn ignore_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
ctm: fz_matrix,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.ignore_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
);
}
}
impl<'ctx, T> Drop for Device<'ctx, T> {
fn drop(&mut self) {
unsafe {
// FIXME: fz_close_device may throw exceptions
// fz_close_device(self.ctx.0.get(), self.dev);
fz_drop_device(self.ctx.0.get(), self.dev);
}
}
}
#[repr(C)]
struct DeviceStruct<T> {
base: fz_device,
value: Box<T>,
}
pub(crate) trait PathWalker<'ctx> {
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
fn curve_to(
&mut self,
ctx: ContextRef<'ctx>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
);
fn close_path(&mut self, ctx: ContextRef<'ctx>);
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
self.move_to(ctx, x1, y1);
self.move_to(ctx, x2, y1);
self.move_to(ctx, x2, y2);
self.move_to(ctx, x1, y2);
self.close_path(ctx);
}
}
impl<'ctx, T: ?Sized + PathWalker<'ctx>> PathWalker<'ctx> for &'_ mut T {
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
T::move_to(self, ctx, x, y);
}
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
T::line_to(self, ctx, x, y);
}
fn curve_to(
&mut self,
ctx: ContextRef<'ctx>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
) {
T::curve_to(self, ctx, x1, y1, x2, y2, x3, y3);
}
fn close_path(&mut self, ctx: ContextRef<'ctx>) {
T::close_path(self, ctx);
}
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
T::rect_to(self, ctx, x1, y1, x2, y2);
}
}
pub(crate) struct Path<'ctx> {
ptr: *mut fz_path,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Path<'ctx> {
pub(crate) fn walk<W: PathWalker<'ctx>>(&self, mut walker: W) {
unsafe {
fz_walk_path(
self.ctx.0.get(),
self.ptr,
const {
&fz_path_walker {
moveto: Some(Self::move_to_fn::<W>),
lineto: Some(Self::line_to_fn::<W>),
curveto: Some(Self::curve_to_fn::<W>),
closepath: Some(Self::close_path_fn::<W>),
quadto: None,
curvetov: None,
curvetoy: None,
rectto: Some(Self::rect_to_fn::<W>),
}
},
(&raw mut walker).cast(),
);
}
}
unsafe extern "C" fn move_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x: f32,
y: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.move_to(ctx, x, y);
}
unsafe extern "C" fn line_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x: f32,
y: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.line_to(ctx, x, y);
}
unsafe extern "C" fn curve_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.curve_to(ctx, x1, y1, x2, y2, x3, y3);
}
unsafe extern "C" fn close_path_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.close_path(ctx);
}
unsafe extern "C" fn rect_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.rect_to(ctx, x1, y1, x2, y2);
}
}
impl<'ctx> Drop for Path<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_path(self.ctx.0.get(), self.ptr);
}
}
}
pub(crate) struct Text<'ctx> {
ptr: *mut fz_text,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Drop for Text<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_text(self.ctx.0.get(), self.ptr);
}
}
}
impl<'ctx> Text<'ctx> {
pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> {
TextSpanIter {
ptr: unsafe { NonNull::new((*self.ptr).head).map(|ptr| &*ptr.as_ptr().cast()) },
ctx: self.ctx,
_phantom: PhantomData,
}
}
}
#[derive(Clone)]
pub(crate) struct TextSpanIter<'a, 'ctx> {
ptr: Option<&'a UnsafeCell<fz_text_span>>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> {
type Item = TextSpanRef<'a, 'ctx>;
fn next(&mut self) -> Option<Self::Item> {
let ptr = self.ptr?;
self.ptr = unsafe { NonNull::new((*ptr.get()).next).map(|ptr| &*ptr.as_ptr().cast()) };
Some(TextSpanRef {
ptr,
ctx: self.ctx,
_phantom: PhantomData,
})
}
}
#[derive(Copy, Clone)]
pub(crate) struct TextSpanRef<'a, 'ctx> {
ptr: &'a UnsafeCell<fz_text_span>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub(crate) enum WriteMode {
Horizontal,
Vertical,
}
impl<'a, 'ctx> TextSpanRef<'a, 'ctx> {
#[allow(dead_code)]
pub(crate) fn get(self) -> &'a UnsafeCell<fz_text_span> {
self.ptr
}
pub(crate) fn font(self) -> FontRef<'a, 'ctx> {
FontRef {
ptr: unsafe { &*(*self.ptr.get()).font.cast::<UnsafeCell<fz_font>>() },
ctx: self.ctx,
_phantom: PhantomData,
}
}
pub(crate) fn trm(self) -> fz_matrix {
unsafe { (*self.ptr.get()).trm }
}
pub(crate) fn write_mode(self) -> WriteMode {
if unsafe { (*self.ptr.get()).wmode() != 0 } {
WriteMode::Vertical
} else {
WriteMode::Horizontal
}
}
pub(crate) fn items(self) -> &'a [fz_text_item] {
let len = unsafe { (*self.ptr.get()).len } as usize;
if len == 0 {
return &[];
}
unsafe { std::slice::from_raw_parts((*self.ptr.get()).items, len) }
}
}
#[derive(Clone, Copy)]
pub(crate) struct FontRef<'a, 'ctx> {
ptr: &'a UnsafeCell<fz_font>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> FontRef<'a, 'ctx> {
#[allow(dead_code)]
pub(crate) fn get(self) -> &'a UnsafeCell<fz_font> {
self.ptr
}
pub(crate) fn name(self) -> &'a str {
unsafe { CStr::from_ptr(fz_font_name(self.ctx.0.get(), self.ptr.get())) }
.to_str()
.expect("font name isn't valid UTF-8")
}
#[allow(dead_code)]
pub(crate) fn is_bold(self) -> bool {
unsafe { fz_font_is_bold(self.ctx.0.get(), self.ptr.get()) != 0 }
}
#[allow(dead_code)]
pub(crate) fn is_italic(self) -> bool {
unsafe { fz_font_is_italic(self.ctx.0.get(), self.ptr.get()) != 0 }
}
pub(crate) fn ascender(self) -> f32 {
unsafe { fz_font_ascender(self.ctx.0.get(), self.ptr.get()) }
}
pub(crate) fn descender(self) -> f32 {
unsafe { fz_font_descender(self.ctx.0.get(), self.ptr.get()) }
}
}
#[allow(dead_code)]
pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point(point, m) }
}
pub(crate) fn transform_point_xy(x: f32, y: f32, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point_xy(x, y, m) }
}
pub(crate) fn transform_vector(vector: fz_point, m: fz_matrix) -> fz_point {
unsafe { fz_transform_vector(vector, m) }
}
pub(crate) fn matrix_expansion(m: fz_matrix) -> f32 {
unsafe { fz_matrix_expansion(m) }
}
pub(crate) fn concat(left: fz_matrix, right: fz_matrix) -> fz_matrix {
unsafe { fz_concat(left, right) }
}
pub(crate) fn add_points(a: fz_point, b: fz_point) -> fz_point {
fz_point {
x: a.x + b.x,
y: a.y + b.y,
}
}
pub(crate) fn point_min_components(a: fz_point, b: fz_point) -> fz_point {
fz_point {
x: a.x.min(b.x),
y: a.y.min(b.y),
}
}
pub(crate) fn point_max_components(a: fz_point, b: fz_point) -> fz_point {
fz_point {
x: a.x.max(b.x),
y: a.y.max(b.y),
}
}

1279
src/pdf.rs Normal file

File diff suppressed because it is too large Load diff

829
src/pdf/content_stream.rs Normal file
View file

@ -0,0 +1,829 @@
use crate::{
pdf::{
PdfObjectOrStreamDictionaryOrOperator, PdfObjects, PdfParser, PdfTokenizer,
object::{
NameOr, PdfDictionary, PdfMatrix, PdfName, PdfObject, PdfObjectDirect, PdfRectangle,
PdfStream, PdfStreamContents, PdfString, PdfStringBytesDebug, PdfStringOrNumber,
PdfVec2D,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown,
PdfInputPositionNoCompare, PdfParse, PdfParseError,
},
render::{
PdfColorDeviceGray, PdfColorDeviceRgb, PdfRenderOperator, PdfRenderState,
PdfRenderingIntent,
},
},
util::ArcOrRef,
};
use std::{fmt, sync::Arc};
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct PdfOperatorUnparsed {
pos: PdfInputPositionNoCompare,
bytes: ArcOrRef<'static, [u8]>,
}
impl GetPdfInputPosition for PdfOperatorUnparsed {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl fmt::Debug for PdfOperatorUnparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Self::debug_with_name("PdfOperatorUnparsed", &self.bytes, self.pos.0, f)
}
}
trait PdfParseIter: Sized {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError>;
}
impl<T: PdfParse> PdfParseIter for Arc<[T]> {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
FromIterator::from_iter(iter.into_iter().map(T::parse))
}
}
impl PdfOperatorUnparsed {
pub fn new(
pos: impl Into<PdfInputPositionNoCompare>,
bytes: impl Into<ArcOrRef<'static, [u8]>>,
) -> Self {
Self {
pos: pos.into(),
bytes: bytes.into(),
}
}
pub const fn new_static(bytes: &'static [u8]) -> Self {
Self {
pos: PdfInputPositionNoCompare::empty(),
bytes: ArcOrRef::Ref(bytes),
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> {
&self.bytes
}
fn debug_with_name(
name: &str,
pdf_name: &[u8],
pos: PdfInputPosition,
f: &mut fmt::Formatter<'_>,
) -> fmt::Result {
write!(f, "{name}(at {pos}, {})", PdfStringBytesDebug(pdf_name))
}
pub fn bytes_debug(&self) -> PdfStringBytesDebug<'_> {
PdfStringBytesDebug(&self.bytes)
}
}
macro_rules! make_pdf_operator_enum {
(
$(#[$($operator_meta:tt)*])*
$operator_enum_vis:vis enum $PdfOperator:ident;
$(#[$($operator_and_operands_meta:tt)*])*
$enum_vis:vis enum $PdfOperatorAndOperands:ident {
$(#[$($unknown_variant_meta:tt)*])*
$Unknown:ident {
$(#[$($unknown_operands_meta:tt)*])*
$unknown_operands:ident: $unknown_operands_ty:ty,
$(#[$($unknown_operator_meta:tt)*])*
$unknown_operator:ident: $unknown_operator_ty:ty,
},
$(
#[kw = $kw:literal]
$(#[$($variant_meta:tt)*])*
$Variant:ident($VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:tt)*)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}),
)*
}
) => {
$(#[$($operator_meta)*])*
$operator_enum_vis enum $PdfOperator {
$(#[$($unknown_variant_meta)*])*
$Unknown($unknown_operator_ty),
$(
$(#[$($variant_meta)*])*
$Variant(PdfInputPositionNoCompare),
)*
}
impl $PdfOperator {
$operator_enum_vis fn parse(self, operands: impl IntoIterator<Item = PdfObject>) -> Result<$PdfOperatorAndOperands, PdfParseError> {
let operands = operands.into_iter();
Ok(match self {
Self::$Unknown(operator) => $PdfOperatorAndOperands::$Unknown {
operands: FromIterator::from_iter(operands.map(Into::into)),
operator,
},
$(Self::$Variant(pos) => $VariantStruct::parse(pos, operands)?.into(),)*
})
}
$operator_enum_vis fn pos(&self) -> PdfInputPosition {
match *self {
Self::$Unknown(ref operator) => operator.pos(),
$(Self::$Variant(pos) => pos.0,)*
}
}
}
impl fmt::Debug for $PdfOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown(operator) => PdfOperatorUnparsed::debug_with_name("Unknown", &operator.bytes, operator.pos.0, f),
$(Self::$Variant(pos) => PdfOperatorUnparsed::debug_with_name(stringify!($Variant), $kw, pos.0, f),)*
}
}
}
impl From<$PdfOperator> for PdfOperatorUnparsed {
fn from(v: $PdfOperator) -> PdfOperatorUnparsed {
match v {
$PdfOperator::$Unknown(operator) => operator,
$($PdfOperator::$Variant(pos) => PdfOperatorUnparsed { pos, bytes: ArcOrRef::Ref($kw) },)*
}
}
}
impl From<PdfOperatorUnparsed> for $PdfOperator {
fn from(v: PdfOperatorUnparsed) -> $PdfOperator {
match &**v.bytes() {
$($kw => Self::$Variant(v.pos),)*
_ => Self::$Unknown(v),
}
}
}
$(#[derive(Clone)]
$(#[$($variant_meta)*])*
$enum_vis struct $VariantStruct {
$enum_vis $pos: PdfInputPositionNoCompare,
$(
$(#[$($field_meta)*])*
$enum_vis $field: $field_ty,
)*
}
impl fmt::Debug for $VariantStruct {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct(stringify!($VariantStruct)).field("pos", &self.pos)$(.field(stringify!($field), &self.$field))*.finish()
}
}
impl GetPdfInputPosition for $VariantStruct {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl From<$VariantStruct> for $PdfOperatorAndOperands {
fn from(v: $VariantStruct) -> Self {
Self::$Variant(v)
}
}
impl $VariantStruct {
$enum_vis fn operator_from_pos(pos: impl Into<PdfInputPositionNoCompare>) -> $PdfOperator {
$PdfOperator::$Variant(pos.into())
}
$enum_vis fn operator(&self) -> $PdfOperator {
$PdfOperator::$Variant(self.pos)
}
$enum_vis fn pos(&self) -> PdfInputPosition {
self.pos.0
}
}
make_pdf_operator_enum! {
@impl_variant_parse
$enum_vis enum;
struct $VariantStruct {
$pos: PdfInputPositionNoCompare,
$(
#[$field_parse($($parse_args)*)]
$(#[$($field_meta)*])*
$field: $field_ty,
)*
}
})*
$(#[$($operator_and_operands_meta)*])*
$enum_vis enum $PdfOperatorAndOperands {
$(#[$($unknown_variant_meta)*])*
$Unknown {
$(#[$($unknown_operands_meta)*])*
$unknown_operands: $unknown_operands_ty,
$(#[$($unknown_operator_meta)*])*
$unknown_operator: $unknown_operator_ty,
},
$(
$(#[$($variant_meta)*])*
$Variant($VariantStruct),
)*
}
impl $PdfOperatorAndOperands {
$enum_vis fn operator(&self) -> $PdfOperator {
match self {
Self::Unknown { operator, .. } => $PdfOperator::Unknown(operator.clone()),
$(Self::$Variant(v) => v.operator(),)*
}
}
$enum_vis fn pos(&self) -> PdfInputPosition {
match self {
Self::$Unknown { operator, .. } => operator.pos(),
$(Self::$Variant(v) => v.pos(),)*
}
}
}
impl fmt::Debug for $PdfOperatorAndOperands {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown {
operands,
operator,
} => f.debug_struct("Unknown").field("operator", operator).field("operands", operands).finish(),
$(Self::$Variant($VariantStruct {
$pos,
$($field,)*
}) => f.debug_struct(stringify!($Variant)).field("pos", $pos)$(.field(stringify!($field), $field))*.finish(),)*
}
}
}
impl PdfRenderOperator for $PdfOperatorAndOperands {
fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> {
match self {
Self::$Unknown {
operands,
operator,
} => state.handle_unknown_operator(operator, operands),
$(Self::$Variant(v) => <$VariantStruct as PdfRenderOperator>::render(v, state),)*
}
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:ident),* $(,)?)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let mut operands = operands.into_iter();
$($(let Some($parse_args) = operands.next() else {
return Err(PdfParseError::OperatorHasTooFewOperands { operator: Self::operator_from_pos(pos) });
};)*)*
if operands.next().is_some() {
return Err(PdfParseError::OperatorHasTooManyOperands { operator: Self::operator_from_pos(pos) });
}
Ok(Self {
pos,
$($field: <$field_ty>::$field_parse($($parse_args),*)?,)*
})
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
#[$field_parse:ident(...)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let operands = operands.into_iter();
Ok(Self {
pos,
$field: <$field_ty>::$field_parse(operands)?,
})
}
}
};
}
make_pdf_operator_enum! {
#[derive(Clone)]
pub enum PdfOperator;
#[derive(Clone)]
pub enum PdfOperatorAndOperands {
Unknown {
operands: Arc<[PdfObjectDirect]>,
operator: PdfOperatorUnparsed,
},
#[kw = b"b"]
CloseFillAndStrokePath(PdfOperatorCloseFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B"]
FillAndStrokePath(PdfOperatorFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"b*"]
CloseFillAndStrokePathEvenOdd(PdfOperatorCloseFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B*"]
FillAndStrokePathEvenOdd(PdfOperatorFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BDC"]
BeginMarkedContentWithProperties(PdfOperatorBeginMarkedContentWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"BI"]
BeginInlineImage(PdfOperatorBeginInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BMC"]
BeginMarkedContent(PdfOperatorBeginMarkedContent {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"BT"]
BeginText(PdfOperatorBeginText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BX"]
BeginCompatibilitySection(PdfOperatorBeginCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"c"]
CurveTo(PdfOperatorCurveTo {
pos: PdfInputPositionNoCompare,
#[parse(x1, y1)]
p1: PdfVec2D,
#[parse(x2, y2)]
p2: PdfVec2D,
#[parse(x3, y3)]
p3: PdfVec2D,
}),
#[kw = b"cm"]
ConcatMatrix(PdfOperatorConcatMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"CS"]
SetStrokeColorSpace(PdfOperatorSetStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"cs"]
SetNonStrokeColorSpace(PdfOperatorSetNonStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"d"]
SetLineDashPattern(PdfOperatorSetLineDashPattern {
pos: PdfInputPositionNoCompare,
#[parse(dash_array)]
dash_array: PdfObject, // TODO: actually parse
#[parse(dash_phase)]
dash_phase: PdfObject, // TODO: actually parse
}),
#[kw = b"d0"]
FontType3SetWidth(PdfOperatorFontType3SetWidth {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
width: PdfVec2D,
}),
#[kw = b"d1"]
FontType3SetWidthAndBBox(PdfOperatorFontType3SetWidthAndBBox {
pos: PdfInputPositionNoCompare,
#[parse(width_x, width_y)]
width: PdfVec2D,
#[parse_flat(lower_left_x, lower_left_y, upper_right_x, upper_right_y)]
bbox: PdfRectangle,
}),
#[kw = b"Do"]
PaintXObject(PdfOperatorPaintXObject {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"DP"]
DesignateMarkedContentPointWithProperties(PdfOperatorDesignateMarkedContentPointWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"EI"]
EndInlineImage(PdfOperatorEndInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EMC"]
EndMarkedContent(PdfOperatorEndMarkedContent {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"ET"]
EndText(PdfOperatorEndText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EX"]
EndCompatibilitySection(PdfOperatorEndCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f"]
FillPath(PdfOperatorFillPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"F"]
FillPathObsolete(PdfOperatorFillPathObsolete {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f*"]
FillPathEvenOdd(PdfOperatorFillPathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"G"]
SetStrokeGray(PdfOperatorSetStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"g"]
SetNonStrokeGray(PdfOperatorSetNonStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"gs"]
SetGraphicsState(PdfOperatorSetGraphicsState {
pos: PdfInputPositionNoCompare,
#[parse(dictionary_name)]
dictionary_name: PdfName,
}),
#[kw = b"h"]
CloseSubpath(PdfOperatorCloseSubpath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"i"]
SetFlatnessTolerance(PdfOperatorSetFlatnessTolerance {
pos: PdfInputPositionNoCompare,
#[parse(flatness)]
flatness: f32,
}),
#[kw = b"ID"]
BeginInlineImageData(PdfOperatorBeginInlineImageData {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"j"]
SetLineJoinStyle(PdfOperatorSetLineJoinStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_join_style)]
line_join_style: u8, // TODO parse
}),
#[kw = b"J"]
SetLineCapStyle(PdfOperatorSetLineCapStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_cap_style)]
line_cap_style: u8, // TODO parse
}),
#[kw = b"K"]
SetStrokeCmyk(PdfOperatorSetStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"k"]
SetNonStrokeCmyk(PdfOperatorSetNonStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"l"]
LineTo(PdfOperatorLineTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"m"]
MoveTo(PdfOperatorMoveTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"M"]
SetMiterLimit(PdfOperatorSetMiterLimit {
pos: PdfInputPositionNoCompare,
#[parse(limit)]
limit: f32,
}),
#[kw = b"MP"]
DesignateMarkedContentPoint(PdfOperatorDesignateMarkedContentPoint {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"n"]
EndPath(PdfOperatorEndPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"q"]
SaveGraphicsState(PdfOperatorSaveGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Q"]
RestoreGraphicsState(PdfOperatorRestoreGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"re"]
Rectangle(PdfOperatorRectangle {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
p: PdfVec2D,
#[parse(width, height)]
size: PdfVec2D,
}),
#[kw = b"RG"]
SetStrokeRgb(PdfOperatorSetStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"rg"]
SetNonStrokeRgb(PdfOperatorSetNonStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"ri"]
SetColorRenderingIntent(PdfOperatorSetColorRenderingIntent {
pos: PdfInputPositionNoCompare,
#[parse(intent)]
intent: PdfRenderingIntent,
}),
#[kw = b"s"]
CloseAndStrokePath(PdfOperatorCloseAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"S"]
StrokePath(PdfOperatorStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"SC"]
SetStrokeColor(PdfOperatorSetStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"sc"]
SetNonStrokeColor(PdfOperatorSetNonStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"SCN"]
SetStrokeColorWithName(PdfOperatorSetStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"scn"]
SetNonStrokeColorWithName(PdfOperatorSetNonStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"sh"]
Shade(PdfOperatorShade {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"T*"]
TextNextLine(PdfOperatorTextNextLine {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Tc"]
SetCharacterSpacing(PdfOperatorSetCharacterSpacing {
pos: PdfInputPositionNoCompare,
#[parse(char_space)]
char_space: f32,
}),
#[kw = b"Td"]
TextNextLineWithOffset(PdfOperatorTextNextLineWithOffset {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"TD"]
TextNextLineWithOffsetAndLeading(PdfOperatorTextNextLineWithOffsetAndLeading {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"Tf"]
SetFontAndSize(PdfOperatorSetFontAndSize {
pos: PdfInputPositionNoCompare,
#[parse(font)]
font: PdfName,
#[parse(size)]
size: f32,
}),
#[kw = b"Tj"]
ShowText(PdfOperatorShowText {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"TJ"]
ShowTextWithGlyphPositioning(PdfOperatorShowTextWithGlyphPositioning {
pos: PdfInputPositionNoCompare,
#[parse(text_and_positioning)]
text_and_positioning: Arc<[PdfStringOrNumber]>,
}),
#[kw = b"TL"]
SetTextLeading(PdfOperatorSetTextLeading {
pos: PdfInputPositionNoCompare,
#[parse(leading)]
leading: f32,
}),
#[kw = b"Tm"]
SetTextMatrix(PdfOperatorSetTextMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"Tr"]
SetTextRenderingMode(PdfOperatorSetTextRenderingMode {
pos: PdfInputPositionNoCompare,
#[parse(rendering_mode)]
rendering_mode: u8, // TODO: parse
}),
#[kw = b"Ts"]
SetTextRise(PdfOperatorSetTextRise {
pos: PdfInputPositionNoCompare,
#[parse(rise)]
rise: f32,
}),
#[kw = b"Tw"]
SetWordSpacing(PdfOperatorSetWordSpacing {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
}),
#[kw = b"Tz"]
SetTextHorizontalScaling(PdfOperatorSetTextHorizontalScaling {
pos: PdfInputPositionNoCompare,
#[parse(scale_percent)]
scale_percent: f32,
}),
#[kw = b"v"]
CurveTo23(PdfOperatorCurveTo23 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"w"]
SetLineWidth(PdfOperatorSetLineWidth {
pos: PdfInputPositionNoCompare,
#[parse(line_width)]
line_width: f32,
}),
#[kw = b"W"]
Clip(PdfOperatorClip {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"W*"]
ClipEvenOdd(PdfOperatorClipEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"y"]
CurveTo13(PdfOperatorCurveTo13 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"'"]
TextNextLineAndShow(PdfOperatorTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"\""]
SetSpacingThenTextNextLineAndShow(PdfOperatorSetSpacingThenTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
#[parse(char_space)]
char_space: f32,
#[parse(text)]
text: PdfString,
}),
}
}
impl GetPdfInputPosition for PdfOperator {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl GetPdfInputPosition for PdfOperatorAndOperands {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
#[derive(Clone)]
pub struct PdfContentStreamData {
pub operators: Arc<[PdfOperatorAndOperands]>,
}
impl fmt::Debug for PdfContentStreamData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PdfContentStreamData")
.field("operators", &self.operators)
.finish()
}
}
impl PdfStreamContents for PdfContentStreamData {
fn parse(
data: &[u8],
stream_pos: PdfInputPosition,
objects: Arc<PdfObjects>,
) -> Result<Self, PdfParseError> {
let mut parser = PdfParser {
objects,
tokenizer: PdfTokenizer::new(
data,
PdfInputPositionKnown {
pos: 0,
containing_streams_pos: stream_pos.get().map(|v| v.pos),
},
),
};
let mut operands = Vec::new();
let mut operators = Vec::new();
loop {
parser.skip_comments_and_whitespace();
if parser.tokenizer.peek().is_none() {
break;
}
match parser.parse_object_or_operator()? {
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
stream_kw_pos, ..
} => return Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }),
PdfObjectOrStreamDictionaryOrOperator::Object(object) => operands.push(object),
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
operators.push(PdfOperator::from(operator).parse(operands.drain(..))?);
}
}
}
if operands.is_empty() {
Ok(Self {
operators: operators.into(),
})
} else {
Err(PdfParseError::MissingOperator {
pos: parser.tokenizer.pos(),
})
}
}
}
pub type PdfContentStream = PdfStream<PdfDictionary, PdfContentStreamData>;

View file

@ -0,0 +1,743 @@
use crate::{
pdf::{
content_stream::PdfContentStream,
font::PdfFont,
object::{
IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject,
PdfObjectDirect, PdfObjectIndirect, PdfRectangle, PdfStream, PdfString,
},
parse::{PdfParse, PdfParseError},
pdf_parse,
render::{PdfRenderOperator, PdfRenderState},
},
util::DagDebugState,
};
use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
use std::{borrow::Cow, fmt, sync::Arc};
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfDocumentCatalogType {
#[pdf(name = "Catalog")]
#[default]
Catalog,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfDocumentCatalog {
#[pdf(name = "Type")]
pub ty: PdfDocumentCatalogType,
#[pdf(name = "Version")]
pub version: Option<PdfName>,
#[pdf(name = "Extensions")]
pub extensions: Option<PdfDictionary>,
#[pdf(name = "Pages")]
pub pages: PdfPageTree,
// TODO
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfDocumentCatalog {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
version,
extensions,
pages,
rest,
} = self;
f.debug_struct("PdfDocumentCatalog")
.field("ty", ty)
.field("version", version)
.field("extensions", extensions)
.field("pages", pages)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
pub struct PdfResourcesDictionary {
#[pdf(name = "Font")]
pub fonts: PdfDictionary<PdfFont>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
#[derive(Clone)]
pub struct PdfPageTree {
page_tree: PdfPageTreeNode,
pages: Arc<[PdfPage]>,
}
impl PdfPageTree {
fn collect_leaves(
node: &PdfPageTreeNode,
leaves: &mut Vec<PdfPageTreeLeaf>,
) -> Result<(), PdfParseError> {
for kid in node.kids.iter() {
match kid {
PdfPageTreeNodeOrLeaf::Node(node) => Self::collect_leaves(node, leaves)?,
PdfPageTreeNodeOrLeaf::Leaf(leaf) => {
leaves.push(leaf.clone());
}
PdfPageTreeNodeOrLeaf::Other(v) => {
return Err(PdfParseError::InvalidType {
pos: v.pos(),
ty: "dictionary",
expected_ty: "PdfPageTreeNodeOrLeaf",
});
}
}
}
Ok(())
}
pub fn try_from_page_tree_root(mut page_tree: PdfPageTreeNode) -> Result<Self, PdfParseError> {
page_tree.propagate_inheritable_data_to_leaves();
let mut leaves = Vec::new();
Self::collect_leaves(&page_tree, &mut leaves)?;
Ok(Self {
page_tree,
pages: Result::from_par_iter(
leaves
.into_par_iter()
.map(PdfPage::parse_after_propagating_inheritable_data)
.panic_fuse(),
)?,
})
}
pub fn page_tree(&self) -> &PdfPageTreeNode {
&self.page_tree
}
pub fn pages(&self) -> &Arc<[PdfPage]> {
&self.pages
}
}
impl fmt::Debug for PdfPageTree {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
page_tree: _,
pages,
} = self;
f.debug_struct("PdfPageTree")
.field("pages", pages)
.finish_non_exhaustive()
})
}
}
impl IsPdfNull for PdfPageTree {
fn is_pdf_null(&self) -> bool {
self.page_tree.is_pdf_null()
}
}
impl PdfParse for PdfPageTree {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfPageTree")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
Self::try_from_page_tree_root(PdfParse::parse(object)?)
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Default, Debug)]
pub struct PdfPageInheritableData {
#[pdf(name = "Resources")]
pub resources: Option<PdfResourcesDictionary>,
#[pdf(name = "MediaBox")]
pub media_box: Option<PdfRectangle>,
#[pdf(name = "CropBox")]
pub crop_box: Option<PdfRectangle>,
#[pdf(name = "Rotate")]
pub rotate: Option<PdfPageRotation>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfPageInheritableData {
pub fn propagate_to(&self, target: &mut Self) {
let Self {
resources,
media_box,
crop_box,
rotate,
rest: _,
} = self;
fn propagate_to<T: Clone>(this: &Option<T>, target: &mut Option<T>) {
if let (Some(this), target @ None) = (this, target) {
*target = Some(this.clone());
}
}
propagate_to(resources, &mut target.resources);
propagate_to(media_box, &mut target.media_box);
propagate_to(crop_box, &mut target.crop_box);
propagate_to(rotate, &mut target.rotate);
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageTreeNodeType {
#[pdf(name = "Pages")]
#[default]
Pages,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeNode {
#[pdf(name = "Type")]
pub ty: PdfPageTreeNodeType,
#[pdf(name = "Parent")]
pub parent: Option<PdfObjectIndirect>,
#[pdf(name = "Kids")]
pub kids: Arc<[PdfPageTreeNodeOrLeaf]>,
#[pdf(name = "Count")]
pub count: usize,
// TODO
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
kids,
count,
inheritable,
} = self;
f.debug_struct("PdfPageTreeNode")
.field("ty", ty)
.field("parent", parent)
.field("kids", kids)
.field("count", count)
.field("inheritable", inheritable)
.finish()
})
}
}
impl PdfPageTreeNode {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
for kid in Arc::make_mut(&mut self.kids) {
if let Some(target) = kid.inheritable_data_mut() {
self.inheritable.propagate_to(target);
}
kid.propagate_inheritable_data_to_leaves();
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageType {
#[pdf(name = "Page")]
#[default]
Page,
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum PdfPageAnnotationsTabOrder {
#[pdf(name = "R")]
RowOrder,
#[pdf(name = "C")]
ColumnOrder,
#[pdf(name = "S")]
StructureOrder,
#[pdf(other)]
Other(PdfName),
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeLeaf {
#[pdf(name = "Type")]
pub ty: PdfPageType,
#[pdf(name = "Parent")]
pub parent: PdfObjectIndirect,
#[pdf(name = "LastModified")]
pub last_modified: Option<PdfDate>,
#[pdf(name = "BleedBox")]
pub bleed_box: Option<PdfRectangle>,
#[pdf(name = "TrimBox")]
pub trim_box: Option<PdfRectangle>,
#[pdf(name = "ArtBox")]
pub art_box: Option<PdfRectangle>,
#[pdf(name = "BoxColorInfo")]
pub box_color_info: Option<PdfDictionary>,
#[pdf(name = "Contents")]
pub contents: MaybeArray<PdfContentStream>,
#[pdf(name = "Group")]
pub group: Option<PdfDictionary>,
#[pdf(name = "Thumb")]
pub thumbnail: Option<PdfStream>,
#[pdf(name = "B")]
pub beads: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "Dur")]
pub duration: Option<f32>,
#[pdf(name = "Trans")]
pub transition: Option<PdfDictionary>,
#[pdf(name = "Annots")]
pub annotations: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "AA")]
pub additional_actions: Option<PdfDictionary>,
#[pdf(name = "Metadata")]
pub metadata: Option<PdfStream>,
#[pdf(name = "PieceInfo")]
pub piece_info: Option<PdfDictionary>,
#[pdf(name = "StructParents")]
pub structural_parents: Option<PdfInteger>,
#[pdf(name = "ID")]
pub parent_web_capture_content_set_id: Option<PdfString>,
#[pdf(name = "PZ")]
pub preferred_zoom_factor: Option<f32>,
#[pdf(name = "SeparationInfo")]
pub separation_info: Option<PdfDictionary>,
#[pdf(name = "Tabs")]
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
#[pdf(name = "TemplateInstantiated")]
pub template_instantiated: Option<PdfName>,
#[pdf(name = "PresSteps")]
pub pres_steps: Option<PdfDictionary>,
#[pdf(name = "UserUnit")]
pub user_unit: Option<f32>,
#[pdf(name = "VP")]
pub viewports: Option<Arc<[PdfDictionary]>>,
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable,
} = self;
f.debug_struct("PdfPageTreeLeaf")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("inheritable", inheritable)
.finish()
})
}
}
pdf_parse! {
#[pdf(tag = "Type")]
#[derive(Clone)]
pub enum PdfPageTreeNodeOrLeaf {
#[pdf(tag_value = "Pages")]
Node(PdfPageTreeNode),
#[pdf(tag_value = "Page")]
Leaf(PdfPageTreeLeaf),
#[pdf(other)]
Other(PdfDictionary),
}
}
impl PdfPageTreeNodeOrLeaf {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => v.propagate_inheritable_data_to_leaves(),
PdfPageTreeNodeOrLeaf::Leaf(_) | PdfPageTreeNodeOrLeaf::Other(_) => {}
}
}
pub fn inheritable_data_mut(&mut self) -> Option<&mut PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
pub fn inheritable_data(&self) -> Option<&PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
}
impl fmt::Debug for PdfPageTreeNodeOrLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Node(v) => v.fmt(f),
Self::Leaf(v) => v.fmt(f),
Self::Other(v) => v.fmt(f),
}
}
}
/// the amount by which the page is rotated clockwise when displaying or printing, is always a multiple of 90 degrees.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub enum PdfPageRotation {
#[default]
NoRotation = 0,
ClockwiseBy90Degrees = 90,
By180Degrees = 180,
ClockwiseBy270Degrees = 270,
}
impl PdfPageRotation {
pub fn from_clockwise_angle_in_degrees(angle: i32) -> Option<Self> {
match angle.rem_euclid(360) {
0 => Some(Self::NoRotation),
90 => Some(Self::ClockwiseBy90Degrees),
180 => Some(Self::By180Degrees),
270 => Some(Self::ClockwiseBy270Degrees),
_ => None,
}
}
pub fn from_clockwise_angle_in_degrees_i128(angle: i128) -> Option<Self> {
Self::from_clockwise_angle_in_degrees((angle % 360) as i32)
}
}
impl From<PdfPageRotation> for i32 {
fn from(value: PdfPageRotation) -> Self {
value as i32
}
}
impl IsPdfNull for PdfPageRotation {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfPageRotation {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("page rotation")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = PdfObjectDirect::from(object);
let pos = object.pos();
let angle = PdfInteger::parse(object.into())?;
Self::from_clockwise_angle_in_degrees_i128(angle.value())
.ok_or(PdfParseError::IntegerOutOfRange { pos })
}
}
#[derive(Clone)]
pub struct PdfPage {
pub ty: PdfPageType,
pub parent: PdfObjectIndirect,
pub last_modified: Option<PdfDate>,
pub resources: PdfResourcesDictionary,
pub media_box: PdfRectangle,
pub crop_box: PdfRectangle,
pub bleed_box: PdfRectangle,
pub trim_box: PdfRectangle,
pub art_box: PdfRectangle,
pub box_color_info: Option<PdfDictionary>,
pub contents: Arc<[PdfContentStream]>,
pub rotate: PdfPageRotation,
pub group: Option<PdfDictionary>,
pub thumbnail: Option<PdfStream>,
pub beads: Option<Arc<[PdfDictionary]>>,
pub duration: Option<f32>,
pub transition: Option<PdfDictionary>,
pub annotations: Option<Arc<[PdfDictionary]>>,
pub additional_actions: Option<PdfDictionary>,
pub metadata: Option<PdfStream>,
pub piece_info: Option<PdfDictionary>,
pub structural_parents: Option<PdfInteger>,
pub parent_web_capture_content_set_id: Option<PdfString>,
pub preferred_zoom_factor: Option<f32>,
pub separation_info: Option<PdfDictionary>,
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
pub template_instantiated: Option<PdfName>,
pub pres_steps: Option<PdfDictionary>,
pub user_unit: f32,
pub viewports: Option<Arc<[PdfDictionary]>>,
pub rest: PdfDictionary,
rendered_objects: Option<PdfPageRenderedObjects>,
}
impl PdfPage {
pub fn rendered_objects(&self) -> &PdfPageRenderedObjects {
let Some(retval) = &self.rendered_objects else {
unreachable!();
};
retval
}
pub fn parse_after_propagating_inheritable_data(
leaf: PdfPageTreeLeaf,
) -> Result<Self, PdfParseError> {
let PdfPageTreeLeaf {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable:
PdfPageInheritableData {
resources,
media_box,
crop_box,
rotate,
rest,
},
} = leaf;
let pos = rest.pos();
let resources = resources.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page resources dictionary",
})?;
let media_box = media_box.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page MediaBox rectangle",
})?;
let crop_box = crop_box.unwrap_or(media_box);
let rotate = rotate.unwrap_or(PdfPageRotation::NoRotation);
let mut retval = Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box: bleed_box.unwrap_or(crop_box),
trim_box: trim_box.unwrap_or(crop_box),
art_box: art_box.unwrap_or(crop_box),
box_color_info,
contents: contents.0,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit: user_unit.unwrap_or(1.0),
viewports,
rest,
rendered_objects: None,
};
retval.rendered_objects = Some(PdfPageRenderedObjects::render_page(&retval)?);
Ok(retval)
}
}
impl fmt::Debug for PdfPage {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
rest,
rendered_objects,
} = self;
struct Unparsed;
impl fmt::Debug for Unparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("<unparsed>")
}
}
f.debug_struct("PdfPage")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("resources", resources)
.field("media_box", media_box)
.field("crop_box", crop_box)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("rotate", rotate)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("rest", rest)
.field(
"rendered_objects",
if let Some(rendered_objects) = rendered_objects {
rendered_objects
} else {
&Unparsed
},
)
.finish()
})
}
}
#[derive(Clone, Debug)]
pub struct PdfPageRenderedObjects {}
impl PdfPageRenderedObjects {
fn render_page(page: &PdfPage) -> Result<Self, PdfParseError> {
let mut state = PdfRenderState::new(page);
for content_stream in page.contents.iter() {
for op in content_stream.decoded_data().as_ref()?.operators.iter() {
op.render(&mut state)?;
}
}
Ok(Self {})
}
}

1057
src/pdf/font.rs Normal file

File diff suppressed because it is too large Load diff

1067
src/pdf/font/tables.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,325 @@
use std::{collections::BTreeMap, sync::Arc};
use crate::{
pdf::{
font::{
PdfFontToUnicode,
type_1_parse::{PsFile, Token},
},
object::{PdfName, PdfObjectDirect, PdfString},
parse::{PdfInputPosition, PdfParseError},
},
util::ArcOrRef,
};
pub(crate) struct ToUnicodeParser {
tokenizer: PsFile,
}
#[track_caller]
fn invalid_token_err<T>(pos: PdfInputPosition, token: Option<Token>) -> Result<T, PdfParseError> {
Err(PdfParseError::InvalidTokenInToUnicodeStream {
pos,
token: format!("{token:?}"),
})
}
impl ToUnicodeParser {
pub(crate) fn new(tokenizer: PsFile) -> Self {
Self { tokenizer }
}
fn expect_any_string(&mut self) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(string)) => Ok(string),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_string_with_len(
&mut self,
expected_len: usize,
) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(string)) if string.len() == expected_len => Ok(string),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_literal_name(
&mut self,
expected_name: &[u8],
) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::LiteralName(name)) if name == expected_name => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_any_literal_name(&mut self) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::LiteralName(name)) => Ok(name),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_executable_name(
&mut self,
expected_name: &[u8],
) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::ExecutableName(name)) if name == expected_name => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect(&mut self, expected_token: Token) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(token) if token == expected_token => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_integer(&mut self) -> Result<i128, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::Integer(value)) => Ok(value),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn parse_dict(
&mut self,
mut entry_callback: impl FnMut(Vec<u8>, PdfInputPosition, Token) -> Result<(), PdfParseError>,
) -> Result<(), PdfParseError> {
self.expect(Token::DictStart)?;
loop {
self.tokenizer.skip_comments_and_whitespace();
let name_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::DictEnd) => return Ok(()),
Some(Token::LiteralName(name)) => {
self.tokenizer.skip_comments_and_whitespace();
let value_pos = self.tokenizer.pos();
let Some(value) = self.tokenizer.next_token()? else {
return invalid_token_err(value_pos, None);
};
entry_callback(name, value_pos, value)?;
}
token => {
return invalid_token_err(name_pos, token);
}
}
}
}
pub(crate) fn parse(
mut self,
base_map: Option<PdfObjectDirect>,
) -> Result<PdfFontToUnicode, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
self.expect_literal_name(b"CIDInit")?;
self.expect_literal_name(b"ProcSet")?;
self.expect_executable_name(b"findresource")?;
self.expect_executable_name(b"begin")?;
self.expect_integer()?;
self.expect_executable_name(b"dict")?;
self.expect_executable_name(b"begin")?;
self.expect_executable_name(b"begincmap")?;
self.expect_literal_name(b"CIDSystemInfo")?;
let mut registry = None;
let mut ordering = None;
let mut supplement = None;
self.parse_dict(|name, value_pos, value| match &*name {
b"Registry" => {
let Token::String(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
registry = Some(v);
Ok(())
}
b"Ordering" => {
let Token::String(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
ordering = Some(v);
Ok(())
}
b"Supplement" => {
let Token::Integer(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
supplement = Some(v);
Ok(())
}
_ => todo!("{}: {value:?}", name.escape_ascii()),
})?;
self.expect_executable_name(b"def")?;
self.expect_literal_name(b"CMapName")?;
self.tokenizer.skip_comments_and_whitespace();
let char_map_name_pos = self.tokenizer.pos();
let char_map_name = self.expect_any_literal_name()?;
self.expect_executable_name(b"def")?;
self.expect_literal_name(b"CMapType")?;
self.expect(Token::Integer(2))?;
self.expect_executable_name(b"def")?;
self.expect(Token::Integer(1))?;
self.expect_executable_name(b"begincodespacerange")?;
self.tokenizer.skip_comments_and_whitespace();
let range_start_pos = self.tokenizer.pos();
let range_start = self.expect_any_string()?;
if range_start.is_empty() {
return invalid_token_err(range_start_pos, Some(Token::String(range_start)));
}
self.tokenizer.skip_comments_and_whitespace();
let range_end_pos = self.tokenizer.pos();
let range_end = self.expect_string_with_len(range_start.len())?;
self.expect_executable_name(b"endcodespacerange")?;
let mut to_unicode_map: BTreeMap<PdfString, Arc<str>> = BTreeMap::new();
let mut dest_str = String::new();
let mut insert_mapping = |src_pos: PdfInputPosition,
src: &[u8],
dest_pos: PdfInputPosition,
dest_utf16_be: &[u8]|
-> Result<(), PdfParseError> {
dest_str.clear();
for ch in char::decode_utf16(
dest_utf16_be
.chunks(2)
.map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]])),
) {
match ch {
Ok(ch) => dest_str.push(ch),
Err(_) => {
return Err(PdfParseError::InvalidUtf16 { pos: dest_pos });
}
}
}
to_unicode_map.insert(
PdfString::new(src_pos, ArcOrRef::Arc(src.into())),
dest_str.as_str().into(),
);
Ok(())
};
loop {
match self.tokenizer.next_token()? {
Some(Token::Integer(size)) => match self.tokenizer.next_token()? {
Some(Token::ExecutableName(name)) if name == b"beginbfrange" => {
for _ in 0..size {
self.tokenizer.skip_comments_and_whitespace();
let src_pos = self.tokenizer.pos();
let src_low = self.expect_string_with_len(range_start.len())?;
self.tokenizer.skip_comments_and_whitespace();
let src_high_pos = self.tokenizer.pos();
let src_high = self.expect_string_with_len(range_start.len())?;
if src_low.split_last().map(|(_, prefix)| prefix)
!= src_high.split_last().map(|(_, prefix)| prefix)
{
return invalid_token_err(
src_high_pos,
Some(Token::String(src_high)),
);
}
let src_last_range = *src_low.last().expect("known to be non-empty")
..=*src_high.last().expect("known to be non-empty");
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest))
if dest.len() >= 2 && dest.len() % 2 == 0 =>
{
let mut src = src_low;
for (index, src_last_byte) in src_last_range.enumerate() {
*src.last_mut().expect("known to be non-empty") =
src_last_byte;
let mut dest = dest.clone();
let [.., last] = &mut *dest else {
unreachable!();
};
*last += index as u8;
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
Some(Token::ArrayStart) => {
let mut src = src_low;
for src_last_byte in src_last_range {
*src.last_mut().expect("known to be non-empty") =
src_last_byte;
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest))
if dest.len() >= 2 && dest.len() % 2 == 0 =>
{
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect(Token::ArrayEnd)?;
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect_executable_name(b"endbfrange")?;
}
Some(Token::ExecutableName(name)) if name == b"beginbfchar" => {
for _ in 0..size {
self.tokenizer.skip_comments_and_whitespace();
let src_pos = self.tokenizer.pos();
let src = self.expect_string_with_len(range_start.len())?;
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest)) if dest.len() % 2 == 0 => {
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect_executable_name(b"endbfchar")?;
}
token => todo!("{token:?}"),
},
Some(Token::ExecutableName(name)) if name == b"endcmap" => {
break;
}
token => todo!("{token:?}"),
}
}
self.expect_executable_name(b"CMapName")?;
self.expect_executable_name(b"currentdict")?;
self.expect_literal_name(b"CMap")?;
self.expect_executable_name(b"defineresource")?;
self.expect_executable_name(b"pop")?;
self.expect_executable_name(b"end")?;
self.expect_executable_name(b"end")?;
self.tokenizer.skip_comments_and_whitespace();
let eof_pos = self.tokenizer.pos();
if let token @ Some(_) = self.tokenizer.next_token()? {
return invalid_token_err(eof_pos, token);
}
Ok(PdfFontToUnicode {
base_map,
char_map_name: PdfName::new(char_map_name_pos, Arc::<[u8]>::from(char_map_name)),
src_ranges: Arc::new([
PdfString::new(range_start_pos, ArcOrRef::Arc(range_start.into()))
..=PdfString::new(range_end_pos, ArcOrRef::Arc(range_end.into())),
]),
to_unicode_map: Arc::new(to_unicode_map),
})
}
}

1507
src/pdf/font/type_1_parse.rs Normal file

File diff suppressed because it is too large Load diff

2142
src/pdf/object.rs Normal file

File diff suppressed because it is too large Load diff

1341
src/pdf/parse.rs Normal file

File diff suppressed because it is too large Load diff

1107
src/pdf/render.rs Normal file

File diff suppressed because it is too large Load diff

66
src/pdf/stream_filters.rs Normal file
View file

@ -0,0 +1,66 @@
use crate::pdf::{
object::{PdfDictionary, PdfName},
parse::{PdfInputPosition, PdfParse, PdfParseError},
pdf_parse,
};
pub mod flate;
pdf_parse! {
#[pdf(name)]
#[derive(Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum PdfStreamFilter {
#[pdf(name = "ASCIIHexDecode")]
AsciiHexDecode,
#[pdf(name = "ASCII85Decode")]
Ascii85Decode,
#[pdf(name = "LZWDecode")]
LzwDecode,
#[pdf(name = "FlateDecode")]
FlateDecode,
#[pdf(name = "RunLengthDecode")]
RunLengthDecode,
#[pdf(name = "CCITTFaxDecode")]
CcittFaxDecode,
#[pdf(name = "JBIG2Decode")]
Jbig2Decode,
#[pdf(name = "DCTDecode")]
DctDecode,
#[pdf(name = "JPXDecode")]
JpxDecode,
#[pdf(name = "Crypt")]
Crypt,
#[pdf(other)]
Unknown(PdfName),
}
}
impl PdfStreamFilter {
pub fn decode_stream_data(
&self,
filter_parms: PdfDictionary,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
match self {
PdfStreamFilter::AsciiHexDecode => todo!(),
PdfStreamFilter::Ascii85Decode => todo!(),
PdfStreamFilter::LzwDecode => todo!(),
PdfStreamFilter::FlateDecode => {
flate::PdfFilterParmsFlateDecode::parse(filter_parms.into())?
.decode_stream_data(stream_pos, encoded_data)
}
PdfStreamFilter::RunLengthDecode => todo!(),
PdfStreamFilter::CcittFaxDecode => todo!(),
PdfStreamFilter::Jbig2Decode => todo!(),
PdfStreamFilter::DctDecode => todo!(),
PdfStreamFilter::JpxDecode => todo!(),
PdfStreamFilter::Crypt => todo!(),
PdfStreamFilter::Unknown(filter) => Err(PdfParseError::UnknownStreamFilter {
pos: stream_pos,
filter: filter.clone(),
}),
}
}
}

View file

@ -0,0 +1,74 @@
use crate::pdf::{
object::PdfDictionary,
parse::{PdfInputPosition, PdfParseError},
pdf_parse,
stream_filters::PdfStreamFilter,
};
use std::{io::Read, num::NonZero};
pdf_parse! {
#[pdf]
#[derive(Clone, Debug, Default)]
pub struct PdfFilterParmsFlateDecode {
#[pdf(name = "Predictor")]
pub predictor: Option<NonZero<u32>>,
#[pdf(name = "Colors")]
pub colors: Option<NonZero<u32>>,
#[pdf(name = "BitsPerComponent")]
pub bits_per_component: Option<NonZero<u32>>,
#[pdf(name = "Columns")]
pub columns: Option<NonZero<u32>>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfFilterParmsFlateDecode {
pub const FILTER: PdfStreamFilter = PdfStreamFilter::FlateDecode;
pub const DEFAULT_PREDICTOR: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_COLORS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_BITS_PER_COMPONENT: NonZero<u32> = const { NonZero::new(8).unwrap() };
pub const DEFAULT_COLUMNS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub fn predictor(&self) -> NonZero<u32> {
self.predictor.unwrap_or(Self::DEFAULT_PREDICTOR)
}
pub fn colors(&self) -> NonZero<u32> {
self.colors.unwrap_or(Self::DEFAULT_COLORS)
}
pub fn bits_per_component(&self) -> NonZero<u32> {
self.bits_per_component
.unwrap_or(Self::DEFAULT_BITS_PER_COMPONENT)
}
pub fn columns(&self) -> NonZero<u32> {
self.columns.unwrap_or(Self::DEFAULT_COLUMNS)
}
pub fn decode_stream_data(
&self,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
let Self {
predictor: _,
colors: _,
bits_per_component: _,
columns: _,
rest: _,
} = self;
let mut decoded_data = vec![];
flate2::bufread::ZlibDecoder::new(encoded_data)
.read_to_end(&mut decoded_data)
.map_err(|e| PdfParseError::StreamFilterError {
pos: stream_pos,
filter: Self::FILTER.into(),
error: e.to_string(),
})?;
let predictor = self.predictor();
let colors = self.colors();
let bits_per_component = self.bits_per_component();
let columns = self.columns();
match predictor {
Self::DEFAULT_PREDICTOR => Ok(decoded_data),
_ => todo!("{predictor}"),
}
}
}

File diff suppressed because it is too large Load diff

382
src/util.rs Normal file
View file

@ -0,0 +1,382 @@
use std::{
any::{Any, TypeId},
borrow::Borrow,
cell::Cell,
collections::HashMap,
fmt,
hash::{Hash, Hasher},
sync::Arc,
};
pub enum ArcOrRef<'a, T: ?Sized> {
Arc(Arc<T>),
Ref(&'a T),
}
impl<'a, T: ?Sized> AsRef<T> for ArcOrRef<'a, T> {
fn as_ref(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> Borrow<T> for ArcOrRef<'a, T> {
fn borrow(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> From<Arc<T>> for ArcOrRef<'a, T> {
fn from(value: Arc<T>) -> Self {
Self::Arc(value)
}
}
impl<'a, T: ?Sized> From<&'a T> for ArcOrRef<'a, T> {
fn from(value: &'a T) -> Self {
Self::Ref(value)
}
}
impl<'a, T: ?Sized> Default for ArcOrRef<'a, T>
where
&'a T: Default,
{
fn default() -> Self {
Self::Ref(Default::default())
}
}
impl<T: ?Sized> Clone for ArcOrRef<'_, T> {
fn clone(&self) -> Self {
match self {
Self::Arc(v) => Self::Arc(v.clone()),
Self::Ref(v) => Self::Ref(v),
}
}
}
impl<T: ?Sized + Hash> Hash for ArcOrRef<'_, T> {
fn hash<H: Hasher>(&self, state: &mut H) {
T::hash(self, state)
}
}
impl<'a, 'b, T: ?Sized + PartialEq<U>, U: ?Sized> PartialEq<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn eq(&self, other: &ArcOrRef<'b, U>) -> bool {
T::eq(self, other)
}
}
impl<T: ?Sized + Eq> Eq for ArcOrRef<'_, T> {}
impl<'a, 'b, T: ?Sized + PartialOrd<U>, U: ?Sized> PartialOrd<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn partial_cmp(&self, other: &ArcOrRef<'b, U>) -> Option<std::cmp::Ordering> {
T::partial_cmp(self, other)
}
}
impl<T: ?Sized + Ord> Ord for ArcOrRef<'_, T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
T::cmp(self, other)
}
}
impl<T: ?Sized> std::ops::Deref for ArcOrRef<'_, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
match self {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => v,
}
}
}
impl<T: ?Sized + fmt::Debug> fmt::Debug for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
impl<T: ?Sized + fmt::Display> fmt::Display for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
/// a stable alternative to `CloneToUninit` for `Arc`
pub trait ArcFromRef {
/// like `Arc::new(Self::clone(self))` but works for unsized types too
fn arc_from_ref(&self) -> Arc<Self>;
/// generic version of `Arc::make_mut`
fn make_mut(this: &mut Arc<Self>) -> &mut Self;
}
impl<T: Clone> ArcFromRef for T {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::new(Self::clone(self))
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<T: Clone> ArcFromRef for [T] {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl ArcFromRef for str {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<'a, T: ?Sized + ArcFromRef> ArcOrRef<'a, T> {
pub fn into_arc(this: Self) -> Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => T::arc_from_ref(v),
}
}
pub fn make_arc(this: &mut Self) -> &mut Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => {
*this = ArcOrRef::Arc(T::arc_from_ref(v));
let ArcOrRef::Arc(v) = this else {
unreachable!();
};
v
}
}
}
pub fn make_mut(this: &mut Self) -> &mut T {
T::make_mut(Self::make_arc(this))
}
}
trait DagDebugStateSealed {}
#[expect(private_bounds)]
pub trait SupportsDagDebugState: DagDebugStateSealed + 'static + Clone {
type Key: Clone + Hash + Eq + 'static;
fn key(this: &Self) -> Self::Key;
}
impl<T: 'static> DagDebugStateSealed for Arc<T> {}
impl<T: 'static> SupportsDagDebugState for Arc<T> {
type Key = *const T;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl<T: 'static> DagDebugStateSealed for Arc<[T]> {}
impl<T: 'static> SupportsDagDebugState for Arc<[T]> {
type Key = *const [T];
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl DagDebugStateSealed for Arc<str> {}
impl SupportsDagDebugState for Arc<str> {
type Key = *const str;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
trait DagDebugStatePartTrait: 'static {
fn reset(&mut self);
fn as_any_mut(&mut self) -> &mut dyn Any;
}
struct DagDebugStatePart<T: SupportsDagDebugState> {
table: HashMap<T::Key, (u64, T)>,
next_id: u64,
}
impl<T: SupportsDagDebugState> DagDebugStatePartTrait for DagDebugStatePart<T> {
fn reset(&mut self) {
let Self { table, next_id } = self;
table.clear();
*next_id = 0;
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
impl<T: SupportsDagDebugState> DagDebugStatePart<T> {
fn insert(&mut self, value: &T) -> DagDebugStateInsertResult {
use std::collections::hash_map::Entry;
match self.table.entry(T::key(value)) {
Entry::Occupied(entry) => DagDebugStateInsertResult::Old { id: entry.get().0 },
Entry::Vacant(entry) => {
let value = T::clone(value);
let id = self.next_id;
self.next_id += 1;
entry.insert((id, value));
DagDebugStateInsertResult::New { id }
}
}
}
}
impl<T: SupportsDagDebugState> Default for DagDebugStatePart<T> {
fn default() -> Self {
Self {
table: HashMap::default(),
next_id: 0,
}
}
}
pub struct DagDebugState {
parts: std::cell::RefCell<HashMap<TypeId, Box<dyn DagDebugStatePartTrait>>>,
ref_count: Cell<usize>,
}
#[derive(Clone, Copy, Debug)]
pub enum DagDebugStateInsertResult {
New { id: u64 },
Old { id: u64 },
}
impl DagDebugStateInsertResult {
pub fn id(self) -> u64 {
match self {
Self::New { id } | Self::Old { id } => id,
}
}
}
impl DagDebugState {
fn with_part<T: SupportsDagDebugState, R>(
&self,
f: impl FnOnce(&mut DagDebugStatePart<T>) -> R,
) -> R {
let mut parts = self.parts.borrow_mut();
let Some(part) = parts
.entry(TypeId::of::<DagDebugStatePart<T>>())
.or_insert_with(|| Box::new(DagDebugStatePart::<T>::default()))
.as_any_mut()
.downcast_mut::<DagDebugStatePart<T>>()
else {
unreachable!()
};
f(part)
}
pub fn insert<T: SupportsDagDebugState>(&self, value: &T) -> DagDebugStateInsertResult {
self.with_part(|part: &mut DagDebugStatePart<T>| part.insert(value))
}
pub fn debug_or_id<'a, T: SupportsDagDebugState + fmt::Debug, Abbreviated: fmt::Display>(
&self,
value: &'a T,
abbreviated: Abbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, Abbreviated> {
self.debug_or_id_with(value, fmt::Debug::fmt, move |f| abbreviated.fmt(f))
}
pub fn debug_or_id_with<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
>(
&self,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, DebugValue, DebugAbbreviated> {
struct DebugOrIdWith<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> {
insert_result: DagDebugStateInsertResult,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Debug for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Display for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "#{} ", self.insert_result.id())?;
match self.insert_result {
DagDebugStateInsertResult::New { id: _ } => (self.debug_value)(self.value, f),
DagDebugStateInsertResult::Old { id: _ } => (self.debug_abbreviated)(f),
}
}
}
DebugOrIdWith {
insert_result: self.insert(value),
value,
debug_value,
debug_abbreviated,
}
}
#[must_use]
fn inc_ref_count_scope(&self) -> impl Sized {
struct DecRefCountOnDrop<'a>(&'a DagDebugState);
impl Drop for DecRefCountOnDrop<'_> {
fn drop(&mut self) {
self.0.ref_count.set(self.0.ref_count.get() - 1);
if self.0.ref_count.get() == 0 {
self.0
.parts
.borrow_mut()
.values_mut()
.for_each(|v| v.reset());
}
}
}
self.ref_count.set(
self.ref_count
.get()
.checked_add(1)
.expect("too many nested calls"),
);
DecRefCountOnDrop(self)
}
pub fn scope<R>(f: impl FnOnce(&Self) -> R) -> R {
thread_local! {
static STATE: DagDebugState = DagDebugState { parts: Default::default(), ref_count: Cell::new(0) };
}
STATE.with(|state| {
let _scope = state.inc_ref_count_scope();
f(state)
})
}
}

View file

@ -1,232 +0,0 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use quick_xml::{
Writer,
events::{BytesDecl, BytesStart, BytesText, Event},
};
use std::fmt;
struct FmtToIoAdaptor<W> {
writer: W,
partial_char: [u8; 4],
partial_char_len: u8,
}
impl<W: fmt::Write> FmtToIoAdaptor<W> {
fn new(writer: W) -> Self {
Self {
writer,
partial_char: [0; 4],
partial_char_len: 0,
}
}
fn finish(self) -> Result<W, fmt::Error> {
let Self {
writer,
partial_char: _,
partial_char_len,
} = self;
if partial_char_len != 0 {
Err(fmt::Error)
} else {
Ok(writer)
}
}
fn write_byte(&mut self, b: u8) -> std::io::Result<()> {
let Self {
writer,
partial_char,
partial_char_len,
} = self;
partial_char[usize::from(*partial_char_len)] = b;
*partial_char_len += 1;
match str::from_utf8(&partial_char[..usize::from(*partial_char_len)]) {
Ok(s) => {
*partial_char_len = 0;
writer.write_str(s).map_err(std::io::Error::other)
}
Err(e) => {
if e.error_len().is_some() {
*partial_char_len = 0;
Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e))
} else {
Ok(())
}
}
}
}
}
impl<W: fmt::Write> std::io::Write for FmtToIoAdaptor<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
for &b in buf {
self.write_byte(b)?;
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub(crate) enum ElementTag {
Comment,
Normal(String),
}
impl ElementTag {
pub(crate) fn normal(&self) -> Option<&str> {
match self {
ElementTag::Comment => None,
ElementTag::Normal(v) => Some(v),
}
}
}
impl fmt::Debug for ElementTag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Comment => f.write_str("<Comment>"),
Self::Normal(v) => v.fmt(f),
}
}
}
/// like python's xml.etree.ElementTree.Element
#[derive(Clone, Debug)]
pub(crate) struct Element {
pub(crate) tag: ElementTag,
pub(crate) attrib: Vec<(String, String)>,
/// text contained in this element but before any children
pub(crate) text: String,
pub(crate) children: Vec<Element>,
/// text after the end of this element
pub(crate) tail: String,
}
/// equivalent to python `xml.etree.ElementTree.tostring(self, encoding="unicode")`
impl fmt::Display for Element {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut writer = Writer::new(FmtToIoAdaptor::new(f));
self.write_to(&mut writer).map_err(|_| fmt::Error)?;
writer.into_inner().finish()?;
Ok(())
}
}
impl Element {
pub(crate) fn new(tag: String, attrib: impl IntoIterator<Item = (String, String)>) -> Self {
Self {
tag: ElementTag::Normal(tag),
attrib: Vec::from_iter(attrib),
text: String::new(),
children: Vec::new(),
tail: String::new(),
}
}
/// equivalent of python's `xml.etree.ElementTree.Comment()`
pub(crate) fn comment(text: String) -> Self {
Self {
tag: ElementTag::Comment,
attrib: Vec::new(),
text,
children: Vec::new(),
tail: String::new(),
}
}
/// equivalent to python `"".join(self.itertext())`
pub(crate) fn inner_text(&self) -> String {
let mut retval = String::new();
fn helper(element: &Element, retval: &mut String) {
let Element {
tag,
attrib: _,
text,
children,
tail: _,
} = element;
let ElementTag::Normal(_) = tag else {
return;
};
retval.push_str(text);
for child in children {
helper(child, retval);
retval.push_str(&child.tail);
}
}
helper(self, &mut retval);
retval
}
/// equivalent of python's `xml.etree.ElementTree.SubElement()`
pub(crate) fn sub_element(
&mut self,
tag: String,
attrib: impl IntoIterator<Item = (String, String)>,
) -> &mut Self {
self.children.push(Self::new(tag, attrib));
self.children.last_mut().expect("just pushed")
}
pub(crate) fn write_to(&self, writer: &mut Writer<impl std::io::Write>) -> std::io::Result<()> {
let Element {
tag,
attrib,
text,
children,
tail,
} = self;
match tag {
ElementTag::Comment => {
writer.write_event(Event::Comment(BytesText::new(text)))?;
}
ElementTag::Normal(tag) if tag.is_empty() => {
writer.write_event(Event::Text(BytesText::new(text)))?;
}
ElementTag::Normal(tag)
if attrib.is_empty() && text.is_empty() && children.is_empty() =>
{
// write element like `<br />` to match python instead of like `<br/>`
writer.write_event(Event::Empty(BytesStart::from_content(
tag.clone() + " ",
tag.len(),
)))?;
}
ElementTag::Normal(tag) => {
let mut element_writer = writer.create_element(tag);
for (name, value) in attrib {
element_writer = element_writer.with_attribute((name.as_str(), value.as_str()));
}
if text.is_empty() && children.is_empty() {
element_writer.write_empty()?;
} else {
element_writer.write_inner_content(|writer| {
writer.write_event(Event::Text(BytesText::new(text)))?;
for child in children {
child.write_to(writer)?;
}
Ok(())
})?;
}
}
}
writer.write_event(Event::Text(BytesText::new(tail)))?;
Ok(())
}
/// equivalent of python's `xml.etree.ElementTree(self).write(writer, encoding='utf-8', xml_declaration=xml_declaration)`
pub(crate) fn write(
&self,
writer: impl std::io::Write,
xml_declaration: bool,
) -> std::io::Result<()> {
let mut writer = Writer::new(writer);
if xml_declaration {
// use specific string to match python
writer.write_event(Event::Decl(BytesDecl::from_start(
BytesStart::from_content("xml version='1.0' encoding='utf-8'", 3),
)))?;
writer.write_event(Event::Text(BytesText::new("\n")))?;
}
self.write_to(&mut writer)
}
}