Compare commits

..

10 commits

28 changed files with 12027 additions and 6489 deletions

View file

@ -1,46 +0,0 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
on: [push, pull_request]
env:
PDF_HASH: 56372d23ece7e9e2c1b381a639443982a3e16e38109df1c141d655b779b61fdb
OUTPUT_XML_HASH: c0b4592cbd0a3e59b9b2931a6a75a3d87ebf23bf453e8587a1522dd157f15ee9
jobs:
test:
runs-on: debian-12
container:
image: git.libre-chip.org/libre-chip/fayalite-deps:latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- run: |
scripts/check-copyright.sh
- uses: https://git.libre-chip.org/mirrors/rust-cache@v2
with:
save-if: ${{ github.ref == 'refs/heads/master' }}
- run: |
apt-get update -qq
apt-get install -qq python3-venv wget
# copy of https://files.openpower.foundation/s/9izgC5Rogi5Ywmm/download/OPF_PowerISA_v3.1C.pdf
wget -O OPF_PowerISA_v3.1C.pdf https://libre-chip.org/OPF_PowerISA_v3.1C.pdf
echo "$PDF_HASH OPF_PowerISA_v3.1C.pdf" | sha256sum -c
- run: |
cargo test
- run: |
cargo build --release
- run: |
cargo run --release -- OPF_PowerISA_v3.1C.pdf &> >(tee out.log | grep '^page ') || { tail -n1000 out.log; false; }
echo "expected output (not all instructions are decoded yet, change when the output is improved):"
echo "$OUTPUT_XML_HASH powerisa-instructions.xml" | sha256sum -c
mv powerisa-instructions.xml powerisa-instructions-rust.xml
- run: |
python3 -m venv --upgrade-deps .venv
. .venv/bin/activate
pip install -e .
parse_powerisa_pdf OPF_PowerISA_v3.1C.pdf &> >(tee out.log | grep '^page ') || { tail -n1000 out.log; false; }
echo "expected output (not all instructions are decoded yet, change when the output is improved):"
echo "$OUTPUT_XML_HASH powerisa-instructions.xml" | sha256sum -c
mv powerisa-instructions.xml powerisa-instructions-python.xml

3
.gitignore vendored
View file

@ -1,10 +1,7 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
/.venv /.venv
/.vscode /.vscode
*.egg-info *.egg-info
__pycache__ __pycache__
*.log *.log
/powerisa-instructions.xml /powerisa-instructions.xml
/*.pdf
/target /target

308
Cargo.lock generated
View file

@ -3,56 +3,10 @@
version = 4 version = 4
[[package]] [[package]]
name = "aho-corasick" name = "adler2"
version = "1.1.4" version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
dependencies = [
"memchr",
]
[[package]]
name = "bindgen"
version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"itertools",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
]
[[package]]
name = "bitflags"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
[[package]]
name = "cc"
version = "1.2.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
@ -61,16 +15,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]] [[package]]
name = "clang-sys" name = "crc32fast"
version = "1.8.1" version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
dependencies = [ dependencies = [
"glob", "cfg-if",
"libc",
"libloading",
] ]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]] [[package]]
name = "either" name = "either"
version = "1.15.0" version = "1.15.0"
@ -78,228 +55,55 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]] [[package]]
name = "equivalent" name = "flate2"
version = "1.0.2" version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
[[package]]
name = "find-msvc-tools"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]]
name = "indexmap"
version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
dependencies = [ dependencies = [
"equivalent", "crc32fast",
"hashbrown", "miniz_oxide",
] ]
[[package]] [[package]]
name = "itertools" name = "miniz_oxide"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.178"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
[[package]]
name = "libloading"
version = "0.8.9" version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [ dependencies = [
"cfg-if", "adler2",
"windows-link", "simd-adler32",
]
[[package]]
name = "libm"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "mupdf-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e9a0d4e844ab50315d43312f3d62f72c77205b07c8ee21cbd4b52bdc2a9910"
dependencies = [
"bindgen",
"cc",
"pkg-config",
"regex",
"zerocopy",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
] ]
[[package]] [[package]]
name = "parse_powerisa_pdf" name = "parse_powerisa_pdf"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"indexmap", "flate2",
"libm", "rayon",
"mupdf-sys",
"quick-xml",
] ]
[[package]] [[package]]
name = "pkg-config" name = "rayon"
version = "0.3.32" version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
[[package]]
name = "proc-macro2"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0"
dependencies = [ dependencies = [
"unicode-ident", "either",
"rayon-core",
] ]
[[package]] [[package]]
name = "quick-xml" name = "rayon-core"
version = "0.38.4" version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c" checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [ dependencies = [
"memchr", "crossbeam-deque",
"crossbeam-utils",
] ]
[[package]] [[package]]
name = "quote" name = "simd-adler32"
version = "1.0.42" version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "syn"
version = "2.0.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "zerocopy"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View file

@ -1,17 +1,10 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
[package] [package]
name = "parse_powerisa_pdf" name = "parse_powerisa_pdf"
version = "0.1.0" version = "0.1.0"
license = "LGPL-3.0-or-later"
edition = "2024" edition = "2024"
repository = "" license = "LGPL-3.0-or-later"
keywords = []
categories = []
rust-version = "1.89.0"
[dependencies] [dependencies]
indexmap = "2.12.1" flate2 = "1.1.5"
libm = "0.2.15" rayon = "1.11.0"
mupdf-sys = { version = "0.5.0", default-features = false }
quick-xml = "0.38.4"

View file

@ -1,38 +1,5 @@
<!--
SPDX-License-Identifier: LGPL-3.0-or-later
See Notices.txt for copyright information
-->
parser for the OPF PowerISA 3.1C pdf to attempt to extract all instructions' pseudo-code including subscripts/superscripts and other formatting parser for the OPF PowerISA 3.1C pdf to attempt to extract all instructions' pseudo-code including subscripts/superscripts and other formatting
# Using the new Rust code:
Usage:
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/>
* Install Rust -- you need version 1.89.0 or later.
Getting it from https://rustup.rs/ is recommended.
* Install required build dependencies:
On Debian 12:
```bash
sudo apt update
sudo apt install build-essential clang unzip
```
* Compile and run:
```bash
cargo run -- path/to/downloaded/OPF_PowerISA_v3.1C.pdf > out.log
```
* This will spit out lots of errors and then successfully create
the output file -- `powerisa-instructions.xml` in the current directory.
# Using the old Python code:
Usage: Usage:
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/> * Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/>
* Obtain CPython 3.11 (the default `python3` in [Debian Bookworm](https://www.debian.org/releases/bookworm/)) * Obtain CPython 3.11 (the default `python3` in [Debian Bookworm](https://www.debian.org/releases/bookworm/))

View file

@ -1,5 +1,3 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
from __future__ import annotations from __future__ import annotations
from collections import defaultdict from collections import defaultdict
from collections.abc import Generator, Iterable, Iterator, Callable from collections.abc import Generator, Iterable, Iterator, Callable
@ -765,7 +763,7 @@ class Page:
unprocessed_non_text: SetById[LTLine | LTRect] unprocessed_non_text: SetById[LTLine | LTRect]
@staticmethod @staticmethod
def from_lt_page(page_num: int, page: LTPage, first_seen_fonts: defaultdict[str, set[float]]) -> Page: def from_lt_page(page_num: int, page: LTPage) -> Page:
qt: defaultdict[TextSection, QuadTree[Char | LTLine | LTRect]] = defaultdict(QuadTree) qt: defaultdict[TextSection, QuadTree[Char | LTLine | LTRect]] = defaultdict(QuadTree)
unprocessed_chars = defaultdict(lambda: defaultdict(SetById[Char])) unprocessed_chars = defaultdict(lambda: defaultdict(SetById[Char]))
unprocessed_non_text: SetById[LTLine | LTRect] = SetById() unprocessed_non_text: SetById[LTLine | LTRect] = SetById()
@ -804,25 +802,20 @@ class Page:
raise AssertionError( raise AssertionError(
f"char not in text section: {element}\npage_num={page_num}") f"char not in text section: {element}\npage_num={page_num}")
continue continue
font_size = round(element.size, 3)
char = Char( char = Char(
text=element.get_text(), text=element.get_text(),
font=Font(font_name=element.fontname, size=font_size), font=Font(font_name=element.fontname, size=round(element.size, 3)),
adv=element.adv, adv=element.adv,
min_x=element.x0, min_x=element.x0,
min_y=element.y0, min_y=element.y0,
max_x=element.x1, max_x=element.x1,
max_y=element.y1, max_y=element.y1,
) )
if font_size not in first_seen_fonts[element.fontname]:
first_seen_fonts[element.fontname].add(font_size)
print(f"first seen font: {element.fontname!r} {font_size}: page {page_num} {char!r}")
qt[text_section].insert(char.min_x, char.min_y, char) qt[text_section].insert(char.min_x, char.min_y, char)
unprocessed_chars[text_section][char.font].add(char) unprocessed_chars[text_section][char.font].add(char)
for text_section, i in unprocessed_chars.items(): for i in unprocessed_chars.values():
for chars in i.values(): for j in i.values():
chars.sort(key=Char.top_down_left_to_right_sort_key) j.sort(key=Char.top_down_left_to_right_sort_key)
print(f"first char: {text_section!r}: {next(iter(chars), None)!r}")
unknown_fonts=[] unknown_fonts=[]
unknown_font_errors=[] unknown_font_errors=[]
for i in unprocessed_chars.values(): for i in unprocessed_chars.values():
@ -1186,14 +1179,13 @@ class Parser:
def __pages_gen(file: Path, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]: def __pages_gen(file: Path, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]:
if page_numbers is not None: if page_numbers is not None:
page_numbers = sorted(i - 1 for i in page_numbers) page_numbers = sorted(i - 1 for i in page_numbers)
first_seen_fonts = defaultdict(set)
for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)): for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)):
if page_numbers is not None: if page_numbers is not None:
page_num = page_numbers[i] + 1 page_num = page_numbers[i] + 1
else: else:
page_num = i + 1 page_num = i + 1
print(f"page {page_num}") print(f"page {page_num}")
yield Page.from_lt_page(page_num=page_num, page=page, first_seen_fonts=first_seen_fonts) yield Page.from_lt_page(page_num=page_num, page=page)
def parse_pdf(self, file: Path, page_numbers: Iterable[int] | None = None): def parse_pdf(self, file: Path, page_numbers: Iterable[int] | None = None):
self.pages = Pages(pages_gen=Parser.__pages_gen( self.pages = Pages(pages_gen=Parser.__pages_gen(
@ -1509,7 +1501,7 @@ class Parser:
f"instruction bit fields box has wrong number of horizontal lines:\n{h_lines}") f"instruction bit fields box has wrong number of horizontal lines:\n{h_lines}")
if len(v_lines) < 2: if len(v_lines) < 2:
raise InsnParseError( raise InsnParseError(
f"instruction bit fields box has too few vertical lines:\n{v_lines}") f"instruction bit fields box has too few vertical lines:\n{h_lines}")
bottom_line, top_line = h_lines bottom_line, top_line = h_lines
box_min_x = v_lines[0].x0 box_min_x = v_lines[0].x0
box_max_x = v_lines[-1].x0 box_max_x = v_lines[-1].x0

View file

@ -1,5 +1,3 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
from __future__ import annotations from __future__ import annotations
from typing import Callable, Generic, Iterable, Iterator, TypeVar from typing import Callable, Generic, Iterable, Iterator, TypeVar
from math import frexp, isfinite, isnan, ldexp from math import frexp, isfinite, isnan, ldexp

View file

@ -1,5 +1,3 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
from collections import abc from collections import abc
from typing import Callable, Generic, Iterable, Iterator, Protocol, TypeAlias, TypeVar, overload from typing import Callable, Generic, Iterable, Iterator, Protocol, TypeAlias, TypeVar, overload

View file

@ -1,5 +1,3 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
[build-system] [build-system]
requires = ["setuptools >= 61.0"] requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
@ -13,7 +11,4 @@ dependencies = [
requires-python = ">= 3.11" requires-python = ">= 3.11"
[project.scripts] [project.scripts]
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main" parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"
[tool.setuptools]
packages = ["parse_powerisa_pdf"]

View file

@ -1,70 +0,0 @@
#!/bin/bash
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
set -e
function fail()
{
local error="$1"
echo "error: $error" >&2
exit 1
}
function fail_file()
{
local file="$1" line="$2" error="$3"
fail "$file:$((line + 1)): $error"
}
function check_file()
{
local file="$1" regexes=("${@:2}")
local lines
mapfile -t lines < "$file"
if (("${#lines[@]}" == 0)); then
return # empty file, no copyright needed
fi
local line
for line in "${!regexes[@]}"; do
eval '[[ "${lines[i]}" =~ '"${regexes[i]}"' ]]' ||
fail_file "$file" "$line" "doesn't match regex: ${regexes[i]}"
done
}
POUND_HEADER=('^"# SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"# See Notices.txt for copyright information"$')
SLASH_HEADER=('^"// SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"// See Notices.txt for copyright information"$')
MD_HEADER=('^"<!--"$' '^"SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"See Notices.txt for copyright information"$')
function main()
{
local IFS=$'\n'
[[ -z "$(git status --porcelain)" ]] || fail "git repo is dirty"
local file
for file in $(git ls-tree --name-only --full-tree -r HEAD); do
case "/$file" in
/Cargo.lock)
# generated file
;;
*/LICENSE.md|*/Notices.txt)
# copyright file
;;
/.forgejo/workflows/*.yml|*/.gitignore|*.toml|*.py)
check_file "$file" "${POUND_HEADER[@]}"
;;
*.md)
check_file "$file" "${MD_HEADER[@]}"
;;
*.sh)
check_file "$file" '^'\''#!'\' "${POUND_HEADER[@]}"
;;
*.rs)
check_file "$file" "${SLASH_HEADER[@]}"
;;
*)
fail_file "$file" 0 "unimplemented file kind -- you need to add it to $0"
;;
esac
done
}
main

3831
src/lib.rs

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,38 @@
// SPDX-License-Identifier: LGPL-3.0-or-later use parse_powerisa_pdf::pdf::Pdf;
// See Notices.txt for copyright information use std::{
error::Error,
io::{IsTerminal, Read},
process::ExitCode,
};
fn main() -> std::process::ExitCode { fn main() -> Result<ExitCode, Box<dyn Error>> {
parse_powerisa_pdf::main() let args: Vec<_> = std::env::args_os().collect();
if args
.iter()
.skip(1)
.any(|v| v.as_encoded_bytes().starts_with(b"-") && v != "-")
|| args.len() > 2
|| (args.len() == 1 && std::io::stdin().is_terminal())
{
eprintln!(
"Usage: {} [<path/to/file.pdf>]\n\
Reads the PDF file passed on the command line,\n\
Reads stdin if no arguments are passed or if the file name is just a dash `-`.\n\
If stdin is a terminal, you have to pass `-` explicitly to read from it.",
args[0].display()
);
return Ok(ExitCode::FAILURE);
}
let file_path = args.get(1).filter(|v| *v != "-");
let input = if let Some(file_path) = file_path {
std::fs::read(file_path)?
} else {
let mut buf = Vec::new();
std::io::stdin().lock().read_to_end(&mut buf)?;
buf
};
let pdf = Pdf::parse(input)?;
println!("{:#?}", pdf.trailer.trailer_dictionary());
todo!();
Ok(ExitCode::SUCCESS)
} }

View file

@ -1,871 +0,0 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use mupdf_sys::{
fz_buffer, fz_buffer_storage, fz_clone_context, fz_color_params, fz_colorspace, fz_concat,
fz_context, fz_device, fz_document, fz_drop_buffer, fz_drop_context, fz_drop_device,
fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text, fz_error_type_FZ_ERROR_GENERIC,
fz_font, fz_font_ascender, fz_font_descender, fz_font_is_bold, fz_font_is_italic, fz_font_name,
fz_matrix, fz_matrix_expansion, fz_page, fz_path, fz_path_walker, fz_point, fz_rect,
fz_stroke_state, fz_text, fz_text_item, fz_text_span, fz_transform_point,
fz_transform_point_xy, fz_transform_vector, fz_walk_path, mupdf_document_page_count,
mupdf_drop_error, mupdf_error_t, mupdf_load_page, mupdf_new_base_context,
mupdf_new_derived_device, mupdf_open_document, mupdf_page_to_xml, mupdf_pdf_page_transform,
mupdf_run_page, pdf_page, pdf_page_from_fz_page,
};
use std::{
cell::{Cell, UnsafeCell},
ffi::{CStr, c_int, c_void},
fmt,
marker::PhantomData,
mem::ManuallyDrop,
ptr::{self, NonNull},
sync::{Mutex, OnceLock},
};
#[derive(Debug)]
pub(crate) struct MuPdfError {
type_: c_int,
message: String,
}
impl MuPdfError {
fn new_generic(message: impl ToString) -> Self {
Self {
type_: fz_error_type_FZ_ERROR_GENERIC as _,
message: message.to_string(),
}
}
}
impl fmt::Display for MuPdfError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"MuPDF error: type: {}, message: {}",
self.type_, self.message
)
}
}
impl std::error::Error for MuPdfError {}
struct OwnedMuPdfError(NonNull<mupdf_error_t>);
impl Drop for OwnedMuPdfError {
fn drop(&mut self) {
unsafe {
mupdf_drop_error(self.0.as_ptr());
}
}
}
unsafe fn mupdf_try<R>(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result<R, MuPdfError> {
let mut err = ptr::null_mut();
let retval = f(&mut err);
let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else {
return Ok(retval);
};
unsafe {
Err(MuPdfError {
type_: (*err.0.as_ptr()).type_,
message: CStr::from_ptr((*err.0.as_ptr()).message)
.to_string_lossy()
.into_owned(),
})
}
}
pub(crate) struct Context(NonNull<fz_context>);
impl Context {
fn new() -> Self {
struct BaseContext(NonNull<fz_context>);
unsafe impl Send for BaseContext {}
static CTX: OnceLock<Mutex<BaseContext>> = OnceLock::new();
let base = CTX
.get_or_init(|| {
let ctx = unsafe { mupdf_new_base_context() };
let Some(ctx) = NonNull::new(ctx).map(BaseContext) else {
panic!("failed to allocate a MuPDF context");
};
Mutex::new(ctx)
})
.lock()
.expect("not poisoned");
let ctx = unsafe { fz_clone_context(base.0.as_ptr()) };
let Some(ctx) = NonNull::new(ctx).map(Self) else {
drop(base);
panic!("failed to clone a MuPDF context");
};
ctx
}
pub(crate) fn with<R>(f: impl FnOnce(&Self) -> R) -> R {
thread_local! {
static CTX: Context = Context::new();
}
CTX.with(f)
}
pub(crate) fn as_ref(&self) -> ContextRef<'_> {
unsafe { ContextRef::from_ptr(self.0.as_ptr()) }
}
}
impl Drop for Context {
fn drop(&mut self) {
unsafe {
fz_drop_context(self.0.as_ptr());
}
}
}
#[derive(Clone, Copy)]
pub(crate) struct ContextRef<'ctx>(&'ctx UnsafeCell<fz_context>);
impl<'ctx> ContextRef<'ctx> {
unsafe fn from_ptr(ptr: *mut fz_context) -> Self {
Self(unsafe { &*ptr.cast() })
}
}
impl<'ctx> From<&'ctx Context> for ContextRef<'ctx> {
fn from(value: &'ctx Context) -> Self {
value.as_ref()
}
}
pub(crate) struct Document<'ctx> {
ptr: *mut fz_document,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Document<'ctx> {
pub(crate) fn open(
ctx: impl Into<ContextRef<'ctx>>,
file_name: &CStr,
) -> Result<Document<'ctx>, MuPdfError> {
let ctx = ctx.into();
unsafe {
mupdf_try(|errptr| mupdf_open_document(ctx.0.get(), file_name.as_ptr(), errptr))
.map(|ptr| Document { ptr, ctx })
}
}
pub(crate) fn page_count(&self) -> Result<usize, MuPdfError> {
unsafe {
mupdf_try(|errptr| mupdf_document_page_count(self.ctx.0.get(), self.ptr, errptr))?
.try_into()
.map_err(MuPdfError::new_generic)
}
}
pub(crate) fn load_page(&self, page: usize) -> Result<Page<'ctx>, MuPdfError> {
let page = page.try_into().map_err(MuPdfError::new_generic)?;
unsafe {
mupdf_try(|errptr| mupdf_load_page(self.ctx.0.get(), self.ptr, page, errptr))
.map(|ptr| Page { ptr, ctx: self.ctx })
}
}
}
impl<'ctx> Drop for Document<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_document(self.ctx.0.get(), self.ptr);
}
}
}
struct Buffer<'ctx> {
ptr: *mut fz_buffer,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Buffer<'ctx> {
fn storage(&mut self) -> &mut [u8] {
unsafe {
let mut ptr = ptr::null_mut();
let len = fz_buffer_storage(self.ctx.0.get(), self.ptr, &raw mut ptr);
if len == 0 {
&mut []
} else {
std::slice::from_raw_parts_mut(ptr, len)
}
}
}
}
impl<'ctx> Drop for Buffer<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_buffer(self.ctx.0.get(), self.ptr);
}
}
}
pub(crate) struct Page<'ctx> {
ptr: *mut fz_page,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Page<'ctx> {
pub(crate) fn ctx(&self) -> ContextRef<'ctx> {
self.ctx
}
pub(crate) fn run<T>(
&self,
device: &Device<'ctx, T>,
ctm: fz_matrix,
) -> Result<(), MuPdfError> {
unsafe {
mupdf_try(|errptr| {
mupdf_run_page(
self.ctx.0.get(),
self.ptr,
device.dev,
ctm,
ptr::null_mut(),
errptr,
)
})
}
}
pub(crate) fn to_xml(&self) -> Result<String, MuPdfError> {
unsafe {
let mut buffer =
mupdf_try(|errptr| mupdf_page_to_xml(self.ctx.0.get(), self.ptr, errptr))
.map(|ptr| Buffer { ptr, ctx: self.ctx })?;
Ok(str::from_utf8(buffer.storage())
.map_err(MuPdfError::new_generic)?
.into())
}
}
pub(crate) fn pdf_page<'a>(&'a self) -> Option<PdfPageRef<'a, 'ctx>> {
unsafe {
let ptr = pdf_page_from_fz_page(self.ctx.0.get(), self.ptr);
NonNull::new(ptr).map(|ptr| PdfPageRef {
ptr: &*ptr.as_ptr().cast(),
ctx: self.ctx,
})
}
}
}
impl<'ctx> Drop for Page<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_page(self.ctx.0.get(), self.ptr);
}
}
}
#[derive(Clone, Copy)]
pub(crate) struct PdfPageRef<'a, 'ctx> {
ptr: &'a UnsafeCell<pdf_page>,
ctx: ContextRef<'ctx>,
}
impl<'a, 'ctx> PdfPageRef<'a, 'ctx> {
pub(crate) fn transform(self) -> Result<fz_matrix, MuPdfError> {
unsafe {
mupdf_try(|errptr| mupdf_pdf_page_transform(self.ctx.0.get(), self.ptr.get(), errptr))
}
}
}
pub(crate) struct Device<'ctx, T: 'ctx> {
dev: *mut fz_device,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<Box<Cell<T>>>,
}
pub(crate) trait DeviceCallbacks<'ctx> {
fn fill_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, even_odd: bool, ctm: fz_matrix);
fn stroke_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, ctm: fz_matrix);
fn clip_path(
&self,
ctx: ContextRef<'ctx>,
path: &Path<'ctx>,
even_odd: bool,
ctm: fz_matrix,
scissor: fz_rect,
);
fn clip_stroke_path(
&self,
ctx: ContextRef<'ctx>,
path: &Path<'ctx>,
ctm: fz_matrix,
scissor: fz_rect,
);
fn fill_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
fn stroke_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
fn clip_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix, scissor: fz_rect);
fn clip_stroke_text(
&self,
ctx: ContextRef<'ctx>,
text: &Text<'ctx>,
ctm: fz_matrix,
scissor: fz_rect,
);
fn ignore_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
}
impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
pub(crate) fn new(ctx: impl Into<ContextRef<'ctx>>, value: Box<T>) -> Result<Self, MuPdfError> {
let ctx = ctx.into();
unsafe {
let dev_ptr = mupdf_try(|errptr| {
mupdf_new_derived_device::<DeviceStruct<T>>(
ctx.0.get(),
c"parse_powerisa_pdf::mupdf_ffi::Device",
errptr,
)
})?;
let retval = Device {
dev: dev_ptr.cast(),
ctx,
_phantom: PhantomData,
};
(&raw mut (*dev_ptr).value).write(value);
let fz_device {
drop_device,
fill_path,
stroke_path,
clip_path,
clip_stroke_path,
fill_text,
stroke_text,
clip_text,
clip_stroke_text,
ignore_text,
..
} = &mut (*dev_ptr).base;
*drop_device = Some(Self::drop_device_fn);
*fill_path = Some(Self::fill_path_fn);
*stroke_path = Some(Self::stroke_path_fn);
*clip_path = Some(Self::clip_path_fn);
*clip_stroke_path = Some(Self::clip_stroke_path_fn);
*fill_text = Some(Self::fill_text_fn);
*stroke_text = Some(Self::stroke_text_fn);
*clip_text = Some(Self::clip_text_fn);
*clip_stroke_text = Some(Self::clip_stroke_text_fn);
*ignore_text = Some(Self::ignore_text_fn);
Ok(retval)
}
}
pub(crate) fn get(&self) -> &T {
unsafe { &(*self.dev.cast::<DeviceStruct<T>>()).value }
}
unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) {
unsafe {
(&raw mut (*dev.cast::<DeviceStruct<T>>()).value).drop_in_place();
}
}
unsafe extern "C" fn fill_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
even_odd: c_int,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.fill_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
even_odd != 0,
ctm,
);
}
unsafe extern "C" fn stroke_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.stroke_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
ctm,
);
}
unsafe extern "C" fn clip_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
even_odd: ::std::os::raw::c_int,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
even_odd != 0,
ctm,
scissor,
);
}
unsafe extern "C" fn clip_stroke_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_stroke_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
ctm,
scissor,
);
}
unsafe extern "C" fn fill_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.fill_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
);
}
unsafe extern "C" fn stroke_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.stroke_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
);
}
unsafe extern "C" fn clip_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
scissor,
);
}
unsafe extern "C" fn clip_stroke_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_stroke_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
scissor,
);
}
unsafe extern "C" fn ignore_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
ctm: fz_matrix,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.ignore_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
);
}
}
impl<'ctx, T> Drop for Device<'ctx, T> {
fn drop(&mut self) {
unsafe {
// FIXME: fz_close_device may throw exceptions
// fz_close_device(self.ctx.0.get(), self.dev);
fz_drop_device(self.ctx.0.get(), self.dev);
}
}
}
#[repr(C)]
struct DeviceStruct<T> {
base: fz_device,
value: Box<T>,
}
pub(crate) trait PathWalker<'ctx> {
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
fn curve_to(
&mut self,
ctx: ContextRef<'ctx>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
);
fn close_path(&mut self, ctx: ContextRef<'ctx>);
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
self.move_to(ctx, x1, y1);
self.move_to(ctx, x2, y1);
self.move_to(ctx, x2, y2);
self.move_to(ctx, x1, y2);
self.close_path(ctx);
}
}
impl<'ctx, T: ?Sized + PathWalker<'ctx>> PathWalker<'ctx> for &'_ mut T {
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
T::move_to(self, ctx, x, y);
}
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
T::line_to(self, ctx, x, y);
}
fn curve_to(
&mut self,
ctx: ContextRef<'ctx>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
) {
T::curve_to(self, ctx, x1, y1, x2, y2, x3, y3);
}
fn close_path(&mut self, ctx: ContextRef<'ctx>) {
T::close_path(self, ctx);
}
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
T::rect_to(self, ctx, x1, y1, x2, y2);
}
}
pub(crate) struct Path<'ctx> {
ptr: *mut fz_path,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Path<'ctx> {
pub(crate) fn walk<W: PathWalker<'ctx>>(&self, mut walker: W) {
unsafe {
fz_walk_path(
self.ctx.0.get(),
self.ptr,
const {
&fz_path_walker {
moveto: Some(Self::move_to_fn::<W>),
lineto: Some(Self::line_to_fn::<W>),
curveto: Some(Self::curve_to_fn::<W>),
closepath: Some(Self::close_path_fn::<W>),
quadto: None,
curvetov: None,
curvetoy: None,
rectto: Some(Self::rect_to_fn::<W>),
}
},
(&raw mut walker).cast(),
);
}
}
unsafe extern "C" fn move_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x: f32,
y: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.move_to(ctx, x, y);
}
unsafe extern "C" fn line_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x: f32,
y: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.line_to(ctx, x, y);
}
unsafe extern "C" fn curve_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.curve_to(ctx, x1, y1, x2, y2, x3, y3);
}
unsafe extern "C" fn close_path_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.close_path(ctx);
}
unsafe extern "C" fn rect_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.rect_to(ctx, x1, y1, x2, y2);
}
}
impl<'ctx> Drop for Path<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_path(self.ctx.0.get(), self.ptr);
}
}
}
pub(crate) struct Text<'ctx> {
ptr: *mut fz_text,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Drop for Text<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_text(self.ctx.0.get(), self.ptr);
}
}
}
impl<'ctx> Text<'ctx> {
pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> {
TextSpanIter {
ptr: unsafe { NonNull::new((*self.ptr).head).map(|ptr| &*ptr.as_ptr().cast()) },
ctx: self.ctx,
_phantom: PhantomData,
}
}
}
#[derive(Clone)]
pub(crate) struct TextSpanIter<'a, 'ctx> {
ptr: Option<&'a UnsafeCell<fz_text_span>>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> {
type Item = TextSpanRef<'a, 'ctx>;
fn next(&mut self) -> Option<Self::Item> {
let ptr = self.ptr?;
self.ptr = unsafe { NonNull::new((*ptr.get()).next).map(|ptr| &*ptr.as_ptr().cast()) };
Some(TextSpanRef {
ptr,
ctx: self.ctx,
_phantom: PhantomData,
})
}
}
#[derive(Copy, Clone)]
pub(crate) struct TextSpanRef<'a, 'ctx> {
ptr: &'a UnsafeCell<fz_text_span>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub(crate) enum WriteMode {
Horizontal,
Vertical,
}
impl<'a, 'ctx> TextSpanRef<'a, 'ctx> {
#[allow(dead_code)]
pub(crate) fn get(self) -> &'a UnsafeCell<fz_text_span> {
self.ptr
}
pub(crate) fn font(self) -> FontRef<'a, 'ctx> {
FontRef {
ptr: unsafe { &*(*self.ptr.get()).font.cast::<UnsafeCell<fz_font>>() },
ctx: self.ctx,
_phantom: PhantomData,
}
}
pub(crate) fn trm(self) -> fz_matrix {
unsafe { (*self.ptr.get()).trm }
}
pub(crate) fn write_mode(self) -> WriteMode {
if unsafe { (*self.ptr.get()).wmode() != 0 } {
WriteMode::Vertical
} else {
WriteMode::Horizontal
}
}
pub(crate) fn items(self) -> &'a [fz_text_item] {
let len = unsafe { (*self.ptr.get()).len } as usize;
if len == 0 {
return &[];
}
unsafe { std::slice::from_raw_parts((*self.ptr.get()).items, len) }
}
}
#[derive(Clone, Copy)]
pub(crate) struct FontRef<'a, 'ctx> {
ptr: &'a UnsafeCell<fz_font>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> FontRef<'a, 'ctx> {
#[allow(dead_code)]
pub(crate) fn get(self) -> &'a UnsafeCell<fz_font> {
self.ptr
}
pub(crate) fn name(self) -> &'a str {
unsafe { CStr::from_ptr(fz_font_name(self.ctx.0.get(), self.ptr.get())) }
.to_str()
.expect("font name isn't valid UTF-8")
}
#[allow(dead_code)]
pub(crate) fn is_bold(self) -> bool {
unsafe { fz_font_is_bold(self.ctx.0.get(), self.ptr.get()) != 0 }
}
#[allow(dead_code)]
pub(crate) fn is_italic(self) -> bool {
unsafe { fz_font_is_italic(self.ctx.0.get(), self.ptr.get()) != 0 }
}
pub(crate) fn ascender(self) -> f32 {
unsafe { fz_font_ascender(self.ctx.0.get(), self.ptr.get()) }
}
pub(crate) fn descender(self) -> f32 {
unsafe { fz_font_descender(self.ctx.0.get(), self.ptr.get()) }
}
}
#[allow(dead_code)]
pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point(point, m) }
}
pub(crate) fn transform_point_xy(x: f32, y: f32, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point_xy(x, y, m) }
}
pub(crate) fn transform_vector(vector: fz_point, m: fz_matrix) -> fz_point {
unsafe { fz_transform_vector(vector, m) }
}
pub(crate) fn matrix_expansion(m: fz_matrix) -> f32 {
unsafe { fz_matrix_expansion(m) }
}
pub(crate) fn concat(left: fz_matrix, right: fz_matrix) -> fz_matrix {
unsafe { fz_concat(left, right) }
}
pub(crate) fn add_points(a: fz_point, b: fz_point) -> fz_point {
fz_point {
x: a.x + b.x,
y: a.y + b.y,
}
}
pub(crate) fn point_min_components(a: fz_point, b: fz_point) -> fz_point {
fz_point {
x: a.x.min(b.x),
y: a.y.min(b.y),
}
}
pub(crate) fn point_max_components(a: fz_point, b: fz_point) -> fz_point {
fz_point {
x: a.x.max(b.x),
y: a.y.max(b.y),
}
}

1279
src/pdf.rs Normal file

File diff suppressed because it is too large Load diff

829
src/pdf/content_stream.rs Normal file
View file

@ -0,0 +1,829 @@
use crate::{
pdf::{
PdfObjectOrStreamDictionaryOrOperator, PdfObjects, PdfParser, PdfTokenizer,
object::{
NameOr, PdfDictionary, PdfMatrix, PdfName, PdfObject, PdfObjectDirect, PdfRectangle,
PdfStream, PdfStreamContents, PdfString, PdfStringBytesDebug, PdfStringOrNumber,
PdfVec2D,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown,
PdfInputPositionNoCompare, PdfParse, PdfParseError,
},
render::{
PdfColorDeviceGray, PdfColorDeviceRgb, PdfRenderOperator, PdfRenderState,
PdfRenderingIntent,
},
},
util::ArcOrRef,
};
use std::{fmt, sync::Arc};
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct PdfOperatorUnparsed {
pos: PdfInputPositionNoCompare,
bytes: ArcOrRef<'static, [u8]>,
}
impl GetPdfInputPosition for PdfOperatorUnparsed {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl fmt::Debug for PdfOperatorUnparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Self::debug_with_name("PdfOperatorUnparsed", &self.bytes, self.pos.0, f)
}
}
trait PdfParseIter: Sized {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError>;
}
impl<T: PdfParse> PdfParseIter for Arc<[T]> {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
FromIterator::from_iter(iter.into_iter().map(T::parse))
}
}
impl PdfOperatorUnparsed {
pub fn new(
pos: impl Into<PdfInputPositionNoCompare>,
bytes: impl Into<ArcOrRef<'static, [u8]>>,
) -> Self {
Self {
pos: pos.into(),
bytes: bytes.into(),
}
}
pub const fn new_static(bytes: &'static [u8]) -> Self {
Self {
pos: PdfInputPositionNoCompare::empty(),
bytes: ArcOrRef::Ref(bytes),
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> {
&self.bytes
}
fn debug_with_name(
name: &str,
pdf_name: &[u8],
pos: PdfInputPosition,
f: &mut fmt::Formatter<'_>,
) -> fmt::Result {
write!(f, "{name}(at {pos}, {})", PdfStringBytesDebug(pdf_name))
}
pub fn bytes_debug(&self) -> PdfStringBytesDebug<'_> {
PdfStringBytesDebug(&self.bytes)
}
}
macro_rules! make_pdf_operator_enum {
(
$(#[$($operator_meta:tt)*])*
$operator_enum_vis:vis enum $PdfOperator:ident;
$(#[$($operator_and_operands_meta:tt)*])*
$enum_vis:vis enum $PdfOperatorAndOperands:ident {
$(#[$($unknown_variant_meta:tt)*])*
$Unknown:ident {
$(#[$($unknown_operands_meta:tt)*])*
$unknown_operands:ident: $unknown_operands_ty:ty,
$(#[$($unknown_operator_meta:tt)*])*
$unknown_operator:ident: $unknown_operator_ty:ty,
},
$(
#[kw = $kw:literal]
$(#[$($variant_meta:tt)*])*
$Variant:ident($VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:tt)*)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}),
)*
}
) => {
$(#[$($operator_meta)*])*
$operator_enum_vis enum $PdfOperator {
$(#[$($unknown_variant_meta)*])*
$Unknown($unknown_operator_ty),
$(
$(#[$($variant_meta)*])*
$Variant(PdfInputPositionNoCompare),
)*
}
impl $PdfOperator {
$operator_enum_vis fn parse(self, operands: impl IntoIterator<Item = PdfObject>) -> Result<$PdfOperatorAndOperands, PdfParseError> {
let operands = operands.into_iter();
Ok(match self {
Self::$Unknown(operator) => $PdfOperatorAndOperands::$Unknown {
operands: FromIterator::from_iter(operands.map(Into::into)),
operator,
},
$(Self::$Variant(pos) => $VariantStruct::parse(pos, operands)?.into(),)*
})
}
$operator_enum_vis fn pos(&self) -> PdfInputPosition {
match *self {
Self::$Unknown(ref operator) => operator.pos(),
$(Self::$Variant(pos) => pos.0,)*
}
}
}
impl fmt::Debug for $PdfOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown(operator) => PdfOperatorUnparsed::debug_with_name("Unknown", &operator.bytes, operator.pos.0, f),
$(Self::$Variant(pos) => PdfOperatorUnparsed::debug_with_name(stringify!($Variant), $kw, pos.0, f),)*
}
}
}
impl From<$PdfOperator> for PdfOperatorUnparsed {
fn from(v: $PdfOperator) -> PdfOperatorUnparsed {
match v {
$PdfOperator::$Unknown(operator) => operator,
$($PdfOperator::$Variant(pos) => PdfOperatorUnparsed { pos, bytes: ArcOrRef::Ref($kw) },)*
}
}
}
impl From<PdfOperatorUnparsed> for $PdfOperator {
fn from(v: PdfOperatorUnparsed) -> $PdfOperator {
match &**v.bytes() {
$($kw => Self::$Variant(v.pos),)*
_ => Self::$Unknown(v),
}
}
}
$(#[derive(Clone)]
$(#[$($variant_meta)*])*
$enum_vis struct $VariantStruct {
$enum_vis $pos: PdfInputPositionNoCompare,
$(
$(#[$($field_meta)*])*
$enum_vis $field: $field_ty,
)*
}
impl fmt::Debug for $VariantStruct {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct(stringify!($VariantStruct)).field("pos", &self.pos)$(.field(stringify!($field), &self.$field))*.finish()
}
}
impl GetPdfInputPosition for $VariantStruct {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl From<$VariantStruct> for $PdfOperatorAndOperands {
fn from(v: $VariantStruct) -> Self {
Self::$Variant(v)
}
}
impl $VariantStruct {
$enum_vis fn operator_from_pos(pos: impl Into<PdfInputPositionNoCompare>) -> $PdfOperator {
$PdfOperator::$Variant(pos.into())
}
$enum_vis fn operator(&self) -> $PdfOperator {
$PdfOperator::$Variant(self.pos)
}
$enum_vis fn pos(&self) -> PdfInputPosition {
self.pos.0
}
}
make_pdf_operator_enum! {
@impl_variant_parse
$enum_vis enum;
struct $VariantStruct {
$pos: PdfInputPositionNoCompare,
$(
#[$field_parse($($parse_args)*)]
$(#[$($field_meta)*])*
$field: $field_ty,
)*
}
})*
$(#[$($operator_and_operands_meta)*])*
$enum_vis enum $PdfOperatorAndOperands {
$(#[$($unknown_variant_meta)*])*
$Unknown {
$(#[$($unknown_operands_meta)*])*
$unknown_operands: $unknown_operands_ty,
$(#[$($unknown_operator_meta)*])*
$unknown_operator: $unknown_operator_ty,
},
$(
$(#[$($variant_meta)*])*
$Variant($VariantStruct),
)*
}
impl $PdfOperatorAndOperands {
$enum_vis fn operator(&self) -> $PdfOperator {
match self {
Self::Unknown { operator, .. } => $PdfOperator::Unknown(operator.clone()),
$(Self::$Variant(v) => v.operator(),)*
}
}
$enum_vis fn pos(&self) -> PdfInputPosition {
match self {
Self::$Unknown { operator, .. } => operator.pos(),
$(Self::$Variant(v) => v.pos(),)*
}
}
}
impl fmt::Debug for $PdfOperatorAndOperands {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown {
operands,
operator,
} => f.debug_struct("Unknown").field("operator", operator).field("operands", operands).finish(),
$(Self::$Variant($VariantStruct {
$pos,
$($field,)*
}) => f.debug_struct(stringify!($Variant)).field("pos", $pos)$(.field(stringify!($field), $field))*.finish(),)*
}
}
}
impl PdfRenderOperator for $PdfOperatorAndOperands {
fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> {
match self {
Self::$Unknown {
operands,
operator,
} => state.handle_unknown_operator(operator, operands),
$(Self::$Variant(v) => <$VariantStruct as PdfRenderOperator>::render(v, state),)*
}
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:ident),* $(,)?)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let mut operands = operands.into_iter();
$($(let Some($parse_args) = operands.next() else {
return Err(PdfParseError::OperatorHasTooFewOperands { operator: Self::operator_from_pos(pos) });
};)*)*
if operands.next().is_some() {
return Err(PdfParseError::OperatorHasTooManyOperands { operator: Self::operator_from_pos(pos) });
}
Ok(Self {
pos,
$($field: <$field_ty>::$field_parse($($parse_args),*)?,)*
})
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
#[$field_parse:ident(...)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let operands = operands.into_iter();
Ok(Self {
pos,
$field: <$field_ty>::$field_parse(operands)?,
})
}
}
};
}
make_pdf_operator_enum! {
#[derive(Clone)]
pub enum PdfOperator;
#[derive(Clone)]
pub enum PdfOperatorAndOperands {
Unknown {
operands: Arc<[PdfObjectDirect]>,
operator: PdfOperatorUnparsed,
},
#[kw = b"b"]
CloseFillAndStrokePath(PdfOperatorCloseFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B"]
FillAndStrokePath(PdfOperatorFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"b*"]
CloseFillAndStrokePathEvenOdd(PdfOperatorCloseFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B*"]
FillAndStrokePathEvenOdd(PdfOperatorFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BDC"]
BeginMarkedContentWithProperties(PdfOperatorBeginMarkedContentWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"BI"]
BeginInlineImage(PdfOperatorBeginInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BMC"]
BeginMarkedContent(PdfOperatorBeginMarkedContent {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"BT"]
BeginText(PdfOperatorBeginText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BX"]
BeginCompatibilitySection(PdfOperatorBeginCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"c"]
CurveTo(PdfOperatorCurveTo {
pos: PdfInputPositionNoCompare,
#[parse(x1, y1)]
p1: PdfVec2D,
#[parse(x2, y2)]
p2: PdfVec2D,
#[parse(x3, y3)]
p3: PdfVec2D,
}),
#[kw = b"cm"]
ConcatMatrix(PdfOperatorConcatMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"CS"]
SetStrokeColorSpace(PdfOperatorSetStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"cs"]
SetNonStrokeColorSpace(PdfOperatorSetNonStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"d"]
SetLineDashPattern(PdfOperatorSetLineDashPattern {
pos: PdfInputPositionNoCompare,
#[parse(dash_array)]
dash_array: PdfObject, // TODO: actually parse
#[parse(dash_phase)]
dash_phase: PdfObject, // TODO: actually parse
}),
#[kw = b"d0"]
FontType3SetWidth(PdfOperatorFontType3SetWidth {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
width: PdfVec2D,
}),
#[kw = b"d1"]
FontType3SetWidthAndBBox(PdfOperatorFontType3SetWidthAndBBox {
pos: PdfInputPositionNoCompare,
#[parse(width_x, width_y)]
width: PdfVec2D,
#[parse_flat(lower_left_x, lower_left_y, upper_right_x, upper_right_y)]
bbox: PdfRectangle,
}),
#[kw = b"Do"]
PaintXObject(PdfOperatorPaintXObject {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"DP"]
DesignateMarkedContentPointWithProperties(PdfOperatorDesignateMarkedContentPointWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"EI"]
EndInlineImage(PdfOperatorEndInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EMC"]
EndMarkedContent(PdfOperatorEndMarkedContent {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"ET"]
EndText(PdfOperatorEndText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EX"]
EndCompatibilitySection(PdfOperatorEndCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f"]
FillPath(PdfOperatorFillPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"F"]
FillPathObsolete(PdfOperatorFillPathObsolete {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f*"]
FillPathEvenOdd(PdfOperatorFillPathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"G"]
SetStrokeGray(PdfOperatorSetStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"g"]
SetNonStrokeGray(PdfOperatorSetNonStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"gs"]
SetGraphicsState(PdfOperatorSetGraphicsState {
pos: PdfInputPositionNoCompare,
#[parse(dictionary_name)]
dictionary_name: PdfName,
}),
#[kw = b"h"]
CloseSubpath(PdfOperatorCloseSubpath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"i"]
SetFlatnessTolerance(PdfOperatorSetFlatnessTolerance {
pos: PdfInputPositionNoCompare,
#[parse(flatness)]
flatness: f32,
}),
#[kw = b"ID"]
BeginInlineImageData(PdfOperatorBeginInlineImageData {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"j"]
SetLineJoinStyle(PdfOperatorSetLineJoinStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_join_style)]
line_join_style: u8, // TODO parse
}),
#[kw = b"J"]
SetLineCapStyle(PdfOperatorSetLineCapStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_cap_style)]
line_cap_style: u8, // TODO parse
}),
#[kw = b"K"]
SetStrokeCmyk(PdfOperatorSetStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"k"]
SetNonStrokeCmyk(PdfOperatorSetNonStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"l"]
LineTo(PdfOperatorLineTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"m"]
MoveTo(PdfOperatorMoveTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"M"]
SetMiterLimit(PdfOperatorSetMiterLimit {
pos: PdfInputPositionNoCompare,
#[parse(limit)]
limit: f32,
}),
#[kw = b"MP"]
DesignateMarkedContentPoint(PdfOperatorDesignateMarkedContentPoint {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"n"]
EndPath(PdfOperatorEndPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"q"]
SaveGraphicsState(PdfOperatorSaveGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Q"]
RestoreGraphicsState(PdfOperatorRestoreGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"re"]
Rectangle(PdfOperatorRectangle {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
p: PdfVec2D,
#[parse(width, height)]
size: PdfVec2D,
}),
#[kw = b"RG"]
SetStrokeRgb(PdfOperatorSetStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"rg"]
SetNonStrokeRgb(PdfOperatorSetNonStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"ri"]
SetColorRenderingIntent(PdfOperatorSetColorRenderingIntent {
pos: PdfInputPositionNoCompare,
#[parse(intent)]
intent: PdfRenderingIntent,
}),
#[kw = b"s"]
CloseAndStrokePath(PdfOperatorCloseAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"S"]
StrokePath(PdfOperatorStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"SC"]
SetStrokeColor(PdfOperatorSetStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"sc"]
SetNonStrokeColor(PdfOperatorSetNonStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"SCN"]
SetStrokeColorWithName(PdfOperatorSetStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"scn"]
SetNonStrokeColorWithName(PdfOperatorSetNonStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"sh"]
Shade(PdfOperatorShade {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"T*"]
TextNextLine(PdfOperatorTextNextLine {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Tc"]
SetCharacterSpacing(PdfOperatorSetCharacterSpacing {
pos: PdfInputPositionNoCompare,
#[parse(char_space)]
char_space: f32,
}),
#[kw = b"Td"]
TextNextLineWithOffset(PdfOperatorTextNextLineWithOffset {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"TD"]
TextNextLineWithOffsetAndLeading(PdfOperatorTextNextLineWithOffsetAndLeading {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"Tf"]
SetFontAndSize(PdfOperatorSetFontAndSize {
pos: PdfInputPositionNoCompare,
#[parse(font)]
font: PdfName,
#[parse(size)]
size: f32,
}),
#[kw = b"Tj"]
ShowText(PdfOperatorShowText {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"TJ"]
ShowTextWithGlyphPositioning(PdfOperatorShowTextWithGlyphPositioning {
pos: PdfInputPositionNoCompare,
#[parse(text_and_positioning)]
text_and_positioning: Arc<[PdfStringOrNumber]>,
}),
#[kw = b"TL"]
SetTextLeading(PdfOperatorSetTextLeading {
pos: PdfInputPositionNoCompare,
#[parse(leading)]
leading: f32,
}),
#[kw = b"Tm"]
SetTextMatrix(PdfOperatorSetTextMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"Tr"]
SetTextRenderingMode(PdfOperatorSetTextRenderingMode {
pos: PdfInputPositionNoCompare,
#[parse(rendering_mode)]
rendering_mode: u8, // TODO: parse
}),
#[kw = b"Ts"]
SetTextRise(PdfOperatorSetTextRise {
pos: PdfInputPositionNoCompare,
#[parse(rise)]
rise: f32,
}),
#[kw = b"Tw"]
SetWordSpacing(PdfOperatorSetWordSpacing {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
}),
#[kw = b"Tz"]
SetTextHorizontalScaling(PdfOperatorSetTextHorizontalScaling {
pos: PdfInputPositionNoCompare,
#[parse(scale_percent)]
scale_percent: f32,
}),
#[kw = b"v"]
CurveTo23(PdfOperatorCurveTo23 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"w"]
SetLineWidth(PdfOperatorSetLineWidth {
pos: PdfInputPositionNoCompare,
#[parse(line_width)]
line_width: f32,
}),
#[kw = b"W"]
Clip(PdfOperatorClip {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"W*"]
ClipEvenOdd(PdfOperatorClipEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"y"]
CurveTo13(PdfOperatorCurveTo13 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"'"]
TextNextLineAndShow(PdfOperatorTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"\""]
SetSpacingThenTextNextLineAndShow(PdfOperatorSetSpacingThenTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
#[parse(char_space)]
char_space: f32,
#[parse(text)]
text: PdfString,
}),
}
}
impl GetPdfInputPosition for PdfOperator {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl GetPdfInputPosition for PdfOperatorAndOperands {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
#[derive(Clone)]
pub struct PdfContentStreamData {
pub operators: Arc<[PdfOperatorAndOperands]>,
}
impl fmt::Debug for PdfContentStreamData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PdfContentStreamData")
.field("operators", &self.operators)
.finish()
}
}
impl PdfStreamContents for PdfContentStreamData {
fn parse(
data: &[u8],
stream_pos: PdfInputPosition,
objects: Arc<PdfObjects>,
) -> Result<Self, PdfParseError> {
let mut parser = PdfParser {
objects,
tokenizer: PdfTokenizer::new(
data,
PdfInputPositionKnown {
pos: 0,
containing_streams_pos: stream_pos.get().map(|v| v.pos),
},
),
};
let mut operands = Vec::new();
let mut operators = Vec::new();
loop {
parser.skip_comments_and_whitespace();
if parser.tokenizer.peek().is_none() {
break;
}
match parser.parse_object_or_operator()? {
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
stream_kw_pos, ..
} => return Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }),
PdfObjectOrStreamDictionaryOrOperator::Object(object) => operands.push(object),
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
operators.push(PdfOperator::from(operator).parse(operands.drain(..))?);
}
}
}
if operands.is_empty() {
Ok(Self {
operators: operators.into(),
})
} else {
Err(PdfParseError::MissingOperator {
pos: parser.tokenizer.pos(),
})
}
}
}
pub type PdfContentStream = PdfStream<PdfDictionary, PdfContentStreamData>;

View file

@ -0,0 +1,743 @@
use crate::{
pdf::{
content_stream::PdfContentStream,
font::PdfFont,
object::{
IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject,
PdfObjectDirect, PdfObjectIndirect, PdfRectangle, PdfStream, PdfString,
},
parse::{PdfParse, PdfParseError},
pdf_parse,
render::{PdfRenderOperator, PdfRenderState},
},
util::DagDebugState,
};
use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
use std::{borrow::Cow, fmt, sync::Arc};
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfDocumentCatalogType {
#[pdf(name = "Catalog")]
#[default]
Catalog,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfDocumentCatalog {
#[pdf(name = "Type")]
pub ty: PdfDocumentCatalogType,
#[pdf(name = "Version")]
pub version: Option<PdfName>,
#[pdf(name = "Extensions")]
pub extensions: Option<PdfDictionary>,
#[pdf(name = "Pages")]
pub pages: PdfPageTree,
// TODO
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfDocumentCatalog {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
version,
extensions,
pages,
rest,
} = self;
f.debug_struct("PdfDocumentCatalog")
.field("ty", ty)
.field("version", version)
.field("extensions", extensions)
.field("pages", pages)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
pub struct PdfResourcesDictionary {
#[pdf(name = "Font")]
pub fonts: PdfDictionary<PdfFont>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
#[derive(Clone)]
pub struct PdfPageTree {
page_tree: PdfPageTreeNode,
pages: Arc<[PdfPage]>,
}
impl PdfPageTree {
fn collect_leaves(
node: &PdfPageTreeNode,
leaves: &mut Vec<PdfPageTreeLeaf>,
) -> Result<(), PdfParseError> {
for kid in node.kids.iter() {
match kid {
PdfPageTreeNodeOrLeaf::Node(node) => Self::collect_leaves(node, leaves)?,
PdfPageTreeNodeOrLeaf::Leaf(leaf) => {
leaves.push(leaf.clone());
}
PdfPageTreeNodeOrLeaf::Other(v) => {
return Err(PdfParseError::InvalidType {
pos: v.pos(),
ty: "dictionary",
expected_ty: "PdfPageTreeNodeOrLeaf",
});
}
}
}
Ok(())
}
pub fn try_from_page_tree_root(mut page_tree: PdfPageTreeNode) -> Result<Self, PdfParseError> {
page_tree.propagate_inheritable_data_to_leaves();
let mut leaves = Vec::new();
Self::collect_leaves(&page_tree, &mut leaves)?;
Ok(Self {
page_tree,
pages: Result::from_par_iter(
leaves
.into_par_iter()
.map(PdfPage::parse_after_propagating_inheritable_data)
.panic_fuse(),
)?,
})
}
pub fn page_tree(&self) -> &PdfPageTreeNode {
&self.page_tree
}
pub fn pages(&self) -> &Arc<[PdfPage]> {
&self.pages
}
}
impl fmt::Debug for PdfPageTree {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
page_tree: _,
pages,
} = self;
f.debug_struct("PdfPageTree")
.field("pages", pages)
.finish_non_exhaustive()
})
}
}
impl IsPdfNull for PdfPageTree {
fn is_pdf_null(&self) -> bool {
self.page_tree.is_pdf_null()
}
}
impl PdfParse for PdfPageTree {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfPageTree")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
Self::try_from_page_tree_root(PdfParse::parse(object)?)
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Default, Debug)]
pub struct PdfPageInheritableData {
#[pdf(name = "Resources")]
pub resources: Option<PdfResourcesDictionary>,
#[pdf(name = "MediaBox")]
pub media_box: Option<PdfRectangle>,
#[pdf(name = "CropBox")]
pub crop_box: Option<PdfRectangle>,
#[pdf(name = "Rotate")]
pub rotate: Option<PdfPageRotation>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfPageInheritableData {
pub fn propagate_to(&self, target: &mut Self) {
let Self {
resources,
media_box,
crop_box,
rotate,
rest: _,
} = self;
fn propagate_to<T: Clone>(this: &Option<T>, target: &mut Option<T>) {
if let (Some(this), target @ None) = (this, target) {
*target = Some(this.clone());
}
}
propagate_to(resources, &mut target.resources);
propagate_to(media_box, &mut target.media_box);
propagate_to(crop_box, &mut target.crop_box);
propagate_to(rotate, &mut target.rotate);
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageTreeNodeType {
#[pdf(name = "Pages")]
#[default]
Pages,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeNode {
#[pdf(name = "Type")]
pub ty: PdfPageTreeNodeType,
#[pdf(name = "Parent")]
pub parent: Option<PdfObjectIndirect>,
#[pdf(name = "Kids")]
pub kids: Arc<[PdfPageTreeNodeOrLeaf]>,
#[pdf(name = "Count")]
pub count: usize,
// TODO
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
kids,
count,
inheritable,
} = self;
f.debug_struct("PdfPageTreeNode")
.field("ty", ty)
.field("parent", parent)
.field("kids", kids)
.field("count", count)
.field("inheritable", inheritable)
.finish()
})
}
}
impl PdfPageTreeNode {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
for kid in Arc::make_mut(&mut self.kids) {
if let Some(target) = kid.inheritable_data_mut() {
self.inheritable.propagate_to(target);
}
kid.propagate_inheritable_data_to_leaves();
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageType {
#[pdf(name = "Page")]
#[default]
Page,
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum PdfPageAnnotationsTabOrder {
#[pdf(name = "R")]
RowOrder,
#[pdf(name = "C")]
ColumnOrder,
#[pdf(name = "S")]
StructureOrder,
#[pdf(other)]
Other(PdfName),
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeLeaf {
#[pdf(name = "Type")]
pub ty: PdfPageType,
#[pdf(name = "Parent")]
pub parent: PdfObjectIndirect,
#[pdf(name = "LastModified")]
pub last_modified: Option<PdfDate>,
#[pdf(name = "BleedBox")]
pub bleed_box: Option<PdfRectangle>,
#[pdf(name = "TrimBox")]
pub trim_box: Option<PdfRectangle>,
#[pdf(name = "ArtBox")]
pub art_box: Option<PdfRectangle>,
#[pdf(name = "BoxColorInfo")]
pub box_color_info: Option<PdfDictionary>,
#[pdf(name = "Contents")]
pub contents: MaybeArray<PdfContentStream>,
#[pdf(name = "Group")]
pub group: Option<PdfDictionary>,
#[pdf(name = "Thumb")]
pub thumbnail: Option<PdfStream>,
#[pdf(name = "B")]
pub beads: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "Dur")]
pub duration: Option<f32>,
#[pdf(name = "Trans")]
pub transition: Option<PdfDictionary>,
#[pdf(name = "Annots")]
pub annotations: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "AA")]
pub additional_actions: Option<PdfDictionary>,
#[pdf(name = "Metadata")]
pub metadata: Option<PdfStream>,
#[pdf(name = "PieceInfo")]
pub piece_info: Option<PdfDictionary>,
#[pdf(name = "StructParents")]
pub structural_parents: Option<PdfInteger>,
#[pdf(name = "ID")]
pub parent_web_capture_content_set_id: Option<PdfString>,
#[pdf(name = "PZ")]
pub preferred_zoom_factor: Option<f32>,
#[pdf(name = "SeparationInfo")]
pub separation_info: Option<PdfDictionary>,
#[pdf(name = "Tabs")]
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
#[pdf(name = "TemplateInstantiated")]
pub template_instantiated: Option<PdfName>,
#[pdf(name = "PresSteps")]
pub pres_steps: Option<PdfDictionary>,
#[pdf(name = "UserUnit")]
pub user_unit: Option<f32>,
#[pdf(name = "VP")]
pub viewports: Option<Arc<[PdfDictionary]>>,
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable,
} = self;
f.debug_struct("PdfPageTreeLeaf")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("inheritable", inheritable)
.finish()
})
}
}
pdf_parse! {
#[pdf(tag = "Type")]
#[derive(Clone)]
pub enum PdfPageTreeNodeOrLeaf {
#[pdf(tag_value = "Pages")]
Node(PdfPageTreeNode),
#[pdf(tag_value = "Page")]
Leaf(PdfPageTreeLeaf),
#[pdf(other)]
Other(PdfDictionary),
}
}
impl PdfPageTreeNodeOrLeaf {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => v.propagate_inheritable_data_to_leaves(),
PdfPageTreeNodeOrLeaf::Leaf(_) | PdfPageTreeNodeOrLeaf::Other(_) => {}
}
}
pub fn inheritable_data_mut(&mut self) -> Option<&mut PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
pub fn inheritable_data(&self) -> Option<&PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
}
impl fmt::Debug for PdfPageTreeNodeOrLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Node(v) => v.fmt(f),
Self::Leaf(v) => v.fmt(f),
Self::Other(v) => v.fmt(f),
}
}
}
/// the amount by which the page is rotated clockwise when displaying or printing, is always a multiple of 90 degrees.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub enum PdfPageRotation {
#[default]
NoRotation = 0,
ClockwiseBy90Degrees = 90,
By180Degrees = 180,
ClockwiseBy270Degrees = 270,
}
impl PdfPageRotation {
pub fn from_clockwise_angle_in_degrees(angle: i32) -> Option<Self> {
match angle.rem_euclid(360) {
0 => Some(Self::NoRotation),
90 => Some(Self::ClockwiseBy90Degrees),
180 => Some(Self::By180Degrees),
270 => Some(Self::ClockwiseBy270Degrees),
_ => None,
}
}
pub fn from_clockwise_angle_in_degrees_i128(angle: i128) -> Option<Self> {
Self::from_clockwise_angle_in_degrees((angle % 360) as i32)
}
}
impl From<PdfPageRotation> for i32 {
fn from(value: PdfPageRotation) -> Self {
value as i32
}
}
impl IsPdfNull for PdfPageRotation {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfPageRotation {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("page rotation")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = PdfObjectDirect::from(object);
let pos = object.pos();
let angle = PdfInteger::parse(object.into())?;
Self::from_clockwise_angle_in_degrees_i128(angle.value())
.ok_or(PdfParseError::IntegerOutOfRange { pos })
}
}
#[derive(Clone)]
pub struct PdfPage {
pub ty: PdfPageType,
pub parent: PdfObjectIndirect,
pub last_modified: Option<PdfDate>,
pub resources: PdfResourcesDictionary,
pub media_box: PdfRectangle,
pub crop_box: PdfRectangle,
pub bleed_box: PdfRectangle,
pub trim_box: PdfRectangle,
pub art_box: PdfRectangle,
pub box_color_info: Option<PdfDictionary>,
pub contents: Arc<[PdfContentStream]>,
pub rotate: PdfPageRotation,
pub group: Option<PdfDictionary>,
pub thumbnail: Option<PdfStream>,
pub beads: Option<Arc<[PdfDictionary]>>,
pub duration: Option<f32>,
pub transition: Option<PdfDictionary>,
pub annotations: Option<Arc<[PdfDictionary]>>,
pub additional_actions: Option<PdfDictionary>,
pub metadata: Option<PdfStream>,
pub piece_info: Option<PdfDictionary>,
pub structural_parents: Option<PdfInteger>,
pub parent_web_capture_content_set_id: Option<PdfString>,
pub preferred_zoom_factor: Option<f32>,
pub separation_info: Option<PdfDictionary>,
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
pub template_instantiated: Option<PdfName>,
pub pres_steps: Option<PdfDictionary>,
pub user_unit: f32,
pub viewports: Option<Arc<[PdfDictionary]>>,
pub rest: PdfDictionary,
rendered_objects: Option<PdfPageRenderedObjects>,
}
impl PdfPage {
pub fn rendered_objects(&self) -> &PdfPageRenderedObjects {
let Some(retval) = &self.rendered_objects else {
unreachable!();
};
retval
}
pub fn parse_after_propagating_inheritable_data(
leaf: PdfPageTreeLeaf,
) -> Result<Self, PdfParseError> {
let PdfPageTreeLeaf {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable:
PdfPageInheritableData {
resources,
media_box,
crop_box,
rotate,
rest,
},
} = leaf;
let pos = rest.pos();
let resources = resources.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page resources dictionary",
})?;
let media_box = media_box.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page MediaBox rectangle",
})?;
let crop_box = crop_box.unwrap_or(media_box);
let rotate = rotate.unwrap_or(PdfPageRotation::NoRotation);
let mut retval = Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box: bleed_box.unwrap_or(crop_box),
trim_box: trim_box.unwrap_or(crop_box),
art_box: art_box.unwrap_or(crop_box),
box_color_info,
contents: contents.0,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit: user_unit.unwrap_or(1.0),
viewports,
rest,
rendered_objects: None,
};
retval.rendered_objects = Some(PdfPageRenderedObjects::render_page(&retval)?);
Ok(retval)
}
}
impl fmt::Debug for PdfPage {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
rest,
rendered_objects,
} = self;
struct Unparsed;
impl fmt::Debug for Unparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("<unparsed>")
}
}
f.debug_struct("PdfPage")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("resources", resources)
.field("media_box", media_box)
.field("crop_box", crop_box)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("rotate", rotate)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("rest", rest)
.field(
"rendered_objects",
if let Some(rendered_objects) = rendered_objects {
rendered_objects
} else {
&Unparsed
},
)
.finish()
})
}
}
#[derive(Clone, Debug)]
pub struct PdfPageRenderedObjects {}
impl PdfPageRenderedObjects {
fn render_page(page: &PdfPage) -> Result<Self, PdfParseError> {
let mut state = PdfRenderState::new(page);
for content_stream in page.contents.iter() {
for op in content_stream.decoded_data().as_ref()?.operators.iter() {
op.render(&mut state)?;
}
}
Ok(Self {})
}
}

1057
src/pdf/font.rs Normal file

File diff suppressed because it is too large Load diff

1067
src/pdf/font/tables.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,325 @@
use std::{collections::BTreeMap, sync::Arc};
use crate::{
pdf::{
font::{
PdfFontToUnicode,
type_1_parse::{PsFile, Token},
},
object::{PdfName, PdfObjectDirect, PdfString},
parse::{PdfInputPosition, PdfParseError},
},
util::ArcOrRef,
};
pub(crate) struct ToUnicodeParser {
tokenizer: PsFile,
}
#[track_caller]
fn invalid_token_err<T>(pos: PdfInputPosition, token: Option<Token>) -> Result<T, PdfParseError> {
Err(PdfParseError::InvalidTokenInToUnicodeStream {
pos,
token: format!("{token:?}"),
})
}
impl ToUnicodeParser {
pub(crate) fn new(tokenizer: PsFile) -> Self {
Self { tokenizer }
}
fn expect_any_string(&mut self) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(string)) => Ok(string),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_string_with_len(
&mut self,
expected_len: usize,
) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(string)) if string.len() == expected_len => Ok(string),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_literal_name(
&mut self,
expected_name: &[u8],
) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::LiteralName(name)) if name == expected_name => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_any_literal_name(&mut self) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::LiteralName(name)) => Ok(name),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_executable_name(
&mut self,
expected_name: &[u8],
) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::ExecutableName(name)) if name == expected_name => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect(&mut self, expected_token: Token) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(token) if token == expected_token => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_integer(&mut self) -> Result<i128, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::Integer(value)) => Ok(value),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn parse_dict(
&mut self,
mut entry_callback: impl FnMut(Vec<u8>, PdfInputPosition, Token) -> Result<(), PdfParseError>,
) -> Result<(), PdfParseError> {
self.expect(Token::DictStart)?;
loop {
self.tokenizer.skip_comments_and_whitespace();
let name_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::DictEnd) => return Ok(()),
Some(Token::LiteralName(name)) => {
self.tokenizer.skip_comments_and_whitespace();
let value_pos = self.tokenizer.pos();
let Some(value) = self.tokenizer.next_token()? else {
return invalid_token_err(value_pos, None);
};
entry_callback(name, value_pos, value)?;
}
token => {
return invalid_token_err(name_pos, token);
}
}
}
}
pub(crate) fn parse(
mut self,
base_map: Option<PdfObjectDirect>,
) -> Result<PdfFontToUnicode, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
self.expect_literal_name(b"CIDInit")?;
self.expect_literal_name(b"ProcSet")?;
self.expect_executable_name(b"findresource")?;
self.expect_executable_name(b"begin")?;
self.expect_integer()?;
self.expect_executable_name(b"dict")?;
self.expect_executable_name(b"begin")?;
self.expect_executable_name(b"begincmap")?;
self.expect_literal_name(b"CIDSystemInfo")?;
let mut registry = None;
let mut ordering = None;
let mut supplement = None;
self.parse_dict(|name, value_pos, value| match &*name {
b"Registry" => {
let Token::String(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
registry = Some(v);
Ok(())
}
b"Ordering" => {
let Token::String(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
ordering = Some(v);
Ok(())
}
b"Supplement" => {
let Token::Integer(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
supplement = Some(v);
Ok(())
}
_ => todo!("{}: {value:?}", name.escape_ascii()),
})?;
self.expect_executable_name(b"def")?;
self.expect_literal_name(b"CMapName")?;
self.tokenizer.skip_comments_and_whitespace();
let char_map_name_pos = self.tokenizer.pos();
let char_map_name = self.expect_any_literal_name()?;
self.expect_executable_name(b"def")?;
self.expect_literal_name(b"CMapType")?;
self.expect(Token::Integer(2))?;
self.expect_executable_name(b"def")?;
self.expect(Token::Integer(1))?;
self.expect_executable_name(b"begincodespacerange")?;
self.tokenizer.skip_comments_and_whitespace();
let range_start_pos = self.tokenizer.pos();
let range_start = self.expect_any_string()?;
if range_start.is_empty() {
return invalid_token_err(range_start_pos, Some(Token::String(range_start)));
}
self.tokenizer.skip_comments_and_whitespace();
let range_end_pos = self.tokenizer.pos();
let range_end = self.expect_string_with_len(range_start.len())?;
self.expect_executable_name(b"endcodespacerange")?;
let mut to_unicode_map: BTreeMap<PdfString, Arc<str>> = BTreeMap::new();
let mut dest_str = String::new();
let mut insert_mapping = |src_pos: PdfInputPosition,
src: &[u8],
dest_pos: PdfInputPosition,
dest_utf16_be: &[u8]|
-> Result<(), PdfParseError> {
dest_str.clear();
for ch in char::decode_utf16(
dest_utf16_be
.chunks(2)
.map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]])),
) {
match ch {
Ok(ch) => dest_str.push(ch),
Err(_) => {
return Err(PdfParseError::InvalidUtf16 { pos: dest_pos });
}
}
}
to_unicode_map.insert(
PdfString::new(src_pos, ArcOrRef::Arc(src.into())),
dest_str.as_str().into(),
);
Ok(())
};
loop {
match self.tokenizer.next_token()? {
Some(Token::Integer(size)) => match self.tokenizer.next_token()? {
Some(Token::ExecutableName(name)) if name == b"beginbfrange" => {
for _ in 0..size {
self.tokenizer.skip_comments_and_whitespace();
let src_pos = self.tokenizer.pos();
let src_low = self.expect_string_with_len(range_start.len())?;
self.tokenizer.skip_comments_and_whitespace();
let src_high_pos = self.tokenizer.pos();
let src_high = self.expect_string_with_len(range_start.len())?;
if src_low.split_last().map(|(_, prefix)| prefix)
!= src_high.split_last().map(|(_, prefix)| prefix)
{
return invalid_token_err(
src_high_pos,
Some(Token::String(src_high)),
);
}
let src_last_range = *src_low.last().expect("known to be non-empty")
..=*src_high.last().expect("known to be non-empty");
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest))
if dest.len() >= 2 && dest.len() % 2 == 0 =>
{
let mut src = src_low;
for (index, src_last_byte) in src_last_range.enumerate() {
*src.last_mut().expect("known to be non-empty") =
src_last_byte;
let mut dest = dest.clone();
let [.., last] = &mut *dest else {
unreachable!();
};
*last += index as u8;
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
Some(Token::ArrayStart) => {
let mut src = src_low;
for src_last_byte in src_last_range {
*src.last_mut().expect("known to be non-empty") =
src_last_byte;
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest))
if dest.len() >= 2 && dest.len() % 2 == 0 =>
{
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect(Token::ArrayEnd)?;
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect_executable_name(b"endbfrange")?;
}
Some(Token::ExecutableName(name)) if name == b"beginbfchar" => {
for _ in 0..size {
self.tokenizer.skip_comments_and_whitespace();
let src_pos = self.tokenizer.pos();
let src = self.expect_string_with_len(range_start.len())?;
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest)) if dest.len() % 2 == 0 => {
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect_executable_name(b"endbfchar")?;
}
token => todo!("{token:?}"),
},
Some(Token::ExecutableName(name)) if name == b"endcmap" => {
break;
}
token => todo!("{token:?}"),
}
}
self.expect_executable_name(b"CMapName")?;
self.expect_executable_name(b"currentdict")?;
self.expect_literal_name(b"CMap")?;
self.expect_executable_name(b"defineresource")?;
self.expect_executable_name(b"pop")?;
self.expect_executable_name(b"end")?;
self.expect_executable_name(b"end")?;
self.tokenizer.skip_comments_and_whitespace();
let eof_pos = self.tokenizer.pos();
if let token @ Some(_) = self.tokenizer.next_token()? {
return invalid_token_err(eof_pos, token);
}
Ok(PdfFontToUnicode {
base_map,
char_map_name: PdfName::new(char_map_name_pos, Arc::<[u8]>::from(char_map_name)),
src_ranges: Arc::new([
PdfString::new(range_start_pos, ArcOrRef::Arc(range_start.into()))
..=PdfString::new(range_end_pos, ArcOrRef::Arc(range_end.into())),
]),
to_unicode_map: Arc::new(to_unicode_map),
})
}
}

1507
src/pdf/font/type_1_parse.rs Normal file

File diff suppressed because it is too large Load diff

2142
src/pdf/object.rs Normal file

File diff suppressed because it is too large Load diff

1341
src/pdf/parse.rs Normal file

File diff suppressed because it is too large Load diff

1107
src/pdf/render.rs Normal file

File diff suppressed because it is too large Load diff

66
src/pdf/stream_filters.rs Normal file
View file

@ -0,0 +1,66 @@
use crate::pdf::{
object::{PdfDictionary, PdfName},
parse::{PdfInputPosition, PdfParse, PdfParseError},
pdf_parse,
};
pub mod flate;
pdf_parse! {
#[pdf(name)]
#[derive(Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum PdfStreamFilter {
#[pdf(name = "ASCIIHexDecode")]
AsciiHexDecode,
#[pdf(name = "ASCII85Decode")]
Ascii85Decode,
#[pdf(name = "LZWDecode")]
LzwDecode,
#[pdf(name = "FlateDecode")]
FlateDecode,
#[pdf(name = "RunLengthDecode")]
RunLengthDecode,
#[pdf(name = "CCITTFaxDecode")]
CcittFaxDecode,
#[pdf(name = "JBIG2Decode")]
Jbig2Decode,
#[pdf(name = "DCTDecode")]
DctDecode,
#[pdf(name = "JPXDecode")]
JpxDecode,
#[pdf(name = "Crypt")]
Crypt,
#[pdf(other)]
Unknown(PdfName),
}
}
impl PdfStreamFilter {
pub fn decode_stream_data(
&self,
filter_parms: PdfDictionary,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
match self {
PdfStreamFilter::AsciiHexDecode => todo!(),
PdfStreamFilter::Ascii85Decode => todo!(),
PdfStreamFilter::LzwDecode => todo!(),
PdfStreamFilter::FlateDecode => {
flate::PdfFilterParmsFlateDecode::parse(filter_parms.into())?
.decode_stream_data(stream_pos, encoded_data)
}
PdfStreamFilter::RunLengthDecode => todo!(),
PdfStreamFilter::CcittFaxDecode => todo!(),
PdfStreamFilter::Jbig2Decode => todo!(),
PdfStreamFilter::DctDecode => todo!(),
PdfStreamFilter::JpxDecode => todo!(),
PdfStreamFilter::Crypt => todo!(),
PdfStreamFilter::Unknown(filter) => Err(PdfParseError::UnknownStreamFilter {
pos: stream_pos,
filter: filter.clone(),
}),
}
}
}

View file

@ -0,0 +1,74 @@
use crate::pdf::{
object::PdfDictionary,
parse::{PdfInputPosition, PdfParseError},
pdf_parse,
stream_filters::PdfStreamFilter,
};
use std::{io::Read, num::NonZero};
pdf_parse! {
#[pdf]
#[derive(Clone, Debug, Default)]
pub struct PdfFilterParmsFlateDecode {
#[pdf(name = "Predictor")]
pub predictor: Option<NonZero<u32>>,
#[pdf(name = "Colors")]
pub colors: Option<NonZero<u32>>,
#[pdf(name = "BitsPerComponent")]
pub bits_per_component: Option<NonZero<u32>>,
#[pdf(name = "Columns")]
pub columns: Option<NonZero<u32>>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfFilterParmsFlateDecode {
pub const FILTER: PdfStreamFilter = PdfStreamFilter::FlateDecode;
pub const DEFAULT_PREDICTOR: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_COLORS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_BITS_PER_COMPONENT: NonZero<u32> = const { NonZero::new(8).unwrap() };
pub const DEFAULT_COLUMNS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub fn predictor(&self) -> NonZero<u32> {
self.predictor.unwrap_or(Self::DEFAULT_PREDICTOR)
}
pub fn colors(&self) -> NonZero<u32> {
self.colors.unwrap_or(Self::DEFAULT_COLORS)
}
pub fn bits_per_component(&self) -> NonZero<u32> {
self.bits_per_component
.unwrap_or(Self::DEFAULT_BITS_PER_COMPONENT)
}
pub fn columns(&self) -> NonZero<u32> {
self.columns.unwrap_or(Self::DEFAULT_COLUMNS)
}
pub fn decode_stream_data(
&self,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
let Self {
predictor: _,
colors: _,
bits_per_component: _,
columns: _,
rest: _,
} = self;
let mut decoded_data = vec![];
flate2::bufread::ZlibDecoder::new(encoded_data)
.read_to_end(&mut decoded_data)
.map_err(|e| PdfParseError::StreamFilterError {
pos: stream_pos,
filter: Self::FILTER.into(),
error: e.to_string(),
})?;
let predictor = self.predictor();
let colors = self.colors();
let bits_per_component = self.bits_per_component();
let columns = self.columns();
match predictor {
Self::DEFAULT_PREDICTOR => Ok(decoded_data),
_ => todo!("{predictor}"),
}
}
}

File diff suppressed because it is too large Load diff

382
src/util.rs Normal file
View file

@ -0,0 +1,382 @@
use std::{
any::{Any, TypeId},
borrow::Borrow,
cell::Cell,
collections::HashMap,
fmt,
hash::{Hash, Hasher},
sync::Arc,
};
pub enum ArcOrRef<'a, T: ?Sized> {
Arc(Arc<T>),
Ref(&'a T),
}
impl<'a, T: ?Sized> AsRef<T> for ArcOrRef<'a, T> {
fn as_ref(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> Borrow<T> for ArcOrRef<'a, T> {
fn borrow(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> From<Arc<T>> for ArcOrRef<'a, T> {
fn from(value: Arc<T>) -> Self {
Self::Arc(value)
}
}
impl<'a, T: ?Sized> From<&'a T> for ArcOrRef<'a, T> {
fn from(value: &'a T) -> Self {
Self::Ref(value)
}
}
impl<'a, T: ?Sized> Default for ArcOrRef<'a, T>
where
&'a T: Default,
{
fn default() -> Self {
Self::Ref(Default::default())
}
}
impl<T: ?Sized> Clone for ArcOrRef<'_, T> {
fn clone(&self) -> Self {
match self {
Self::Arc(v) => Self::Arc(v.clone()),
Self::Ref(v) => Self::Ref(v),
}
}
}
impl<T: ?Sized + Hash> Hash for ArcOrRef<'_, T> {
fn hash<H: Hasher>(&self, state: &mut H) {
T::hash(self, state)
}
}
impl<'a, 'b, T: ?Sized + PartialEq<U>, U: ?Sized> PartialEq<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn eq(&self, other: &ArcOrRef<'b, U>) -> bool {
T::eq(self, other)
}
}
impl<T: ?Sized + Eq> Eq for ArcOrRef<'_, T> {}
impl<'a, 'b, T: ?Sized + PartialOrd<U>, U: ?Sized> PartialOrd<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn partial_cmp(&self, other: &ArcOrRef<'b, U>) -> Option<std::cmp::Ordering> {
T::partial_cmp(self, other)
}
}
impl<T: ?Sized + Ord> Ord for ArcOrRef<'_, T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
T::cmp(self, other)
}
}
impl<T: ?Sized> std::ops::Deref for ArcOrRef<'_, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
match self {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => v,
}
}
}
impl<T: ?Sized + fmt::Debug> fmt::Debug for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
impl<T: ?Sized + fmt::Display> fmt::Display for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
/// a stable alternative to `CloneToUninit` for `Arc`
pub trait ArcFromRef {
/// like `Arc::new(Self::clone(self))` but works for unsized types too
fn arc_from_ref(&self) -> Arc<Self>;
/// generic version of `Arc::make_mut`
fn make_mut(this: &mut Arc<Self>) -> &mut Self;
}
impl<T: Clone> ArcFromRef for T {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::new(Self::clone(self))
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<T: Clone> ArcFromRef for [T] {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl ArcFromRef for str {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<'a, T: ?Sized + ArcFromRef> ArcOrRef<'a, T> {
pub fn into_arc(this: Self) -> Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => T::arc_from_ref(v),
}
}
pub fn make_arc(this: &mut Self) -> &mut Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => {
*this = ArcOrRef::Arc(T::arc_from_ref(v));
let ArcOrRef::Arc(v) = this else {
unreachable!();
};
v
}
}
}
pub fn make_mut(this: &mut Self) -> &mut T {
T::make_mut(Self::make_arc(this))
}
}
trait DagDebugStateSealed {}
#[expect(private_bounds)]
pub trait SupportsDagDebugState: DagDebugStateSealed + 'static + Clone {
type Key: Clone + Hash + Eq + 'static;
fn key(this: &Self) -> Self::Key;
}
impl<T: 'static> DagDebugStateSealed for Arc<T> {}
impl<T: 'static> SupportsDagDebugState for Arc<T> {
type Key = *const T;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl<T: 'static> DagDebugStateSealed for Arc<[T]> {}
impl<T: 'static> SupportsDagDebugState for Arc<[T]> {
type Key = *const [T];
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl DagDebugStateSealed for Arc<str> {}
impl SupportsDagDebugState for Arc<str> {
type Key = *const str;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
trait DagDebugStatePartTrait: 'static {
fn reset(&mut self);
fn as_any_mut(&mut self) -> &mut dyn Any;
}
struct DagDebugStatePart<T: SupportsDagDebugState> {
table: HashMap<T::Key, (u64, T)>,
next_id: u64,
}
impl<T: SupportsDagDebugState> DagDebugStatePartTrait for DagDebugStatePart<T> {
fn reset(&mut self) {
let Self { table, next_id } = self;
table.clear();
*next_id = 0;
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
impl<T: SupportsDagDebugState> DagDebugStatePart<T> {
fn insert(&mut self, value: &T) -> DagDebugStateInsertResult {
use std::collections::hash_map::Entry;
match self.table.entry(T::key(value)) {
Entry::Occupied(entry) => DagDebugStateInsertResult::Old { id: entry.get().0 },
Entry::Vacant(entry) => {
let value = T::clone(value);
let id = self.next_id;
self.next_id += 1;
entry.insert((id, value));
DagDebugStateInsertResult::New { id }
}
}
}
}
impl<T: SupportsDagDebugState> Default for DagDebugStatePart<T> {
fn default() -> Self {
Self {
table: HashMap::default(),
next_id: 0,
}
}
}
pub struct DagDebugState {
parts: std::cell::RefCell<HashMap<TypeId, Box<dyn DagDebugStatePartTrait>>>,
ref_count: Cell<usize>,
}
#[derive(Clone, Copy, Debug)]
pub enum DagDebugStateInsertResult {
New { id: u64 },
Old { id: u64 },
}
impl DagDebugStateInsertResult {
pub fn id(self) -> u64 {
match self {
Self::New { id } | Self::Old { id } => id,
}
}
}
impl DagDebugState {
fn with_part<T: SupportsDagDebugState, R>(
&self,
f: impl FnOnce(&mut DagDebugStatePart<T>) -> R,
) -> R {
let mut parts = self.parts.borrow_mut();
let Some(part) = parts
.entry(TypeId::of::<DagDebugStatePart<T>>())
.or_insert_with(|| Box::new(DagDebugStatePart::<T>::default()))
.as_any_mut()
.downcast_mut::<DagDebugStatePart<T>>()
else {
unreachable!()
};
f(part)
}
pub fn insert<T: SupportsDagDebugState>(&self, value: &T) -> DagDebugStateInsertResult {
self.with_part(|part: &mut DagDebugStatePart<T>| part.insert(value))
}
pub fn debug_or_id<'a, T: SupportsDagDebugState + fmt::Debug, Abbreviated: fmt::Display>(
&self,
value: &'a T,
abbreviated: Abbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, Abbreviated> {
self.debug_or_id_with(value, fmt::Debug::fmt, move |f| abbreviated.fmt(f))
}
pub fn debug_or_id_with<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
>(
&self,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, DebugValue, DebugAbbreviated> {
struct DebugOrIdWith<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> {
insert_result: DagDebugStateInsertResult,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Debug for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Display for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "#{} ", self.insert_result.id())?;
match self.insert_result {
DagDebugStateInsertResult::New { id: _ } => (self.debug_value)(self.value, f),
DagDebugStateInsertResult::Old { id: _ } => (self.debug_abbreviated)(f),
}
}
}
DebugOrIdWith {
insert_result: self.insert(value),
value,
debug_value,
debug_abbreviated,
}
}
#[must_use]
fn inc_ref_count_scope(&self) -> impl Sized {
struct DecRefCountOnDrop<'a>(&'a DagDebugState);
impl Drop for DecRefCountOnDrop<'_> {
fn drop(&mut self) {
self.0.ref_count.set(self.0.ref_count.get() - 1);
if self.0.ref_count.get() == 0 {
self.0
.parts
.borrow_mut()
.values_mut()
.for_each(|v| v.reset());
}
}
}
self.ref_count.set(
self.ref_count
.get()
.checked_add(1)
.expect("too many nested calls"),
);
DecRefCountOnDrop(self)
}
pub fn scope<R>(f: impl FnOnce(&Self) -> R) -> R {
thread_local! {
static STATE: DagDebugState = DagDebugState { parts: Default::default(), ref_count: Cell::new(0) };
}
STATE.with(|state| {
let _scope = state.inc_ref_count_scope();
f(state)
})
}
}

View file

@ -1,232 +0,0 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use quick_xml::{
Writer,
events::{BytesDecl, BytesStart, BytesText, Event},
};
use std::fmt;
struct FmtToIoAdaptor<W> {
writer: W,
partial_char: [u8; 4],
partial_char_len: u8,
}
impl<W: fmt::Write> FmtToIoAdaptor<W> {
fn new(writer: W) -> Self {
Self {
writer,
partial_char: [0; 4],
partial_char_len: 0,
}
}
fn finish(self) -> Result<W, fmt::Error> {
let Self {
writer,
partial_char: _,
partial_char_len,
} = self;
if partial_char_len != 0 {
Err(fmt::Error)
} else {
Ok(writer)
}
}
fn write_byte(&mut self, b: u8) -> std::io::Result<()> {
let Self {
writer,
partial_char,
partial_char_len,
} = self;
partial_char[usize::from(*partial_char_len)] = b;
*partial_char_len += 1;
match str::from_utf8(&partial_char[..usize::from(*partial_char_len)]) {
Ok(s) => {
*partial_char_len = 0;
writer.write_str(s).map_err(std::io::Error::other)
}
Err(e) => {
if e.error_len().is_some() {
*partial_char_len = 0;
Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e))
} else {
Ok(())
}
}
}
}
}
impl<W: fmt::Write> std::io::Write for FmtToIoAdaptor<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
for &b in buf {
self.write_byte(b)?;
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub(crate) enum ElementTag {
Comment,
Normal(String),
}
impl ElementTag {
pub(crate) fn normal(&self) -> Option<&str> {
match self {
ElementTag::Comment => None,
ElementTag::Normal(v) => Some(v),
}
}
}
impl fmt::Debug for ElementTag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Comment => f.write_str("<Comment>"),
Self::Normal(v) => v.fmt(f),
}
}
}
/// like python's xml.etree.ElementTree.Element
#[derive(Clone, Debug)]
pub(crate) struct Element {
pub(crate) tag: ElementTag,
pub(crate) attrib: Vec<(String, String)>,
/// text contained in this element but before any children
pub(crate) text: String,
pub(crate) children: Vec<Element>,
/// text after the end of this element
pub(crate) tail: String,
}
/// equivalent to python `xml.etree.ElementTree.tostring(self, encoding="unicode")`
impl fmt::Display for Element {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut writer = Writer::new(FmtToIoAdaptor::new(f));
self.write_to(&mut writer).map_err(|_| fmt::Error)?;
writer.into_inner().finish()?;
Ok(())
}
}
impl Element {
pub(crate) fn new(tag: String, attrib: impl IntoIterator<Item = (String, String)>) -> Self {
Self {
tag: ElementTag::Normal(tag),
attrib: Vec::from_iter(attrib),
text: String::new(),
children: Vec::new(),
tail: String::new(),
}
}
/// equivalent of python's `xml.etree.ElementTree.Comment()`
pub(crate) fn comment(text: String) -> Self {
Self {
tag: ElementTag::Comment,
attrib: Vec::new(),
text,
children: Vec::new(),
tail: String::new(),
}
}
/// equivalent to python `"".join(self.itertext())`
pub(crate) fn inner_text(&self) -> String {
let mut retval = String::new();
fn helper(element: &Element, retval: &mut String) {
let Element {
tag,
attrib: _,
text,
children,
tail: _,
} = element;
let ElementTag::Normal(_) = tag else {
return;
};
retval.push_str(text);
for child in children {
helper(child, retval);
retval.push_str(&child.tail);
}
}
helper(self, &mut retval);
retval
}
/// equivalent of python's `xml.etree.ElementTree.SubElement()`
pub(crate) fn sub_element(
&mut self,
tag: String,
attrib: impl IntoIterator<Item = (String, String)>,
) -> &mut Self {
self.children.push(Self::new(tag, attrib));
self.children.last_mut().expect("just pushed")
}
pub(crate) fn write_to(&self, writer: &mut Writer<impl std::io::Write>) -> std::io::Result<()> {
let Element {
tag,
attrib,
text,
children,
tail,
} = self;
match tag {
ElementTag::Comment => {
writer.write_event(Event::Comment(BytesText::new(text)))?;
}
ElementTag::Normal(tag) if tag.is_empty() => {
writer.write_event(Event::Text(BytesText::new(text)))?;
}
ElementTag::Normal(tag)
if attrib.is_empty() && text.is_empty() && children.is_empty() =>
{
// write element like `<br />` to match python instead of like `<br/>`
writer.write_event(Event::Empty(BytesStart::from_content(
tag.clone() + " ",
tag.len(),
)))?;
}
ElementTag::Normal(tag) => {
let mut element_writer = writer.create_element(tag);
for (name, value) in attrib {
element_writer = element_writer.with_attribute((name.as_str(), value.as_str()));
}
if text.is_empty() && children.is_empty() {
element_writer.write_empty()?;
} else {
element_writer.write_inner_content(|writer| {
writer.write_event(Event::Text(BytesText::new(text)))?;
for child in children {
child.write_to(writer)?;
}
Ok(())
})?;
}
}
}
writer.write_event(Event::Text(BytesText::new(tail)))?;
Ok(())
}
/// equivalent of python's `xml.etree.ElementTree(self).write(writer, encoding='utf-8', xml_declaration=xml_declaration)`
pub(crate) fn write(
&self,
writer: impl std::io::Write,
xml_declaration: bool,
) -> std::io::Result<()> {
let mut writer = Writer::new(writer);
if xml_declaration {
// use specific string to match python
writer.write_event(Event::Decl(BytesDecl::from_start(
BytesStart::from_content("xml version='1.0' encoding='utf-8'", 3),
)))?;
writer.write_event(Event::Text(BytesText::new("\n")))?;
}
self.write_to(&mut writer)
}
}