Compare commits

..

33 commits

Author SHA1 Message Date
38a1fb328b
add build dependencies to readme
All checks were successful
/ test (push) Successful in 3m54s
2026-01-06 16:13:36 -08:00
4177a58c8d
add rust code to readme
All checks were successful
/ test (push) Successful in 3m50s
2026-01-06 16:05:02 -08:00
bc550be122
clean up dead code
All checks were successful
/ test (push) Successful in 3m47s
2026-01-06 15:29:14 -08:00
b68cb274da
change to a library 2026-01-06 15:24:16 -08:00
76438b727c
rename src/main.rs -> src/lib.rs 2026-01-06 15:21:46 -08:00
2636ab2518
Merge remote-tracking branch 'origin/wip-experimental-rust-port2'
All checks were successful
/ test (push) Successful in 5m0s
2026-01-06 14:46:38 -08:00
8fd55cdda8
attempt to fix CI
All checks were successful
/ test (push) Successful in 4m27s
2026-01-06 14:29:06 -08:00
56ee3c5c43
add rust to CI 2026-01-06 14:27:49 -08:00
63698fd90f
fix python code not getting installed properly 2026-01-06 14:19:54 -08:00
9bf15dc9d0
it works! produces the exact same xml as the python version
Some checks failed
/ test (push) Failing after 24s
2026-01-06 13:54:53 -08:00
73c45323c8
seems to work 2026-01-06 13:36:04 -08:00
040afcc435
extracts pdf items using mupdf-sys directly
Some checks failed
/ test (push) Failing after 24s
2026-01-06 08:00:38 -08:00
a677cd8a33
wip 2026-01-05 18:33:44 -08:00
103f986bc0
wip 2026-01-05 17:53:57 -08:00
f9a24f4c48
switching to using mupdf-sys directly since mupdf hides all the necessary functionality 2026-01-05 14:17:49 -08:00
fcf1c63cb7
wip 2026-01-05 11:27:52 -08:00
c58bc23904
wip 2026-01-05 09:41:56 -08:00
3d66c853f6
wip 2026-01-05 09:30:06 -08:00
e9830566c0
fix typo in parse_powerisa_pdf.py 2026-01-05 06:57:29 -08:00
442afe5f06
wip 2026-01-04 20:01:13 -08:00
45e8925d34
use Display for errors rather than Debug 2026-01-04 18:33:41 -08:00
b1d83b1d84
switch font names to not include tag 2026-01-04 18:33:07 -08:00
c8cd234d8f
wip porting to rust
Some checks failed
/ test (push) Failing after 27s
2026-01-02 16:09:43 -08:00
718de40b09
wip 2026-01-02 07:34:40 -08:00
9e090a66a3
port more code 2026-01-02 04:50:51 -08:00
7ecdbc0239
porting more 2026-01-02 03:53:16 -08:00
e1277bbb90
add insn bit fields 2026-01-02 03:30:35 -08:00
3fc0e92f95
add ElementBodyBuilder 2026-01-02 03:09:49 -08:00
104ee37933
port more to rust 2026-01-02 02:47:21 -08:00
8643d47338
port Font to Rust 2026-01-01 23:14:08 -08:00
944ae4bf41
port QuadTree to rust 2026-01-01 08:41:26 -08:00
da339ce00a
test in forgejo ci
All checks were successful
/ test (push) Successful in 2m47s
2025-12-31 21:42:22 -08:00
2381421776
add copyright headers and check script 2025-12-31 20:43:18 -08:00
28 changed files with 6489 additions and 12027 deletions

View file

@ -0,0 +1,46 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
on: [push, pull_request]
env:
PDF_HASH: 56372d23ece7e9e2c1b381a639443982a3e16e38109df1c141d655b779b61fdb
OUTPUT_XML_HASH: c0b4592cbd0a3e59b9b2931a6a75a3d87ebf23bf453e8587a1522dd157f15ee9
jobs:
test:
runs-on: debian-12
container:
image: git.libre-chip.org/libre-chip/fayalite-deps:latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- run: |
scripts/check-copyright.sh
- uses: https://git.libre-chip.org/mirrors/rust-cache@v2
with:
save-if: ${{ github.ref == 'refs/heads/master' }}
- run: |
apt-get update -qq
apt-get install -qq python3-venv wget
# copy of https://files.openpower.foundation/s/9izgC5Rogi5Ywmm/download/OPF_PowerISA_v3.1C.pdf
wget -O OPF_PowerISA_v3.1C.pdf https://libre-chip.org/OPF_PowerISA_v3.1C.pdf
echo "$PDF_HASH OPF_PowerISA_v3.1C.pdf" | sha256sum -c
- run: |
cargo test
- run: |
cargo build --release
- run: |
cargo run --release -- OPF_PowerISA_v3.1C.pdf &> >(tee out.log | grep '^page ') || { tail -n1000 out.log; false; }
echo "expected output (not all instructions are decoded yet, change when the output is improved):"
echo "$OUTPUT_XML_HASH powerisa-instructions.xml" | sha256sum -c
mv powerisa-instructions.xml powerisa-instructions-rust.xml
- run: |
python3 -m venv --upgrade-deps .venv
. .venv/bin/activate
pip install -e .
parse_powerisa_pdf OPF_PowerISA_v3.1C.pdf &> >(tee out.log | grep '^page ') || { tail -n1000 out.log; false; }
echo "expected output (not all instructions are decoded yet, change when the output is improved):"
echo "$OUTPUT_XML_HASH powerisa-instructions.xml" | sha256sum -c
mv powerisa-instructions.xml powerisa-instructions-python.xml

3
.gitignore vendored
View file

@ -1,7 +1,10 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
/.venv
/.vscode
*.egg-info
__pycache__
*.log
/powerisa-instructions.xml
/*.pdf
/target

308
Cargo.lock generated
View file

@ -3,10 +3,56 @@
version = 4
[[package]]
name = "adler2"
version = "2.0.1"
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "bindgen"
version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"itertools",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
]
[[package]]
name = "bitflags"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
[[package]]
name = "cc"
version = "1.2.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
@ -15,39 +61,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "crc32fast"
version = "1.5.0"
name = "clang-sys"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
dependencies = [
"cfg-if",
"glob",
"libc",
"libloading",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]]
name = "either"
version = "1.15.0"
@ -55,55 +78,228 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "flate2"
version = "1.1.5"
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "find-msvc-tools"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]]
name = "indexmap"
version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
dependencies = [
"crc32fast",
"miniz_oxide",
"equivalent",
"hashbrown",
]
[[package]]
name = "miniz_oxide"
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.178"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
[[package]]
name = "libloading"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
dependencies = [
"adler2",
"simd-adler32",
"cfg-if",
"windows-link",
]
[[package]]
name = "libm"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "mupdf-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e9a0d4e844ab50315d43312f3d62f72c77205b07c8ee21cbd4b52bdc2a9910"
dependencies = [
"bindgen",
"cc",
"pkg-config",
"regex",
"zerocopy",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "parse_powerisa_pdf"
version = "0.1.0"
dependencies = [
"flate2",
"rayon",
"indexmap",
"libm",
"mupdf-sys",
"quick-xml",
]
[[package]]
name = "rayon"
version = "1.11.0"
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "proc-macro2"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0"
dependencies = [
"either",
"rayon-core",
"unicode-ident",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
name = "quick-xml"
version = "0.38.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
"memchr",
]
[[package]]
name = "simd-adler32"
version = "0.3.8"
name = "quote"
version = "1.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "syn"
version = "2.0.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "zerocopy"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

View file

@ -1,10 +1,17 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
[package]
name = "parse_powerisa_pdf"
version = "0.1.0"
edition = "2024"
license = "LGPL-3.0-or-later"
edition = "2024"
repository = ""
keywords = []
categories = []
rust-version = "1.89.0"
[dependencies]
flate2 = "1.1.5"
rayon = "1.11.0"
indexmap = "2.12.1"
libm = "0.2.15"
mupdf-sys = { version = "0.5.0", default-features = false }
quick-xml = "0.38.4"

View file

@ -1,5 +1,38 @@
<!--
SPDX-License-Identifier: LGPL-3.0-or-later
See Notices.txt for copyright information
-->
parser for the OPF PowerISA 3.1C pdf to attempt to extract all instructions' pseudo-code including subscripts/superscripts and other formatting
# Using the new Rust code:
Usage:
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/>
* Install Rust -- you need version 1.89.0 or later.
Getting it from https://rustup.rs/ is recommended.
* Install required build dependencies:
On Debian 12:
```bash
sudo apt update
sudo apt install build-essential clang unzip
```
* Compile and run:
```bash
cargo run -- path/to/downloaded/OPF_PowerISA_v3.1C.pdf > out.log
```
* This will spit out lots of errors and then successfully create
the output file -- `powerisa-instructions.xml` in the current directory.
# Using the old Python code:
Usage:
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/>
* Obtain CPython 3.11 (the default `python3` in [Debian Bookworm](https://www.debian.org/releases/bookworm/))

View file

@ -1,3 +1,5 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
from __future__ import annotations
from collections import defaultdict
from collections.abc import Generator, Iterable, Iterator, Callable
@ -763,7 +765,7 @@ class Page:
unprocessed_non_text: SetById[LTLine | LTRect]
@staticmethod
def from_lt_page(page_num: int, page: LTPage) -> Page:
def from_lt_page(page_num: int, page: LTPage, first_seen_fonts: defaultdict[str, set[float]]) -> Page:
qt: defaultdict[TextSection, QuadTree[Char | LTLine | LTRect]] = defaultdict(QuadTree)
unprocessed_chars = defaultdict(lambda: defaultdict(SetById[Char]))
unprocessed_non_text: SetById[LTLine | LTRect] = SetById()
@ -802,20 +804,25 @@ class Page:
raise AssertionError(
f"char not in text section: {element}\npage_num={page_num}")
continue
font_size = round(element.size, 3)
char = Char(
text=element.get_text(),
font=Font(font_name=element.fontname, size=round(element.size, 3)),
font=Font(font_name=element.fontname, size=font_size),
adv=element.adv,
min_x=element.x0,
min_y=element.y0,
max_x=element.x1,
max_y=element.y1,
)
if font_size not in first_seen_fonts[element.fontname]:
first_seen_fonts[element.fontname].add(font_size)
print(f"first seen font: {element.fontname!r} {font_size}: page {page_num} {char!r}")
qt[text_section].insert(char.min_x, char.min_y, char)
unprocessed_chars[text_section][char.font].add(char)
for i in unprocessed_chars.values():
for j in i.values():
j.sort(key=Char.top_down_left_to_right_sort_key)
for text_section, i in unprocessed_chars.items():
for chars in i.values():
chars.sort(key=Char.top_down_left_to_right_sort_key)
print(f"first char: {text_section!r}: {next(iter(chars), None)!r}")
unknown_fonts=[]
unknown_font_errors=[]
for i in unprocessed_chars.values():
@ -1179,13 +1186,14 @@ class Parser:
def __pages_gen(file: Path, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]:
if page_numbers is not None:
page_numbers = sorted(i - 1 for i in page_numbers)
first_seen_fonts = defaultdict(set)
for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)):
if page_numbers is not None:
page_num = page_numbers[i] + 1
else:
page_num = i + 1
print(f"page {page_num}")
yield Page.from_lt_page(page_num=page_num, page=page)
yield Page.from_lt_page(page_num=page_num, page=page, first_seen_fonts=first_seen_fonts)
def parse_pdf(self, file: Path, page_numbers: Iterable[int] | None = None):
self.pages = Pages(pages_gen=Parser.__pages_gen(
@ -1501,7 +1509,7 @@ class Parser:
f"instruction bit fields box has wrong number of horizontal lines:\n{h_lines}")
if len(v_lines) < 2:
raise InsnParseError(
f"instruction bit fields box has too few vertical lines:\n{h_lines}")
f"instruction bit fields box has too few vertical lines:\n{v_lines}")
bottom_line, top_line = h_lines
box_min_x = v_lines[0].x0
box_max_x = v_lines[-1].x0

View file

@ -1,3 +1,5 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
from __future__ import annotations
from typing import Callable, Generic, Iterable, Iterator, TypeVar
from math import frexp, isfinite, isnan, ldexp

View file

@ -1,3 +1,5 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
from collections import abc
from typing import Callable, Generic, Iterable, Iterator, Protocol, TypeAlias, TypeVar, overload

View file

@ -1,3 +1,5 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
[build-system]
requires = ["setuptools >= 61.0"]
build-backend = "setuptools.build_meta"
@ -11,4 +13,7 @@ dependencies = [
requires-python = ">= 3.11"
[project.scripts]
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"
[tool.setuptools]
packages = ["parse_powerisa_pdf"]

70
scripts/check-copyright.sh Executable file
View file

@ -0,0 +1,70 @@
#!/bin/bash
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
set -e
function fail()
{
local error="$1"
echo "error: $error" >&2
exit 1
}
function fail_file()
{
local file="$1" line="$2" error="$3"
fail "$file:$((line + 1)): $error"
}
function check_file()
{
local file="$1" regexes=("${@:2}")
local lines
mapfile -t lines < "$file"
if (("${#lines[@]}" == 0)); then
return # empty file, no copyright needed
fi
local line
for line in "${!regexes[@]}"; do
eval '[[ "${lines[i]}" =~ '"${regexes[i]}"' ]]' ||
fail_file "$file" "$line" "doesn't match regex: ${regexes[i]}"
done
}
POUND_HEADER=('^"# SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"# See Notices.txt for copyright information"$')
SLASH_HEADER=('^"// SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"// See Notices.txt for copyright information"$')
MD_HEADER=('^"<!--"$' '^"SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"See Notices.txt for copyright information"$')
function main()
{
local IFS=$'\n'
[[ -z "$(git status --porcelain)" ]] || fail "git repo is dirty"
local file
for file in $(git ls-tree --name-only --full-tree -r HEAD); do
case "/$file" in
/Cargo.lock)
# generated file
;;
*/LICENSE.md|*/Notices.txt)
# copyright file
;;
/.forgejo/workflows/*.yml|*/.gitignore|*.toml|*.py)
check_file "$file" "${POUND_HEADER[@]}"
;;
*.md)
check_file "$file" "${MD_HEADER[@]}"
;;
*.sh)
check_file "$file" '^'\''#!'\' "${POUND_HEADER[@]}"
;;
*.rs)
check_file "$file" "${SLASH_HEADER[@]}"
;;
*)
fail_file "$file" 0 "unimplemented file kind -- you need to add it to $0"
;;
esac
done
}
main

3831
src/lib.rs

File diff suppressed because it is too large Load diff

View file

@ -1,38 +1,6 @@
use parse_powerisa_pdf::pdf::Pdf;
use std::{
error::Error,
io::{IsTerminal, Read},
process::ExitCode,
};
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
fn main() -> Result<ExitCode, Box<dyn Error>> {
let args: Vec<_> = std::env::args_os().collect();
if args
.iter()
.skip(1)
.any(|v| v.as_encoded_bytes().starts_with(b"-") && v != "-")
|| args.len() > 2
|| (args.len() == 1 && std::io::stdin().is_terminal())
{
eprintln!(
"Usage: {} [<path/to/file.pdf>]\n\
Reads the PDF file passed on the command line,\n\
Reads stdin if no arguments are passed or if the file name is just a dash `-`.\n\
If stdin is a terminal, you have to pass `-` explicitly to read from it.",
args[0].display()
);
return Ok(ExitCode::FAILURE);
}
let file_path = args.get(1).filter(|v| *v != "-");
let input = if let Some(file_path) = file_path {
std::fs::read(file_path)?
} else {
let mut buf = Vec::new();
std::io::stdin().lock().read_to_end(&mut buf)?;
buf
};
let pdf = Pdf::parse(input)?;
println!("{:#?}", pdf.trailer.trailer_dictionary());
todo!();
Ok(ExitCode::SUCCESS)
fn main() -> std::process::ExitCode {
parse_powerisa_pdf::main()
}

871
src/mupdf_ffi.rs Normal file
View file

@ -0,0 +1,871 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use mupdf_sys::{
fz_buffer, fz_buffer_storage, fz_clone_context, fz_color_params, fz_colorspace, fz_concat,
fz_context, fz_device, fz_document, fz_drop_buffer, fz_drop_context, fz_drop_device,
fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text, fz_error_type_FZ_ERROR_GENERIC,
fz_font, fz_font_ascender, fz_font_descender, fz_font_is_bold, fz_font_is_italic, fz_font_name,
fz_matrix, fz_matrix_expansion, fz_page, fz_path, fz_path_walker, fz_point, fz_rect,
fz_stroke_state, fz_text, fz_text_item, fz_text_span, fz_transform_point,
fz_transform_point_xy, fz_transform_vector, fz_walk_path, mupdf_document_page_count,
mupdf_drop_error, mupdf_error_t, mupdf_load_page, mupdf_new_base_context,
mupdf_new_derived_device, mupdf_open_document, mupdf_page_to_xml, mupdf_pdf_page_transform,
mupdf_run_page, pdf_page, pdf_page_from_fz_page,
};
use std::{
cell::{Cell, UnsafeCell},
ffi::{CStr, c_int, c_void},
fmt,
marker::PhantomData,
mem::ManuallyDrop,
ptr::{self, NonNull},
sync::{Mutex, OnceLock},
};
#[derive(Debug)]
pub(crate) struct MuPdfError {
type_: c_int,
message: String,
}
impl MuPdfError {
fn new_generic(message: impl ToString) -> Self {
Self {
type_: fz_error_type_FZ_ERROR_GENERIC as _,
message: message.to_string(),
}
}
}
impl fmt::Display for MuPdfError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"MuPDF error: type: {}, message: {}",
self.type_, self.message
)
}
}
impl std::error::Error for MuPdfError {}
struct OwnedMuPdfError(NonNull<mupdf_error_t>);
impl Drop for OwnedMuPdfError {
fn drop(&mut self) {
unsafe {
mupdf_drop_error(self.0.as_ptr());
}
}
}
unsafe fn mupdf_try<R>(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result<R, MuPdfError> {
let mut err = ptr::null_mut();
let retval = f(&mut err);
let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else {
return Ok(retval);
};
unsafe {
Err(MuPdfError {
type_: (*err.0.as_ptr()).type_,
message: CStr::from_ptr((*err.0.as_ptr()).message)
.to_string_lossy()
.into_owned(),
})
}
}
pub(crate) struct Context(NonNull<fz_context>);
impl Context {
fn new() -> Self {
struct BaseContext(NonNull<fz_context>);
unsafe impl Send for BaseContext {}
static CTX: OnceLock<Mutex<BaseContext>> = OnceLock::new();
let base = CTX
.get_or_init(|| {
let ctx = unsafe { mupdf_new_base_context() };
let Some(ctx) = NonNull::new(ctx).map(BaseContext) else {
panic!("failed to allocate a MuPDF context");
};
Mutex::new(ctx)
})
.lock()
.expect("not poisoned");
let ctx = unsafe { fz_clone_context(base.0.as_ptr()) };
let Some(ctx) = NonNull::new(ctx).map(Self) else {
drop(base);
panic!("failed to clone a MuPDF context");
};
ctx
}
pub(crate) fn with<R>(f: impl FnOnce(&Self) -> R) -> R {
thread_local! {
static CTX: Context = Context::new();
}
CTX.with(f)
}
pub(crate) fn as_ref(&self) -> ContextRef<'_> {
unsafe { ContextRef::from_ptr(self.0.as_ptr()) }
}
}
impl Drop for Context {
fn drop(&mut self) {
unsafe {
fz_drop_context(self.0.as_ptr());
}
}
}
#[derive(Clone, Copy)]
pub(crate) struct ContextRef<'ctx>(&'ctx UnsafeCell<fz_context>);
impl<'ctx> ContextRef<'ctx> {
unsafe fn from_ptr(ptr: *mut fz_context) -> Self {
Self(unsafe { &*ptr.cast() })
}
}
impl<'ctx> From<&'ctx Context> for ContextRef<'ctx> {
fn from(value: &'ctx Context) -> Self {
value.as_ref()
}
}
pub(crate) struct Document<'ctx> {
ptr: *mut fz_document,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Document<'ctx> {
pub(crate) fn open(
ctx: impl Into<ContextRef<'ctx>>,
file_name: &CStr,
) -> Result<Document<'ctx>, MuPdfError> {
let ctx = ctx.into();
unsafe {
mupdf_try(|errptr| mupdf_open_document(ctx.0.get(), file_name.as_ptr(), errptr))
.map(|ptr| Document { ptr, ctx })
}
}
pub(crate) fn page_count(&self) -> Result<usize, MuPdfError> {
unsafe {
mupdf_try(|errptr| mupdf_document_page_count(self.ctx.0.get(), self.ptr, errptr))?
.try_into()
.map_err(MuPdfError::new_generic)
}
}
pub(crate) fn load_page(&self, page: usize) -> Result<Page<'ctx>, MuPdfError> {
let page = page.try_into().map_err(MuPdfError::new_generic)?;
unsafe {
mupdf_try(|errptr| mupdf_load_page(self.ctx.0.get(), self.ptr, page, errptr))
.map(|ptr| Page { ptr, ctx: self.ctx })
}
}
}
impl<'ctx> Drop for Document<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_document(self.ctx.0.get(), self.ptr);
}
}
}
struct Buffer<'ctx> {
ptr: *mut fz_buffer,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Buffer<'ctx> {
fn storage(&mut self) -> &mut [u8] {
unsafe {
let mut ptr = ptr::null_mut();
let len = fz_buffer_storage(self.ctx.0.get(), self.ptr, &raw mut ptr);
if len == 0 {
&mut []
} else {
std::slice::from_raw_parts_mut(ptr, len)
}
}
}
}
impl<'ctx> Drop for Buffer<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_buffer(self.ctx.0.get(), self.ptr);
}
}
}
pub(crate) struct Page<'ctx> {
ptr: *mut fz_page,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Page<'ctx> {
pub(crate) fn ctx(&self) -> ContextRef<'ctx> {
self.ctx
}
pub(crate) fn run<T>(
&self,
device: &Device<'ctx, T>,
ctm: fz_matrix,
) -> Result<(), MuPdfError> {
unsafe {
mupdf_try(|errptr| {
mupdf_run_page(
self.ctx.0.get(),
self.ptr,
device.dev,
ctm,
ptr::null_mut(),
errptr,
)
})
}
}
pub(crate) fn to_xml(&self) -> Result<String, MuPdfError> {
unsafe {
let mut buffer =
mupdf_try(|errptr| mupdf_page_to_xml(self.ctx.0.get(), self.ptr, errptr))
.map(|ptr| Buffer { ptr, ctx: self.ctx })?;
Ok(str::from_utf8(buffer.storage())
.map_err(MuPdfError::new_generic)?
.into())
}
}
pub(crate) fn pdf_page<'a>(&'a self) -> Option<PdfPageRef<'a, 'ctx>> {
unsafe {
let ptr = pdf_page_from_fz_page(self.ctx.0.get(), self.ptr);
NonNull::new(ptr).map(|ptr| PdfPageRef {
ptr: &*ptr.as_ptr().cast(),
ctx: self.ctx,
})
}
}
}
impl<'ctx> Drop for Page<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_page(self.ctx.0.get(), self.ptr);
}
}
}
#[derive(Clone, Copy)]
pub(crate) struct PdfPageRef<'a, 'ctx> {
ptr: &'a UnsafeCell<pdf_page>,
ctx: ContextRef<'ctx>,
}
impl<'a, 'ctx> PdfPageRef<'a, 'ctx> {
pub(crate) fn transform(self) -> Result<fz_matrix, MuPdfError> {
unsafe {
mupdf_try(|errptr| mupdf_pdf_page_transform(self.ctx.0.get(), self.ptr.get(), errptr))
}
}
}
pub(crate) struct Device<'ctx, T: 'ctx> {
dev: *mut fz_device,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<Box<Cell<T>>>,
}
pub(crate) trait DeviceCallbacks<'ctx> {
fn fill_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, even_odd: bool, ctm: fz_matrix);
fn stroke_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, ctm: fz_matrix);
fn clip_path(
&self,
ctx: ContextRef<'ctx>,
path: &Path<'ctx>,
even_odd: bool,
ctm: fz_matrix,
scissor: fz_rect,
);
fn clip_stroke_path(
&self,
ctx: ContextRef<'ctx>,
path: &Path<'ctx>,
ctm: fz_matrix,
scissor: fz_rect,
);
fn fill_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
fn stroke_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
fn clip_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix, scissor: fz_rect);
fn clip_stroke_text(
&self,
ctx: ContextRef<'ctx>,
text: &Text<'ctx>,
ctm: fz_matrix,
scissor: fz_rect,
);
fn ignore_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
}
impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
pub(crate) fn new(ctx: impl Into<ContextRef<'ctx>>, value: Box<T>) -> Result<Self, MuPdfError> {
let ctx = ctx.into();
unsafe {
let dev_ptr = mupdf_try(|errptr| {
mupdf_new_derived_device::<DeviceStruct<T>>(
ctx.0.get(),
c"parse_powerisa_pdf::mupdf_ffi::Device",
errptr,
)
})?;
let retval = Device {
dev: dev_ptr.cast(),
ctx,
_phantom: PhantomData,
};
(&raw mut (*dev_ptr).value).write(value);
let fz_device {
drop_device,
fill_path,
stroke_path,
clip_path,
clip_stroke_path,
fill_text,
stroke_text,
clip_text,
clip_stroke_text,
ignore_text,
..
} = &mut (*dev_ptr).base;
*drop_device = Some(Self::drop_device_fn);
*fill_path = Some(Self::fill_path_fn);
*stroke_path = Some(Self::stroke_path_fn);
*clip_path = Some(Self::clip_path_fn);
*clip_stroke_path = Some(Self::clip_stroke_path_fn);
*fill_text = Some(Self::fill_text_fn);
*stroke_text = Some(Self::stroke_text_fn);
*clip_text = Some(Self::clip_text_fn);
*clip_stroke_text = Some(Self::clip_stroke_text_fn);
*ignore_text = Some(Self::ignore_text_fn);
Ok(retval)
}
}
pub(crate) fn get(&self) -> &T {
unsafe { &(*self.dev.cast::<DeviceStruct<T>>()).value }
}
unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) {
unsafe {
(&raw mut (*dev.cast::<DeviceStruct<T>>()).value).drop_in_place();
}
}
unsafe extern "C" fn fill_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
even_odd: c_int,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.fill_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
even_odd != 0,
ctm,
);
}
unsafe extern "C" fn stroke_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.stroke_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
ctm,
);
}
unsafe extern "C" fn clip_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
even_odd: ::std::os::raw::c_int,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
even_odd != 0,
ctm,
scissor,
);
}
unsafe extern "C" fn clip_stroke_path_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
path: *const fz_path,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_stroke_path(
ctx,
&ManuallyDrop::new(Path {
ptr: path.cast_mut(),
ctx,
}),
ctm,
scissor,
);
}
unsafe extern "C" fn fill_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.fill_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
);
}
unsafe extern "C" fn stroke_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
_color_space: *mut fz_colorspace,
_color: *const f32,
_alpha: f32,
_color_params: fz_color_params,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.stroke_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
);
}
unsafe extern "C" fn clip_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
scissor,
);
}
unsafe extern "C" fn clip_stroke_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
_stroke_state: *const fz_stroke_state,
ctm: fz_matrix,
scissor: fz_rect,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.clip_stroke_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
scissor,
);
}
unsafe extern "C" fn ignore_text_fn(
ctx: *mut fz_context,
dev: *mut fz_device,
text: *const fz_text,
ctm: fz_matrix,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
this.ignore_text(
ctx,
&ManuallyDrop::new(Text {
ptr: text.cast_mut(),
ctx,
}),
ctm,
);
}
}
impl<'ctx, T> Drop for Device<'ctx, T> {
fn drop(&mut self) {
unsafe {
// FIXME: fz_close_device may throw exceptions
// fz_close_device(self.ctx.0.get(), self.dev);
fz_drop_device(self.ctx.0.get(), self.dev);
}
}
}
#[repr(C)]
struct DeviceStruct<T> {
base: fz_device,
value: Box<T>,
}
pub(crate) trait PathWalker<'ctx> {
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
fn curve_to(
&mut self,
ctx: ContextRef<'ctx>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
);
fn close_path(&mut self, ctx: ContextRef<'ctx>);
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
self.move_to(ctx, x1, y1);
self.move_to(ctx, x2, y1);
self.move_to(ctx, x2, y2);
self.move_to(ctx, x1, y2);
self.close_path(ctx);
}
}
impl<'ctx, T: ?Sized + PathWalker<'ctx>> PathWalker<'ctx> for &'_ mut T {
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
T::move_to(self, ctx, x, y);
}
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
T::line_to(self, ctx, x, y);
}
fn curve_to(
&mut self,
ctx: ContextRef<'ctx>,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
) {
T::curve_to(self, ctx, x1, y1, x2, y2, x3, y3);
}
fn close_path(&mut self, ctx: ContextRef<'ctx>) {
T::close_path(self, ctx);
}
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
T::rect_to(self, ctx, x1, y1, x2, y2);
}
}
pub(crate) struct Path<'ctx> {
ptr: *mut fz_path,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Path<'ctx> {
pub(crate) fn walk<W: PathWalker<'ctx>>(&self, mut walker: W) {
unsafe {
fz_walk_path(
self.ctx.0.get(),
self.ptr,
const {
&fz_path_walker {
moveto: Some(Self::move_to_fn::<W>),
lineto: Some(Self::line_to_fn::<W>),
curveto: Some(Self::curve_to_fn::<W>),
closepath: Some(Self::close_path_fn::<W>),
quadto: None,
curvetov: None,
curvetoy: None,
rectto: Some(Self::rect_to_fn::<W>),
}
},
(&raw mut walker).cast(),
);
}
}
unsafe extern "C" fn move_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x: f32,
y: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.move_to(ctx, x, y);
}
unsafe extern "C" fn line_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x: f32,
y: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.line_to(ctx, x, y);
}
unsafe extern "C" fn curve_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
x3: f32,
y3: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.curve_to(ctx, x1, y1, x2, y2, x3, y3);
}
unsafe extern "C" fn close_path_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.close_path(ctx);
}
unsafe extern "C" fn rect_to_fn<W: PathWalker<'ctx>>(
ctx: *mut fz_context,
arg: *mut c_void,
x1: f32,
y1: f32,
x2: f32,
y2: f32,
) {
let ctx = unsafe { ContextRef::from_ptr(ctx) };
let this = unsafe { &mut *arg.cast::<W>() };
this.rect_to(ctx, x1, y1, x2, y2);
}
}
impl<'ctx> Drop for Path<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_path(self.ctx.0.get(), self.ptr);
}
}
}
pub(crate) struct Text<'ctx> {
ptr: *mut fz_text,
ctx: ContextRef<'ctx>,
}
impl<'ctx> Drop for Text<'ctx> {
fn drop(&mut self) {
unsafe {
fz_drop_text(self.ctx.0.get(), self.ptr);
}
}
}
impl<'ctx> Text<'ctx> {
pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> {
TextSpanIter {
ptr: unsafe { NonNull::new((*self.ptr).head).map(|ptr| &*ptr.as_ptr().cast()) },
ctx: self.ctx,
_phantom: PhantomData,
}
}
}
#[derive(Clone)]
pub(crate) struct TextSpanIter<'a, 'ctx> {
ptr: Option<&'a UnsafeCell<fz_text_span>>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> {
type Item = TextSpanRef<'a, 'ctx>;
fn next(&mut self) -> Option<Self::Item> {
let ptr = self.ptr?;
self.ptr = unsafe { NonNull::new((*ptr.get()).next).map(|ptr| &*ptr.as_ptr().cast()) };
Some(TextSpanRef {
ptr,
ctx: self.ctx,
_phantom: PhantomData,
})
}
}
#[derive(Copy, Clone)]
pub(crate) struct TextSpanRef<'a, 'ctx> {
ptr: &'a UnsafeCell<fz_text_span>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub(crate) enum WriteMode {
Horizontal,
Vertical,
}
impl<'a, 'ctx> TextSpanRef<'a, 'ctx> {
#[allow(dead_code)]
pub(crate) fn get(self) -> &'a UnsafeCell<fz_text_span> {
self.ptr
}
pub(crate) fn font(self) -> FontRef<'a, 'ctx> {
FontRef {
ptr: unsafe { &*(*self.ptr.get()).font.cast::<UnsafeCell<fz_font>>() },
ctx: self.ctx,
_phantom: PhantomData,
}
}
pub(crate) fn trm(self) -> fz_matrix {
unsafe { (*self.ptr.get()).trm }
}
pub(crate) fn write_mode(self) -> WriteMode {
if unsafe { (*self.ptr.get()).wmode() != 0 } {
WriteMode::Vertical
} else {
WriteMode::Horizontal
}
}
pub(crate) fn items(self) -> &'a [fz_text_item] {
let len = unsafe { (*self.ptr.get()).len } as usize;
if len == 0 {
return &[];
}
unsafe { std::slice::from_raw_parts((*self.ptr.get()).items, len) }
}
}
#[derive(Clone, Copy)]
pub(crate) struct FontRef<'a, 'ctx> {
ptr: &'a UnsafeCell<fz_font>,
ctx: ContextRef<'ctx>,
_phantom: PhantomData<&'a Text<'ctx>>,
}
impl<'a, 'ctx> FontRef<'a, 'ctx> {
#[allow(dead_code)]
pub(crate) fn get(self) -> &'a UnsafeCell<fz_font> {
self.ptr
}
pub(crate) fn name(self) -> &'a str {
unsafe { CStr::from_ptr(fz_font_name(self.ctx.0.get(), self.ptr.get())) }
.to_str()
.expect("font name isn't valid UTF-8")
}
#[allow(dead_code)]
pub(crate) fn is_bold(self) -> bool {
unsafe { fz_font_is_bold(self.ctx.0.get(), self.ptr.get()) != 0 }
}
#[allow(dead_code)]
pub(crate) fn is_italic(self) -> bool {
unsafe { fz_font_is_italic(self.ctx.0.get(), self.ptr.get()) != 0 }
}
pub(crate) fn ascender(self) -> f32 {
unsafe { fz_font_ascender(self.ctx.0.get(), self.ptr.get()) }
}
pub(crate) fn descender(self) -> f32 {
unsafe { fz_font_descender(self.ctx.0.get(), self.ptr.get()) }
}
}
#[allow(dead_code)]
pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point(point, m) }
}
pub(crate) fn transform_point_xy(x: f32, y: f32, m: fz_matrix) -> fz_point {
unsafe { fz_transform_point_xy(x, y, m) }
}
pub(crate) fn transform_vector(vector: fz_point, m: fz_matrix) -> fz_point {
unsafe { fz_transform_vector(vector, m) }
}
pub(crate) fn matrix_expansion(m: fz_matrix) -> f32 {
unsafe { fz_matrix_expansion(m) }
}
pub(crate) fn concat(left: fz_matrix, right: fz_matrix) -> fz_matrix {
unsafe { fz_concat(left, right) }
}
pub(crate) fn add_points(a: fz_point, b: fz_point) -> fz_point {
fz_point {
x: a.x + b.x,
y: a.y + b.y,
}
}
pub(crate) fn point_min_components(a: fz_point, b: fz_point) -> fz_point {
fz_point {
x: a.x.min(b.x),
y: a.y.min(b.y),
}
}
pub(crate) fn point_max_components(a: fz_point, b: fz_point) -> fz_point {
fz_point {
x: a.x.max(b.x),
y: a.y.max(b.y),
}
}

1279
src/pdf.rs

File diff suppressed because it is too large Load diff

View file

@ -1,829 +0,0 @@
use crate::{
pdf::{
PdfObjectOrStreamDictionaryOrOperator, PdfObjects, PdfParser, PdfTokenizer,
object::{
NameOr, PdfDictionary, PdfMatrix, PdfName, PdfObject, PdfObjectDirect, PdfRectangle,
PdfStream, PdfStreamContents, PdfString, PdfStringBytesDebug, PdfStringOrNumber,
PdfVec2D,
},
parse::{
GetPdfInputPosition, PdfInputPosition, PdfInputPositionKnown,
PdfInputPositionNoCompare, PdfParse, PdfParseError,
},
render::{
PdfColorDeviceGray, PdfColorDeviceRgb, PdfRenderOperator, PdfRenderState,
PdfRenderingIntent,
},
},
util::ArcOrRef,
};
use std::{fmt, sync::Arc};
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct PdfOperatorUnparsed {
pos: PdfInputPositionNoCompare,
bytes: ArcOrRef<'static, [u8]>,
}
impl GetPdfInputPosition for PdfOperatorUnparsed {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl fmt::Debug for PdfOperatorUnparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
Self::debug_with_name("PdfOperatorUnparsed", &self.bytes, self.pos.0, f)
}
}
trait PdfParseIter: Sized {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError>;
}
impl<T: PdfParse> PdfParseIter for Arc<[T]> {
fn parse_iter(iter: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
FromIterator::from_iter(iter.into_iter().map(T::parse))
}
}
impl PdfOperatorUnparsed {
pub fn new(
pos: impl Into<PdfInputPositionNoCompare>,
bytes: impl Into<ArcOrRef<'static, [u8]>>,
) -> Self {
Self {
pos: pos.into(),
bytes: bytes.into(),
}
}
pub const fn new_static(bytes: &'static [u8]) -> Self {
Self {
pos: PdfInputPositionNoCompare::empty(),
bytes: ArcOrRef::Ref(bytes),
}
}
pub fn pos(&self) -> PdfInputPosition {
self.pos.0
}
pub fn bytes(&self) -> &ArcOrRef<'static, [u8]> {
&self.bytes
}
fn debug_with_name(
name: &str,
pdf_name: &[u8],
pos: PdfInputPosition,
f: &mut fmt::Formatter<'_>,
) -> fmt::Result {
write!(f, "{name}(at {pos}, {})", PdfStringBytesDebug(pdf_name))
}
pub fn bytes_debug(&self) -> PdfStringBytesDebug<'_> {
PdfStringBytesDebug(&self.bytes)
}
}
macro_rules! make_pdf_operator_enum {
(
$(#[$($operator_meta:tt)*])*
$operator_enum_vis:vis enum $PdfOperator:ident;
$(#[$($operator_and_operands_meta:tt)*])*
$enum_vis:vis enum $PdfOperatorAndOperands:ident {
$(#[$($unknown_variant_meta:tt)*])*
$Unknown:ident {
$(#[$($unknown_operands_meta:tt)*])*
$unknown_operands:ident: $unknown_operands_ty:ty,
$(#[$($unknown_operator_meta:tt)*])*
$unknown_operator:ident: $unknown_operator_ty:ty,
},
$(
#[kw = $kw:literal]
$(#[$($variant_meta:tt)*])*
$Variant:ident($VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:tt)*)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}),
)*
}
) => {
$(#[$($operator_meta)*])*
$operator_enum_vis enum $PdfOperator {
$(#[$($unknown_variant_meta)*])*
$Unknown($unknown_operator_ty),
$(
$(#[$($variant_meta)*])*
$Variant(PdfInputPositionNoCompare),
)*
}
impl $PdfOperator {
$operator_enum_vis fn parse(self, operands: impl IntoIterator<Item = PdfObject>) -> Result<$PdfOperatorAndOperands, PdfParseError> {
let operands = operands.into_iter();
Ok(match self {
Self::$Unknown(operator) => $PdfOperatorAndOperands::$Unknown {
operands: FromIterator::from_iter(operands.map(Into::into)),
operator,
},
$(Self::$Variant(pos) => $VariantStruct::parse(pos, operands)?.into(),)*
})
}
$operator_enum_vis fn pos(&self) -> PdfInputPosition {
match *self {
Self::$Unknown(ref operator) => operator.pos(),
$(Self::$Variant(pos) => pos.0,)*
}
}
}
impl fmt::Debug for $PdfOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown(operator) => PdfOperatorUnparsed::debug_with_name("Unknown", &operator.bytes, operator.pos.0, f),
$(Self::$Variant(pos) => PdfOperatorUnparsed::debug_with_name(stringify!($Variant), $kw, pos.0, f),)*
}
}
}
impl From<$PdfOperator> for PdfOperatorUnparsed {
fn from(v: $PdfOperator) -> PdfOperatorUnparsed {
match v {
$PdfOperator::$Unknown(operator) => operator,
$($PdfOperator::$Variant(pos) => PdfOperatorUnparsed { pos, bytes: ArcOrRef::Ref($kw) },)*
}
}
}
impl From<PdfOperatorUnparsed> for $PdfOperator {
fn from(v: PdfOperatorUnparsed) -> $PdfOperator {
match &**v.bytes() {
$($kw => Self::$Variant(v.pos),)*
_ => Self::$Unknown(v),
}
}
}
$(#[derive(Clone)]
$(#[$($variant_meta)*])*
$enum_vis struct $VariantStruct {
$enum_vis $pos: PdfInputPositionNoCompare,
$(
$(#[$($field_meta)*])*
$enum_vis $field: $field_ty,
)*
}
impl fmt::Debug for $VariantStruct {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct(stringify!($VariantStruct)).field("pos", &self.pos)$(.field(stringify!($field), &self.$field))*.finish()
}
}
impl GetPdfInputPosition for $VariantStruct {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl From<$VariantStruct> for $PdfOperatorAndOperands {
fn from(v: $VariantStruct) -> Self {
Self::$Variant(v)
}
}
impl $VariantStruct {
$enum_vis fn operator_from_pos(pos: impl Into<PdfInputPositionNoCompare>) -> $PdfOperator {
$PdfOperator::$Variant(pos.into())
}
$enum_vis fn operator(&self) -> $PdfOperator {
$PdfOperator::$Variant(self.pos)
}
$enum_vis fn pos(&self) -> PdfInputPosition {
self.pos.0
}
}
make_pdf_operator_enum! {
@impl_variant_parse
$enum_vis enum;
struct $VariantStruct {
$pos: PdfInputPositionNoCompare,
$(
#[$field_parse($($parse_args)*)]
$(#[$($field_meta)*])*
$field: $field_ty,
)*
}
})*
$(#[$($operator_and_operands_meta)*])*
$enum_vis enum $PdfOperatorAndOperands {
$(#[$($unknown_variant_meta)*])*
$Unknown {
$(#[$($unknown_operands_meta)*])*
$unknown_operands: $unknown_operands_ty,
$(#[$($unknown_operator_meta)*])*
$unknown_operator: $unknown_operator_ty,
},
$(
$(#[$($variant_meta)*])*
$Variant($VariantStruct),
)*
}
impl $PdfOperatorAndOperands {
$enum_vis fn operator(&self) -> $PdfOperator {
match self {
Self::Unknown { operator, .. } => $PdfOperator::Unknown(operator.clone()),
$(Self::$Variant(v) => v.operator(),)*
}
}
$enum_vis fn pos(&self) -> PdfInputPosition {
match self {
Self::$Unknown { operator, .. } => operator.pos(),
$(Self::$Variant(v) => v.pos(),)*
}
}
}
impl fmt::Debug for $PdfOperatorAndOperands {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::$Unknown {
operands,
operator,
} => f.debug_struct("Unknown").field("operator", operator).field("operands", operands).finish(),
$(Self::$Variant($VariantStruct {
$pos,
$($field,)*
}) => f.debug_struct(stringify!($Variant)).field("pos", $pos)$(.field(stringify!($field), $field))*.finish(),)*
}
}
}
impl PdfRenderOperator for $PdfOperatorAndOperands {
fn render(&self, state: &mut PdfRenderState) -> Result<(), PdfParseError> {
match self {
Self::$Unknown {
operands,
operator,
} => state.handle_unknown_operator(operator, operands),
$(Self::$Variant(v) => <$VariantStruct as PdfRenderOperator>::render(v, state),)*
}
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
$(
#[$field_parse:ident($($parse_args:ident),* $(,)?)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
)*
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let mut operands = operands.into_iter();
$($(let Some($parse_args) = operands.next() else {
return Err(PdfParseError::OperatorHasTooFewOperands { operator: Self::operator_from_pos(pos) });
};)*)*
if operands.next().is_some() {
return Err(PdfParseError::OperatorHasTooManyOperands { operator: Self::operator_from_pos(pos) });
}
Ok(Self {
pos,
$($field: <$field_ty>::$field_parse($($parse_args),*)?,)*
})
}
}
};
(
@impl_variant_parse
$enum_vis:vis enum;
struct $VariantStruct:ident {
$pos:ident: PdfInputPositionNoCompare,
#[$field_parse:ident(...)]
$(#[$($field_meta:tt)*])*
$field:ident: $field_ty:ty,
}
) => {
impl $VariantStruct {
$enum_vis fn parse(pos: impl Into<PdfInputPositionNoCompare>, operands: impl IntoIterator<Item = PdfObject>) -> Result<Self, PdfParseError> {
let pos = pos.into();
let operands = operands.into_iter();
Ok(Self {
pos,
$field: <$field_ty>::$field_parse(operands)?,
})
}
}
};
}
make_pdf_operator_enum! {
#[derive(Clone)]
pub enum PdfOperator;
#[derive(Clone)]
pub enum PdfOperatorAndOperands {
Unknown {
operands: Arc<[PdfObjectDirect]>,
operator: PdfOperatorUnparsed,
},
#[kw = b"b"]
CloseFillAndStrokePath(PdfOperatorCloseFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B"]
FillAndStrokePath(PdfOperatorFillAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"b*"]
CloseFillAndStrokePathEvenOdd(PdfOperatorCloseFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"B*"]
FillAndStrokePathEvenOdd(PdfOperatorFillAndStrokePathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BDC"]
BeginMarkedContentWithProperties(PdfOperatorBeginMarkedContentWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"BI"]
BeginInlineImage(PdfOperatorBeginInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BMC"]
BeginMarkedContent(PdfOperatorBeginMarkedContent {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"BT"]
BeginText(PdfOperatorBeginText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"BX"]
BeginCompatibilitySection(PdfOperatorBeginCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"c"]
CurveTo(PdfOperatorCurveTo {
pos: PdfInputPositionNoCompare,
#[parse(x1, y1)]
p1: PdfVec2D,
#[parse(x2, y2)]
p2: PdfVec2D,
#[parse(x3, y3)]
p3: PdfVec2D,
}),
#[kw = b"cm"]
ConcatMatrix(PdfOperatorConcatMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"CS"]
SetStrokeColorSpace(PdfOperatorSetStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"cs"]
SetNonStrokeColorSpace(PdfOperatorSetNonStrokeColorSpace {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"d"]
SetLineDashPattern(PdfOperatorSetLineDashPattern {
pos: PdfInputPositionNoCompare,
#[parse(dash_array)]
dash_array: PdfObject, // TODO: actually parse
#[parse(dash_phase)]
dash_phase: PdfObject, // TODO: actually parse
}),
#[kw = b"d0"]
FontType3SetWidth(PdfOperatorFontType3SetWidth {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
width: PdfVec2D,
}),
#[kw = b"d1"]
FontType3SetWidthAndBBox(PdfOperatorFontType3SetWidthAndBBox {
pos: PdfInputPositionNoCompare,
#[parse(width_x, width_y)]
width: PdfVec2D,
#[parse_flat(lower_left_x, lower_left_y, upper_right_x, upper_right_y)]
bbox: PdfRectangle,
}),
#[kw = b"Do"]
PaintXObject(PdfOperatorPaintXObject {
pos: PdfInputPositionNoCompare,
#[parse(name)]
name: PdfName,
}),
#[kw = b"DP"]
DesignateMarkedContentPointWithProperties(PdfOperatorDesignateMarkedContentPointWithProperties {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
#[parse(properties)]
properties: NameOr<PdfDictionary>,
}),
#[kw = b"EI"]
EndInlineImage(PdfOperatorEndInlineImage {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EMC"]
EndMarkedContent(PdfOperatorEndMarkedContent {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"ET"]
EndText(PdfOperatorEndText {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"EX"]
EndCompatibilitySection(PdfOperatorEndCompatibilitySection {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f"]
FillPath(PdfOperatorFillPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"F"]
FillPathObsolete(PdfOperatorFillPathObsolete {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"f*"]
FillPathEvenOdd(PdfOperatorFillPathEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"G"]
SetStrokeGray(PdfOperatorSetStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"g"]
SetNonStrokeGray(PdfOperatorSetNonStrokeGray {
pos: PdfInputPositionNoCompare,
#[parse(gray)]
gray: PdfColorDeviceGray,
}),
#[kw = b"gs"]
SetGraphicsState(PdfOperatorSetGraphicsState {
pos: PdfInputPositionNoCompare,
#[parse(dictionary_name)]
dictionary_name: PdfName,
}),
#[kw = b"h"]
CloseSubpath(PdfOperatorCloseSubpath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"i"]
SetFlatnessTolerance(PdfOperatorSetFlatnessTolerance {
pos: PdfInputPositionNoCompare,
#[parse(flatness)]
flatness: f32,
}),
#[kw = b"ID"]
BeginInlineImageData(PdfOperatorBeginInlineImageData {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"j"]
SetLineJoinStyle(PdfOperatorSetLineJoinStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_join_style)]
line_join_style: u8, // TODO parse
}),
#[kw = b"J"]
SetLineCapStyle(PdfOperatorSetLineCapStyle {
pos: PdfInputPositionNoCompare,
#[parse(line_cap_style)]
line_cap_style: u8, // TODO parse
}),
#[kw = b"K"]
SetStrokeCmyk(PdfOperatorSetStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"k"]
SetNonStrokeCmyk(PdfOperatorSetNonStrokeCmyk {
pos: PdfInputPositionNoCompare,
#[parse(c)]
c: f32,
#[parse(m)]
m: f32,
#[parse(y)]
y: f32,
#[parse(k)]
k: f32,
}),
#[kw = b"l"]
LineTo(PdfOperatorLineTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"m"]
MoveTo(PdfOperatorMoveTo {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
to: PdfVec2D,
}),
#[kw = b"M"]
SetMiterLimit(PdfOperatorSetMiterLimit {
pos: PdfInputPositionNoCompare,
#[parse(limit)]
limit: f32,
}),
#[kw = b"MP"]
DesignateMarkedContentPoint(PdfOperatorDesignateMarkedContentPoint {
pos: PdfInputPositionNoCompare,
#[parse(tag)]
tag: PdfName,
}),
#[kw = b"n"]
EndPath(PdfOperatorEndPath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"q"]
SaveGraphicsState(PdfOperatorSaveGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Q"]
RestoreGraphicsState(PdfOperatorRestoreGraphicsState {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"re"]
Rectangle(PdfOperatorRectangle {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
p: PdfVec2D,
#[parse(width, height)]
size: PdfVec2D,
}),
#[kw = b"RG"]
SetStrokeRgb(PdfOperatorSetStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"rg"]
SetNonStrokeRgb(PdfOperatorSetNonStrokeRgb {
pos: PdfInputPositionNoCompare,
#[parse_flat(r, g, b)]
color: PdfColorDeviceRgb,
}),
#[kw = b"ri"]
SetColorRenderingIntent(PdfOperatorSetColorRenderingIntent {
pos: PdfInputPositionNoCompare,
#[parse(intent)]
intent: PdfRenderingIntent,
}),
#[kw = b"s"]
CloseAndStrokePath(PdfOperatorCloseAndStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"S"]
StrokePath(PdfOperatorStrokePath {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"SC"]
SetStrokeColor(PdfOperatorSetStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"sc"]
SetNonStrokeColor(PdfOperatorSetNonStrokeColor {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color: Arc<[f32]>,
}),
#[kw = b"SCN"]
SetStrokeColorWithName(PdfOperatorSetStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"scn"]
SetNonStrokeColorWithName(PdfOperatorSetNonStrokeColorWithName {
pos: PdfInputPositionNoCompare,
#[parse_iter(...)]
color_and_name: Arc<[NameOr<f32>]>,
}),
#[kw = b"sh"]
Shade(PdfOperatorShade {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"T*"]
TextNextLine(PdfOperatorTextNextLine {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"Tc"]
SetCharacterSpacing(PdfOperatorSetCharacterSpacing {
pos: PdfInputPositionNoCompare,
#[parse(char_space)]
char_space: f32,
}),
#[kw = b"Td"]
TextNextLineWithOffset(PdfOperatorTextNextLineWithOffset {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"TD"]
TextNextLineWithOffsetAndLeading(PdfOperatorTextNextLineWithOffsetAndLeading {
pos: PdfInputPositionNoCompare,
#[parse(x, y)]
offset: PdfVec2D,
}),
#[kw = b"Tf"]
SetFontAndSize(PdfOperatorSetFontAndSize {
pos: PdfInputPositionNoCompare,
#[parse(font)]
font: PdfName,
#[parse(size)]
size: f32,
}),
#[kw = b"Tj"]
ShowText(PdfOperatorShowText {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"TJ"]
ShowTextWithGlyphPositioning(PdfOperatorShowTextWithGlyphPositioning {
pos: PdfInputPositionNoCompare,
#[parse(text_and_positioning)]
text_and_positioning: Arc<[PdfStringOrNumber]>,
}),
#[kw = b"TL"]
SetTextLeading(PdfOperatorSetTextLeading {
pos: PdfInputPositionNoCompare,
#[parse(leading)]
leading: f32,
}),
#[kw = b"Tm"]
SetTextMatrix(PdfOperatorSetTextMatrix {
pos: PdfInputPositionNoCompare,
#[parse_flat(a, b, c, d, e, f)]
matrix: PdfMatrix,
}),
#[kw = b"Tr"]
SetTextRenderingMode(PdfOperatorSetTextRenderingMode {
pos: PdfInputPositionNoCompare,
#[parse(rendering_mode)]
rendering_mode: u8, // TODO: parse
}),
#[kw = b"Ts"]
SetTextRise(PdfOperatorSetTextRise {
pos: PdfInputPositionNoCompare,
#[parse(rise)]
rise: f32,
}),
#[kw = b"Tw"]
SetWordSpacing(PdfOperatorSetWordSpacing {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
}),
#[kw = b"Tz"]
SetTextHorizontalScaling(PdfOperatorSetTextHorizontalScaling {
pos: PdfInputPositionNoCompare,
#[parse(scale_percent)]
scale_percent: f32,
}),
#[kw = b"v"]
CurveTo23(PdfOperatorCurveTo23 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"w"]
SetLineWidth(PdfOperatorSetLineWidth {
pos: PdfInputPositionNoCompare,
#[parse(line_width)]
line_width: f32,
}),
#[kw = b"W"]
Clip(PdfOperatorClip {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"W*"]
ClipEvenOdd(PdfOperatorClipEvenOdd {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"y"]
CurveTo13(PdfOperatorCurveTo13 {
pos: PdfInputPositionNoCompare,
}),
#[kw = b"'"]
TextNextLineAndShow(PdfOperatorTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(text)]
text: PdfString,
}),
#[kw = b"\""]
SetSpacingThenTextNextLineAndShow(PdfOperatorSetSpacingThenTextNextLineAndShow {
pos: PdfInputPositionNoCompare,
#[parse(word_space)]
word_space: f32,
#[parse(char_space)]
char_space: f32,
#[parse(text)]
text: PdfString,
}),
}
}
impl GetPdfInputPosition for PdfOperator {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
impl GetPdfInputPosition for PdfOperatorAndOperands {
fn get_pdf_input_position(&self) -> PdfInputPosition {
self.pos()
}
}
#[derive(Clone)]
pub struct PdfContentStreamData {
pub operators: Arc<[PdfOperatorAndOperands]>,
}
impl fmt::Debug for PdfContentStreamData {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PdfContentStreamData")
.field("operators", &self.operators)
.finish()
}
}
impl PdfStreamContents for PdfContentStreamData {
fn parse(
data: &[u8],
stream_pos: PdfInputPosition,
objects: Arc<PdfObjects>,
) -> Result<Self, PdfParseError> {
let mut parser = PdfParser {
objects,
tokenizer: PdfTokenizer::new(
data,
PdfInputPositionKnown {
pos: 0,
containing_streams_pos: stream_pos.get().map(|v| v.pos),
},
),
};
let mut operands = Vec::new();
let mut operators = Vec::new();
loop {
parser.skip_comments_and_whitespace();
if parser.tokenizer.peek().is_none() {
break;
}
match parser.parse_object_or_operator()? {
PdfObjectOrStreamDictionaryOrOperator::StreamDictionary {
stream_kw_pos, ..
} => return Err(PdfParseError::StreamNotAllowedHere { pos: stream_kw_pos }),
PdfObjectOrStreamDictionaryOrOperator::Object(object) => operands.push(object),
PdfObjectOrStreamDictionaryOrOperator::Operator(operator) => {
operators.push(PdfOperator::from(operator).parse(operands.drain(..))?);
}
}
}
if operands.is_empty() {
Ok(Self {
operators: operators.into(),
})
} else {
Err(PdfParseError::MissingOperator {
pos: parser.tokenizer.pos(),
})
}
}
}
pub type PdfContentStream = PdfStream<PdfDictionary, PdfContentStreamData>;

View file

@ -1,743 +0,0 @@
use crate::{
pdf::{
content_stream::PdfContentStream,
font::PdfFont,
object::{
IsPdfNull, MaybeArray, PdfDate, PdfDictionary, PdfInteger, PdfName, PdfObject,
PdfObjectDirect, PdfObjectIndirect, PdfRectangle, PdfStream, PdfString,
},
parse::{PdfParse, PdfParseError},
pdf_parse,
render::{PdfRenderOperator, PdfRenderState},
},
util::DagDebugState,
};
use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
use std::{borrow::Cow, fmt, sync::Arc};
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfDocumentCatalogType {
#[pdf(name = "Catalog")]
#[default]
Catalog,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfDocumentCatalog {
#[pdf(name = "Type")]
pub ty: PdfDocumentCatalogType,
#[pdf(name = "Version")]
pub version: Option<PdfName>,
#[pdf(name = "Extensions")]
pub extensions: Option<PdfDictionary>,
#[pdf(name = "Pages")]
pub pages: PdfPageTree,
// TODO
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl fmt::Debug for PdfDocumentCatalog {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
version,
extensions,
pages,
rest,
} = self;
f.debug_struct("PdfDocumentCatalog")
.field("ty", ty)
.field("version", version)
.field("extensions", extensions)
.field("pages", pages)
.field("rest", rest)
.finish()
})
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Debug)]
pub struct PdfResourcesDictionary {
#[pdf(name = "Font")]
pub fonts: PdfDictionary<PdfFont>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
#[derive(Clone)]
pub struct PdfPageTree {
page_tree: PdfPageTreeNode,
pages: Arc<[PdfPage]>,
}
impl PdfPageTree {
fn collect_leaves(
node: &PdfPageTreeNode,
leaves: &mut Vec<PdfPageTreeLeaf>,
) -> Result<(), PdfParseError> {
for kid in node.kids.iter() {
match kid {
PdfPageTreeNodeOrLeaf::Node(node) => Self::collect_leaves(node, leaves)?,
PdfPageTreeNodeOrLeaf::Leaf(leaf) => {
leaves.push(leaf.clone());
}
PdfPageTreeNodeOrLeaf::Other(v) => {
return Err(PdfParseError::InvalidType {
pos: v.pos(),
ty: "dictionary",
expected_ty: "PdfPageTreeNodeOrLeaf",
});
}
}
}
Ok(())
}
pub fn try_from_page_tree_root(mut page_tree: PdfPageTreeNode) -> Result<Self, PdfParseError> {
page_tree.propagate_inheritable_data_to_leaves();
let mut leaves = Vec::new();
Self::collect_leaves(&page_tree, &mut leaves)?;
Ok(Self {
page_tree,
pages: Result::from_par_iter(
leaves
.into_par_iter()
.map(PdfPage::parse_after_propagating_inheritable_data)
.panic_fuse(),
)?,
})
}
pub fn page_tree(&self) -> &PdfPageTreeNode {
&self.page_tree
}
pub fn pages(&self) -> &Arc<[PdfPage]> {
&self.pages
}
}
impl fmt::Debug for PdfPageTree {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
page_tree: _,
pages,
} = self;
f.debug_struct("PdfPageTree")
.field("pages", pages)
.finish_non_exhaustive()
})
}
}
impl IsPdfNull for PdfPageTree {
fn is_pdf_null(&self) -> bool {
self.page_tree.is_pdf_null()
}
}
impl PdfParse for PdfPageTree {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("PdfPageTree")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
Self::try_from_page_tree_root(PdfParse::parse(object)?)
}
}
pdf_parse! {
#[pdf]
#[derive(Clone, Default, Debug)]
pub struct PdfPageInheritableData {
#[pdf(name = "Resources")]
pub resources: Option<PdfResourcesDictionary>,
#[pdf(name = "MediaBox")]
pub media_box: Option<PdfRectangle>,
#[pdf(name = "CropBox")]
pub crop_box: Option<PdfRectangle>,
#[pdf(name = "Rotate")]
pub rotate: Option<PdfPageRotation>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfPageInheritableData {
pub fn propagate_to(&self, target: &mut Self) {
let Self {
resources,
media_box,
crop_box,
rotate,
rest: _,
} = self;
fn propagate_to<T: Clone>(this: &Option<T>, target: &mut Option<T>) {
if let (Some(this), target @ None) = (this, target) {
*target = Some(this.clone());
}
}
propagate_to(resources, &mut target.resources);
propagate_to(media_box, &mut target.media_box);
propagate_to(crop_box, &mut target.crop_box);
propagate_to(rotate, &mut target.rotate);
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageTreeNodeType {
#[pdf(name = "Pages")]
#[default]
Pages,
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeNode {
#[pdf(name = "Type")]
pub ty: PdfPageTreeNodeType,
#[pdf(name = "Parent")]
pub parent: Option<PdfObjectIndirect>,
#[pdf(name = "Kids")]
pub kids: Arc<[PdfPageTreeNodeOrLeaf]>,
#[pdf(name = "Count")]
pub count: usize,
// TODO
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
kids,
count,
inheritable,
} = self;
f.debug_struct("PdfPageTreeNode")
.field("ty", ty)
.field("parent", parent)
.field("kids", kids)
.field("count", count)
.field("inheritable", inheritable)
.finish()
})
}
}
impl PdfPageTreeNode {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
for kid in Arc::make_mut(&mut self.kids) {
if let Some(target) = kid.inheritable_data_mut() {
self.inheritable.propagate_to(target);
}
kid.propagate_inheritable_data_to_leaves();
}
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub enum PdfPageType {
#[pdf(name = "Page")]
#[default]
Page,
}
}
pdf_parse! {
#[pdf(name)]
#[derive(Clone, PartialEq, Eq, Hash, Debug)]
pub enum PdfPageAnnotationsTabOrder {
#[pdf(name = "R")]
RowOrder,
#[pdf(name = "C")]
ColumnOrder,
#[pdf(name = "S")]
StructureOrder,
#[pdf(other)]
Other(PdfName),
}
}
pdf_parse! {
#[pdf]
#[derive(Clone)]
pub struct PdfPageTreeLeaf {
#[pdf(name = "Type")]
pub ty: PdfPageType,
#[pdf(name = "Parent")]
pub parent: PdfObjectIndirect,
#[pdf(name = "LastModified")]
pub last_modified: Option<PdfDate>,
#[pdf(name = "BleedBox")]
pub bleed_box: Option<PdfRectangle>,
#[pdf(name = "TrimBox")]
pub trim_box: Option<PdfRectangle>,
#[pdf(name = "ArtBox")]
pub art_box: Option<PdfRectangle>,
#[pdf(name = "BoxColorInfo")]
pub box_color_info: Option<PdfDictionary>,
#[pdf(name = "Contents")]
pub contents: MaybeArray<PdfContentStream>,
#[pdf(name = "Group")]
pub group: Option<PdfDictionary>,
#[pdf(name = "Thumb")]
pub thumbnail: Option<PdfStream>,
#[pdf(name = "B")]
pub beads: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "Dur")]
pub duration: Option<f32>,
#[pdf(name = "Trans")]
pub transition: Option<PdfDictionary>,
#[pdf(name = "Annots")]
pub annotations: Option<Arc<[PdfDictionary]>>,
#[pdf(name = "AA")]
pub additional_actions: Option<PdfDictionary>,
#[pdf(name = "Metadata")]
pub metadata: Option<PdfStream>,
#[pdf(name = "PieceInfo")]
pub piece_info: Option<PdfDictionary>,
#[pdf(name = "StructParents")]
pub structural_parents: Option<PdfInteger>,
#[pdf(name = "ID")]
pub parent_web_capture_content_set_id: Option<PdfString>,
#[pdf(name = "PZ")]
pub preferred_zoom_factor: Option<f32>,
#[pdf(name = "SeparationInfo")]
pub separation_info: Option<PdfDictionary>,
#[pdf(name = "Tabs")]
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
#[pdf(name = "TemplateInstantiated")]
pub template_instantiated: Option<PdfName>,
#[pdf(name = "PresSteps")]
pub pres_steps: Option<PdfDictionary>,
#[pdf(name = "UserUnit")]
pub user_unit: Option<f32>,
#[pdf(name = "VP")]
pub viewports: Option<Arc<[PdfDictionary]>>,
#[pdf(flatten)]
pub inheritable: PdfPageInheritableData,
}
}
impl fmt::Debug for PdfPageTreeLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable,
} = self;
f.debug_struct("PdfPageTreeLeaf")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("inheritable", inheritable)
.finish()
})
}
}
pdf_parse! {
#[pdf(tag = "Type")]
#[derive(Clone)]
pub enum PdfPageTreeNodeOrLeaf {
#[pdf(tag_value = "Pages")]
Node(PdfPageTreeNode),
#[pdf(tag_value = "Page")]
Leaf(PdfPageTreeLeaf),
#[pdf(other)]
Other(PdfDictionary),
}
}
impl PdfPageTreeNodeOrLeaf {
pub fn propagate_inheritable_data_to_leaves(&mut self) {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => v.propagate_inheritable_data_to_leaves(),
PdfPageTreeNodeOrLeaf::Leaf(_) | PdfPageTreeNodeOrLeaf::Other(_) => {}
}
}
pub fn inheritable_data_mut(&mut self) -> Option<&mut PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&mut v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
pub fn inheritable_data(&self) -> Option<&PdfPageInheritableData> {
match self {
PdfPageTreeNodeOrLeaf::Node(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Leaf(v) => Some(&v.inheritable),
PdfPageTreeNodeOrLeaf::Other(_) => None,
}
}
}
impl fmt::Debug for PdfPageTreeNodeOrLeaf {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Node(v) => v.fmt(f),
Self::Leaf(v) => v.fmt(f),
Self::Other(v) => v.fmt(f),
}
}
}
/// the amount by which the page is rotated clockwise when displaying or printing, is always a multiple of 90 degrees.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
pub enum PdfPageRotation {
#[default]
NoRotation = 0,
ClockwiseBy90Degrees = 90,
By180Degrees = 180,
ClockwiseBy270Degrees = 270,
}
impl PdfPageRotation {
pub fn from_clockwise_angle_in_degrees(angle: i32) -> Option<Self> {
match angle.rem_euclid(360) {
0 => Some(Self::NoRotation),
90 => Some(Self::ClockwiseBy90Degrees),
180 => Some(Self::By180Degrees),
270 => Some(Self::ClockwiseBy270Degrees),
_ => None,
}
}
pub fn from_clockwise_angle_in_degrees_i128(angle: i128) -> Option<Self> {
Self::from_clockwise_angle_in_degrees((angle % 360) as i32)
}
}
impl From<PdfPageRotation> for i32 {
fn from(value: PdfPageRotation) -> Self {
value as i32
}
}
impl IsPdfNull for PdfPageRotation {
fn is_pdf_null(&self) -> bool {
false
}
}
impl PdfParse for PdfPageRotation {
fn type_name() -> Cow<'static, str> {
Cow::Borrowed("page rotation")
}
fn parse(object: PdfObject) -> Result<Self, PdfParseError> {
let object = PdfObjectDirect::from(object);
let pos = object.pos();
let angle = PdfInteger::parse(object.into())?;
Self::from_clockwise_angle_in_degrees_i128(angle.value())
.ok_or(PdfParseError::IntegerOutOfRange { pos })
}
}
#[derive(Clone)]
pub struct PdfPage {
pub ty: PdfPageType,
pub parent: PdfObjectIndirect,
pub last_modified: Option<PdfDate>,
pub resources: PdfResourcesDictionary,
pub media_box: PdfRectangle,
pub crop_box: PdfRectangle,
pub bleed_box: PdfRectangle,
pub trim_box: PdfRectangle,
pub art_box: PdfRectangle,
pub box_color_info: Option<PdfDictionary>,
pub contents: Arc<[PdfContentStream]>,
pub rotate: PdfPageRotation,
pub group: Option<PdfDictionary>,
pub thumbnail: Option<PdfStream>,
pub beads: Option<Arc<[PdfDictionary]>>,
pub duration: Option<f32>,
pub transition: Option<PdfDictionary>,
pub annotations: Option<Arc<[PdfDictionary]>>,
pub additional_actions: Option<PdfDictionary>,
pub metadata: Option<PdfStream>,
pub piece_info: Option<PdfDictionary>,
pub structural_parents: Option<PdfInteger>,
pub parent_web_capture_content_set_id: Option<PdfString>,
pub preferred_zoom_factor: Option<f32>,
pub separation_info: Option<PdfDictionary>,
pub annotations_tab_order: Option<PdfPageAnnotationsTabOrder>,
pub template_instantiated: Option<PdfName>,
pub pres_steps: Option<PdfDictionary>,
pub user_unit: f32,
pub viewports: Option<Arc<[PdfDictionary]>>,
pub rest: PdfDictionary,
rendered_objects: Option<PdfPageRenderedObjects>,
}
impl PdfPage {
pub fn rendered_objects(&self) -> &PdfPageRenderedObjects {
let Some(retval) = &self.rendered_objects else {
unreachable!();
};
retval
}
pub fn parse_after_propagating_inheritable_data(
leaf: PdfPageTreeLeaf,
) -> Result<Self, PdfParseError> {
let PdfPageTreeLeaf {
ty,
parent,
last_modified,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
inheritable:
PdfPageInheritableData {
resources,
media_box,
crop_box,
rotate,
rest,
},
} = leaf;
let pos = rest.pos();
let resources = resources.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page resources dictionary",
})?;
let media_box = media_box.ok_or(PdfParseError::InvalidType {
pos,
ty: "null",
expected_ty: "page MediaBox rectangle",
})?;
let crop_box = crop_box.unwrap_or(media_box);
let rotate = rotate.unwrap_or(PdfPageRotation::NoRotation);
let mut retval = Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box: bleed_box.unwrap_or(crop_box),
trim_box: trim_box.unwrap_or(crop_box),
art_box: art_box.unwrap_or(crop_box),
box_color_info,
contents: contents.0,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit: user_unit.unwrap_or(1.0),
viewports,
rest,
rendered_objects: None,
};
retval.rendered_objects = Some(PdfPageRenderedObjects::render_page(&retval)?);
Ok(retval)
}
}
impl fmt::Debug for PdfPage {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
DagDebugState::scope(|_state| {
let Self {
ty,
parent,
last_modified,
resources,
media_box,
crop_box,
bleed_box,
trim_box,
art_box,
box_color_info,
contents,
rotate,
group,
thumbnail,
beads,
duration,
transition,
annotations,
additional_actions,
metadata,
piece_info,
structural_parents,
parent_web_capture_content_set_id,
preferred_zoom_factor,
separation_info,
annotations_tab_order,
template_instantiated,
pres_steps,
user_unit,
viewports,
rest,
rendered_objects,
} = self;
struct Unparsed;
impl fmt::Debug for Unparsed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("<unparsed>")
}
}
f.debug_struct("PdfPage")
.field("ty", ty)
.field("parent", parent)
.field("last_modified", last_modified)
.field("resources", resources)
.field("media_box", media_box)
.field("crop_box", crop_box)
.field("bleed_box", bleed_box)
.field("trim_box", trim_box)
.field("art_box", art_box)
.field("box_color_info", box_color_info)
.field("contents", contents)
.field("rotate", rotate)
.field("group", group)
.field("thumbnail", thumbnail)
.field("beads", beads)
.field("duration", duration)
.field("transition", transition)
.field("annotations", annotations)
.field("additional_actions", additional_actions)
.field("metadata", metadata)
.field("piece_info", piece_info)
.field("structural_parents", structural_parents)
.field(
"parent_web_capture_content_set_id",
parent_web_capture_content_set_id,
)
.field("preferred_zoom_factor", preferred_zoom_factor)
.field("separation_info", separation_info)
.field("annotations_tab_order", annotations_tab_order)
.field("template_instantiated", template_instantiated)
.field("pres_steps", pres_steps)
.field("user_unit", user_unit)
.field("viewports", viewports)
.field("rest", rest)
.field(
"rendered_objects",
if let Some(rendered_objects) = rendered_objects {
rendered_objects
} else {
&Unparsed
},
)
.finish()
})
}
}
#[derive(Clone, Debug)]
pub struct PdfPageRenderedObjects {}
impl PdfPageRenderedObjects {
fn render_page(page: &PdfPage) -> Result<Self, PdfParseError> {
let mut state = PdfRenderState::new(page);
for content_stream in page.contents.iter() {
for op in content_stream.decoded_data().as_ref()?.operators.iter() {
op.render(&mut state)?;
}
}
Ok(Self {})
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,325 +0,0 @@
use std::{collections::BTreeMap, sync::Arc};
use crate::{
pdf::{
font::{
PdfFontToUnicode,
type_1_parse::{PsFile, Token},
},
object::{PdfName, PdfObjectDirect, PdfString},
parse::{PdfInputPosition, PdfParseError},
},
util::ArcOrRef,
};
pub(crate) struct ToUnicodeParser {
tokenizer: PsFile,
}
#[track_caller]
fn invalid_token_err<T>(pos: PdfInputPosition, token: Option<Token>) -> Result<T, PdfParseError> {
Err(PdfParseError::InvalidTokenInToUnicodeStream {
pos,
token: format!("{token:?}"),
})
}
impl ToUnicodeParser {
pub(crate) fn new(tokenizer: PsFile) -> Self {
Self { tokenizer }
}
fn expect_any_string(&mut self) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(string)) => Ok(string),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_string_with_len(
&mut self,
expected_len: usize,
) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(string)) if string.len() == expected_len => Ok(string),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_literal_name(
&mut self,
expected_name: &[u8],
) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::LiteralName(name)) if name == expected_name => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_any_literal_name(&mut self) -> Result<Vec<u8>, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::LiteralName(name)) => Ok(name),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_executable_name(
&mut self,
expected_name: &[u8],
) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::ExecutableName(name)) if name == expected_name => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect(&mut self, expected_token: Token) -> Result<(), PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(token) if token == expected_token => Ok(()),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn expect_integer(&mut self) -> Result<i128, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
let pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::Integer(value)) => Ok(value),
token => invalid_token_err(pos, token),
}
}
pub(crate) fn parse_dict(
&mut self,
mut entry_callback: impl FnMut(Vec<u8>, PdfInputPosition, Token) -> Result<(), PdfParseError>,
) -> Result<(), PdfParseError> {
self.expect(Token::DictStart)?;
loop {
self.tokenizer.skip_comments_and_whitespace();
let name_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::DictEnd) => return Ok(()),
Some(Token::LiteralName(name)) => {
self.tokenizer.skip_comments_and_whitespace();
let value_pos = self.tokenizer.pos();
let Some(value) = self.tokenizer.next_token()? else {
return invalid_token_err(value_pos, None);
};
entry_callback(name, value_pos, value)?;
}
token => {
return invalid_token_err(name_pos, token);
}
}
}
}
pub(crate) fn parse(
mut self,
base_map: Option<PdfObjectDirect>,
) -> Result<PdfFontToUnicode, PdfParseError> {
self.tokenizer.skip_comments_and_whitespace();
self.expect_literal_name(b"CIDInit")?;
self.expect_literal_name(b"ProcSet")?;
self.expect_executable_name(b"findresource")?;
self.expect_executable_name(b"begin")?;
self.expect_integer()?;
self.expect_executable_name(b"dict")?;
self.expect_executable_name(b"begin")?;
self.expect_executable_name(b"begincmap")?;
self.expect_literal_name(b"CIDSystemInfo")?;
let mut registry = None;
let mut ordering = None;
let mut supplement = None;
self.parse_dict(|name, value_pos, value| match &*name {
b"Registry" => {
let Token::String(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
registry = Some(v);
Ok(())
}
b"Ordering" => {
let Token::String(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
ordering = Some(v);
Ok(())
}
b"Supplement" => {
let Token::Integer(v) = value else {
return invalid_token_err(value_pos, Some(value));
};
supplement = Some(v);
Ok(())
}
_ => todo!("{}: {value:?}", name.escape_ascii()),
})?;
self.expect_executable_name(b"def")?;
self.expect_literal_name(b"CMapName")?;
self.tokenizer.skip_comments_and_whitespace();
let char_map_name_pos = self.tokenizer.pos();
let char_map_name = self.expect_any_literal_name()?;
self.expect_executable_name(b"def")?;
self.expect_literal_name(b"CMapType")?;
self.expect(Token::Integer(2))?;
self.expect_executable_name(b"def")?;
self.expect(Token::Integer(1))?;
self.expect_executable_name(b"begincodespacerange")?;
self.tokenizer.skip_comments_and_whitespace();
let range_start_pos = self.tokenizer.pos();
let range_start = self.expect_any_string()?;
if range_start.is_empty() {
return invalid_token_err(range_start_pos, Some(Token::String(range_start)));
}
self.tokenizer.skip_comments_and_whitespace();
let range_end_pos = self.tokenizer.pos();
let range_end = self.expect_string_with_len(range_start.len())?;
self.expect_executable_name(b"endcodespacerange")?;
let mut to_unicode_map: BTreeMap<PdfString, Arc<str>> = BTreeMap::new();
let mut dest_str = String::new();
let mut insert_mapping = |src_pos: PdfInputPosition,
src: &[u8],
dest_pos: PdfInputPosition,
dest_utf16_be: &[u8]|
-> Result<(), PdfParseError> {
dest_str.clear();
for ch in char::decode_utf16(
dest_utf16_be
.chunks(2)
.map(|chunk| u16::from_be_bytes([chunk[0], chunk[1]])),
) {
match ch {
Ok(ch) => dest_str.push(ch),
Err(_) => {
return Err(PdfParseError::InvalidUtf16 { pos: dest_pos });
}
}
}
to_unicode_map.insert(
PdfString::new(src_pos, ArcOrRef::Arc(src.into())),
dest_str.as_str().into(),
);
Ok(())
};
loop {
match self.tokenizer.next_token()? {
Some(Token::Integer(size)) => match self.tokenizer.next_token()? {
Some(Token::ExecutableName(name)) if name == b"beginbfrange" => {
for _ in 0..size {
self.tokenizer.skip_comments_and_whitespace();
let src_pos = self.tokenizer.pos();
let src_low = self.expect_string_with_len(range_start.len())?;
self.tokenizer.skip_comments_and_whitespace();
let src_high_pos = self.tokenizer.pos();
let src_high = self.expect_string_with_len(range_start.len())?;
if src_low.split_last().map(|(_, prefix)| prefix)
!= src_high.split_last().map(|(_, prefix)| prefix)
{
return invalid_token_err(
src_high_pos,
Some(Token::String(src_high)),
);
}
let src_last_range = *src_low.last().expect("known to be non-empty")
..=*src_high.last().expect("known to be non-empty");
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest))
if dest.len() >= 2 && dest.len() % 2 == 0 =>
{
let mut src = src_low;
for (index, src_last_byte) in src_last_range.enumerate() {
*src.last_mut().expect("known to be non-empty") =
src_last_byte;
let mut dest = dest.clone();
let [.., last] = &mut *dest else {
unreachable!();
};
*last += index as u8;
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
Some(Token::ArrayStart) => {
let mut src = src_low;
for src_last_byte in src_last_range {
*src.last_mut().expect("known to be non-empty") =
src_last_byte;
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest))
if dest.len() >= 2 && dest.len() % 2 == 0 =>
{
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect(Token::ArrayEnd)?;
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect_executable_name(b"endbfrange")?;
}
Some(Token::ExecutableName(name)) if name == b"beginbfchar" => {
for _ in 0..size {
self.tokenizer.skip_comments_and_whitespace();
let src_pos = self.tokenizer.pos();
let src = self.expect_string_with_len(range_start.len())?;
self.tokenizer.skip_comments_and_whitespace();
let dest_pos = self.tokenizer.pos();
match self.tokenizer.next_token()? {
Some(Token::String(dest)) if dest.len() % 2 == 0 => {
insert_mapping(src_pos, &src, dest_pos, &dest)?;
}
Some(token @ Token::String(_)) => {
todo!("odd number of dest bytes: {token:?}");
}
token => return invalid_token_err(dest_pos, token),
}
}
self.expect_executable_name(b"endbfchar")?;
}
token => todo!("{token:?}"),
},
Some(Token::ExecutableName(name)) if name == b"endcmap" => {
break;
}
token => todo!("{token:?}"),
}
}
self.expect_executable_name(b"CMapName")?;
self.expect_executable_name(b"currentdict")?;
self.expect_literal_name(b"CMap")?;
self.expect_executable_name(b"defineresource")?;
self.expect_executable_name(b"pop")?;
self.expect_executable_name(b"end")?;
self.expect_executable_name(b"end")?;
self.tokenizer.skip_comments_and_whitespace();
let eof_pos = self.tokenizer.pos();
if let token @ Some(_) = self.tokenizer.next_token()? {
return invalid_token_err(eof_pos, token);
}
Ok(PdfFontToUnicode {
base_map,
char_map_name: PdfName::new(char_map_name_pos, Arc::<[u8]>::from(char_map_name)),
src_ranges: Arc::new([
PdfString::new(range_start_pos, ArcOrRef::Arc(range_start.into()))
..=PdfString::new(range_end_pos, ArcOrRef::Arc(range_end.into())),
]),
to_unicode_map: Arc::new(to_unicode_map),
})
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,66 +0,0 @@
use crate::pdf::{
object::{PdfDictionary, PdfName},
parse::{PdfInputPosition, PdfParse, PdfParseError},
pdf_parse,
};
pub mod flate;
pdf_parse! {
#[pdf(name)]
#[derive(Clone, Debug, PartialEq, Eq)]
#[non_exhaustive]
pub enum PdfStreamFilter {
#[pdf(name = "ASCIIHexDecode")]
AsciiHexDecode,
#[pdf(name = "ASCII85Decode")]
Ascii85Decode,
#[pdf(name = "LZWDecode")]
LzwDecode,
#[pdf(name = "FlateDecode")]
FlateDecode,
#[pdf(name = "RunLengthDecode")]
RunLengthDecode,
#[pdf(name = "CCITTFaxDecode")]
CcittFaxDecode,
#[pdf(name = "JBIG2Decode")]
Jbig2Decode,
#[pdf(name = "DCTDecode")]
DctDecode,
#[pdf(name = "JPXDecode")]
JpxDecode,
#[pdf(name = "Crypt")]
Crypt,
#[pdf(other)]
Unknown(PdfName),
}
}
impl PdfStreamFilter {
pub fn decode_stream_data(
&self,
filter_parms: PdfDictionary,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
match self {
PdfStreamFilter::AsciiHexDecode => todo!(),
PdfStreamFilter::Ascii85Decode => todo!(),
PdfStreamFilter::LzwDecode => todo!(),
PdfStreamFilter::FlateDecode => {
flate::PdfFilterParmsFlateDecode::parse(filter_parms.into())?
.decode_stream_data(stream_pos, encoded_data)
}
PdfStreamFilter::RunLengthDecode => todo!(),
PdfStreamFilter::CcittFaxDecode => todo!(),
PdfStreamFilter::Jbig2Decode => todo!(),
PdfStreamFilter::DctDecode => todo!(),
PdfStreamFilter::JpxDecode => todo!(),
PdfStreamFilter::Crypt => todo!(),
PdfStreamFilter::Unknown(filter) => Err(PdfParseError::UnknownStreamFilter {
pos: stream_pos,
filter: filter.clone(),
}),
}
}
}

View file

@ -1,74 +0,0 @@
use crate::pdf::{
object::PdfDictionary,
parse::{PdfInputPosition, PdfParseError},
pdf_parse,
stream_filters::PdfStreamFilter,
};
use std::{io::Read, num::NonZero};
pdf_parse! {
#[pdf]
#[derive(Clone, Debug, Default)]
pub struct PdfFilterParmsFlateDecode {
#[pdf(name = "Predictor")]
pub predictor: Option<NonZero<u32>>,
#[pdf(name = "Colors")]
pub colors: Option<NonZero<u32>>,
#[pdf(name = "BitsPerComponent")]
pub bits_per_component: Option<NonZero<u32>>,
#[pdf(name = "Columns")]
pub columns: Option<NonZero<u32>>,
#[pdf(flatten)]
pub rest: PdfDictionary,
}
}
impl PdfFilterParmsFlateDecode {
pub const FILTER: PdfStreamFilter = PdfStreamFilter::FlateDecode;
pub const DEFAULT_PREDICTOR: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_COLORS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub const DEFAULT_BITS_PER_COMPONENT: NonZero<u32> = const { NonZero::new(8).unwrap() };
pub const DEFAULT_COLUMNS: NonZero<u32> = const { NonZero::new(1).unwrap() };
pub fn predictor(&self) -> NonZero<u32> {
self.predictor.unwrap_or(Self::DEFAULT_PREDICTOR)
}
pub fn colors(&self) -> NonZero<u32> {
self.colors.unwrap_or(Self::DEFAULT_COLORS)
}
pub fn bits_per_component(&self) -> NonZero<u32> {
self.bits_per_component
.unwrap_or(Self::DEFAULT_BITS_PER_COMPONENT)
}
pub fn columns(&self) -> NonZero<u32> {
self.columns.unwrap_or(Self::DEFAULT_COLUMNS)
}
pub fn decode_stream_data(
&self,
stream_pos: PdfInputPosition,
encoded_data: &[u8],
) -> Result<Vec<u8>, PdfParseError> {
let Self {
predictor: _,
colors: _,
bits_per_component: _,
columns: _,
rest: _,
} = self;
let mut decoded_data = vec![];
flate2::bufread::ZlibDecoder::new(encoded_data)
.read_to_end(&mut decoded_data)
.map_err(|e| PdfParseError::StreamFilterError {
pos: stream_pos,
filter: Self::FILTER.into(),
error: e.to_string(),
})?;
let predictor = self.predictor();
let colors = self.colors();
let bits_per_component = self.bits_per_component();
let columns = self.columns();
match predictor {
Self::DEFAULT_PREDICTOR => Ok(decoded_data),
_ => todo!("{predictor}"),
}
}
}

1115
src/quad_tree.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,382 +0,0 @@
use std::{
any::{Any, TypeId},
borrow::Borrow,
cell::Cell,
collections::HashMap,
fmt,
hash::{Hash, Hasher},
sync::Arc,
};
pub enum ArcOrRef<'a, T: ?Sized> {
Arc(Arc<T>),
Ref(&'a T),
}
impl<'a, T: ?Sized> AsRef<T> for ArcOrRef<'a, T> {
fn as_ref(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> Borrow<T> for ArcOrRef<'a, T> {
fn borrow(&self) -> &T {
self
}
}
impl<'a, T: ?Sized> From<Arc<T>> for ArcOrRef<'a, T> {
fn from(value: Arc<T>) -> Self {
Self::Arc(value)
}
}
impl<'a, T: ?Sized> From<&'a T> for ArcOrRef<'a, T> {
fn from(value: &'a T) -> Self {
Self::Ref(value)
}
}
impl<'a, T: ?Sized> Default for ArcOrRef<'a, T>
where
&'a T: Default,
{
fn default() -> Self {
Self::Ref(Default::default())
}
}
impl<T: ?Sized> Clone for ArcOrRef<'_, T> {
fn clone(&self) -> Self {
match self {
Self::Arc(v) => Self::Arc(v.clone()),
Self::Ref(v) => Self::Ref(v),
}
}
}
impl<T: ?Sized + Hash> Hash for ArcOrRef<'_, T> {
fn hash<H: Hasher>(&self, state: &mut H) {
T::hash(self, state)
}
}
impl<'a, 'b, T: ?Sized + PartialEq<U>, U: ?Sized> PartialEq<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn eq(&self, other: &ArcOrRef<'b, U>) -> bool {
T::eq(self, other)
}
}
impl<T: ?Sized + Eq> Eq for ArcOrRef<'_, T> {}
impl<'a, 'b, T: ?Sized + PartialOrd<U>, U: ?Sized> PartialOrd<ArcOrRef<'b, U>> for ArcOrRef<'a, T> {
fn partial_cmp(&self, other: &ArcOrRef<'b, U>) -> Option<std::cmp::Ordering> {
T::partial_cmp(self, other)
}
}
impl<T: ?Sized + Ord> Ord for ArcOrRef<'_, T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
T::cmp(self, other)
}
}
impl<T: ?Sized> std::ops::Deref for ArcOrRef<'_, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
match self {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => v,
}
}
}
impl<T: ?Sized + fmt::Debug> fmt::Debug for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
impl<T: ?Sized + fmt::Display> fmt::Display for ArcOrRef<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
T::fmt(self, f)
}
}
/// a stable alternative to `CloneToUninit` for `Arc`
pub trait ArcFromRef {
/// like `Arc::new(Self::clone(self))` but works for unsized types too
fn arc_from_ref(&self) -> Arc<Self>;
/// generic version of `Arc::make_mut`
fn make_mut(this: &mut Arc<Self>) -> &mut Self;
}
impl<T: Clone> ArcFromRef for T {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::new(Self::clone(self))
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<T: Clone> ArcFromRef for [T] {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl ArcFromRef for str {
fn arc_from_ref(&self) -> Arc<Self> {
Arc::from(self)
}
fn make_mut(this: &mut Arc<Self>) -> &mut Self {
Arc::make_mut(this)
}
}
impl<'a, T: ?Sized + ArcFromRef> ArcOrRef<'a, T> {
pub fn into_arc(this: Self) -> Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => T::arc_from_ref(v),
}
}
pub fn make_arc(this: &mut Self) -> &mut Arc<T> {
match this {
ArcOrRef::Arc(v) => v,
ArcOrRef::Ref(v) => {
*this = ArcOrRef::Arc(T::arc_from_ref(v));
let ArcOrRef::Arc(v) = this else {
unreachable!();
};
v
}
}
}
pub fn make_mut(this: &mut Self) -> &mut T {
T::make_mut(Self::make_arc(this))
}
}
trait DagDebugStateSealed {}
#[expect(private_bounds)]
pub trait SupportsDagDebugState: DagDebugStateSealed + 'static + Clone {
type Key: Clone + Hash + Eq + 'static;
fn key(this: &Self) -> Self::Key;
}
impl<T: 'static> DagDebugStateSealed for Arc<T> {}
impl<T: 'static> SupportsDagDebugState for Arc<T> {
type Key = *const T;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl<T: 'static> DagDebugStateSealed for Arc<[T]> {}
impl<T: 'static> SupportsDagDebugState for Arc<[T]> {
type Key = *const [T];
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
impl DagDebugStateSealed for Arc<str> {}
impl SupportsDagDebugState for Arc<str> {
type Key = *const str;
fn key(this: &Self) -> Self::Key {
Arc::as_ptr(this)
}
}
trait DagDebugStatePartTrait: 'static {
fn reset(&mut self);
fn as_any_mut(&mut self) -> &mut dyn Any;
}
struct DagDebugStatePart<T: SupportsDagDebugState> {
table: HashMap<T::Key, (u64, T)>,
next_id: u64,
}
impl<T: SupportsDagDebugState> DagDebugStatePartTrait for DagDebugStatePart<T> {
fn reset(&mut self) {
let Self { table, next_id } = self;
table.clear();
*next_id = 0;
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
impl<T: SupportsDagDebugState> DagDebugStatePart<T> {
fn insert(&mut self, value: &T) -> DagDebugStateInsertResult {
use std::collections::hash_map::Entry;
match self.table.entry(T::key(value)) {
Entry::Occupied(entry) => DagDebugStateInsertResult::Old { id: entry.get().0 },
Entry::Vacant(entry) => {
let value = T::clone(value);
let id = self.next_id;
self.next_id += 1;
entry.insert((id, value));
DagDebugStateInsertResult::New { id }
}
}
}
}
impl<T: SupportsDagDebugState> Default for DagDebugStatePart<T> {
fn default() -> Self {
Self {
table: HashMap::default(),
next_id: 0,
}
}
}
pub struct DagDebugState {
parts: std::cell::RefCell<HashMap<TypeId, Box<dyn DagDebugStatePartTrait>>>,
ref_count: Cell<usize>,
}
#[derive(Clone, Copy, Debug)]
pub enum DagDebugStateInsertResult {
New { id: u64 },
Old { id: u64 },
}
impl DagDebugStateInsertResult {
pub fn id(self) -> u64 {
match self {
Self::New { id } | Self::Old { id } => id,
}
}
}
impl DagDebugState {
fn with_part<T: SupportsDagDebugState, R>(
&self,
f: impl FnOnce(&mut DagDebugStatePart<T>) -> R,
) -> R {
let mut parts = self.parts.borrow_mut();
let Some(part) = parts
.entry(TypeId::of::<DagDebugStatePart<T>>())
.or_insert_with(|| Box::new(DagDebugStatePart::<T>::default()))
.as_any_mut()
.downcast_mut::<DagDebugStatePart<T>>()
else {
unreachable!()
};
f(part)
}
pub fn insert<T: SupportsDagDebugState>(&self, value: &T) -> DagDebugStateInsertResult {
self.with_part(|part: &mut DagDebugStatePart<T>| part.insert(value))
}
pub fn debug_or_id<'a, T: SupportsDagDebugState + fmt::Debug, Abbreviated: fmt::Display>(
&self,
value: &'a T,
abbreviated: Abbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, Abbreviated> {
self.debug_or_id_with(value, fmt::Debug::fmt, move |f| abbreviated.fmt(f))
}
pub fn debug_or_id_with<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
>(
&self,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
) -> impl fmt::Debug + fmt::Display + use<'a, T, DebugValue, DebugAbbreviated> {
struct DebugOrIdWith<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> {
insert_result: DagDebugStateInsertResult,
value: &'a T,
debug_value: DebugValue,
debug_abbreviated: DebugAbbreviated,
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Debug for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self, f)
}
}
impl<
'a,
T: SupportsDagDebugState,
DebugValue: Fn(&'a T, &mut fmt::Formatter<'_>) -> fmt::Result,
DebugAbbreviated: Fn(&mut fmt::Formatter<'_>) -> fmt::Result,
> fmt::Display for DebugOrIdWith<'a, T, DebugValue, DebugAbbreviated>
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "#{} ", self.insert_result.id())?;
match self.insert_result {
DagDebugStateInsertResult::New { id: _ } => (self.debug_value)(self.value, f),
DagDebugStateInsertResult::Old { id: _ } => (self.debug_abbreviated)(f),
}
}
}
DebugOrIdWith {
insert_result: self.insert(value),
value,
debug_value,
debug_abbreviated,
}
}
#[must_use]
fn inc_ref_count_scope(&self) -> impl Sized {
struct DecRefCountOnDrop<'a>(&'a DagDebugState);
impl Drop for DecRefCountOnDrop<'_> {
fn drop(&mut self) {
self.0.ref_count.set(self.0.ref_count.get() - 1);
if self.0.ref_count.get() == 0 {
self.0
.parts
.borrow_mut()
.values_mut()
.for_each(|v| v.reset());
}
}
}
self.ref_count.set(
self.ref_count
.get()
.checked_add(1)
.expect("too many nested calls"),
);
DecRefCountOnDrop(self)
}
pub fn scope<R>(f: impl FnOnce(&Self) -> R) -> R {
thread_local! {
static STATE: DagDebugState = DagDebugState { parts: Default::default(), ref_count: Cell::new(0) };
}
STATE.with(|state| {
let _scope = state.inc_ref_count_scope();
f(state)
})
}
}

232
src/xml_tree.rs Normal file
View file

@ -0,0 +1,232 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use quick_xml::{
Writer,
events::{BytesDecl, BytesStart, BytesText, Event},
};
use std::fmt;
struct FmtToIoAdaptor<W> {
writer: W,
partial_char: [u8; 4],
partial_char_len: u8,
}
impl<W: fmt::Write> FmtToIoAdaptor<W> {
fn new(writer: W) -> Self {
Self {
writer,
partial_char: [0; 4],
partial_char_len: 0,
}
}
fn finish(self) -> Result<W, fmt::Error> {
let Self {
writer,
partial_char: _,
partial_char_len,
} = self;
if partial_char_len != 0 {
Err(fmt::Error)
} else {
Ok(writer)
}
}
fn write_byte(&mut self, b: u8) -> std::io::Result<()> {
let Self {
writer,
partial_char,
partial_char_len,
} = self;
partial_char[usize::from(*partial_char_len)] = b;
*partial_char_len += 1;
match str::from_utf8(&partial_char[..usize::from(*partial_char_len)]) {
Ok(s) => {
*partial_char_len = 0;
writer.write_str(s).map_err(std::io::Error::other)
}
Err(e) => {
if e.error_len().is_some() {
*partial_char_len = 0;
Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e))
} else {
Ok(())
}
}
}
}
}
impl<W: fmt::Write> std::io::Write for FmtToIoAdaptor<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
for &b in buf {
self.write_byte(b)?;
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub(crate) enum ElementTag {
Comment,
Normal(String),
}
impl ElementTag {
pub(crate) fn normal(&self) -> Option<&str> {
match self {
ElementTag::Comment => None,
ElementTag::Normal(v) => Some(v),
}
}
}
impl fmt::Debug for ElementTag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Comment => f.write_str("<Comment>"),
Self::Normal(v) => v.fmt(f),
}
}
}
/// like python's xml.etree.ElementTree.Element
#[derive(Clone, Debug)]
pub(crate) struct Element {
pub(crate) tag: ElementTag,
pub(crate) attrib: Vec<(String, String)>,
/// text contained in this element but before any children
pub(crate) text: String,
pub(crate) children: Vec<Element>,
/// text after the end of this element
pub(crate) tail: String,
}
/// equivalent to python `xml.etree.ElementTree.tostring(self, encoding="unicode")`
impl fmt::Display for Element {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut writer = Writer::new(FmtToIoAdaptor::new(f));
self.write_to(&mut writer).map_err(|_| fmt::Error)?;
writer.into_inner().finish()?;
Ok(())
}
}
impl Element {
pub(crate) fn new(tag: String, attrib: impl IntoIterator<Item = (String, String)>) -> Self {
Self {
tag: ElementTag::Normal(tag),
attrib: Vec::from_iter(attrib),
text: String::new(),
children: Vec::new(),
tail: String::new(),
}
}
/// equivalent of python's `xml.etree.ElementTree.Comment()`
pub(crate) fn comment(text: String) -> Self {
Self {
tag: ElementTag::Comment,
attrib: Vec::new(),
text,
children: Vec::new(),
tail: String::new(),
}
}
/// equivalent to python `"".join(self.itertext())`
pub(crate) fn inner_text(&self) -> String {
let mut retval = String::new();
fn helper(element: &Element, retval: &mut String) {
let Element {
tag,
attrib: _,
text,
children,
tail: _,
} = element;
let ElementTag::Normal(_) = tag else {
return;
};
retval.push_str(text);
for child in children {
helper(child, retval);
retval.push_str(&child.tail);
}
}
helper(self, &mut retval);
retval
}
/// equivalent of python's `xml.etree.ElementTree.SubElement()`
pub(crate) fn sub_element(
&mut self,
tag: String,
attrib: impl IntoIterator<Item = (String, String)>,
) -> &mut Self {
self.children.push(Self::new(tag, attrib));
self.children.last_mut().expect("just pushed")
}
pub(crate) fn write_to(&self, writer: &mut Writer<impl std::io::Write>) -> std::io::Result<()> {
let Element {
tag,
attrib,
text,
children,
tail,
} = self;
match tag {
ElementTag::Comment => {
writer.write_event(Event::Comment(BytesText::new(text)))?;
}
ElementTag::Normal(tag) if tag.is_empty() => {
writer.write_event(Event::Text(BytesText::new(text)))?;
}
ElementTag::Normal(tag)
if attrib.is_empty() && text.is_empty() && children.is_empty() =>
{
// write element like `<br />` to match python instead of like `<br/>`
writer.write_event(Event::Empty(BytesStart::from_content(
tag.clone() + " ",
tag.len(),
)))?;
}
ElementTag::Normal(tag) => {
let mut element_writer = writer.create_element(tag);
for (name, value) in attrib {
element_writer = element_writer.with_attribute((name.as_str(), value.as_str()));
}
if text.is_empty() && children.is_empty() {
element_writer.write_empty()?;
} else {
element_writer.write_inner_content(|writer| {
writer.write_event(Event::Text(BytesText::new(text)))?;
for child in children {
child.write_to(writer)?;
}
Ok(())
})?;
}
}
}
writer.write_event(Event::Text(BytesText::new(tail)))?;
Ok(())
}
/// equivalent of python's `xml.etree.ElementTree(self).write(writer, encoding='utf-8', xml_declaration=xml_declaration)`
pub(crate) fn write(
&self,
writer: impl std::io::Write,
xml_declaration: bool,
) -> std::io::Result<()> {
let mut writer = Writer::new(writer);
if xml_declaration {
// use specific string to match python
writer.write_event(Event::Decl(BytesDecl::from_start(
BytesStart::from_content("xml version='1.0' encoding='utf-8'", 3),
)))?;
writer.write_event(Event::Text(BytesText::new("\n")))?;
}
self.write_to(&mut writer)
}
}