Compare commits
33 commits
wip-experi
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 38a1fb328b | |||
| 4177a58c8d | |||
| bc550be122 | |||
| b68cb274da | |||
| 76438b727c | |||
| 2636ab2518 | |||
| 8fd55cdda8 | |||
| 56ee3c5c43 | |||
| 63698fd90f | |||
| 9bf15dc9d0 | |||
| 73c45323c8 | |||
| 040afcc435 | |||
| a677cd8a33 | |||
| 103f986bc0 | |||
| f9a24f4c48 | |||
| fcf1c63cb7 | |||
| c58bc23904 | |||
| 3d66c853f6 | |||
| e9830566c0 | |||
| 442afe5f06 | |||
| 45e8925d34 | |||
| b1d83b1d84 | |||
| c8cd234d8f | |||
| 718de40b09 | |||
| 9e090a66a3 | |||
| 7ecdbc0239 | |||
| e1277bbb90 | |||
| 3fc0e92f95 | |||
| 104ee37933 | |||
| 8643d47338 | |||
| 944ae4bf41 | |||
| da339ce00a | |||
| 2381421776 |
15 changed files with 6553 additions and 9 deletions
46
.forgejo/workflows/test.yml
Normal file
46
.forgejo/workflows/test.yml
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
# SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
# See Notices.txt for copyright information
|
||||
on: [push, pull_request]
|
||||
|
||||
env:
|
||||
PDF_HASH: 56372d23ece7e9e2c1b381a639443982a3e16e38109df1c141d655b779b61fdb
|
||||
OUTPUT_XML_HASH: c0b4592cbd0a3e59b9b2931a6a75a3d87ebf23bf453e8587a1522dd157f15ee9
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: debian-12
|
||||
container:
|
||||
image: git.libre-chip.org/libre-chip/fayalite-deps:latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- run: |
|
||||
scripts/check-copyright.sh
|
||||
- uses: https://git.libre-chip.org/mirrors/rust-cache@v2
|
||||
with:
|
||||
save-if: ${{ github.ref == 'refs/heads/master' }}
|
||||
- run: |
|
||||
apt-get update -qq
|
||||
apt-get install -qq python3-venv wget
|
||||
# copy of https://files.openpower.foundation/s/9izgC5Rogi5Ywmm/download/OPF_PowerISA_v3.1C.pdf
|
||||
wget -O OPF_PowerISA_v3.1C.pdf https://libre-chip.org/OPF_PowerISA_v3.1C.pdf
|
||||
echo "$PDF_HASH OPF_PowerISA_v3.1C.pdf" | sha256sum -c
|
||||
- run: |
|
||||
cargo test
|
||||
- run: |
|
||||
cargo build --release
|
||||
- run: |
|
||||
cargo run --release -- OPF_PowerISA_v3.1C.pdf &> >(tee out.log | grep '^page ') || { tail -n1000 out.log; false; }
|
||||
echo "expected output (not all instructions are decoded yet, change when the output is improved):"
|
||||
echo "$OUTPUT_XML_HASH powerisa-instructions.xml" | sha256sum -c
|
||||
mv powerisa-instructions.xml powerisa-instructions-rust.xml
|
||||
- run: |
|
||||
python3 -m venv --upgrade-deps .venv
|
||||
. .venv/bin/activate
|
||||
pip install -e .
|
||||
parse_powerisa_pdf OPF_PowerISA_v3.1C.pdf &> >(tee out.log | grep '^page ') || { tail -n1000 out.log; false; }
|
||||
echo "expected output (not all instructions are decoded yet, change when the output is improved):"
|
||||
echo "$OUTPUT_XML_HASH powerisa-instructions.xml" | sha256sum -c
|
||||
mv powerisa-instructions.xml powerisa-instructions-python.xml
|
||||
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -1,6 +1,10 @@
|
|||
# SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
# See Notices.txt for copyright information
|
||||
/.venv
|
||||
/.vscode
|
||||
*.egg-info
|
||||
__pycache__
|
||||
*.log
|
||||
/powerisa-instructions.xml
|
||||
/powerisa-instructions.xml
|
||||
/*.pdf
|
||||
/target
|
||||
|
|
|
|||
305
Cargo.lock
generated
Normal file
305
Cargo.lock
generated
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.71.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.51"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
|
||||
dependencies = [
|
||||
"find-msvc-tools",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cexpr"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
||||
dependencies = [
|
||||
"nom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
|
||||
|
||||
[[package]]
|
||||
name = "clang-sys"
|
||||
version = "1.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
|
||||
dependencies = [
|
||||
"glob",
|
||||
"libc",
|
||||
"libloading",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.15.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||
|
||||
[[package]]
|
||||
name = "find-msvc-tools"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
|
||||
|
||||
[[package]]
|
||||
name = "indexmap"
|
||||
version = "2.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
|
||||
dependencies = [
|
||||
"equivalent",
|
||||
"hashbrown",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.178"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
||||
|
||||
[[package]]
|
||||
name = "mupdf-sys"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13e9a0d4e844ab50315d43312f3d62f72c77205b07c8ee21cbd4b52bdc2a9910"
|
||||
dependencies = [
|
||||
"bindgen",
|
||||
"cc",
|
||||
"pkg-config",
|
||||
"regex",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nom"
|
||||
version = "7.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"minimal-lexical",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parse_powerisa_pdf"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"libm",
|
||||
"mupdf-sys",
|
||||
"quick-xml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.104"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-xml"
|
||||
version = "0.38.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.12.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "2.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.112"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
|
||||
|
||||
[[package]]
|
||||
name = "windows-link"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.8.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
|
||||
dependencies = [
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.8.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
17
Cargo.toml
Normal file
17
Cargo.toml
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
# See Notices.txt for copyright information
|
||||
[package]
|
||||
name = "parse_powerisa_pdf"
|
||||
version = "0.1.0"
|
||||
license = "LGPL-3.0-or-later"
|
||||
edition = "2024"
|
||||
repository = ""
|
||||
keywords = []
|
||||
categories = []
|
||||
rust-version = "1.89.0"
|
||||
|
||||
[dependencies]
|
||||
indexmap = "2.12.1"
|
||||
libm = "0.2.15"
|
||||
mupdf-sys = { version = "0.5.0", default-features = false }
|
||||
quick-xml = "0.38.4"
|
||||
33
README.md
33
README.md
|
|
@ -1,5 +1,38 @@
|
|||
<!--
|
||||
SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
See Notices.txt for copyright information
|
||||
-->
|
||||
parser for the OPF PowerISA 3.1C pdf to attempt to extract all instructions' pseudo-code including subscripts/superscripts and other formatting
|
||||
|
||||
# Using the new Rust code:
|
||||
|
||||
Usage:
|
||||
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/>
|
||||
|
||||
* Install Rust -- you need version 1.89.0 or later.
|
||||
|
||||
Getting it from https://rustup.rs/ is recommended.
|
||||
|
||||
* Install required build dependencies:
|
||||
|
||||
On Debian 12:
|
||||
|
||||
```bash
|
||||
sudo apt update
|
||||
sudo apt install build-essential clang unzip
|
||||
```
|
||||
|
||||
* Compile and run:
|
||||
|
||||
```bash
|
||||
cargo run -- path/to/downloaded/OPF_PowerISA_v3.1C.pdf > out.log
|
||||
```
|
||||
|
||||
* This will spit out lots of errors and then successfully create
|
||||
the output file -- `powerisa-instructions.xml` in the current directory.
|
||||
|
||||
# Using the old Python code:
|
||||
|
||||
Usage:
|
||||
* Download the OPF PowerISA 3.1C pdf (yes you need that exact version) from <https://openpower.foundation/specifications/isa/>
|
||||
* Obtain CPython 3.11 (the default `python3` in [Debian Bookworm](https://www.debian.org/releases/bookworm/))
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
# SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
# See Notices.txt for copyright information
|
||||
from __future__ import annotations
|
||||
from collections import defaultdict
|
||||
from collections.abc import Generator, Iterable, Iterator, Callable
|
||||
|
|
@ -763,7 +765,7 @@ class Page:
|
|||
unprocessed_non_text: SetById[LTLine | LTRect]
|
||||
|
||||
@staticmethod
|
||||
def from_lt_page(page_num: int, page: LTPage) -> Page:
|
||||
def from_lt_page(page_num: int, page: LTPage, first_seen_fonts: defaultdict[str, set[float]]) -> Page:
|
||||
qt: defaultdict[TextSection, QuadTree[Char | LTLine | LTRect]] = defaultdict(QuadTree)
|
||||
unprocessed_chars = defaultdict(lambda: defaultdict(SetById[Char]))
|
||||
unprocessed_non_text: SetById[LTLine | LTRect] = SetById()
|
||||
|
|
@ -802,20 +804,25 @@ class Page:
|
|||
raise AssertionError(
|
||||
f"char not in text section: {element}\npage_num={page_num}")
|
||||
continue
|
||||
font_size = round(element.size, 3)
|
||||
char = Char(
|
||||
text=element.get_text(),
|
||||
font=Font(font_name=element.fontname, size=round(element.size, 3)),
|
||||
font=Font(font_name=element.fontname, size=font_size),
|
||||
adv=element.adv,
|
||||
min_x=element.x0,
|
||||
min_y=element.y0,
|
||||
max_x=element.x1,
|
||||
max_y=element.y1,
|
||||
)
|
||||
if font_size not in first_seen_fonts[element.fontname]:
|
||||
first_seen_fonts[element.fontname].add(font_size)
|
||||
print(f"first seen font: {element.fontname!r} {font_size}: page {page_num} {char!r}")
|
||||
qt[text_section].insert(char.min_x, char.min_y, char)
|
||||
unprocessed_chars[text_section][char.font].add(char)
|
||||
for i in unprocessed_chars.values():
|
||||
for j in i.values():
|
||||
j.sort(key=Char.top_down_left_to_right_sort_key)
|
||||
for text_section, i in unprocessed_chars.items():
|
||||
for chars in i.values():
|
||||
chars.sort(key=Char.top_down_left_to_right_sort_key)
|
||||
print(f"first char: {text_section!r}: {next(iter(chars), None)!r}")
|
||||
unknown_fonts=[]
|
||||
unknown_font_errors=[]
|
||||
for i in unprocessed_chars.values():
|
||||
|
|
@ -1179,13 +1186,14 @@ class Parser:
|
|||
def __pages_gen(file: Path, page_numbers: Iterable[int] | None) -> Generator[Page, None, None]:
|
||||
if page_numbers is not None:
|
||||
page_numbers = sorted(i - 1 for i in page_numbers)
|
||||
first_seen_fonts = defaultdict(set)
|
||||
for i, page in enumerate(extract_pages(file, page_numbers=page_numbers)):
|
||||
if page_numbers is not None:
|
||||
page_num = page_numbers[i] + 1
|
||||
else:
|
||||
page_num = i + 1
|
||||
print(f"page {page_num}")
|
||||
yield Page.from_lt_page(page_num=page_num, page=page)
|
||||
yield Page.from_lt_page(page_num=page_num, page=page, first_seen_fonts=first_seen_fonts)
|
||||
|
||||
def parse_pdf(self, file: Path, page_numbers: Iterable[int] | None = None):
|
||||
self.pages = Pages(pages_gen=Parser.__pages_gen(
|
||||
|
|
@ -1501,7 +1509,7 @@ class Parser:
|
|||
f"instruction bit fields box has wrong number of horizontal lines:\n{h_lines}")
|
||||
if len(v_lines) < 2:
|
||||
raise InsnParseError(
|
||||
f"instruction bit fields box has too few vertical lines:\n{h_lines}")
|
||||
f"instruction bit fields box has too few vertical lines:\n{v_lines}")
|
||||
bottom_line, top_line = h_lines
|
||||
box_min_x = v_lines[0].x0
|
||||
box_max_x = v_lines[-1].x0
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
# SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
# See Notices.txt for copyright information
|
||||
from __future__ import annotations
|
||||
from typing import Callable, Generic, Iterable, Iterator, TypeVar
|
||||
from math import frexp, isfinite, isnan, ldexp
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
# SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
# See Notices.txt for copyright information
|
||||
from collections import abc
|
||||
from typing import Callable, Generic, Iterable, Iterator, Protocol, TypeAlias, TypeVar, overload
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
# SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
# See Notices.txt for copyright information
|
||||
[build-system]
|
||||
requires = ["setuptools >= 61.0"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
|
@ -11,4 +13,7 @@ dependencies = [
|
|||
requires-python = ">= 3.11"
|
||||
|
||||
[project.scripts]
|
||||
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"
|
||||
parse_powerisa_pdf = "parse_powerisa_pdf.parse_powerisa_pdf:main"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["parse_powerisa_pdf"]
|
||||
|
|
|
|||
70
scripts/check-copyright.sh
Executable file
70
scripts/check-copyright.sh
Executable file
|
|
@ -0,0 +1,70 @@
|
|||
#!/bin/bash
|
||||
# SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
# See Notices.txt for copyright information
|
||||
set -e
|
||||
|
||||
function fail()
|
||||
{
|
||||
local error="$1"
|
||||
echo "error: $error" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
function fail_file()
|
||||
{
|
||||
local file="$1" line="$2" error="$3"
|
||||
fail "$file:$((line + 1)): $error"
|
||||
}
|
||||
|
||||
function check_file()
|
||||
{
|
||||
local file="$1" regexes=("${@:2}")
|
||||
local lines
|
||||
mapfile -t lines < "$file"
|
||||
if (("${#lines[@]}" == 0)); then
|
||||
return # empty file, no copyright needed
|
||||
fi
|
||||
local line
|
||||
for line in "${!regexes[@]}"; do
|
||||
eval '[[ "${lines[i]}" =~ '"${regexes[i]}"' ]]' ||
|
||||
fail_file "$file" "$line" "doesn't match regex: ${regexes[i]}"
|
||||
done
|
||||
}
|
||||
|
||||
POUND_HEADER=('^"# SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"# See Notices.txt for copyright information"$')
|
||||
SLASH_HEADER=('^"// SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"// See Notices.txt for copyright information"$')
|
||||
MD_HEADER=('^"<!--"$' '^"SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"See Notices.txt for copyright information"$')
|
||||
|
||||
function main()
|
||||
{
|
||||
local IFS=$'\n'
|
||||
[[ -z "$(git status --porcelain)" ]] || fail "git repo is dirty"
|
||||
local file
|
||||
for file in $(git ls-tree --name-only --full-tree -r HEAD); do
|
||||
case "/$file" in
|
||||
/Cargo.lock)
|
||||
# generated file
|
||||
;;
|
||||
*/LICENSE.md|*/Notices.txt)
|
||||
# copyright file
|
||||
;;
|
||||
/.forgejo/workflows/*.yml|*/.gitignore|*.toml|*.py)
|
||||
check_file "$file" "${POUND_HEADER[@]}"
|
||||
;;
|
||||
*.md)
|
||||
check_file "$file" "${MD_HEADER[@]}"
|
||||
;;
|
||||
*.sh)
|
||||
check_file "$file" '^'\''#!'\' "${POUND_HEADER[@]}"
|
||||
;;
|
||||
*.rs)
|
||||
check_file "$file" "${SLASH_HEADER[@]}"
|
||||
;;
|
||||
*)
|
||||
fail_file "$file" 0 "unimplemented file kind -- you need to add it to $0"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
}
|
||||
|
||||
main
|
||||
3828
src/lib.rs
Normal file
3828
src/lib.rs
Normal file
File diff suppressed because it is too large
Load diff
6
src/main.rs
Normal file
6
src/main.rs
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
// See Notices.txt for copyright information
|
||||
|
||||
fn main() -> std::process::ExitCode {
|
||||
parse_powerisa_pdf::main()
|
||||
}
|
||||
871
src/mupdf_ffi.rs
Normal file
871
src/mupdf_ffi.rs
Normal file
|
|
@ -0,0 +1,871 @@
|
|||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
// See Notices.txt for copyright information
|
||||
|
||||
use mupdf_sys::{
|
||||
fz_buffer, fz_buffer_storage, fz_clone_context, fz_color_params, fz_colorspace, fz_concat,
|
||||
fz_context, fz_device, fz_document, fz_drop_buffer, fz_drop_context, fz_drop_device,
|
||||
fz_drop_document, fz_drop_page, fz_drop_path, fz_drop_text, fz_error_type_FZ_ERROR_GENERIC,
|
||||
fz_font, fz_font_ascender, fz_font_descender, fz_font_is_bold, fz_font_is_italic, fz_font_name,
|
||||
fz_matrix, fz_matrix_expansion, fz_page, fz_path, fz_path_walker, fz_point, fz_rect,
|
||||
fz_stroke_state, fz_text, fz_text_item, fz_text_span, fz_transform_point,
|
||||
fz_transform_point_xy, fz_transform_vector, fz_walk_path, mupdf_document_page_count,
|
||||
mupdf_drop_error, mupdf_error_t, mupdf_load_page, mupdf_new_base_context,
|
||||
mupdf_new_derived_device, mupdf_open_document, mupdf_page_to_xml, mupdf_pdf_page_transform,
|
||||
mupdf_run_page, pdf_page, pdf_page_from_fz_page,
|
||||
};
|
||||
use std::{
|
||||
cell::{Cell, UnsafeCell},
|
||||
ffi::{CStr, c_int, c_void},
|
||||
fmt,
|
||||
marker::PhantomData,
|
||||
mem::ManuallyDrop,
|
||||
ptr::{self, NonNull},
|
||||
sync::{Mutex, OnceLock},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct MuPdfError {
|
||||
type_: c_int,
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl MuPdfError {
|
||||
fn new_generic(message: impl ToString) -> Self {
|
||||
Self {
|
||||
type_: fz_error_type_FZ_ERROR_GENERIC as _,
|
||||
message: message.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MuPdfError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"MuPDF error: type: {}, message: {}",
|
||||
self.type_, self.message
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for MuPdfError {}
|
||||
|
||||
struct OwnedMuPdfError(NonNull<mupdf_error_t>);
|
||||
|
||||
impl Drop for OwnedMuPdfError {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
mupdf_drop_error(self.0.as_ptr());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn mupdf_try<R>(f: impl FnOnce(&mut *mut mupdf_error_t) -> R) -> Result<R, MuPdfError> {
|
||||
let mut err = ptr::null_mut();
|
||||
let retval = f(&mut err);
|
||||
let Some(err) = NonNull::new(err).map(OwnedMuPdfError) else {
|
||||
return Ok(retval);
|
||||
};
|
||||
unsafe {
|
||||
Err(MuPdfError {
|
||||
type_: (*err.0.as_ptr()).type_,
|
||||
message: CStr::from_ptr((*err.0.as_ptr()).message)
|
||||
.to_string_lossy()
|
||||
.into_owned(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Context(NonNull<fz_context>);
|
||||
|
||||
impl Context {
|
||||
fn new() -> Self {
|
||||
struct BaseContext(NonNull<fz_context>);
|
||||
unsafe impl Send for BaseContext {}
|
||||
static CTX: OnceLock<Mutex<BaseContext>> = OnceLock::new();
|
||||
let base = CTX
|
||||
.get_or_init(|| {
|
||||
let ctx = unsafe { mupdf_new_base_context() };
|
||||
let Some(ctx) = NonNull::new(ctx).map(BaseContext) else {
|
||||
panic!("failed to allocate a MuPDF context");
|
||||
};
|
||||
Mutex::new(ctx)
|
||||
})
|
||||
.lock()
|
||||
.expect("not poisoned");
|
||||
let ctx = unsafe { fz_clone_context(base.0.as_ptr()) };
|
||||
let Some(ctx) = NonNull::new(ctx).map(Self) else {
|
||||
drop(base);
|
||||
panic!("failed to clone a MuPDF context");
|
||||
};
|
||||
ctx
|
||||
}
|
||||
pub(crate) fn with<R>(f: impl FnOnce(&Self) -> R) -> R {
|
||||
thread_local! {
|
||||
static CTX: Context = Context::new();
|
||||
}
|
||||
CTX.with(f)
|
||||
}
|
||||
pub(crate) fn as_ref(&self) -> ContextRef<'_> {
|
||||
unsafe { ContextRef::from_ptr(self.0.as_ptr()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Context {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
fz_drop_context(self.0.as_ptr());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct ContextRef<'ctx>(&'ctx UnsafeCell<fz_context>);
|
||||
|
||||
impl<'ctx> ContextRef<'ctx> {
|
||||
unsafe fn from_ptr(ptr: *mut fz_context) -> Self {
|
||||
Self(unsafe { &*ptr.cast() })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx> From<&'ctx Context> for ContextRef<'ctx> {
|
||||
fn from(value: &'ctx Context) -> Self {
|
||||
value.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Document<'ctx> {
|
||||
ptr: *mut fz_document,
|
||||
ctx: ContextRef<'ctx>,
|
||||
}
|
||||
|
||||
impl<'ctx> Document<'ctx> {
|
||||
pub(crate) fn open(
|
||||
ctx: impl Into<ContextRef<'ctx>>,
|
||||
file_name: &CStr,
|
||||
) -> Result<Document<'ctx>, MuPdfError> {
|
||||
let ctx = ctx.into();
|
||||
unsafe {
|
||||
mupdf_try(|errptr| mupdf_open_document(ctx.0.get(), file_name.as_ptr(), errptr))
|
||||
.map(|ptr| Document { ptr, ctx })
|
||||
}
|
||||
}
|
||||
pub(crate) fn page_count(&self) -> Result<usize, MuPdfError> {
|
||||
unsafe {
|
||||
mupdf_try(|errptr| mupdf_document_page_count(self.ctx.0.get(), self.ptr, errptr))?
|
||||
.try_into()
|
||||
.map_err(MuPdfError::new_generic)
|
||||
}
|
||||
}
|
||||
pub(crate) fn load_page(&self, page: usize) -> Result<Page<'ctx>, MuPdfError> {
|
||||
let page = page.try_into().map_err(MuPdfError::new_generic)?;
|
||||
unsafe {
|
||||
mupdf_try(|errptr| mupdf_load_page(self.ctx.0.get(), self.ptr, page, errptr))
|
||||
.map(|ptr| Page { ptr, ctx: self.ctx })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx> Drop for Document<'ctx> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
fz_drop_document(self.ctx.0.get(), self.ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Buffer<'ctx> {
|
||||
ptr: *mut fz_buffer,
|
||||
ctx: ContextRef<'ctx>,
|
||||
}
|
||||
|
||||
impl<'ctx> Buffer<'ctx> {
|
||||
fn storage(&mut self) -> &mut [u8] {
|
||||
unsafe {
|
||||
let mut ptr = ptr::null_mut();
|
||||
let len = fz_buffer_storage(self.ctx.0.get(), self.ptr, &raw mut ptr);
|
||||
if len == 0 {
|
||||
&mut []
|
||||
} else {
|
||||
std::slice::from_raw_parts_mut(ptr, len)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx> Drop for Buffer<'ctx> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
fz_drop_buffer(self.ctx.0.get(), self.ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Page<'ctx> {
|
||||
ptr: *mut fz_page,
|
||||
ctx: ContextRef<'ctx>,
|
||||
}
|
||||
|
||||
impl<'ctx> Page<'ctx> {
|
||||
pub(crate) fn ctx(&self) -> ContextRef<'ctx> {
|
||||
self.ctx
|
||||
}
|
||||
pub(crate) fn run<T>(
|
||||
&self,
|
||||
device: &Device<'ctx, T>,
|
||||
ctm: fz_matrix,
|
||||
) -> Result<(), MuPdfError> {
|
||||
unsafe {
|
||||
mupdf_try(|errptr| {
|
||||
mupdf_run_page(
|
||||
self.ctx.0.get(),
|
||||
self.ptr,
|
||||
device.dev,
|
||||
ctm,
|
||||
ptr::null_mut(),
|
||||
errptr,
|
||||
)
|
||||
})
|
||||
}
|
||||
}
|
||||
pub(crate) fn to_xml(&self) -> Result<String, MuPdfError> {
|
||||
unsafe {
|
||||
let mut buffer =
|
||||
mupdf_try(|errptr| mupdf_page_to_xml(self.ctx.0.get(), self.ptr, errptr))
|
||||
.map(|ptr| Buffer { ptr, ctx: self.ctx })?;
|
||||
Ok(str::from_utf8(buffer.storage())
|
||||
.map_err(MuPdfError::new_generic)?
|
||||
.into())
|
||||
}
|
||||
}
|
||||
pub(crate) fn pdf_page<'a>(&'a self) -> Option<PdfPageRef<'a, 'ctx>> {
|
||||
unsafe {
|
||||
let ptr = pdf_page_from_fz_page(self.ctx.0.get(), self.ptr);
|
||||
NonNull::new(ptr).map(|ptr| PdfPageRef {
|
||||
ptr: &*ptr.as_ptr().cast(),
|
||||
ctx: self.ctx,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx> Drop for Page<'ctx> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
fz_drop_page(self.ctx.0.get(), self.ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct PdfPageRef<'a, 'ctx> {
|
||||
ptr: &'a UnsafeCell<pdf_page>,
|
||||
ctx: ContextRef<'ctx>,
|
||||
}
|
||||
|
||||
impl<'a, 'ctx> PdfPageRef<'a, 'ctx> {
|
||||
pub(crate) fn transform(self) -> Result<fz_matrix, MuPdfError> {
|
||||
unsafe {
|
||||
mupdf_try(|errptr| mupdf_pdf_page_transform(self.ctx.0.get(), self.ptr.get(), errptr))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Device<'ctx, T: 'ctx> {
|
||||
dev: *mut fz_device,
|
||||
ctx: ContextRef<'ctx>,
|
||||
_phantom: PhantomData<Box<Cell<T>>>,
|
||||
}
|
||||
|
||||
pub(crate) trait DeviceCallbacks<'ctx> {
|
||||
fn fill_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, even_odd: bool, ctm: fz_matrix);
|
||||
fn stroke_path(&self, ctx: ContextRef<'ctx>, path: &Path<'ctx>, ctm: fz_matrix);
|
||||
fn clip_path(
|
||||
&self,
|
||||
ctx: ContextRef<'ctx>,
|
||||
path: &Path<'ctx>,
|
||||
even_odd: bool,
|
||||
ctm: fz_matrix,
|
||||
scissor: fz_rect,
|
||||
);
|
||||
fn clip_stroke_path(
|
||||
&self,
|
||||
ctx: ContextRef<'ctx>,
|
||||
path: &Path<'ctx>,
|
||||
ctm: fz_matrix,
|
||||
scissor: fz_rect,
|
||||
);
|
||||
fn fill_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
|
||||
fn stroke_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
|
||||
fn clip_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix, scissor: fz_rect);
|
||||
fn clip_stroke_text(
|
||||
&self,
|
||||
ctx: ContextRef<'ctx>,
|
||||
text: &Text<'ctx>,
|
||||
ctm: fz_matrix,
|
||||
scissor: fz_rect,
|
||||
);
|
||||
fn ignore_text(&self, ctx: ContextRef<'ctx>, text: &Text<'ctx>, ctm: fz_matrix);
|
||||
}
|
||||
|
||||
impl<'ctx, T: DeviceCallbacks<'ctx>> Device<'ctx, T> {
|
||||
pub(crate) fn new(ctx: impl Into<ContextRef<'ctx>>, value: Box<T>) -> Result<Self, MuPdfError> {
|
||||
let ctx = ctx.into();
|
||||
unsafe {
|
||||
let dev_ptr = mupdf_try(|errptr| {
|
||||
mupdf_new_derived_device::<DeviceStruct<T>>(
|
||||
ctx.0.get(),
|
||||
c"parse_powerisa_pdf::mupdf_ffi::Device",
|
||||
errptr,
|
||||
)
|
||||
})?;
|
||||
let retval = Device {
|
||||
dev: dev_ptr.cast(),
|
||||
ctx,
|
||||
_phantom: PhantomData,
|
||||
};
|
||||
(&raw mut (*dev_ptr).value).write(value);
|
||||
let fz_device {
|
||||
drop_device,
|
||||
fill_path,
|
||||
stroke_path,
|
||||
clip_path,
|
||||
clip_stroke_path,
|
||||
fill_text,
|
||||
stroke_text,
|
||||
clip_text,
|
||||
clip_stroke_text,
|
||||
ignore_text,
|
||||
..
|
||||
} = &mut (*dev_ptr).base;
|
||||
*drop_device = Some(Self::drop_device_fn);
|
||||
*fill_path = Some(Self::fill_path_fn);
|
||||
*stroke_path = Some(Self::stroke_path_fn);
|
||||
*clip_path = Some(Self::clip_path_fn);
|
||||
*clip_stroke_path = Some(Self::clip_stroke_path_fn);
|
||||
*fill_text = Some(Self::fill_text_fn);
|
||||
*stroke_text = Some(Self::stroke_text_fn);
|
||||
*clip_text = Some(Self::clip_text_fn);
|
||||
*clip_stroke_text = Some(Self::clip_stroke_text_fn);
|
||||
*ignore_text = Some(Self::ignore_text_fn);
|
||||
Ok(retval)
|
||||
}
|
||||
}
|
||||
pub(crate) fn get(&self) -> &T {
|
||||
unsafe { &(*self.dev.cast::<DeviceStruct<T>>()).value }
|
||||
}
|
||||
unsafe extern "C" fn drop_device_fn(_ctx: *mut fz_context, dev: *mut fz_device) {
|
||||
unsafe {
|
||||
(&raw mut (*dev.cast::<DeviceStruct<T>>()).value).drop_in_place();
|
||||
}
|
||||
}
|
||||
unsafe extern "C" fn fill_path_fn(
|
||||
ctx: *mut fz_context,
|
||||
dev: *mut fz_device,
|
||||
path: *const fz_path,
|
||||
even_odd: c_int,
|
||||
ctm: fz_matrix,
|
||||
_color_space: *mut fz_colorspace,
|
||||
_color: *const f32,
|
||||
_alpha: f32,
|
||||
_color_params: fz_color_params,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
|
||||
this.fill_path(
|
||||
ctx,
|
||||
&ManuallyDrop::new(Path {
|
||||
ptr: path.cast_mut(),
|
||||
ctx,
|
||||
}),
|
||||
even_odd != 0,
|
||||
ctm,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" fn stroke_path_fn(
|
||||
ctx: *mut fz_context,
|
||||
dev: *mut fz_device,
|
||||
path: *const fz_path,
|
||||
_stroke_state: *const fz_stroke_state,
|
||||
ctm: fz_matrix,
|
||||
_color_space: *mut fz_colorspace,
|
||||
_color: *const f32,
|
||||
_alpha: f32,
|
||||
_color_params: fz_color_params,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
|
||||
this.stroke_path(
|
||||
ctx,
|
||||
&ManuallyDrop::new(Path {
|
||||
ptr: path.cast_mut(),
|
||||
ctx,
|
||||
}),
|
||||
ctm,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" fn clip_path_fn(
|
||||
ctx: *mut fz_context,
|
||||
dev: *mut fz_device,
|
||||
path: *const fz_path,
|
||||
even_odd: ::std::os::raw::c_int,
|
||||
ctm: fz_matrix,
|
||||
scissor: fz_rect,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
|
||||
this.clip_path(
|
||||
ctx,
|
||||
&ManuallyDrop::new(Path {
|
||||
ptr: path.cast_mut(),
|
||||
ctx,
|
||||
}),
|
||||
even_odd != 0,
|
||||
ctm,
|
||||
scissor,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" fn clip_stroke_path_fn(
|
||||
ctx: *mut fz_context,
|
||||
dev: *mut fz_device,
|
||||
path: *const fz_path,
|
||||
_stroke_state: *const fz_stroke_state,
|
||||
ctm: fz_matrix,
|
||||
scissor: fz_rect,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
|
||||
this.clip_stroke_path(
|
||||
ctx,
|
||||
&ManuallyDrop::new(Path {
|
||||
ptr: path.cast_mut(),
|
||||
ctx,
|
||||
}),
|
||||
ctm,
|
||||
scissor,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" fn fill_text_fn(
|
||||
ctx: *mut fz_context,
|
||||
dev: *mut fz_device,
|
||||
text: *const fz_text,
|
||||
ctm: fz_matrix,
|
||||
_color_space: *mut fz_colorspace,
|
||||
_color: *const f32,
|
||||
_alpha: f32,
|
||||
_color_params: fz_color_params,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
|
||||
this.fill_text(
|
||||
ctx,
|
||||
&ManuallyDrop::new(Text {
|
||||
ptr: text.cast_mut(),
|
||||
ctx,
|
||||
}),
|
||||
ctm,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" fn stroke_text_fn(
|
||||
ctx: *mut fz_context,
|
||||
dev: *mut fz_device,
|
||||
text: *const fz_text,
|
||||
_stroke_state: *const fz_stroke_state,
|
||||
ctm: fz_matrix,
|
||||
_color_space: *mut fz_colorspace,
|
||||
_color: *const f32,
|
||||
_alpha: f32,
|
||||
_color_params: fz_color_params,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
|
||||
this.stroke_text(
|
||||
ctx,
|
||||
&ManuallyDrop::new(Text {
|
||||
ptr: text.cast_mut(),
|
||||
ctx,
|
||||
}),
|
||||
ctm,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" fn clip_text_fn(
|
||||
ctx: *mut fz_context,
|
||||
dev: *mut fz_device,
|
||||
text: *const fz_text,
|
||||
ctm: fz_matrix,
|
||||
scissor: fz_rect,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
|
||||
this.clip_text(
|
||||
ctx,
|
||||
&ManuallyDrop::new(Text {
|
||||
ptr: text.cast_mut(),
|
||||
ctx,
|
||||
}),
|
||||
ctm,
|
||||
scissor,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" fn clip_stroke_text_fn(
|
||||
ctx: *mut fz_context,
|
||||
dev: *mut fz_device,
|
||||
text: *const fz_text,
|
||||
_stroke_state: *const fz_stroke_state,
|
||||
ctm: fz_matrix,
|
||||
scissor: fz_rect,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
|
||||
this.clip_stroke_text(
|
||||
ctx,
|
||||
&ManuallyDrop::new(Text {
|
||||
ptr: text.cast_mut(),
|
||||
ctx,
|
||||
}),
|
||||
ctm,
|
||||
scissor,
|
||||
);
|
||||
}
|
||||
unsafe extern "C" fn ignore_text_fn(
|
||||
ctx: *mut fz_context,
|
||||
dev: *mut fz_device,
|
||||
text: *const fz_text,
|
||||
ctm: fz_matrix,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut (*dev.cast::<DeviceStruct<T>>()).value };
|
||||
this.ignore_text(
|
||||
ctx,
|
||||
&ManuallyDrop::new(Text {
|
||||
ptr: text.cast_mut(),
|
||||
ctx,
|
||||
}),
|
||||
ctm,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx, T> Drop for Device<'ctx, T> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
// FIXME: fz_close_device may throw exceptions
|
||||
// fz_close_device(self.ctx.0.get(), self.dev);
|
||||
fz_drop_device(self.ctx.0.get(), self.dev);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
struct DeviceStruct<T> {
|
||||
base: fz_device,
|
||||
value: Box<T>,
|
||||
}
|
||||
|
||||
pub(crate) trait PathWalker<'ctx> {
|
||||
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
|
||||
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32);
|
||||
fn curve_to(
|
||||
&mut self,
|
||||
ctx: ContextRef<'ctx>,
|
||||
x1: f32,
|
||||
y1: f32,
|
||||
x2: f32,
|
||||
y2: f32,
|
||||
x3: f32,
|
||||
y3: f32,
|
||||
);
|
||||
fn close_path(&mut self, ctx: ContextRef<'ctx>);
|
||||
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
|
||||
self.move_to(ctx, x1, y1);
|
||||
self.move_to(ctx, x2, y1);
|
||||
self.move_to(ctx, x2, y2);
|
||||
self.move_to(ctx, x1, y2);
|
||||
self.close_path(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx, T: ?Sized + PathWalker<'ctx>> PathWalker<'ctx> for &'_ mut T {
|
||||
fn move_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
|
||||
T::move_to(self, ctx, x, y);
|
||||
}
|
||||
|
||||
fn line_to(&mut self, ctx: ContextRef<'ctx>, x: f32, y: f32) {
|
||||
T::line_to(self, ctx, x, y);
|
||||
}
|
||||
|
||||
fn curve_to(
|
||||
&mut self,
|
||||
ctx: ContextRef<'ctx>,
|
||||
x1: f32,
|
||||
y1: f32,
|
||||
x2: f32,
|
||||
y2: f32,
|
||||
x3: f32,
|
||||
y3: f32,
|
||||
) {
|
||||
T::curve_to(self, ctx, x1, y1, x2, y2, x3, y3);
|
||||
}
|
||||
|
||||
fn close_path(&mut self, ctx: ContextRef<'ctx>) {
|
||||
T::close_path(self, ctx);
|
||||
}
|
||||
|
||||
fn rect_to(&mut self, ctx: ContextRef<'ctx>, x1: f32, y1: f32, x2: f32, y2: f32) {
|
||||
T::rect_to(self, ctx, x1, y1, x2, y2);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Path<'ctx> {
|
||||
ptr: *mut fz_path,
|
||||
ctx: ContextRef<'ctx>,
|
||||
}
|
||||
|
||||
impl<'ctx> Path<'ctx> {
|
||||
pub(crate) fn walk<W: PathWalker<'ctx>>(&self, mut walker: W) {
|
||||
unsafe {
|
||||
fz_walk_path(
|
||||
self.ctx.0.get(),
|
||||
self.ptr,
|
||||
const {
|
||||
&fz_path_walker {
|
||||
moveto: Some(Self::move_to_fn::<W>),
|
||||
lineto: Some(Self::line_to_fn::<W>),
|
||||
curveto: Some(Self::curve_to_fn::<W>),
|
||||
closepath: Some(Self::close_path_fn::<W>),
|
||||
quadto: None,
|
||||
curvetov: None,
|
||||
curvetoy: None,
|
||||
rectto: Some(Self::rect_to_fn::<W>),
|
||||
}
|
||||
},
|
||||
(&raw mut walker).cast(),
|
||||
);
|
||||
}
|
||||
}
|
||||
unsafe extern "C" fn move_to_fn<W: PathWalker<'ctx>>(
|
||||
ctx: *mut fz_context,
|
||||
arg: *mut c_void,
|
||||
x: f32,
|
||||
y: f32,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut *arg.cast::<W>() };
|
||||
this.move_to(ctx, x, y);
|
||||
}
|
||||
unsafe extern "C" fn line_to_fn<W: PathWalker<'ctx>>(
|
||||
ctx: *mut fz_context,
|
||||
arg: *mut c_void,
|
||||
x: f32,
|
||||
y: f32,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut *arg.cast::<W>() };
|
||||
this.line_to(ctx, x, y);
|
||||
}
|
||||
unsafe extern "C" fn curve_to_fn<W: PathWalker<'ctx>>(
|
||||
ctx: *mut fz_context,
|
||||
arg: *mut c_void,
|
||||
x1: f32,
|
||||
y1: f32,
|
||||
x2: f32,
|
||||
y2: f32,
|
||||
x3: f32,
|
||||
y3: f32,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut *arg.cast::<W>() };
|
||||
this.curve_to(ctx, x1, y1, x2, y2, x3, y3);
|
||||
}
|
||||
unsafe extern "C" fn close_path_fn<W: PathWalker<'ctx>>(
|
||||
ctx: *mut fz_context,
|
||||
arg: *mut c_void,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut *arg.cast::<W>() };
|
||||
this.close_path(ctx);
|
||||
}
|
||||
unsafe extern "C" fn rect_to_fn<W: PathWalker<'ctx>>(
|
||||
ctx: *mut fz_context,
|
||||
arg: *mut c_void,
|
||||
x1: f32,
|
||||
y1: f32,
|
||||
x2: f32,
|
||||
y2: f32,
|
||||
) {
|
||||
let ctx = unsafe { ContextRef::from_ptr(ctx) };
|
||||
let this = unsafe { &mut *arg.cast::<W>() };
|
||||
this.rect_to(ctx, x1, y1, x2, y2);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx> Drop for Path<'ctx> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
fz_drop_path(self.ctx.0.get(), self.ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct Text<'ctx> {
|
||||
ptr: *mut fz_text,
|
||||
ctx: ContextRef<'ctx>,
|
||||
}
|
||||
|
||||
impl<'ctx> Drop for Text<'ctx> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
fz_drop_text(self.ctx.0.get(), self.ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'ctx> Text<'ctx> {
|
||||
pub(crate) fn spans<'a>(&'a self) -> TextSpanIter<'a, 'ctx> {
|
||||
TextSpanIter {
|
||||
ptr: unsafe { NonNull::new((*self.ptr).head).map(|ptr| &*ptr.as_ptr().cast()) },
|
||||
ctx: self.ctx,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct TextSpanIter<'a, 'ctx> {
|
||||
ptr: Option<&'a UnsafeCell<fz_text_span>>,
|
||||
ctx: ContextRef<'ctx>,
|
||||
_phantom: PhantomData<&'a Text<'ctx>>,
|
||||
}
|
||||
|
||||
impl<'a, 'ctx> Iterator for TextSpanIter<'a, 'ctx> {
|
||||
type Item = TextSpanRef<'a, 'ctx>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let ptr = self.ptr?;
|
||||
self.ptr = unsafe { NonNull::new((*ptr.get()).next).map(|ptr| &*ptr.as_ptr().cast()) };
|
||||
Some(TextSpanRef {
|
||||
ptr,
|
||||
ctx: self.ctx,
|
||||
_phantom: PhantomData,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub(crate) struct TextSpanRef<'a, 'ctx> {
|
||||
ptr: &'a UnsafeCell<fz_text_span>,
|
||||
ctx: ContextRef<'ctx>,
|
||||
_phantom: PhantomData<&'a Text<'ctx>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||
pub(crate) enum WriteMode {
|
||||
Horizontal,
|
||||
Vertical,
|
||||
}
|
||||
|
||||
impl<'a, 'ctx> TextSpanRef<'a, 'ctx> {
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn get(self) -> &'a UnsafeCell<fz_text_span> {
|
||||
self.ptr
|
||||
}
|
||||
pub(crate) fn font(self) -> FontRef<'a, 'ctx> {
|
||||
FontRef {
|
||||
ptr: unsafe { &*(*self.ptr.get()).font.cast::<UnsafeCell<fz_font>>() },
|
||||
ctx: self.ctx,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
pub(crate) fn trm(self) -> fz_matrix {
|
||||
unsafe { (*self.ptr.get()).trm }
|
||||
}
|
||||
pub(crate) fn write_mode(self) -> WriteMode {
|
||||
if unsafe { (*self.ptr.get()).wmode() != 0 } {
|
||||
WriteMode::Vertical
|
||||
} else {
|
||||
WriteMode::Horizontal
|
||||
}
|
||||
}
|
||||
pub(crate) fn items(self) -> &'a [fz_text_item] {
|
||||
let len = unsafe { (*self.ptr.get()).len } as usize;
|
||||
if len == 0 {
|
||||
return &[];
|
||||
}
|
||||
unsafe { std::slice::from_raw_parts((*self.ptr.get()).items, len) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct FontRef<'a, 'ctx> {
|
||||
ptr: &'a UnsafeCell<fz_font>,
|
||||
ctx: ContextRef<'ctx>,
|
||||
_phantom: PhantomData<&'a Text<'ctx>>,
|
||||
}
|
||||
|
||||
impl<'a, 'ctx> FontRef<'a, 'ctx> {
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn get(self) -> &'a UnsafeCell<fz_font> {
|
||||
self.ptr
|
||||
}
|
||||
pub(crate) fn name(self) -> &'a str {
|
||||
unsafe { CStr::from_ptr(fz_font_name(self.ctx.0.get(), self.ptr.get())) }
|
||||
.to_str()
|
||||
.expect("font name isn't valid UTF-8")
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn is_bold(self) -> bool {
|
||||
unsafe { fz_font_is_bold(self.ctx.0.get(), self.ptr.get()) != 0 }
|
||||
}
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn is_italic(self) -> bool {
|
||||
unsafe { fz_font_is_italic(self.ctx.0.get(), self.ptr.get()) != 0 }
|
||||
}
|
||||
pub(crate) fn ascender(self) -> f32 {
|
||||
unsafe { fz_font_ascender(self.ctx.0.get(), self.ptr.get()) }
|
||||
}
|
||||
pub(crate) fn descender(self) -> f32 {
|
||||
unsafe { fz_font_descender(self.ctx.0.get(), self.ptr.get()) }
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn transform_point(point: fz_point, m: fz_matrix) -> fz_point {
|
||||
unsafe { fz_transform_point(point, m) }
|
||||
}
|
||||
|
||||
pub(crate) fn transform_point_xy(x: f32, y: f32, m: fz_matrix) -> fz_point {
|
||||
unsafe { fz_transform_point_xy(x, y, m) }
|
||||
}
|
||||
|
||||
pub(crate) fn transform_vector(vector: fz_point, m: fz_matrix) -> fz_point {
|
||||
unsafe { fz_transform_vector(vector, m) }
|
||||
}
|
||||
|
||||
pub(crate) fn matrix_expansion(m: fz_matrix) -> f32 {
|
||||
unsafe { fz_matrix_expansion(m) }
|
||||
}
|
||||
|
||||
pub(crate) fn concat(left: fz_matrix, right: fz_matrix) -> fz_matrix {
|
||||
unsafe { fz_concat(left, right) }
|
||||
}
|
||||
|
||||
pub(crate) fn add_points(a: fz_point, b: fz_point) -> fz_point {
|
||||
fz_point {
|
||||
x: a.x + b.x,
|
||||
y: a.y + b.y,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn point_min_components(a: fz_point, b: fz_point) -> fz_point {
|
||||
fz_point {
|
||||
x: a.x.min(b.x),
|
||||
y: a.y.min(b.y),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn point_max_components(a: fz_point, b: fz_point) -> fz_point {
|
||||
fz_point {
|
||||
x: a.x.max(b.x),
|
||||
y: a.y.max(b.y),
|
||||
}
|
||||
}
|
||||
1115
src/quad_tree.rs
Normal file
1115
src/quad_tree.rs
Normal file
File diff suppressed because it is too large
Load diff
232
src/xml_tree.rs
Normal file
232
src/xml_tree.rs
Normal file
|
|
@ -0,0 +1,232 @@
|
|||
// SPDX-License-Identifier: LGPL-3.0-or-later
|
||||
// See Notices.txt for copyright information
|
||||
|
||||
use quick_xml::{
|
||||
Writer,
|
||||
events::{BytesDecl, BytesStart, BytesText, Event},
|
||||
};
|
||||
use std::fmt;
|
||||
|
||||
struct FmtToIoAdaptor<W> {
|
||||
writer: W,
|
||||
partial_char: [u8; 4],
|
||||
partial_char_len: u8,
|
||||
}
|
||||
|
||||
impl<W: fmt::Write> FmtToIoAdaptor<W> {
|
||||
fn new(writer: W) -> Self {
|
||||
Self {
|
||||
writer,
|
||||
partial_char: [0; 4],
|
||||
partial_char_len: 0,
|
||||
}
|
||||
}
|
||||
fn finish(self) -> Result<W, fmt::Error> {
|
||||
let Self {
|
||||
writer,
|
||||
partial_char: _,
|
||||
partial_char_len,
|
||||
} = self;
|
||||
if partial_char_len != 0 {
|
||||
Err(fmt::Error)
|
||||
} else {
|
||||
Ok(writer)
|
||||
}
|
||||
}
|
||||
fn write_byte(&mut self, b: u8) -> std::io::Result<()> {
|
||||
let Self {
|
||||
writer,
|
||||
partial_char,
|
||||
partial_char_len,
|
||||
} = self;
|
||||
partial_char[usize::from(*partial_char_len)] = b;
|
||||
*partial_char_len += 1;
|
||||
match str::from_utf8(&partial_char[..usize::from(*partial_char_len)]) {
|
||||
Ok(s) => {
|
||||
*partial_char_len = 0;
|
||||
writer.write_str(s).map_err(std::io::Error::other)
|
||||
}
|
||||
Err(e) => {
|
||||
if e.error_len().is_some() {
|
||||
*partial_char_len = 0;
|
||||
Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: fmt::Write> std::io::Write for FmtToIoAdaptor<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
for &b in buf {
|
||||
self.write_byte(b)?;
|
||||
}
|
||||
Ok(buf.len())
|
||||
}
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub(crate) enum ElementTag {
|
||||
Comment,
|
||||
Normal(String),
|
||||
}
|
||||
|
||||
impl ElementTag {
|
||||
pub(crate) fn normal(&self) -> Option<&str> {
|
||||
match self {
|
||||
ElementTag::Comment => None,
|
||||
ElementTag::Normal(v) => Some(v),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for ElementTag {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Comment => f.write_str("<Comment>"),
|
||||
Self::Normal(v) => v.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// like python's xml.etree.ElementTree.Element
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct Element {
|
||||
pub(crate) tag: ElementTag,
|
||||
pub(crate) attrib: Vec<(String, String)>,
|
||||
/// text contained in this element but before any children
|
||||
pub(crate) text: String,
|
||||
pub(crate) children: Vec<Element>,
|
||||
/// text after the end of this element
|
||||
pub(crate) tail: String,
|
||||
}
|
||||
|
||||
/// equivalent to python `xml.etree.ElementTree.tostring(self, encoding="unicode")`
|
||||
impl fmt::Display for Element {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut writer = Writer::new(FmtToIoAdaptor::new(f));
|
||||
self.write_to(&mut writer).map_err(|_| fmt::Error)?;
|
||||
writer.into_inner().finish()?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Element {
|
||||
pub(crate) fn new(tag: String, attrib: impl IntoIterator<Item = (String, String)>) -> Self {
|
||||
Self {
|
||||
tag: ElementTag::Normal(tag),
|
||||
attrib: Vec::from_iter(attrib),
|
||||
text: String::new(),
|
||||
children: Vec::new(),
|
||||
tail: String::new(),
|
||||
}
|
||||
}
|
||||
/// equivalent of python's `xml.etree.ElementTree.Comment()`
|
||||
pub(crate) fn comment(text: String) -> Self {
|
||||
Self {
|
||||
tag: ElementTag::Comment,
|
||||
attrib: Vec::new(),
|
||||
text,
|
||||
children: Vec::new(),
|
||||
tail: String::new(),
|
||||
}
|
||||
}
|
||||
/// equivalent to python `"".join(self.itertext())`
|
||||
pub(crate) fn inner_text(&self) -> String {
|
||||
let mut retval = String::new();
|
||||
fn helper(element: &Element, retval: &mut String) {
|
||||
let Element {
|
||||
tag,
|
||||
attrib: _,
|
||||
text,
|
||||
children,
|
||||
tail: _,
|
||||
} = element;
|
||||
let ElementTag::Normal(_) = tag else {
|
||||
return;
|
||||
};
|
||||
retval.push_str(text);
|
||||
for child in children {
|
||||
helper(child, retval);
|
||||
retval.push_str(&child.tail);
|
||||
}
|
||||
}
|
||||
helper(self, &mut retval);
|
||||
retval
|
||||
}
|
||||
/// equivalent of python's `xml.etree.ElementTree.SubElement()`
|
||||
pub(crate) fn sub_element(
|
||||
&mut self,
|
||||
tag: String,
|
||||
attrib: impl IntoIterator<Item = (String, String)>,
|
||||
) -> &mut Self {
|
||||
self.children.push(Self::new(tag, attrib));
|
||||
self.children.last_mut().expect("just pushed")
|
||||
}
|
||||
pub(crate) fn write_to(&self, writer: &mut Writer<impl std::io::Write>) -> std::io::Result<()> {
|
||||
let Element {
|
||||
tag,
|
||||
attrib,
|
||||
text,
|
||||
children,
|
||||
tail,
|
||||
} = self;
|
||||
match tag {
|
||||
ElementTag::Comment => {
|
||||
writer.write_event(Event::Comment(BytesText::new(text)))?;
|
||||
}
|
||||
ElementTag::Normal(tag) if tag.is_empty() => {
|
||||
writer.write_event(Event::Text(BytesText::new(text)))?;
|
||||
}
|
||||
ElementTag::Normal(tag)
|
||||
if attrib.is_empty() && text.is_empty() && children.is_empty() =>
|
||||
{
|
||||
// write element like `<br />` to match python instead of like `<br/>`
|
||||
writer.write_event(Event::Empty(BytesStart::from_content(
|
||||
tag.clone() + " ",
|
||||
tag.len(),
|
||||
)))?;
|
||||
}
|
||||
ElementTag::Normal(tag) => {
|
||||
let mut element_writer = writer.create_element(tag);
|
||||
for (name, value) in attrib {
|
||||
element_writer = element_writer.with_attribute((name.as_str(), value.as_str()));
|
||||
}
|
||||
if text.is_empty() && children.is_empty() {
|
||||
element_writer.write_empty()?;
|
||||
} else {
|
||||
element_writer.write_inner_content(|writer| {
|
||||
writer.write_event(Event::Text(BytesText::new(text)))?;
|
||||
for child in children {
|
||||
child.write_to(writer)?;
|
||||
}
|
||||
Ok(())
|
||||
})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.write_event(Event::Text(BytesText::new(tail)))?;
|
||||
Ok(())
|
||||
}
|
||||
/// equivalent of python's `xml.etree.ElementTree(self).write(writer, encoding='utf-8', xml_declaration=xml_declaration)`
|
||||
pub(crate) fn write(
|
||||
&self,
|
||||
writer: impl std::io::Write,
|
||||
xml_declaration: bool,
|
||||
) -> std::io::Result<()> {
|
||||
let mut writer = Writer::new(writer);
|
||||
if xml_declaration {
|
||||
// use specific string to match python
|
||||
writer.write_event(Event::Decl(BytesDecl::from_start(
|
||||
BytesStart::from_content("xml version='1.0' encoding='utf-8'", 3),
|
||||
)))?;
|
||||
writer.write_event(Event::Text(BytesText::new("\n")))?;
|
||||
}
|
||||
self.write_to(&mut writer)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue