Compare commits

...

9 commits

Author SHA1 Message Date
c8cd234d8f
wip porting to rust
Some checks failed
/ test (push) Failing after 27s
2026-01-02 16:09:43 -08:00
718de40b09
wip 2026-01-02 07:34:40 -08:00
9e090a66a3
port more code 2026-01-02 04:50:51 -08:00
7ecdbc0239
porting more 2026-01-02 03:53:16 -08:00
e1277bbb90
add insn bit fields 2026-01-02 03:30:35 -08:00
3fc0e92f95
add ElementBodyBuilder 2026-01-02 03:09:49 -08:00
104ee37933
port more to rust 2026-01-02 02:47:21 -08:00
8643d47338
port Font to Rust 2026-01-01 23:14:08 -08:00
944ae4bf41
port QuadTree to rust 2026-01-01 08:41:26 -08:00
7 changed files with 4046 additions and 0 deletions

1
.gitignore vendored
View file

@ -7,3 +7,4 @@ __pycache__
*.log
/powerisa-instructions.xml
/*.pdf
/target

432
Cargo.lock generated Normal file
View file

@ -0,0 +1,432 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "bindgen"
version = "0.71.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"itertools",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn",
]
[[package]]
name = "bitflags"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
[[package]]
name = "cc"
version = "1.2.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a0aeaff4ff1a90589618835a598e545176939b97874f7abc7851caa0618f203"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "clang-sys"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "find-msvc-tools"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645cbb3a84e60b7531617d5ae4e57f7e27308f6445f5abf653209ea76dec8dff"
[[package]]
name = "glob"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
[[package]]
name = "hashbrown"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
[[package]]
name = "indexmap"
version = "2.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "libc"
version = "0.2.178"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091"
[[package]]
name = "libloading"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
dependencies = [
"cfg-if",
"windows-link",
]
[[package]]
name = "libm"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "mupdf"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a6499267155b9ae03ff8e53c456d0bfff988b2647d62ff1df038f39ebe93a0c"
dependencies = [
"bitflags",
"mupdf-sys",
"num_enum",
"once_cell",
"zerocopy",
]
[[package]]
name = "mupdf-sys"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e9a0d4e844ab50315d43312f3d62f72c77205b07c8ee21cbd4b52bdc2a9910"
dependencies = [
"bindgen",
"cc",
"pkg-config",
"regex",
"zerocopy",
]
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "num_enum"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c"
dependencies = [
"num_enum_derive",
"rustversion",
]
[[package]]
name = "num_enum_derive"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7"
dependencies = [
"proc-macro-crate",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "once_cell"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "parse_powerisa_pdf"
version = "0.1.0"
dependencies = [
"indexmap",
"libm",
"mupdf",
"quick-xml",
"serde",
]
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "proc-macro-crate"
version = "3.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
dependencies = [
"toml_edit",
]
[[package]]
name = "proc-macro2"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9695f8df41bb4f3d222c95a67532365f569318332d03d5f3f67f37b20e6ebdf0"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quick-xml"
version = "0.38.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b66c2058c55a409d601666cffe35f04333cf1013010882cec174a7467cd4e21c"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "quote"
version = "1.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
[[package]]
name = "serde"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
dependencies = [
"serde_core",
"serde_derive",
]
[[package]]
name = "serde_core"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.228"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "syn"
version = "2.0.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21f182278bf2d2bcb3c88b1b08a37df029d71ce3d3ae26168e3c653b213b99d4"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "toml_datetime"
version = "0.7.5+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
dependencies = [
"serde_core",
]
[[package]]
name = "toml_edit"
version = "0.23.10+spec-1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84c8b9f757e028cee9fa244aea147aab2a9ec09d5325a9b01e0a49730c2b5269"
dependencies = [
"indexmap",
"toml_datetime",
"toml_parser",
"winnow",
]
[[package]]
name = "toml_parser"
version = "1.0.6+spec-1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
dependencies = [
"winnow",
]
[[package]]
name = "unicode-ident"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "winnow"
version = "0.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829"
dependencies = [
"memchr",
]
[[package]]
name = "zerocopy"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
dependencies = [
"proc-macro2",
"quote",
"syn",
]

18
Cargo.toml Normal file
View file

@ -0,0 +1,18 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
# See Notices.txt for copyright information
[package]
name = "parse_powerisa_pdf"
version = "0.1.0"
license = "LGPL-3.0-or-later"
edition = "2024"
repository = ""
keywords = []
categories = []
rust-version = "1.89.0"
[dependencies]
indexmap = "2.12.1"
libm = "0.2.15"
mupdf = { version = "0.5.0", default-features = false }
quick-xml = { version = "0.38.4", features = ["serialize"] }
serde = { version = "1.0.228", features = ["derive"] }

View file

@ -32,6 +32,7 @@ function check_file()
}
POUND_HEADER=('^"# SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"# See Notices.txt for copyright information"$')
SLASH_HEADER=('^"// SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"// See Notices.txt for copyright information"$')
MD_HEADER=('^"<!--"$' '^"SPDX-License-Identifier: LGPL-3.0-or-later"$' '^"See Notices.txt for copyright information"$')
function main()
@ -41,6 +42,9 @@ function main()
local file
for file in $(git ls-tree --name-only --full-tree -r HEAD); do
case "/$file" in
/Cargo.lock)
# generated file
;;
*/LICENSE.md|*/Notices.txt)
# copyright file
;;
@ -53,6 +57,9 @@ function main()
*.sh)
check_file "$file" '^'\''#!'\' "${POUND_HEADER[@]}"
;;
*.rs)
check_file "$file" "${SLASH_HEADER[@]}"
;;
*)
fail_file "$file" 0 "unimplemented file kind -- you need to add it to $0"
;;

2253
src/main.rs Normal file

File diff suppressed because it is too large Load diff

1115
src/quad_tree.rs Normal file

File diff suppressed because it is too large Load diff

220
src/xml_tree.rs Normal file
View file

@ -0,0 +1,220 @@
// SPDX-License-Identifier: LGPL-3.0-or-later
// See Notices.txt for copyright information
use quick_xml::{
Writer,
events::{BytesDecl, BytesText, Event},
};
use std::fmt;
struct FmtToIoAdaptor<W> {
writer: W,
partial_char: [u8; 4],
partial_char_len: u8,
}
impl<W: fmt::Write> FmtToIoAdaptor<W> {
fn new(writer: W) -> Self {
Self {
writer,
partial_char: [0; 4],
partial_char_len: 0,
}
}
fn finish(self) -> Result<W, fmt::Error> {
let Self {
writer,
partial_char: _,
partial_char_len,
} = self;
if partial_char_len != 0 {
Err(fmt::Error)
} else {
Ok(writer)
}
}
fn write_byte(&mut self, b: u8) -> std::io::Result<()> {
let Self {
writer,
partial_char,
partial_char_len,
} = self;
partial_char[usize::from(*partial_char_len)] = b;
*partial_char_len += 1;
match str::from_utf8(&partial_char[..usize::from(*partial_char_len)]) {
Ok(s) => {
*partial_char_len = 0;
writer.write_str(s).map_err(std::io::Error::other)
}
Err(e) => {
if e.error_len().is_some() {
*partial_char_len = 0;
Err(std::io::Error::new(std::io::ErrorKind::InvalidData, e))
} else {
Ok(())
}
}
}
}
}
impl<W: fmt::Write> std::io::Write for FmtToIoAdaptor<W> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
for &b in buf {
self.write_byte(b)?;
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub(crate) enum ElementTag {
Comment,
Normal(String),
}
impl ElementTag {
pub(crate) fn normal(&self) -> Option<&str> {
match self {
ElementTag::Comment => None,
ElementTag::Normal(v) => Some(v),
}
}
}
impl fmt::Debug for ElementTag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Comment => f.write_str("<Comment>"),
Self::Normal(v) => v.fmt(f),
}
}
}
/// like python's xml.etree.ElementTree.Element
#[derive(Clone, Debug)]
pub(crate) struct Element {
pub(crate) tag: ElementTag,
pub(crate) attrib: Vec<(String, String)>,
/// text contained in this element but before any children
pub(crate) text: String,
pub(crate) children: Vec<Element>,
/// text after the end of this element
pub(crate) tail: String,
}
/// equivalent to python `xml.etree.ElementTree.tostring(self, encoding="unicode")`
impl fmt::Display for Element {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut writer = Writer::new(FmtToIoAdaptor::new(f));
self.write_to(&mut writer).map_err(|_| fmt::Error)?;
writer.into_inner().finish()?;
Ok(())
}
}
impl Element {
pub(crate) fn new(tag: String, attrib: impl IntoIterator<Item = (String, String)>) -> Self {
Self {
tag: ElementTag::Normal(tag),
attrib: Vec::from_iter(attrib),
text: String::new(),
children: Vec::new(),
tail: String::new(),
}
}
/// equivalent of python's `xml.etree.ElementTree.Comment()`
pub(crate) fn comment(text: String) -> Self {
Self {
tag: ElementTag::Comment,
attrib: Vec::new(),
text,
children: Vec::new(),
tail: String::new(),
}
}
/// equivalent to python `"".join(self.itertext())`
pub(crate) fn inner_text(&self) -> String {
let mut retval = String::new();
fn helper(element: &Element, retval: &mut String) {
let Element {
tag,
attrib: _,
text,
children,
tail: _,
} = element;
let ElementTag::Normal(_) = tag else {
return;
};
retval.push_str(text);
for child in children {
helper(child, retval);
retval.push_str(&child.tail);
}
}
helper(self, &mut retval);
retval
}
/// equivalent of python's `xml.etree.ElementTree.SubElement()`
pub(crate) fn sub_element(
&mut self,
tag: String,
attrib: impl IntoIterator<Item = (String, String)>,
) -> &mut Self {
self.children.push(Self::new(tag, attrib));
self.children.last_mut().expect("just pushed")
}
pub(crate) fn write_to(&self, writer: &mut Writer<impl std::io::Write>) -> std::io::Result<()> {
let Element {
tag,
attrib,
text,
children,
tail,
} = self;
match tag {
ElementTag::Comment => {
writer.write_event(Event::Comment(BytesText::new(text)))?;
}
ElementTag::Normal(tag) if tag.is_empty() => {
writer.write_event(Event::Text(BytesText::new(text)))?;
}
ElementTag::Normal(tag) => {
let mut element_writer = writer.create_element(tag);
for (name, value) in attrib {
element_writer = element_writer.with_attribute((name.as_str(), value.as_str()));
}
if text.is_empty() && children.is_empty() {
element_writer.write_empty()?;
} else {
element_writer.write_inner_content(|writer| {
writer.write_event(Event::Text(BytesText::new(text)))?;
for child in children {
child.write_to(writer)?;
}
Ok(())
})?;
}
}
}
writer.write_event(Event::Text(BytesText::new(tail)))?;
Ok(())
}
/// equivalent of python's `xml.etree.ElementTree(self).write(writer, encoding='utf-8', xml_declaration=xml_declaration)`
pub(crate) fn write(
&self,
writer: impl std::io::Write,
xml_declaration: bool,
) -> std::io::Result<()> {
let mut writer = Writer::new(writer);
if xml_declaration {
writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("utf-8"), None)))?;
writer.write_event(Event::Text(BytesText::new("\n")))?;
}
self.write_to(&mut writer)
}
}