diff --git a/Cargo.lock b/Cargo.lock index 2e1df0c..7cbebd6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -479,6 +479,14 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "name_mangling_serde" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "num-bigint" version = "0.4.6" diff --git a/Cargo.toml b/Cargo.toml index 5f7aded..94355a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,8 @@ rust-version = "1.82.0" [workspace.dependencies] fayalite = { git = "https://git.libre-chip.org/libre-chip/fayalite.git", version = "0.3.0", branch = "master" } +serde = { version = "1.0.202", features = ["derive"] } +serde_json = { version = "1.0.117", features = ["preserve_order"] } [profile.dev] opt-level = 1 diff --git a/crates/name_mangling_serde/Cargo.toml b/crates/name_mangling_serde/Cargo.toml new file mode 100644 index 0000000..c69574d --- /dev/null +++ b/crates/name_mangling_serde/Cargo.toml @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +# See Notices.txt for copyright information +[package] +name = "name_mangling_serde" +description = "serde serializer/deserializer for name mangling" +workspace = "../.." +readme = "README.md" +publish = false +edition.workspace = true +license.workspace = true +repository.workspace = true +rust-version.workspace = true +version.workspace = true + +[dependencies] +serde.workspace = true +serde_json.workspace = true + +[lints.rust] +unexpected_cfgs = { level = "warn", check-cfg = ['cfg(todo)'] } diff --git a/crates/name_mangling_serde/LICENSE.md b/crates/name_mangling_serde/LICENSE.md new file mode 120000 index 0000000..f0608a6 --- /dev/null +++ b/crates/name_mangling_serde/LICENSE.md @@ -0,0 +1 @@ +../../LICENSE.md \ No newline at end of file diff --git a/crates/name_mangling_serde/Notices.txt b/crates/name_mangling_serde/Notices.txt new file mode 120000 index 0000000..9f3a306 --- /dev/null +++ b/crates/name_mangling_serde/Notices.txt @@ -0,0 +1 @@ +../../Notices.txt \ No newline at end of file diff --git a/crates/name_mangling_serde/src/lib.rs b/crates/name_mangling_serde/src/lib.rs new file mode 100644 index 0000000..6667da7 --- /dev/null +++ b/crates/name_mangling_serde/src/lib.rs @@ -0,0 +1,470 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later +// See Notices.txt for copyright information + +use serde::{de::DeserializeOwned, Serialize}; +use serde_json::{Map, Number, Value}; +use std::{ + fmt::{self, Write}, + num::ParseIntError, +}; + +macro_rules! byte_enum { + ( + #[repr(u8)] + $(#[$meta:meta])* + $vis:vis enum $enum:ident { + $($Variant:ident = $value:expr,)* + } + ) => { + #[repr(u8)] + $(#[$meta])* + $vis enum $enum { + $($Variant = $value,)* + } + + impl $enum { + $vis fn new(v: u8) -> Option { + struct Values; + #[allow(non_upper_case_globals)] + impl Values { + $(const $Variant: u8 = $enum::$Variant as u8;)* + } + match v { + $(Values::$Variant => Some(Self::$Variant),)* + _ => None, + } + } + #[allow(dead_code)] + $vis fn as_char(self) -> char { + const { + $(assert!((Self::$Variant as u8).is_ascii());)* + }; + self as u8 as char + } + } + }; +} + +macro_rules! string_escapes { + ( + $key_vis:vis enum $StringEscapeKey:ident {} + $value_vis:vis enum $StringEscapeValue:ident { + $( + #[key = $key:expr] + $Variant:ident = $value:expr, + )* + } + ) => { + byte_enum! { + #[repr(u8)] + #[derive(Clone, Copy, Debug)] + $key_vis enum $StringEscapeKey { + $($Variant = $key,)* + } + } + + byte_enum! { + #[repr(u8)] + #[derive(Clone, Copy, Debug)] + $value_vis enum $StringEscapeValue { + $($Variant = $value,)* + } + } + + impl From<$StringEscapeKey> for $StringEscapeValue { + fn from(v: $StringEscapeKey) -> Self { + match v { + $($StringEscapeKey::$Variant => Self::$Variant,)* + } + } + } + + impl From<$StringEscapeValue> for $StringEscapeKey { + fn from(v: $StringEscapeValue) -> Self { + match v { + $($StringEscapeValue::$Variant => Self::$Variant,)* + } + } + } + }; +} + +string_escapes! { + enum StringEscapeKey {} + enum StringEscapeValue { + #[key = b's'] + Space = b' ', + #[key = b't'] + Tab = b'\t', + #[key = b'r'] + CR = b'\r', + #[key = b'n'] + NewLine = b'\n', + #[key = b'_'] + Underline = b'_', + } +} + +fn json_string_to_name_part(value: &str, out: &mut String) { + out.push(ValuePrefix::String.as_char()); + write!(out, "{}_", value.len()).unwrap(); + for b in value.bytes() { + if let Some(v) = StringEscapeValue::new(b) { + out.push('_'); + out.push(StringEscapeKey::from(v).as_char()); + } else if b.is_ascii_alphanumeric() { + out.push(b as char); + } else { + write!(out, "_{b:02x}").unwrap() + } + } +} + +byte_enum! { + #[repr(u8)] + #[derive(Clone, Copy, Debug)] + enum ValuePrefix { + Null = b'z', + False = b'f', + True = b't', + Number = b'n', + String = b's', + Array = b'a', + Object = b'o', + } +} + +fn json_value_to_name_part(value: &Value, out: &mut String) { + match value { + Value::Null => out.push(ValuePrefix::Null.as_char()), + Value::Bool(false) => out.push(ValuePrefix::False.as_char()), + Value::Bool(true) => out.push(ValuePrefix::True.as_char()), + Value::Number(number) => { + out.push(ValuePrefix::Number.as_char()); + let start = out.len(); + write!(out, "{number}").unwrap(); + for i in start..out.len() { + out.replace_range( + i..=i, + match out.as_bytes()[i] { + b'0'..=b'9' => continue, + b'+' => "", + b'-' => "n", + b'.' => "p", + b'e' | b'E' => "e", + _ => unreachable!("invalid character in JSON number"), + }, + ); + } + } + Value::String(string) => json_string_to_name_part(string, out), + Value::Array(array) => { + out.push(ValuePrefix::Array.as_char()); + write!(out, "{}", array.len()).unwrap(); + for element in array { + json_value_to_name_part(element, out); + } + } + Value::Object(object) => { + out.push(ValuePrefix::Object.as_char()); + write!(out, "{}", object.len()).unwrap(); + for (k, v) in object { + json_string_to_name_part(k, out); + json_value_to_name_part(v, out); + } + } + } +} + +pub const NAME_PREFIX: &str = "__HDL"; + +pub fn json_value_to_name(value: &Value) -> String { + let mut retval = NAME_PREFIX.into(); + json_value_to_name_part(value, &mut retval); + retval +} + +#[derive(Debug)] +pub enum Error { + Serde(serde_json::Error), + NameDoesNotStartWithKnownPrefix, + UnknownValuePrefix, + MissingValuePrefix, + InvalidLength(ParseIntError), + TrailingCharacters, + KeyMustBeAString, + StringMissingUnderline, + StringTruncated, + InvalidEscape, + InvalidString, +} + +impl From for Error { + fn from(value: serde_json::Error) -> Self { + Self::Serde(value) + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Serde(e) => e.fmt(f), + Self::NameDoesNotStartWithKnownPrefix => { + f.write_str("name does not start with the known prefix") + } + Self::UnknownValuePrefix => f.write_str("unknown value prefix"), + Self::MissingValuePrefix => f.write_str("missing value prefix"), + Self::InvalidLength(_) => f.write_str("invalid length"), + Self::TrailingCharacters => f.write_str("trailing characters"), + Self::KeyMustBeAString => f.write_str("key must be a string"), + Self::StringMissingUnderline => f.write_str("string missing `_` after length"), + Self::StringTruncated => f.write_str("string truncated"), + Self::InvalidEscape => f.write_str("invalid escape"), + Self::InvalidString => f.write_str("invalid string"), + } + } +} + +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Serde(e) => e.source(), + Self::NameDoesNotStartWithKnownPrefix => None, + Self::UnknownValuePrefix => None, + Self::MissingValuePrefix => None, + Self::InvalidLength(e) => Some(e), + Self::TrailingCharacters => None, + Self::KeyMustBeAString => None, + Self::StringMissingUnderline => None, + Self::StringTruncated => None, + Self::InvalidEscape => None, + Self::InvalidString => None, + } + } +} + +struct NameParser<'a> { + name_part: &'a str, + number_buf: String, +} + +impl NameParser<'_> { + fn parse_len(&mut self) -> Result { + let len_end = self + .name_part + .bytes() + .position(|b| !b.is_ascii_digit()) + .unwrap_or(self.name_part.len()); + let (len, rest) = self.name_part.split_at(len_end); + self.name_part = rest; + len.parse().map_err(Error::InvalidLength) + } + fn parse_string_without_prefix(&mut self) -> Result { + let len = self.parse_len()?; + let Some(rest) = self.name_part.strip_prefix("_") else { + return Err(Error::StringMissingUnderline); + }; + self.name_part = rest; + let mut bytes = Vec::new(); + for _ in 0..len { + let b = self + .name_part + .bytes() + .next() + .ok_or(Error::StringTruncated)?; + if b.is_ascii_alphanumeric() { + bytes.push(b); + self.name_part = &self.name_part[1..]; + } else if b == b'_' { + self.name_part = &self.name_part[1..]; + let escape = self.name_part.bytes().next().ok_or(Error::InvalidEscape)?; + self.name_part = &self.name_part[1..]; + if let Some(high) = (escape as char).to_digit(16) { + let low = self + .name_part + .bytes() + .next() + .ok_or(Error::StringTruncated)?; + let low = (low as char).to_digit(16).ok_or(Error::InvalidString)?; + self.name_part = &self.name_part[1..]; + bytes.push((high * 16 + low) as u8); + } else { + let escape = StringEscapeKey::new(escape).ok_or(Error::InvalidEscape)?; + bytes.push(StringEscapeValue::from(escape) as u8); + } + } else if let Some(high) = (b as char).to_digit(16) { + self.name_part = &self.name_part[1..]; + let low = self + .name_part + .bytes() + .next() + .ok_or(Error::StringTruncated)?; + let low = (low as char).to_digit(16).ok_or(Error::InvalidString)?; + self.name_part = &self.name_part[1..]; + bytes.push((high * 16 + low) as u8); + } else { + return Err(Error::InvalidString); + } + } + String::from_utf8(bytes).map_err(|_| Error::InvalidString) + } + fn parse_string(&mut self) -> Result { + if let ValuePrefix::String = self.parse_value_prefix()? { + self.parse_string_without_prefix() + } else { + Err(Error::KeyMustBeAString) + } + } + fn parse_number_without_prefix(&mut self) -> Result { + let mut bytes = self.name_part.as_bytes().iter(); + self.number_buf.clear(); + if let Some(b'n') = bytes.clone().next() { + bytes.next(); + self.number_buf.push('-'); + } + while let Some(&b @ b'0'..=b'9') = bytes.clone().next() { + bytes.next(); + self.number_buf.push(b as char); + } + if let Some(b'p') = bytes.clone().next() { + bytes.next(); + self.number_buf.push('.'); + while let Some(&b @ b'0'..=b'9') = bytes.clone().next() { + bytes.next(); + self.number_buf.push(b as char); + } + } + if let Some(b'e') = bytes.clone().next() { + bytes.next(); + self.number_buf.push('e'); + if let Some(b'n') = bytes.clone().next() { + bytes.next(); + self.number_buf.push('-'); + } + while let Some(&b @ b'0'..=b'9') = bytes.clone().next() { + bytes.next(); + self.number_buf.push(b as char); + } + } + self.name_part = &self.name_part[self.name_part.len() - bytes.len()..]; + Ok(self.number_buf.parse()?) + } + fn parse_value_prefix(&mut self) -> Result { + let value_prefix = self + .name_part + .bytes() + .next() + .ok_or(Error::MissingValuePrefix)?; + let value_prefix = ValuePrefix::new(value_prefix).ok_or(Error::UnknownValuePrefix)?; + self.name_part = &self.name_part[1..]; + Ok(value_prefix) + } + fn parse_value(&mut self) -> Result { + Ok(match self.parse_value_prefix()? { + ValuePrefix::Null => Value::Null, + ValuePrefix::False => Value::Bool(false), + ValuePrefix::True => Value::Bool(true), + ValuePrefix::Number => Value::Number(self.parse_number_without_prefix()?), + ValuePrefix::String => Value::String(self.parse_string_without_prefix()?), + ValuePrefix::Array => { + let len = self.parse_len()?; + let mut array = Vec::new(); + for _ in 0..len { + array.push(self.parse_value()?); + } + Value::Array(array) + } + ValuePrefix::Object => { + let len = self.parse_len()?; + let mut object = Map::new(); + for _ in 0..len { + let key = self.parse_string()?; + let value = self.parse_value()?; + object.insert(key, value); + } + Value::Object(object) + } + }) + } +} + +pub fn name_to_json_value(name: &str) -> Result { + let Some(name_part) = name.strip_prefix(NAME_PREFIX) else { + return Err(Error::NameDoesNotStartWithKnownPrefix); + }; + let mut parser = NameParser { + name_part, + number_buf: String::new(), + }; + let retval = parser.parse_value()?; + if !parser.name_part.is_empty() { + Err(Error::TrailingCharacters) + } else { + Ok(retval) + } +} + +pub fn from_name(name: &str) -> Result { + Ok(serde_json::from_value(name_to_json_value(name)?)?) +} + +pub fn to_name(value: T) -> Result { + Ok(json_value_to_name(&serde_json::to_value(value)?)) +} + +#[cfg(test)] +mod tests { + use serde_json::json; + + use super::*; + + #[test] + fn test_from_to_name() { + #[track_caller] + fn check_from_to_name(value: Value, name: &str) { + assert_eq!(name, json_value_to_name(&value)); + assert_eq!( + Ok(value), + name_to_json_value(name).map_err(|e| e.to_string()) + ); + } + + check_from_to_name(json! { null }, "__HDLz"); + check_from_to_name(json! { false }, "__HDLf"); + check_from_to_name(json! { true }, "__HDLt"); + check_from_to_name(json! { 0 }, "__HDLn0"); + check_from_to_name(json! { 0.1 }, "__HDLn0p1"); + check_from_to_name(json! { -0.1 }, "__HDLnn0p1"); + check_from_to_name(json! { 1234567 }, "__HDLn1234567"); + check_from_to_name(json! { -1.2345678e-20 }, "__HDLnn1p2345678en20"); + check_from_to_name(json! { -1.2345e300 }, "__HDLnn1p2345e300"); + check_from_to_name(json! { -5 }, "__HDLnn5"); + check_from_to_name(json! { "" }, "__HDLs0_"); + check_from_to_name(json! { "a" }, "__HDLs1_a"); + check_from_to_name(json! { "A" }, "__HDLs1_A"); + check_from_to_name(json! { "z" }, "__HDLs1_z"); + check_from_to_name(json! { "Z" }, "__HDLs1_Z"); + check_from_to_name(json! { "0" }, "__HDLs1_0"); + check_from_to_name(json! { "9" }, "__HDLs1_9"); + check_from_to_name(json! { "_" }, "__HDLs1___"); + check_from_to_name(json! { " " }, "__HDLs1__s"); + check_from_to_name(json! { "\t" }, "__HDLs1__t"); + check_from_to_name(json! { "\r" }, "__HDLs1__r"); + check_from_to_name(json! { "\n" }, "__HDLs1__n"); + check_from_to_name(json! { "\u{25}" }, "__HDLs1__25"); + check_from_to_name(json! { "\u{100}" }, "__HDLs2__c4_80"); + check_from_to_name(json! { "\u{1000}" }, "__HDLs3__e1_80_80"); + check_from_to_name(json! { "\u{10000}" }, "__HDLs4__f0_90_80_80"); + check_from_to_name(json! { "foo" }, "__HDLs3_foo"); + check_from_to_name(json! { { "foo": 123 } }, "__HDLo1s3_foon123"); + check_from_to_name( + json! { { "foo": 123, "bar": null } }, + "__HDLo2s3_foon123s3_barz", + ); + check_from_to_name(json! { [1, 2, 3, 4] }, "__HDLa4n1n2n3n4"); + check_from_to_name( + json! { { "a": [], "b": null, "c": 1234, "d": {} } }, + "__HDLo4s1_aa0s1_bzs1_cn1234s1_do0", + ); + } +}