3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2025-04-24 01:25:33 +00:00

fmt,cxxrtl: add UNICHAR format type.

This format type is used to print an Unicode character (code point) as
its UTF-8 serialization. To this end, two UTF-8 decoders (one for fmt,
one for cxxrtl) are added for rendering. When converted to a Verilog
format specifier, `UNICHAR` degrades to `%c` with the low 7 bits of
the code point, which has equivalent behavior for inputs not exceeding
ASCII. (SystemVerilog leaves source and display encodings completely
undefined.)
This commit is contained in:
Catherine 2024-03-28 07:55:46 +00:00 committed by Marcelina Kościelnicka
parent 1780e2eb1e
commit bf5a960668
3 changed files with 70 additions and 6 deletions

View file

@ -1013,13 +1013,14 @@ struct fmt_part {
LITERAL = 0,
INTEGER = 1,
STRING = 2,
VLOG_TIME = 3,
UNICHAR = 3,
VLOG_TIME = 4,
} type;
// LITERAL type
std::string str;
// INTEGER/STRING types
// INTEGER/STRING/UNICHAR types
// + value<Bits> val;
// INTEGER/STRING/VLOG_TIME types
@ -1073,6 +1074,25 @@ struct fmt_part {
break;
}
case UNICHAR: {
uint32_t codepoint = val.template get<uint32_t>();
if (codepoint >= 0x10000)
buf += (char)(0xf0 | (codepoint >> 18));
else if (codepoint >= 0x800)
buf += (char)(0xe0 | (codepoint >> 12));
else if (codepoint >= 0x80)
buf += (char)(0xc0 | (codepoint >> 6));
else
buf += (char)codepoint;
if (codepoint >= 0x10000)
buf += (char)(0x80 | ((codepoint >> 12) & 0x3f));
if (codepoint >= 0x800)
buf += (char)(0x80 | ((codepoint >> 6) & 0x3f));
if (codepoint >= 0x80)
buf += (char)(0x80 | ((codepoint >> 0) & 0x3f));
break;
}
case INTEGER: {
size_t width = Bits;
if (base != 10) {