3
0
Fork 0
mirror of https://github.com/YosysHQ/yosys synced 2025-10-26 09:24:37 +00:00

Merge pull request #5192 from garytwong/multiline-string

verilog: support newline and hex escapes in string literals
This commit is contained in:
KrystalDelusion 2025-07-08 10:27:01 +12:00 committed by GitHub
commit 1a215719e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 386 additions and 47 deletions

View file

@ -381,3 +381,6 @@ from SystemVerilog:
will process conditionals using these keywords by annotating their will process conditionals using these keywords by annotating their
representation with the appropriate ``full_case`` and/or ``parallel_case`` representation with the appropriate ``full_case`` and/or ``parallel_case``
attributes, which are described above.) attributes, which are described above.)
- SystemVerilog string literals are supported (triple-quoted strings and
escape sequences such as line continuations and hex escapes).

View file

@ -112,6 +112,129 @@ static bool isUserType(std::string &s)
return false; return false;
} }
static bool is_hex_dig(char c, int *val)
{
if ('0' <= c && c <= '9') {
*val = c - '0';
return true;
} else if ('a' <= c && c <= 'f') {
*val = c - 'a' + 0xA;
return true;
} else if ('A' <= c && c <= 'F') {
*val = c - 'A' + 0xA;
return true;
} else if (c == 'x' || c == 'X' || c == 'z' || c == 'Z' || c == '?') {
log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "'%c' not a valid digit in hex escape sequence.\n", c);
*val = 0; // not semantically valid in hex escape...
return true; // ...but still processed as part of hex token
}
return false;
}
static bool is_oct_dig(char c, int *val)
{
if ('0' <= c && c <= '7') {
*val = c - '0';
return true;
} else if (c == 'x' || c == 'X' || c == 'z' || c == 'Z' || c == '?') {
log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "'%c' not a valid digit in octal escape sequence.\n", c);
*val = 0; // not semantically valid in octal escape...
return true; // ...but still processed as part of octal token
}
return false;
}
static std::string *process_str(char *str, int len, bool triple)
{
char *in, *out; // Overwrite input buffer: flex manual states "Actions
// are free to modify 'yytext' except for lengthening it".
for (in = str, out = str; in < str + len; in++)
switch (*in) {
case '\n':
case '\r':
if (in + 1 < str + len && (in[1] ^ *in) == ('\n' ^ '\r'))
in++;
if (!triple)
log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "Multi-line string literals should be triple-quoted or escaped.\n");
*out++ = '\n';
break;
case '\\':
in++;
log_assert(in < str + len);
switch (*in) {
case 'a':
*out++ = '\a';
break;
case 'f':
*out++ = '\f';
break;
case 'n':
*out++ = '\n';
break;
case 'r': /* not part of IEEE-1800 2023, but seems
like a good idea to support it anyway */
*out++ = '\r';
break;
case 't':
*out++ = '\t';
break;
case 'v':
*out++ = '\v';
break;
case 'x':
int val;
if (in + 1 < str + len && is_hex_dig(in[1], &val)) {
*out = val;
in++;
if (in + 1 < str + len && is_hex_dig(in[1], &val)) {
*out = *out * 0x10 + val;
in++;
}
out++;
} else
log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "ignoring invalid hex escape.\n");
break;
case '\\':
*out++ = '\\';
break;
case '"':
*out++ = '"';
break;
case '\n':
case '\r':
if (in + 1 < str + len && (in[1] ^ *in) == ('\n' ^ '\r'))
in++;
break;
default:
if ('0' <= *in && *in <= '7') {
int val;
*out = *in - '0';
if (in + 1 < str + len && is_oct_dig(in[1], &val)) {
*out = *out * 010 + val;
in++;
if (in + 1 < str + len && is_oct_dig(in[1], &val)) {
if (*out >= 040)
log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "octal escape exceeds \\377\n");
*out = *out * 010 + val;
in++;
}
}
out++;
} else
*out++ = *in;
}
break;
default:
*out++ = *in;
}
return new std::string(str, out - str);
}
%} %}
%option yylineno %option yylineno
@ -122,7 +245,6 @@ static bool isUserType(std::string &s)
%option prefix="frontend_verilog_yy" %option prefix="frontend_verilog_yy"
%x COMMENT %x COMMENT
%x STRING
%x SYNOPSYS_TRANSLATE_OFF %x SYNOPSYS_TRANSLATE_OFF
%x SYNOPSYS_FLAGS %x SYNOPSYS_FLAGS
%x IMPORT_DPI %x IMPORT_DPI
@ -335,47 +457,9 @@ TIME_SCALE_SUFFIX [munpf]?s
return TOK_REALVAL; return TOK_REALVAL;
} }
\" { BEGIN(STRING); } \"([^\\"]|\\.|\\\n)*\" { yylval->string = process_str(yytext + 1, yyleng - 2, false); return TOK_STRING; }
<STRING>([^\\"]|\\.)+ { yymore(); real_location = old_location; }
<STRING>\" { \"{3}(\"{0,2}([^\\"]|\\.|\\\n))*\"{3} { yylval->string = process_str(yytext + 3, yyleng - 6, true); return TOK_STRING; }
BEGIN(0);
char *yystr = strdup(yytext);
yystr[strlen(yytext) - 1] = 0;
int i = 0, j = 0;
while (yystr[i]) {
if (yystr[i] == '\\' && yystr[i + 1]) {
i++;
if (yystr[i] == 'a')
yystr[i] = '\a';
else if (yystr[i] == 'f')
yystr[i] = '\f';
else if (yystr[i] == 'n')
yystr[i] = '\n';
else if (yystr[i] == 'r')
yystr[i] = '\r';
else if (yystr[i] == 't')
yystr[i] = '\t';
else if (yystr[i] == 'v')
yystr[i] = '\v';
else if ('0' <= yystr[i] && yystr[i] <= '7') {
yystr[i] = yystr[i] - '0';
if ('0' <= yystr[i + 1] && yystr[i + 1] <= '7') {
yystr[i + 1] = yystr[i] * 8 + yystr[i + 1] - '0';
i++;
}
if ('0' <= yystr[i + 1] && yystr[i + 1] <= '7') {
yystr[i + 1] = yystr[i] * 8 + yystr[i + 1] - '0';
i++;
}
}
}
yystr[j++] = yystr[i++];
}
yystr[j] = 0;
yylval->string = new std::string(yystr, j);
free(yystr);
return TOK_STRING;
}
and|nand|or|nor|xor|xnor|not|buf|bufif0|bufif1|notif0|notif1 { and|nand|or|nor|xor|xnor|not|buf|bufif0|bufif1|notif0|notif1 {
yylval->string = new std::string(yytext); yylval->string = new std::string(yytext);

View file

@ -1,5 +0,0 @@
// Regression test for bug mentioned in #5160:
// https://github.com/YosysHQ/yosys/pull/5160#issuecomment-2983643084
module top;
initial $display( "\\" );
endmodule

View file

@ -0,0 +1,257 @@
# Test valid escape sequences yield correct results:
logger -expect-no-warnings
read_verilog << EOF
module top;
wire[7:0] sp = "\ ";
wire[7:0] spval = 32;
wire[7:0] ex = "\!";
wire[7:0] exval = 33;
wire[7:0] dq = "\"";
wire[7:0] dqval = 34;
wire[7:0] ha = "\#";
wire[7:0] haval = 35;
wire[7:0] do = "\$";
wire[7:0] doval = 36;
wire[7:0] pc = "\%";
wire[7:0] pcval = 37;
wire[7:0] am = "\&";
wire[7:0] amval = 38;
wire[7:0] sq = "\'";
wire[7:0] sqval = 39;
wire[7:0] op = "\(";
wire[7:0] opval = 40;
wire[7:0] cp = "\)";
wire[7:0] cpval = 41;
wire[7:0] as = "\*";
wire[7:0] asval = 42;
wire[7:0] pl = "\+";
wire[7:0] plval = 43;
wire[7:0] co = "\,";
wire[7:0] coval = 44;
wire[7:0] mi = "\-";
wire[7:0] mival = 45;
wire[7:0] do = "\.";
wire[7:0] doval = 46;
wire[7:0] sl = "\/";
wire[7:0] slval = 47;
wire[7:0] dig0 = "\012";
wire[7:0] dig0val = 10;
wire[7:0] dig8 = "\8"; // not octal, a literal '8'
wire[7:0] dig8val = 56;
wire[7:0] dig9 = "\9"; // not octal, a literal '9'
wire[7:0] dig9val = 57;
wire[7:0] cl = "\:";
wire[7:0] clval = 58;
wire[7:0] sc = "\;";
wire[7:0] scval = 59;
wire[7:0] lt = "\<";
wire[7:0] ltval = 60;
wire[7:0] eq = "\=";
wire[7:0] eqval = 61;
wire[7:0] gt = "\>";
wire[7:0] gtval = 62;
wire[7:0] qu = "\?";
wire[7:0] quval = 63;
wire[7:0] at = "\@";
wire[7:0] atval = 64;
wire[7:0] A = "\A";
wire[7:0] Aval = 65; // etc. etc.
wire[7:0] os = "\[";
wire[7:0] osval = 91;
wire[7:0] bs = "\\";
wire[7:0] bsval = 92;
wire[7:0] cs = "\]";
wire[7:0] csval = 93;
wire[7:0] ca = "\^";
wire[7:0] caval = 94;
wire[7:0] us = "\_";
wire[7:0] usval = 95;
wire[7:0] bq = "\`";
wire[7:0] bqval = 96;
wire[7:0] a = "\a"; // alert, ASCII BEL=7
wire[7:0] aval = 7;
wire[7:0] b = "\b";
wire[7:0] bval = 98;
wire[7:0] c = "\c";
wire[7:0] cval = 99;
wire[7:0] d = "\d";
wire[7:0] dval = 100;
wire[7:0] e = "\e";
wire[7:0] eval = 101;
wire[7:0] f = "\f"; // form feed, ASCII FF=12
wire[7:0] fval = 12;
wire[7:0] g = "\g";
wire[7:0] gval = 103;
wire[7:0] h = "\h";
wire[7:0] hval = 104;
wire[7:0] i = "\i";
wire[7:0] ival = 105;
wire[7:0] j = "\j";
wire[7:0] jval = 106;
wire[7:0] k = "\k";
wire[7:0] kval = 107;
wire[7:0] l = "\l";
wire[7:0] lval = 108;
wire[7:0] m = "\m";
wire[7:0] mval = 109;
wire[7:0] n = "\n"; // new line, ASCII LF=10
wire[7:0] nval = 10;
wire[7:0] o = "\o";
wire[7:0] oval = 111;
wire[7:0] p = "\p";
wire[7:0] pval = 112;
wire[7:0] q = "\q";
wire[7:0] qval = 113;
wire[7:0] r = "\r"; // carriage return, ASCII CR=13, not IEEE 1800-2023
wire[7:0] rval = 13;
wire[7:0] s = "\s";
wire[7:0] sval = 115;
wire[7:0] t = "\t"; // tab, ASCII HT=9
wire[7:0] tval = 9;
wire[7:0] u = "\u";
wire[7:0] uval = 117;
wire[7:0] v = "\v"; // vertical tab, ASCII VT=11
wire[7:0] vval = 11;
wire[7:0] w = "\w";
wire[7:0] wval = 119;
wire[7:0] x = "\x2A"; // hex escape
wire[7:0] xval = 42;
wire[7:0] y = "\y";
wire[7:0] yval = 121;
wire[7:0] z = "\z";
wire[7:0] zval = 122;
wire[7:0] ob = "\{";
wire[7:0] obval = 123;
wire[7:0] vb = "\|";
wire[7:0] vbval = 124;
wire[7:0] cb = "\}";
wire[7:0] cbval = 125;
wire[7:0] ti = "\~";
wire[7:0] tival = 126;
endmodule
EOF
sat -prove sp spval -prove ex exval -prove dq dqval -prove ha haval -prove do doval -prove pc pcval -prove am amval -prove sq sqval -prove op opval -prove cp cpval -prove as asval -prove pl plval -prove co coval -prove mi mival -prove do doval -prove sl slval -verify
sat -prove dig0 dig0val -prove dig8 dig8val -prove dig9 dig9val -verify
sat -prove cl clval -prove sc scval -prove lt ltval -prove eq eqval -prove gt gtval -prove qu quval -prove at atval -prove A Aval -verify
sat -prove os osval -prove bs bsval -prove cs csval -prove ca caval -prove us usval -prove bq bqval -verify
sat -prove a aval -prove b bval -prove c cval -prove d dval -prove e eval -prove f fval -prove g gval -prove h hval -prove i ival -prove j jval -prove k kval -prove l lval -prove m mval -prove n nval -prove o oval -prove p pval -prove q qval -prove r rval -prove s sval -prove t tval -prove u uval -prove v vval -prove w wval -prove x xval -prove y yval -prove z zval -verify
sat -prove ob obval -prove vb vbval -prove cb cbval -prove ti tival -verify
logger -check-expected
design -reset
# Test octal escape out of range.
logger -expect warning "octal escape exceeds \\377" 1
read_verilog << EOF
module top;
wire[7:0] x = "\400";
endmodule
EOF
logger -check-expected
design -reset
# Test invalid octal digit.
logger -expect warning "'\?' not a valid digit in octal escape sequence" 1
read_verilog << EOF
module top;
wire[7:0] x = "\0?";
endmodule
EOF
logger -check-expected
design -reset
# Test invalid hex digit.
logger -expect warning "'X' not a valid digit in hex escape sequence" 1
read_verilog << EOF
module top;
wire[7:0] x = "\x0X";
endmodule
EOF
logger -check-expected
design -reset
# Test hex escape with no hex digits at all.
logger -expect warning "ignoring invalid hex escape" 1
read_verilog << EOF
module top;
wire[7:0] x = "\xy";
endmodule
EOF
logger -check-expected
design -reset
# Test hex escape interrupted by end of string.
logger -expect warning "ignoring invalid hex escape" 1
read_verilog << EOF
module top;
wire[7:0] x = "\x";
endmodule
EOF
logger -check-expected
design -reset
# Test multi-line string.
logger -expect warning "Multi-line string literals should be triple-quoted or escaped" 1
read_verilog << EOF
module top;
wire[31:0] x = "A
BC";
wire[31:0] xval = 32'h410A4243;
endmodule
EOF
logger -check-expected
design -reset
# Test multi-line triple-quoted string.
logger -expect-no-warnings
read_verilog << EOF
module top;
wire[31:0] x = """A
BC""";
wire[31:0] xval = 32'h410A4243;
endmodule
EOF
logger -check-expected
sat -prove x xval -verify
design -reset
# Test escaped multi-line string.
logger -expect-no-warnings
read_verilog << EOF
module top;
wire[31:0] x = "AB\
CD";
wire[31:0] xval = 32'h41424344;
endmodule
EOF
logger -check-expected
sat -prove x xval -verify
design -reset
# Test octal escape with surrounding data.
logger -expect-no-warnings
read_verilog << EOF
module top;
wire[31:0] x = "AB\234C";
wire[31:0] xval = 32'h41429C43;
endmodule
EOF
logger -check-expected
sat -prove x xval -verify
design -reset
# Test hex escape with surrounding data.
logger -expect-no-warnings
read_verilog << EOF
module top;
wire[31:0] x = "A\xBCDE";
wire[31:0] xval = 32'h41BC4445;
endmodule
EOF
logger -check-expected
sat -prove x xval -verify