diff --git a/docs/source/yosys_internals/verilog.rst b/docs/source/yosys_internals/verilog.rst index 0039aaab7..d67553aa9 100644 --- a/docs/source/yosys_internals/verilog.rst +++ b/docs/source/yosys_internals/verilog.rst @@ -381,3 +381,6 @@ from SystemVerilog: will process conditionals using these keywords by annotating their representation with the appropriate ``full_case`` and/or ``parallel_case`` attributes, which are described above.) + +- SystemVerilog string literals are supported (triple-quoted strings and + escape sequences such as line continuations and hex escapes). diff --git a/frontends/verilog/verilog_lexer.l b/frontends/verilog/verilog_lexer.l index 40162b8d3..e2d7a2cd9 100644 --- a/frontends/verilog/verilog_lexer.l +++ b/frontends/verilog/verilog_lexer.l @@ -112,6 +112,129 @@ static bool isUserType(std::string &s) return false; } +static bool is_hex_dig(char c, int *val) +{ + if ('0' <= c && c <= '9') { + *val = c - '0'; + return true; + } else if ('a' <= c && c <= 'f') { + *val = c - 'a' + 0xA; + return true; + } else if ('A' <= c && c <= 'F') { + *val = c - 'A' + 0xA; + return true; + } else if (c == 'x' || c == 'X' || c == 'z' || c == 'Z' || c == '?') { + log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "'%c' not a valid digit in hex escape sequence.\n", c); + *val = 0; // not semantically valid in hex escape... + return true; // ...but still processed as part of hex token + } + + return false; +} + +static bool is_oct_dig(char c, int *val) +{ + if ('0' <= c && c <= '7') { + *val = c - '0'; + return true; + } else if (c == 'x' || c == 'X' || c == 'z' || c == 'Z' || c == '?') { + log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "'%c' not a valid digit in octal escape sequence.\n", c); + *val = 0; // not semantically valid in octal escape... + return true; // ...but still processed as part of octal token + } + + return false; +} + +static std::string *process_str(char *str, int len, bool triple) +{ + char *in, *out; // Overwrite input buffer: flex manual states "Actions + // are free to modify 'yytext' except for lengthening it". + + for (in = str, out = str; in < str + len; in++) + switch (*in) { + case '\n': + case '\r': + if (in + 1 < str + len && (in[1] ^ *in) == ('\n' ^ '\r')) + in++; + if (!triple) + log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "Multi-line string literals should be triple-quoted or escaped.\n"); + *out++ = '\n'; + break; + case '\\': + in++; + log_assert(in < str + len); + switch (*in) { + case 'a': + *out++ = '\a'; + break; + case 'f': + *out++ = '\f'; + break; + case 'n': + *out++ = '\n'; + break; + case 'r': /* not part of IEEE-1800 2023, but seems + like a good idea to support it anyway */ + *out++ = '\r'; + break; + case 't': + *out++ = '\t'; + break; + case 'v': + *out++ = '\v'; + break; + case 'x': + int val; + if (in + 1 < str + len && is_hex_dig(in[1], &val)) { + *out = val; + in++; + if (in + 1 < str + len && is_hex_dig(in[1], &val)) { + *out = *out * 0x10 + val; + in++; + } + out++; + } else + log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "ignoring invalid hex escape.\n"); + break; + case '\\': + *out++ = '\\'; + break; + case '"': + *out++ = '"'; + break; + case '\n': + case '\r': + if (in + 1 < str + len && (in[1] ^ *in) == ('\n' ^ '\r')) + in++; + break; + default: + if ('0' <= *in && *in <= '7') { + int val; + + *out = *in - '0'; + if (in + 1 < str + len && is_oct_dig(in[1], &val)) { + *out = *out * 010 + val; + in++; + if (in + 1 < str + len && is_oct_dig(in[1], &val)) { + if (*out >= 040) + log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "octal escape exceeds \\377\n"); + *out = *out * 010 + val; + in++; + } + } + out++; + } else + *out++ = *in; + } + break; + default: + *out++ = *in; + } + + return new std::string(str, out - str); +} + %} %option yylineno @@ -122,7 +245,6 @@ static bool isUserType(std::string &s) %option prefix="frontend_verilog_yy" %x COMMENT -%x STRING %x SYNOPSYS_TRANSLATE_OFF %x SYNOPSYS_FLAGS %x IMPORT_DPI @@ -335,47 +457,9 @@ TIME_SCALE_SUFFIX [munpf]?s return TOK_REALVAL; } -\" { BEGIN(STRING); } -([^\\"]|\\.)+ { yymore(); real_location = old_location; } -\" { - BEGIN(0); - char *yystr = strdup(yytext); - yystr[strlen(yytext) - 1] = 0; - int i = 0, j = 0; - while (yystr[i]) { - if (yystr[i] == '\\' && yystr[i + 1]) { - i++; - if (yystr[i] == 'a') - yystr[i] = '\a'; - else if (yystr[i] == 'f') - yystr[i] = '\f'; - else if (yystr[i] == 'n') - yystr[i] = '\n'; - else if (yystr[i] == 'r') - yystr[i] = '\r'; - else if (yystr[i] == 't') - yystr[i] = '\t'; - else if (yystr[i] == 'v') - yystr[i] = '\v'; - else if ('0' <= yystr[i] && yystr[i] <= '7') { - yystr[i] = yystr[i] - '0'; - if ('0' <= yystr[i + 1] && yystr[i + 1] <= '7') { - yystr[i + 1] = yystr[i] * 8 + yystr[i + 1] - '0'; - i++; - } - if ('0' <= yystr[i + 1] && yystr[i + 1] <= '7') { - yystr[i + 1] = yystr[i] * 8 + yystr[i + 1] - '0'; - i++; - } - } - } - yystr[j++] = yystr[i++]; - } - yystr[j] = 0; - yylval->string = new std::string(yystr, j); - free(yystr); - return TOK_STRING; -} +\"([^\\"]|\\.|\\\n)*\" { yylval->string = process_str(yytext + 1, yyleng - 2, false); return TOK_STRING; } + +\"{3}(\"{0,2}([^\\"]|\\.|\\\n))*\"{3} { yylval->string = process_str(yytext + 3, yyleng - 6, true); return TOK_STRING; } and|nand|or|nor|xor|xnor|not|buf|bufif0|bufif1|notif0|notif1 { yylval->string = new std::string(yytext); diff --git a/tests/verilog/bug5160.v b/tests/verilog/bug5160.v deleted file mode 100644 index 5b141a360..000000000 --- a/tests/verilog/bug5160.v +++ /dev/null @@ -1,5 +0,0 @@ -// Regression test for bug mentioned in #5160: -// https://github.com/YosysHQ/yosys/pull/5160#issuecomment-2983643084 -module top; - initial $display( "\\" ); -endmodule diff --git a/tests/verilog/string-literals.ys b/tests/verilog/string-literals.ys new file mode 100644 index 000000000..a0f0f0460 --- /dev/null +++ b/tests/verilog/string-literals.ys @@ -0,0 +1,257 @@ +# Test valid escape sequences yield correct results: +logger -expect-no-warnings +read_verilog << EOF +module top; + wire[7:0] sp = "\ "; + wire[7:0] spval = 32; + wire[7:0] ex = "\!"; + wire[7:0] exval = 33; + wire[7:0] dq = "\""; + wire[7:0] dqval = 34; + wire[7:0] ha = "\#"; + wire[7:0] haval = 35; + wire[7:0] do = "\$"; + wire[7:0] doval = 36; + wire[7:0] pc = "\%"; + wire[7:0] pcval = 37; + wire[7:0] am = "\&"; + wire[7:0] amval = 38; + wire[7:0] sq = "\'"; + wire[7:0] sqval = 39; + wire[7:0] op = "\("; + wire[7:0] opval = 40; + wire[7:0] cp = "\)"; + wire[7:0] cpval = 41; + wire[7:0] as = "\*"; + wire[7:0] asval = 42; + wire[7:0] pl = "\+"; + wire[7:0] plval = 43; + wire[7:0] co = "\,"; + wire[7:0] coval = 44; + wire[7:0] mi = "\-"; + wire[7:0] mival = 45; + wire[7:0] do = "\."; + wire[7:0] doval = 46; + wire[7:0] sl = "\/"; + wire[7:0] slval = 47; + + wire[7:0] dig0 = "\012"; + wire[7:0] dig0val = 10; + wire[7:0] dig8 = "\8"; // not octal, a literal '8' + wire[7:0] dig8val = 56; + wire[7:0] dig9 = "\9"; // not octal, a literal '9' + wire[7:0] dig9val = 57; + + wire[7:0] cl = "\:"; + wire[7:0] clval = 58; + wire[7:0] sc = "\;"; + wire[7:0] scval = 59; + wire[7:0] lt = "\<"; + wire[7:0] ltval = 60; + wire[7:0] eq = "\="; + wire[7:0] eqval = 61; + wire[7:0] gt = "\>"; + wire[7:0] gtval = 62; + wire[7:0] qu = "\?"; + wire[7:0] quval = 63; + wire[7:0] at = "\@"; + wire[7:0] atval = 64; + + wire[7:0] A = "\A"; + wire[7:0] Aval = 65; // etc. etc. + + wire[7:0] os = "\["; + wire[7:0] osval = 91; + wire[7:0] bs = "\\"; + wire[7:0] bsval = 92; + wire[7:0] cs = "\]"; + wire[7:0] csval = 93; + wire[7:0] ca = "\^"; + wire[7:0] caval = 94; + wire[7:0] us = "\_"; + wire[7:0] usval = 95; + wire[7:0] bq = "\`"; + wire[7:0] bqval = 96; + + wire[7:0] a = "\a"; // alert, ASCII BEL=7 + wire[7:0] aval = 7; + wire[7:0] b = "\b"; + wire[7:0] bval = 98; + wire[7:0] c = "\c"; + wire[7:0] cval = 99; + wire[7:0] d = "\d"; + wire[7:0] dval = 100; + wire[7:0] e = "\e"; + wire[7:0] eval = 101; + wire[7:0] f = "\f"; // form feed, ASCII FF=12 + wire[7:0] fval = 12; + wire[7:0] g = "\g"; + wire[7:0] gval = 103; + wire[7:0] h = "\h"; + wire[7:0] hval = 104; + wire[7:0] i = "\i"; + wire[7:0] ival = 105; + wire[7:0] j = "\j"; + wire[7:0] jval = 106; + wire[7:0] k = "\k"; + wire[7:0] kval = 107; + wire[7:0] l = "\l"; + wire[7:0] lval = 108; + wire[7:0] m = "\m"; + wire[7:0] mval = 109; + wire[7:0] n = "\n"; // new line, ASCII LF=10 + wire[7:0] nval = 10; + wire[7:0] o = "\o"; + wire[7:0] oval = 111; + wire[7:0] p = "\p"; + wire[7:0] pval = 112; + wire[7:0] q = "\q"; + wire[7:0] qval = 113; + wire[7:0] r = "\r"; // carriage return, ASCII CR=13, not IEEE 1800-2023 + wire[7:0] rval = 13; + wire[7:0] s = "\s"; + wire[7:0] sval = 115; + wire[7:0] t = "\t"; // tab, ASCII HT=9 + wire[7:0] tval = 9; + wire[7:0] u = "\u"; + wire[7:0] uval = 117; + wire[7:0] v = "\v"; // vertical tab, ASCII VT=11 + wire[7:0] vval = 11; + wire[7:0] w = "\w"; + wire[7:0] wval = 119; + wire[7:0] x = "\x2A"; // hex escape + wire[7:0] xval = 42; + wire[7:0] y = "\y"; + wire[7:0] yval = 121; + wire[7:0] z = "\z"; + wire[7:0] zval = 122; + + wire[7:0] ob = "\{"; + wire[7:0] obval = 123; + wire[7:0] vb = "\|"; + wire[7:0] vbval = 124; + wire[7:0] cb = "\}"; + wire[7:0] cbval = 125; + wire[7:0] ti = "\~"; + wire[7:0] tival = 126; +endmodule +EOF +sat -prove sp spval -prove ex exval -prove dq dqval -prove ha haval -prove do doval -prove pc pcval -prove am amval -prove sq sqval -prove op opval -prove cp cpval -prove as asval -prove pl plval -prove co coval -prove mi mival -prove do doval -prove sl slval -verify +sat -prove dig0 dig0val -prove dig8 dig8val -prove dig9 dig9val -verify +sat -prove cl clval -prove sc scval -prove lt ltval -prove eq eqval -prove gt gtval -prove qu quval -prove at atval -prove A Aval -verify +sat -prove os osval -prove bs bsval -prove cs csval -prove ca caval -prove us usval -prove bq bqval -verify +sat -prove a aval -prove b bval -prove c cval -prove d dval -prove e eval -prove f fval -prove g gval -prove h hval -prove i ival -prove j jval -prove k kval -prove l lval -prove m mval -prove n nval -prove o oval -prove p pval -prove q qval -prove r rval -prove s sval -prove t tval -prove u uval -prove v vval -prove w wval -prove x xval -prove y yval -prove z zval -verify +sat -prove ob obval -prove vb vbval -prove cb cbval -prove ti tival -verify +logger -check-expected +design -reset + +# Test octal escape out of range. +logger -expect warning "octal escape exceeds \\377" 1 +read_verilog << EOF +module top; + wire[7:0] x = "\400"; +endmodule +EOF +logger -check-expected +design -reset + +# Test invalid octal digit. +logger -expect warning "'\?' not a valid digit in octal escape sequence" 1 +read_verilog << EOF +module top; + wire[7:0] x = "\0?"; +endmodule +EOF +logger -check-expected +design -reset + +# Test invalid hex digit. +logger -expect warning "'X' not a valid digit in hex escape sequence" 1 +read_verilog << EOF +module top; + wire[7:0] x = "\x0X"; +endmodule +EOF +logger -check-expected +design -reset + +# Test hex escape with no hex digits at all. +logger -expect warning "ignoring invalid hex escape" 1 +read_verilog << EOF +module top; + wire[7:0] x = "\xy"; +endmodule +EOF +logger -check-expected +design -reset + +# Test hex escape interrupted by end of string. +logger -expect warning "ignoring invalid hex escape" 1 +read_verilog << EOF +module top; + wire[7:0] x = "\x"; +endmodule +EOF +logger -check-expected +design -reset + +# Test multi-line string. +logger -expect warning "Multi-line string literals should be triple-quoted or escaped" 1 +read_verilog << EOF +module top; + wire[31:0] x = "A +BC"; + wire[31:0] xval = 32'h410A4243; +endmodule +EOF +logger -check-expected +design -reset + +# Test multi-line triple-quoted string. +logger -expect-no-warnings +read_verilog << EOF +module top; + wire[31:0] x = """A +BC"""; + wire[31:0] xval = 32'h410A4243; +endmodule +EOF +logger -check-expected +sat -prove x xval -verify +design -reset + +# Test escaped multi-line string. +logger -expect-no-warnings +read_verilog << EOF +module top; + wire[31:0] x = "AB\ +CD"; + wire[31:0] xval = 32'h41424344; +endmodule +EOF +logger -check-expected +sat -prove x xval -verify +design -reset + +# Test octal escape with surrounding data. +logger -expect-no-warnings +read_verilog << EOF +module top; + wire[31:0] x = "AB\234C"; + wire[31:0] xval = 32'h41429C43; +endmodule +EOF +logger -check-expected +sat -prove x xval -verify +design -reset + +# Test hex escape with surrounding data. +logger -expect-no-warnings +read_verilog << EOF +module top; + wire[31:0] x = "A\xBCDE"; + wire[31:0] xval = 32'h41BC4445; +endmodule +EOF +logger -check-expected +sat -prove x xval -verify