mirror of
				https://github.com/YosysHQ/yosys
				synced 2025-10-30 19:22:31 +00:00 
			
		
		
		
	Merge pull request #5192 from garytwong/multiline-string
verilog: support newline and hex escapes in string literals
This commit is contained in:
		
						commit
						1a215719e5
					
				
					 4 changed files with 386 additions and 47 deletions
				
			
		|  | @ -381,3 +381,6 @@ from SystemVerilog: | |||
|   will process conditionals using these keywords by annotating their | ||||
|   representation with the appropriate ``full_case`` and/or ``parallel_case`` | ||||
|   attributes, which are described above.) | ||||
| 
 | ||||
| - SystemVerilog string literals are supported (triple-quoted strings and | ||||
|   escape sequences such as line continuations and hex escapes). | ||||
|  |  | |||
|  | @ -112,6 +112,129 @@ static bool isUserType(std::string &s) | |||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static bool is_hex_dig(char c, int *val) | ||||
| { | ||||
| 	if ('0' <= c && c <= '9') { | ||||
| 		*val = c - '0'; | ||||
| 		return true; | ||||
| 	} else if ('a' <= c && c <= 'f') { | ||||
| 		*val = c - 'a' + 0xA; | ||||
| 		return true; | ||||
| 	} else if ('A' <= c && c <= 'F') { | ||||
| 		*val = c - 'A' + 0xA; | ||||
| 		return true; | ||||
| 	} else if (c == 'x' || c == 'X' || c == 'z' || c == 'Z' || c == '?') { | ||||
| 		log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "'%c' not a valid digit in hex escape sequence.\n", c); | ||||
| 		*val = 0; // not semantically valid in hex escape... | ||||
| 		return true; // ...but still processed as part of hex token | ||||
| 	} | ||||
| 
 | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static bool is_oct_dig(char c, int *val) | ||||
| { | ||||
| 	if ('0' <= c && c <= '7') { | ||||
| 		*val = c - '0'; | ||||
| 		return true; | ||||
| 	} else if (c == 'x' || c == 'X' || c == 'z' || c == 'Z' || c == '?') { | ||||
| 		log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "'%c' not a valid digit in octal escape sequence.\n", c); | ||||
| 		*val = 0; // not semantically valid in octal escape... | ||||
| 		return true; // ...but still processed as part of octal token | ||||
| 	} | ||||
| 
 | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| static std::string *process_str(char *str, int len, bool triple) | ||||
| { | ||||
| 	char *in, *out; // Overwrite input buffer: flex manual states "Actions | ||||
| 		// are free to modify 'yytext' except for lengthening it". | ||||
| 
 | ||||
| 	for (in = str, out = str; in < str + len; in++) | ||||
| 		switch (*in) { | ||||
| 		case '\n': | ||||
| 		case '\r': | ||||
| 			if (in + 1 < str + len && (in[1] ^ *in) == ('\n' ^ '\r')) | ||||
| 				in++; | ||||
| 			if (!triple) | ||||
| 				log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "Multi-line string literals should be triple-quoted or escaped.\n"); | ||||
| 			*out++ = '\n'; | ||||
| 			break; | ||||
| 		case '\\': | ||||
| 			in++; | ||||
| 			log_assert(in < str + len); | ||||
| 			switch (*in) { | ||||
| 			case 'a': | ||||
| 				*out++ = '\a'; | ||||
| 				break; | ||||
| 			case 'f': | ||||
| 				*out++ = '\f'; | ||||
| 				break; | ||||
| 			case 'n': | ||||
| 				*out++ = '\n'; | ||||
| 				break; | ||||
| 			case 'r': /* not part of IEEE-1800 2023, but seems | ||||
| 				     like a good idea to support it anyway */ | ||||
| 				*out++ = '\r'; | ||||
| 				break; | ||||
| 			case 't': | ||||
| 				*out++ = '\t'; | ||||
| 				break; | ||||
| 			case 'v': | ||||
| 				*out++ = '\v'; | ||||
| 				break; | ||||
| 			case 'x': | ||||
| 				int val; | ||||
| 				if (in + 1 < str + len && is_hex_dig(in[1], &val)) { | ||||
| 					*out = val; | ||||
| 					in++; | ||||
| 					if (in + 1 < str + len && is_hex_dig(in[1], &val)) { | ||||
| 						*out = *out * 0x10 + val; | ||||
| 						in++; | ||||
| 					} | ||||
| 					out++; | ||||
| 				} else | ||||
| 					log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "ignoring invalid hex escape.\n"); | ||||
| 				break; | ||||
| 			case '\\': | ||||
| 				*out++ = '\\'; | ||||
| 				break; | ||||
| 			case '"': | ||||
| 				*out++ = '"'; | ||||
| 				break; | ||||
| 			case '\n': | ||||
| 			case '\r': | ||||
| 				if (in + 1 < str + len && (in[1] ^ *in) == ('\n' ^ '\r')) | ||||
| 					in++; | ||||
| 				break; | ||||
| 			default: | ||||
| 				if ('0' <= *in && *in <= '7') { | ||||
| 					int val; | ||||
| 
 | ||||
| 					*out = *in - '0'; | ||||
| 					if (in + 1 < str + len && is_oct_dig(in[1], &val)) { | ||||
| 						*out = *out * 010 + val; | ||||
| 						in++; | ||||
| 						if (in + 1 < str + len && is_oct_dig(in[1], &val)) { | ||||
| 							if (*out >= 040) | ||||
| 								log_file_warning(AST::current_filename.c_str(), frontend_verilog_yyget_lineno(), "octal escape exceeds \\377\n"); | ||||
| 							*out = *out * 010 + val; | ||||
| 							in++; | ||||
| 						} | ||||
| 					} | ||||
| 					out++; | ||||
| 				} else | ||||
| 					*out++ = *in; | ||||
| 			} | ||||
| 			break; | ||||
| 		default: | ||||
| 			*out++ = *in; | ||||
| 		} | ||||
| 
 | ||||
| 	return new std::string(str, out - str); | ||||
| } | ||||
| 
 | ||||
| %} | ||||
| 
 | ||||
| %option yylineno | ||||
|  | @ -122,7 +245,6 @@ static bool isUserType(std::string &s) | |||
| %option prefix="frontend_verilog_yy" | ||||
| 
 | ||||
| %x COMMENT | ||||
| %x STRING | ||||
| %x SYNOPSYS_TRANSLATE_OFF | ||||
| %x SYNOPSYS_FLAGS | ||||
| %x IMPORT_DPI | ||||
|  | @ -335,47 +457,9 @@ TIME_SCALE_SUFFIX [munpf]?s | |||
| 	return TOK_REALVAL; | ||||
| } | ||||
| 
 | ||||
| \"		{ BEGIN(STRING); } | ||||
| <STRING>([^\\"]|\\.)+	{ yymore(); real_location = old_location; } | ||||
| <STRING>\"	{ | ||||
| 	BEGIN(0); | ||||
| 	char *yystr = strdup(yytext); | ||||
| 	yystr[strlen(yytext) - 1] = 0; | ||||
| 	int i = 0, j = 0; | ||||
| 	while (yystr[i]) { | ||||
| 		if (yystr[i] == '\\' && yystr[i + 1]) { | ||||
| 			i++; | ||||
| 			if (yystr[i] == 'a') | ||||
| 				yystr[i] = '\a'; | ||||
| 			else if (yystr[i] == 'f') | ||||
| 				yystr[i] = '\f'; | ||||
| 			else if (yystr[i] == 'n') | ||||
| 				yystr[i] = '\n'; | ||||
| 			else if (yystr[i] == 'r') | ||||
| 				yystr[i] = '\r'; | ||||
| 			else if (yystr[i] == 't') | ||||
| 				yystr[i] = '\t'; | ||||
| 			else if (yystr[i] == 'v') | ||||
| 				yystr[i] = '\v'; | ||||
| 			else if ('0' <= yystr[i] && yystr[i] <= '7') { | ||||
| 				yystr[i] = yystr[i] - '0'; | ||||
| 				if ('0' <= yystr[i + 1] && yystr[i + 1] <= '7') { | ||||
| 					yystr[i + 1] = yystr[i] * 8 + yystr[i + 1] - '0'; | ||||
| 					i++; | ||||
| 				} | ||||
| 				if ('0' <= yystr[i + 1] && yystr[i + 1] <= '7') { | ||||
| 					yystr[i + 1] = yystr[i] * 8 + yystr[i + 1] - '0'; | ||||
| 					i++; | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		yystr[j++] = yystr[i++]; | ||||
| 	} | ||||
| 	yystr[j] = 0; | ||||
| 	yylval->string = new std::string(yystr, j); | ||||
| 	free(yystr); | ||||
| 	return TOK_STRING; | ||||
| } | ||||
| \"([^\\"]|\\.|\\\n)*\"			{ yylval->string = process_str(yytext + 1, yyleng - 2, false); return TOK_STRING; } | ||||
| 
 | ||||
| \"{3}(\"{0,2}([^\\"]|\\.|\\\n))*\"{3}	{ yylval->string = process_str(yytext + 3, yyleng - 6, true); return TOK_STRING; } | ||||
| 
 | ||||
| and|nand|or|nor|xor|xnor|not|buf|bufif0|bufif1|notif0|notif1 { | ||||
| 	yylval->string = new std::string(yytext); | ||||
|  |  | |||
|  | @ -1,5 +0,0 @@ | |||
| // Regression test for bug mentioned in #5160:
 | ||||
| // https://github.com/YosysHQ/yosys/pull/5160#issuecomment-2983643084
 | ||||
| module top; | ||||
|     initial $display( "\\" ); | ||||
| endmodule | ||||
							
								
								
									
										257
									
								
								tests/verilog/string-literals.ys
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										257
									
								
								tests/verilog/string-literals.ys
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,257 @@ | |||
| # Test valid escape sequences yield correct results: | ||||
| logger -expect-no-warnings | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[7:0] sp = "\ "; | ||||
| 	wire[7:0] spval = 32; | ||||
| 	wire[7:0] ex = "\!"; | ||||
| 	wire[7:0] exval = 33; | ||||
| 	wire[7:0] dq = "\""; | ||||
| 	wire[7:0] dqval = 34; | ||||
| 	wire[7:0] ha = "\#"; | ||||
| 	wire[7:0] haval = 35; | ||||
| 	wire[7:0] do = "\$"; | ||||
| 	wire[7:0] doval = 36; | ||||
| 	wire[7:0] pc = "\%"; | ||||
| 	wire[7:0] pcval = 37; | ||||
| 	wire[7:0] am = "\&"; | ||||
| 	wire[7:0] amval = 38; | ||||
| 	wire[7:0] sq = "\'"; | ||||
| 	wire[7:0] sqval = 39; | ||||
| 	wire[7:0] op = "\("; | ||||
| 	wire[7:0] opval = 40; | ||||
| 	wire[7:0] cp = "\)"; | ||||
| 	wire[7:0] cpval = 41; | ||||
| 	wire[7:0] as = "\*"; | ||||
| 	wire[7:0] asval = 42; | ||||
| 	wire[7:0] pl = "\+"; | ||||
| 	wire[7:0] plval = 43; | ||||
| 	wire[7:0] co = "\,"; | ||||
| 	wire[7:0] coval = 44; | ||||
| 	wire[7:0] mi = "\-"; | ||||
| 	wire[7:0] mival = 45; | ||||
| 	wire[7:0] do = "\."; | ||||
| 	wire[7:0] doval = 46; | ||||
| 	wire[7:0] sl = "\/"; | ||||
| 	wire[7:0] slval = 47; | ||||
| 
 | ||||
| 	wire[7:0] dig0 = "\012"; | ||||
| 	wire[7:0] dig0val = 10; | ||||
| 	wire[7:0] dig8 = "\8"; // not octal, a literal '8' | ||||
| 	wire[7:0] dig8val = 56; | ||||
| 	wire[7:0] dig9 = "\9"; // not octal, a literal '9' | ||||
| 	wire[7:0] dig9val = 57; | ||||
| 	 | ||||
| 	wire[7:0] cl = "\:"; | ||||
| 	wire[7:0] clval = 58; | ||||
| 	wire[7:0] sc = "\;"; | ||||
| 	wire[7:0] scval = 59; | ||||
| 	wire[7:0] lt = "\<"; | ||||
| 	wire[7:0] ltval = 60; | ||||
| 	wire[7:0] eq = "\="; | ||||
| 	wire[7:0] eqval = 61; | ||||
| 	wire[7:0] gt = "\>"; | ||||
| 	wire[7:0] gtval = 62; | ||||
| 	wire[7:0] qu = "\?"; | ||||
| 	wire[7:0] quval = 63; | ||||
| 	wire[7:0] at = "\@"; | ||||
| 	wire[7:0] atval = 64; | ||||
| 
 | ||||
| 	wire[7:0] A = "\A"; | ||||
| 	wire[7:0] Aval = 65; // etc. etc. | ||||
| 
 | ||||
| 	wire[7:0] os = "\["; | ||||
| 	wire[7:0] osval = 91; | ||||
| 	wire[7:0] bs = "\\"; | ||||
| 	wire[7:0] bsval = 92; | ||||
| 	wire[7:0] cs = "\]"; | ||||
| 	wire[7:0] csval = 93; | ||||
| 	wire[7:0] ca = "\^"; | ||||
| 	wire[7:0] caval = 94; | ||||
| 	wire[7:0] us = "\_"; | ||||
| 	wire[7:0] usval = 95; | ||||
| 	wire[7:0] bq = "\`"; | ||||
| 	wire[7:0] bqval = 96; | ||||
| 
 | ||||
| 	wire[7:0] a = "\a"; // alert, ASCII BEL=7 | ||||
| 	wire[7:0] aval = 7; | ||||
| 	wire[7:0] b = "\b"; | ||||
| 	wire[7:0] bval = 98; | ||||
| 	wire[7:0] c = "\c"; | ||||
| 	wire[7:0] cval = 99; | ||||
| 	wire[7:0] d = "\d"; | ||||
| 	wire[7:0] dval = 100; | ||||
| 	wire[7:0] e = "\e"; | ||||
| 	wire[7:0] eval = 101; | ||||
| 	wire[7:0] f = "\f"; // form feed, ASCII FF=12 | ||||
| 	wire[7:0] fval = 12; | ||||
| 	wire[7:0] g = "\g"; | ||||
| 	wire[7:0] gval = 103; | ||||
| 	wire[7:0] h = "\h"; | ||||
| 	wire[7:0] hval = 104; | ||||
| 	wire[7:0] i = "\i"; | ||||
| 	wire[7:0] ival = 105; | ||||
| 	wire[7:0] j = "\j"; | ||||
| 	wire[7:0] jval = 106; | ||||
| 	wire[7:0] k = "\k"; | ||||
| 	wire[7:0] kval = 107; | ||||
| 	wire[7:0] l = "\l"; | ||||
| 	wire[7:0] lval = 108; | ||||
| 	wire[7:0] m = "\m"; | ||||
| 	wire[7:0] mval = 109; | ||||
| 	wire[7:0] n = "\n"; // new line, ASCII LF=10 | ||||
| 	wire[7:0] nval = 10; | ||||
| 	wire[7:0] o = "\o"; | ||||
| 	wire[7:0] oval = 111; | ||||
| 	wire[7:0] p = "\p"; | ||||
| 	wire[7:0] pval = 112; | ||||
| 	wire[7:0] q = "\q"; | ||||
| 	wire[7:0] qval = 113; | ||||
| 	wire[7:0] r = "\r"; // carriage return, ASCII CR=13, not IEEE 1800-2023 | ||||
| 	wire[7:0] rval = 13; | ||||
| 	wire[7:0] s = "\s"; | ||||
| 	wire[7:0] sval = 115; | ||||
| 	wire[7:0] t = "\t"; // tab, ASCII HT=9 | ||||
| 	wire[7:0] tval = 9; | ||||
| 	wire[7:0] u = "\u"; | ||||
| 	wire[7:0] uval = 117; | ||||
| 	wire[7:0] v = "\v"; // vertical tab, ASCII VT=11 | ||||
| 	wire[7:0] vval = 11; | ||||
| 	wire[7:0] w = "\w"; | ||||
| 	wire[7:0] wval = 119; | ||||
| 	wire[7:0] x = "\x2A"; // hex escape | ||||
| 	wire[7:0] xval = 42; | ||||
| 	wire[7:0] y = "\y"; | ||||
| 	wire[7:0] yval = 121; | ||||
| 	wire[7:0] z = "\z"; | ||||
| 	wire[7:0] zval = 122; | ||||
| 
 | ||||
| 	wire[7:0] ob = "\{"; | ||||
| 	wire[7:0] obval = 123; | ||||
| 	wire[7:0] vb = "\|"; | ||||
| 	wire[7:0] vbval = 124; | ||||
| 	wire[7:0] cb = "\}"; | ||||
| 	wire[7:0] cbval = 125; | ||||
| 	wire[7:0] ti = "\~"; | ||||
| 	wire[7:0] tival = 126; | ||||
| endmodule | ||||
| EOF | ||||
| sat -prove sp spval -prove ex exval -prove dq dqval -prove ha haval -prove do doval -prove pc pcval -prove am amval -prove sq sqval -prove op opval -prove cp cpval -prove as asval -prove pl plval -prove co coval -prove mi mival -prove do doval -prove sl slval -verify | ||||
| sat -prove dig0 dig0val -prove dig8 dig8val -prove dig9 dig9val -verify | ||||
| sat -prove cl clval -prove sc scval -prove lt ltval -prove eq eqval -prove gt gtval -prove qu quval -prove at atval -prove A Aval -verify | ||||
| sat -prove os osval -prove bs bsval -prove cs csval -prove ca caval -prove us usval -prove bq bqval -verify | ||||
| sat -prove a aval -prove b bval -prove c cval -prove d dval -prove e eval -prove f fval -prove g gval -prove h hval -prove i ival -prove j jval -prove k kval -prove l lval -prove m mval -prove n nval -prove o oval -prove p pval -prove q qval -prove r rval -prove s sval -prove t tval -prove u uval -prove v vval -prove w wval -prove x xval -prove y yval -prove z zval -verify | ||||
| sat -prove ob obval -prove vb vbval -prove cb cbval -prove ti tival -verify | ||||
| logger -check-expected | ||||
| design -reset | ||||
| 
 | ||||
| # Test octal escape out of range. | ||||
| logger -expect warning "octal escape exceeds \\377" 1 | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[7:0] x = "\400"; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| design -reset | ||||
| 
 | ||||
| # Test invalid octal digit. | ||||
| logger -expect warning "'\?' not a valid digit in octal escape sequence" 1 | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[7:0] x = "\0?"; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| design -reset | ||||
| 
 | ||||
| # Test invalid hex digit. | ||||
| logger -expect warning "'X' not a valid digit in hex escape sequence" 1 | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[7:0] x = "\x0X"; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| design -reset | ||||
| 
 | ||||
| # Test hex escape with no hex digits at all. | ||||
| logger -expect warning "ignoring invalid hex escape" 1 | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[7:0] x = "\xy"; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| design -reset | ||||
| 
 | ||||
| # Test hex escape interrupted by end of string. | ||||
| logger -expect warning "ignoring invalid hex escape" 1 | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[7:0] x = "\x"; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| design -reset | ||||
| 
 | ||||
| # Test multi-line string. | ||||
| logger -expect warning "Multi-line string literals should be triple-quoted or escaped" 1 | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[31:0] x = "A | ||||
| BC"; | ||||
| 	wire[31:0] xval = 32'h410A4243; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| design -reset | ||||
| 
 | ||||
| # Test multi-line triple-quoted string. | ||||
| logger -expect-no-warnings | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[31:0] x = """A | ||||
| BC"""; | ||||
| 	wire[31:0] xval = 32'h410A4243; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| sat -prove x xval -verify | ||||
| design -reset | ||||
| 
 | ||||
| # Test escaped multi-line string. | ||||
| logger -expect-no-warnings | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[31:0] x = "AB\ | ||||
| CD"; | ||||
| 	wire[31:0] xval = 32'h41424344; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| sat -prove x xval -verify | ||||
| design -reset | ||||
| 
 | ||||
| # Test octal escape with surrounding data. | ||||
| logger -expect-no-warnings | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[31:0] x = "AB\234C"; | ||||
| 	wire[31:0] xval = 32'h41429C43; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| sat -prove x xval -verify | ||||
| design -reset | ||||
| 
 | ||||
| # Test hex escape with surrounding data. | ||||
| logger -expect-no-warnings | ||||
| read_verilog << EOF | ||||
| module top; | ||||
| 	wire[31:0] x = "A\xBCDE"; | ||||
| 	wire[31:0] xval = 32'h41BC4445; | ||||
| endmodule | ||||
| EOF | ||||
| logger -check-expected | ||||
| sat -prove x xval -verify | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue