From bc01468c7545b2362a52b48f11fa0e5cccb4e75a Mon Sep 17 00:00:00 2001 From: Jannis Harder Date: Tue, 1 Apr 2025 13:50:29 +0200 Subject: [PATCH] read_liberty: Faster std::string construction in the liberty lexer This extends the `LibertyInputStream` added in the previous commit to allow arbitrary lookahead. Then this uses the lookahead to find the total length of the token within the input buffer, instead of consuming the token byte by byte while appending to a std::string. Constructing the std::string with the total length is known avoids any reallocations from growing std::string's buffer. --- passes/techmap/libparse.cc | 55 ++++++++++++++++++++++++-------------- passes/techmap/libparse.h | 11 ++++++++ 2 files changed, 46 insertions(+), 20 deletions(-) diff --git a/passes/techmap/libparse.cc b/passes/techmap/libparse.cc index dbf191080..d3d5b7d57 100644 --- a/passes/techmap/libparse.cc +++ b/passes/techmap/libparse.cc @@ -77,6 +77,16 @@ int LibertyInputStream::get_cold() return c; } +int LibertyInputStream::peek_cold(size_t offset) +{ + if (buf_pos + offset >= buf_end) { + if (!extend_buffer_at_least(offset + 1)) + return EOF; + } + + return buffer[buf_pos + offset]; +} + LibertyAst::~LibertyAst() { for (auto child : children) @@ -282,15 +292,19 @@ int LibertyParser::lexer(std::string &str) // search for identifiers, numbers, plus or minus. if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_' || c == '-' || c == '+' || c == '.') { - str = static_cast(c); - while (1) { - c = f.get(); + f.unget(); + size_t i = 1; + while (true) { + c = f.peek(i); if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_' || c == '-' || c == '+' || c == '.') - str += c; + i += 1; else break; } - f.unget(); + str.clear(); + str.append(f.buffered_data(), f.buffered_data() + i); + f.consume(i); + if (str == "+" || str == "-") { /* Single operator is not an identifier */ // fprintf(stderr, "LEX: char >>%s<<\n", str.c_str()); @@ -305,23 +319,24 @@ int LibertyParser::lexer(std::string &str) // if it wasn't an identifer, number of array range, // maybe it's a string? if (c == '"') { - str = ""; -#ifdef FILTERLIB - str += c; -#endif - while (1) { - c = f.get(); - if (c == '\n') - line++; - if (c == '"') { -#ifdef FILTERLIB - str += c; -#endif + size_t i = 0; + while (true) { + c = f.peek(i); + line += (c == '\n'); + if (c != '"') + i += 1; + else break; - } - str += c; } - // fprintf(stderr, "LEX: string >>%s<<\n", str.c_str()); + str.clear(); +#ifdef FILTERLIB + f.unget(); + str.append(f.buffered_data(), f.buffered_data() + i + 2); + f.consume(i + 2); +#else + str.append(f.buffered_data(), f.buffered_data() + i); + f.consume(i + 1); +#endif return 'v'; } diff --git a/passes/techmap/libparse.h b/passes/techmap/libparse.h index eb73e296d..1fcaaebee 100644 --- a/passes/techmap/libparse.h +++ b/passes/techmap/libparse.h @@ -101,6 +101,7 @@ namespace Yosys bool extend_buffer_at_least(size_t size = 1); YS_COLD int get_cold(); + YS_COLD int peek_cold(size_t offset); public: LibertyInputStream(std::istream &f) : f(f) {} @@ -116,6 +117,16 @@ namespace Yosys return c; } + int peek(size_t offset = 0) { + if (buf_pos + offset >= buf_end) + return peek_cold(offset); + return buffer[buf_pos + offset]; + } + + void consume(size_t n = 1) { + buf_pos += n; + } + void unget() { buf_pos -= 1; }