From 75678fc2c2ce4ada7832837454bfb5c3736bc926 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Wed, 2 Jul 2025 09:54:36 -0700 Subject: [PATCH] =?UTF-8?q?Fix=20O(n=C2=B2)=20performance=20issue=20in=20C?= =?UTF-8?q?LI=20datatype=20declaration=20processing=20(#7712)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Initial plan * Implement batch initialization fix for O(n²) datatype performance issue Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Fix the real O(n²) bottleneck with lazy hash table for constructor name lookups Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Optimize get_constructor_by_name: use func_decl* parameter, add linear search optimization for small datatypes, and ensure non-null postcondition Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/datatype_decl_plugin.cpp | 27 ++++++++++++--------------- src/ast/datatype_decl_plugin.h | 27 +++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/ast/datatype_decl_plugin.cpp b/src/ast/datatype_decl_plugin.cpp index 33de8dd69..3daceb88c 100644 --- a/src/ast/datatype_decl_plugin.cpp +++ b/src/ast/datatype_decl_plugin.cpp @@ -1076,14 +1076,13 @@ namespace datatype { sort * datatype = con->get_range(); def const& d = get_def(datatype); - for (constructor const* c : d) { - if (c->name() == con->get_name()) { - for (accessor const* a : *c) { - func_decl_ref fn = a->instantiate(datatype); - res->push_back(fn); - plugin().add_ast(fn); - } - break; + // Use O(1) lookup instead of O(n) linear search + constructor* c = d.get_constructor_by_name(con); + if (c) { + for (accessor const* a : *c) { + func_decl_ref fn = a->instantiate(datatype); + res->push_back(fn); + plugin().add_ast(fn); } } return res; @@ -1105,13 +1104,11 @@ namespace datatype { sort * datatype = con->get_range(); def const& dd = get_def(datatype); symbol r; - // This should be fixed for perf. - // Option 1: hash-table in dd that maps to constructors instead of iterating over all constructors. - // initialize the hash-table lazily when dd is large. - // Option 2: initialize all calls to plugin() registration in a single pass. - for (constructor const* c : dd) - if (c->name() == con->get_name()) - r = c->recognizer(); + // Use O(1) lookup instead of O(n) linear search + constructor* c = dd.get_constructor_by_name(con); + if (c) { + r = c->recognizer(); + } parameter ps[2] = { parameter(con), parameter(r) }; d = m.mk_func_decl(fid(), OP_DT_RECOGNISER, 2, ps, 1, &datatype); SASSERT(d); diff --git a/src/ast/datatype_decl_plugin.h b/src/ast/datatype_decl_plugin.h index 4d91cbf40..7876f10c6 100644 --- a/src/ast/datatype_decl_plugin.h +++ b/src/ast/datatype_decl_plugin.h @@ -26,6 +26,7 @@ Revision History: #include "util/buffer.h" #include "util/symbol_table.h" #include "util/obj_hashtable.h" +#include "util/dictionary.h" enum sort_kind { @@ -164,6 +165,7 @@ namespace datatype { sort_ref_vector m_params; mutable sort_ref m_sort; ptr_vector m_constructors; + mutable dictionary m_name2constructor; public: def(ast_manager& m, util& u, symbol const& n, unsigned class_id, unsigned num_params, sort * const* params): m(m), @@ -195,6 +197,31 @@ namespace datatype { util& u() const { return m_util; } param_size::size* sort_size() { return m_sort_size; } void set_sort_size(param_size::size* p) { auto* q = m_sort_size; m_sort_size = p; if (p) p->inc_ref(); if (q) q->dec_ref(); m_sort = nullptr; } + constructor* get_constructor_by_name(func_decl* con) const { + symbol const& name = con->get_name(); + constructor* result = nullptr; + + // For small datatypes (< 10 constructors), use linear search instead of hash table + if (m_constructors.size() < 10) { + for (constructor* c : m_constructors) { + if (c->name() == name) { + result = c; + break; + } + } + } else { + // Lazy initialization of name-to-constructor map for O(1) lookups + if (m_name2constructor.empty() && !m_constructors.empty()) { + for (constructor* c : m_constructors) { + m_name2constructor.insert(c->name(), c); + } + } + m_name2constructor.find(name, result); + } + + SASSERT(result); // Post-condition: get_constructor_by_name returns a non-null result + return result; + } def* translate(ast_translation& tr, util& u); };