From fdec1489032eb4ce479db8fe911c43b291b7969a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 14 Mar 2026 23:15:50 +0000 Subject: [PATCH] Address NSB review comments in seq_model.cpp Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/rewriter/seq_rewriter.cpp | 18 ++++++ src/ast/rewriter/seq_rewriter.h | 10 ++++ src/smt/seq_model.cpp | 98 ++++++------------------------- src/smt/seq_model.h | 12 +--- src/smt/theory_nseq.cpp | 2 +- 5 files changed, 49 insertions(+), 91 deletions(-) diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 86bb297e9..d90e31c11 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -6014,6 +6014,24 @@ void seq_rewriter::op_cache::cleanup() { } } +lbool seq_rewriter::some_seq_in_re(expr* r, expr_ref& result) { + sort* seq_sort = nullptr; + if (u().is_re(r, seq_sort) && u().is_string(seq_sort)) { + zstring s; + lbool res = some_string_in_re(r, s); + if (res == l_true) + result = str().mk_string(s); + return res; + } + // For non-string sequences: check if the regex accepts the empty sequence. + expr_ref is_null = is_nullable(r); + if (m().is_true(is_null)) { + result = str().mk_empty(seq_sort ? seq_sort : str().mk_string_sort()); + return l_true; + } + return l_undef; +} + lbool seq_rewriter::some_string_in_re(expr* r, zstring& s) { sort* rs; (void)rs; diff --git a/src/ast/rewriter/seq_rewriter.h b/src/ast/rewriter/seq_rewriter.h index 583720911..5612c5a69 100644 --- a/src/ast/rewriter/seq_rewriter.h +++ b/src/ast/rewriter/seq_rewriter.h @@ -417,6 +417,16 @@ public: /* Apply simplifications to the intersection to keep it normalized (r1 and r2 are not normalized)*/ expr_ref mk_regex_inter_normalize(expr* r1, expr* r2); + /* + * Extract some sequence that is a member of r. + * result is set to a concrete sequence expression if l_true is returned. + * For string-typed regexes, delegates to some_string_in_re. + * For other sequence types, checks nullability and returns the empty + * sequence if the regex accepts it; otherwise returns l_undef. + * Returns l_false if the regex is known to be empty. + */ + lbool some_seq_in_re(expr* r, expr_ref& result); + /* * Extract some string that is a member of r. * Return true if a valid string was extracted. diff --git a/src/smt/seq_model.cpp b/src/smt/seq_model.cpp index b3ca03890..8de5a7f5a 100644 --- a/src/smt/seq_model.cpp +++ b/src/smt/seq_model.cpp @@ -17,7 +17,6 @@ Author: --*/ #include "smt/seq_model.h" -#include "smt/seq/seq_regex.h" #include "smt/seq/seq_state.h" #include "smt/smt_context.h" #include "smt/smt_model_generator.h" @@ -27,8 +26,8 @@ Author: namespace smt { seq_model::seq_model(ast_manager& m, seq_util& seq, - seq_rewriter& rw, euf::sgraph& sg, seq::seq_regex& regex) - : m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_regex(regex), m_trail(m) + seq_rewriter& rw, euf::sgraph& sg) + : m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_trail(m) {} void seq_model::init(model_generator& mg, seq::nielsen_graph& nielsen, seq_state const& state) { @@ -173,7 +172,7 @@ namespace smt { return expr_ref(get_var_value(n), m); if (n->is_concat()) { - // NSB review: assert the sort is sequence and not a regex + SASSERT(n->get_sort() && m_seq.is_seq(n->get_sort())); expr_ref lhs = snode_to_value(n->arg(0)); expr_ref rhs = snode_to_value(n->arg(1)); if (lhs && rhs) @@ -232,44 +231,20 @@ namespace smt { exp_val = rational(0); // Build the repeated string: base^exp_val - // NSB review: use the sort of n to when calling mk_empty - if (exp_val.is_zero()) - return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); + if (exp_val.is_zero()) { + sort* srt = n->get_sort(); + if (!srt) srt = m_seq.str.mk_string_sort(); + return expr_ref(m_seq.str.mk_empty(srt), m); + } if (exp_val.is_one()) return base_val; // For small exponents, concatenate directly; for large ones, - // build a concrete string constant to avoid enormous AST chains - // that cause cleanup_expr to diverge. - - // NSB review: for large n_val just return mk_power(base_val, arith.mk_int(n_val)) + // return mk_power to avoid enormous AST chains. unsigned n_val = exp_val.get_unsigned(); constexpr unsigned POWER_EXPAND_LIMIT = 1000; - if (n_val > POWER_EXPAND_LIMIT) { - // Try to extract a concrete character from the base (seq.unit(c)) - // and build a string literal directly (O(1) AST node). - unsigned ch = 0; - expr* unit_arg = nullptr; - if (m_seq.str.is_unit(base_val, unit_arg) && m_seq.is_const_char(unit_arg, ch)) { - svector buf(n_val, ch); - zstring result(buf.size(), buf.data()); - return expr_ref(m_seq.str.mk_string(result), m); - } - // Also handle if base is already a string constant - zstring base_str; - if (m_seq.str.is_string(base_val, base_str) && base_str.length() > 0) { - svector buf; - for (unsigned i = 0; i < n_val; ++i) - for (unsigned j = 0; j < base_str.length(); ++j) - buf.push_back(base_str[j]); - zstring result(buf.size(), buf.data()); - return expr_ref(m_seq.str.mk_string(result), m); - } - // Fallback: cap exponent to avoid divergence - - // NSB review: this goes away and instead we can use mk_power(base_val, arith.mk_int(n_val)) when n_val - n_val = POWER_EXPAND_LIMIT; - } + if (n_val > POWER_EXPAND_LIMIT) + return expr_ref(m_seq.str.mk_power(base_val, arith.mk_int(n_val)), m); expr_ref acc(base_val); for (unsigned i = 1; i < n_val; ++i) acc = m_seq.str.mk_concat(acc, base_val); @@ -281,43 +256,6 @@ namespace smt { return e ? expr_ref(e, m) : expr_ref(m); } - // NSB review: use seq_rewriter::some_string_in_re instead of this custome witness generator - // NSB review: add some_seq_in_re to seq_rewriter to create a witness sequence for types other than strings. - expr_ref seq_model::generate_regex_witness(euf::snode* regex, unsigned depth) { - if (!regex) - return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); - - // depth bound to prevent stack overflow on deep regexes - if (depth > 1000) { - sort* srt = m_seq.str.mk_string_sort(); - expr* fresh = m_factory->get_fresh_value(srt); - return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m); - } - - // nullable regex: empty string is a valid witness - if (m_regex.is_nullable(regex)) - return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); - - // collect first-position characters - euf::snode_vector chars; - m_regex.collect_first_chars(regex, chars); - - if (!chars.empty()) { - // pick first concrete character, derive, and recurse - euf::snode* c = chars[0]; - euf::snode* deriv = m_regex.derivative(regex, c); - expr_ref tail = generate_regex_witness(deriv, depth + 1); - if (tail && c->get_expr()) - return expr_ref(m_seq.str.mk_concat(c->get_expr(), tail), m); - } - - // fallback: return fresh value from factory (may not satisfy the regex, - // but avoids returning empty string which definitely doesn't satisfy non-nullable regex) - sort* srt = m_seq.str.mk_string_sort(); - expr* fresh = m_factory->get_fresh_value(srt); - return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m); - } - void seq_model::register_existing_values(seq::nielsen_graph& nielsen) { seq::nielsen_node const* root = nielsen.root(); if (!root) @@ -348,12 +286,14 @@ namespace smt { // check if this variable has regex constraints euf::snode* re = nullptr; if (m_var_regex.find(var->id(), re) && re) { - // generate a witness string satisfying the regex - expr_ref witness = generate_regex_witness(re); - if (witness) { - m_trail.push_back(witness); - m_factory->register_value(witness); - return witness; + expr* re_expr = re->get_expr(); + if (re_expr) { + expr_ref witness(m); + if (m_rewriter.some_seq_in_re(re_expr, witness) == l_true && witness) { + m_trail.push_back(witness); + m_factory->register_value(witness); + return witness; + } } } diff --git a/src/smt/seq_model.h b/src/smt/seq_model.h index 533d253aa..51bd8365b 100644 --- a/src/smt/seq_model.h +++ b/src/smt/seq_model.h @@ -36,10 +36,6 @@ Author: class proto_model; -namespace seq { - class seq_regex; -} - namespace smt { class enode; @@ -52,7 +48,6 @@ namespace smt { seq_util& m_seq; seq_rewriter& m_rewriter; euf::sgraph& m_sg; - seq::seq_regex& m_regex; // factory for generating fresh string/regex values seq_factory* m_factory = nullptr; @@ -74,7 +69,7 @@ namespace smt { public: seq_model(ast_manager& m, seq_util& seq, - seq_rewriter& rw, euf::sgraph& sg, seq::seq_regex& regex); + seq_rewriter& rw, euf::sgraph& sg); // Phase 1: initialize model construction. // Allocates seq_factory, registers it with mg, collects @@ -103,11 +98,6 @@ namespace smt { // Returns a concrete Z3 expression. expr_ref snode_to_value(euf::snode* n); - // generate a concrete witness string for a regex. - // Uses nullable check and first-char collection to build - // a minimal satisfying string. depth bounds recursion. - expr_ref generate_regex_witness(euf::snode* regex, unsigned depth = 0); - // register all string literals appearing in the constraint store // with the factory to avoid collisions with fresh values. void register_existing_values(seq::nielsen_graph& nielsen); diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 78931e63f..f0676132e 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -37,7 +37,7 @@ namespace smt { m_nielsen(m_sgraph, m_context_solver), m_state(), m_regex(m_sgraph), - m_model(ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex) + m_model(ctx.get_manager(), m_seq, m_rewriter, m_sgraph) {} // -----------------------------------------------------------------------