From 84d371f2e9d2a9acda749bbf4ba91146d8a775d0 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Mon, 16 Mar 2026 19:54:12 +0100 Subject: [PATCH] Bugfix in regex overapproximation --- src/smt/seq/seq_nielsen.cpp | 35 +++++++++++++++++++++-------------- src/smt/theory_nseq.cpp | 17 ++++------------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index a747debf4..7de3e84c5 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -1789,7 +1789,8 @@ namespace seq { euf::snode* deriv = fwd ? sg.brzozowski_deriv(mem.m_regex, tok) : reverse_brzozowski_deriv(sg, mem.m_regex, tok); - if (!deriv) break; + if (!deriv) + break; if (deriv->is_fail()) { m_is_general_conflict = true; m_reason = backtrack_reason::regex; @@ -1833,11 +1834,15 @@ namespace seq { if (!mt || mt->is_fail()) continue; euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt); - if (!deriv) { is_uniform = false; break; } - if (!uniform) { + if (!deriv) { + is_uniform = false; + break; + } + if (!uniform) uniform = deriv; - } else if (uniform->id() != deriv->id()) { - is_uniform = false; break; + else if (uniform->id() != deriv->id()) { + is_uniform = false; + break; } } if (is_uniform && uniform) { @@ -4427,17 +4432,17 @@ namespace seq { if (tok->is_char()) { // Concrete character → to_re(unit(c)) expr* te = tok->get_expr(); - if (!te) return false; - expr_ref unit_s(seq.str.mk_unit(te), mgr); - expr_ref tre(seq.re.mk_to_re(unit_s), mgr); + if (!te) + return false; + expr_ref tre(seq.re.mk_to_re(te), mgr); tok_re = m_sg.mk(tre); } else if (tok->is_var()) { // Variable → intersection of primitive regex constraints, or Σ* euf::snode* x_range = m_seq_regex->collect_primitive_regex_intersection(tok, node); - if (x_range) { + if (x_range) tok_re = x_range; - } else { + else { // Unconstrained variable: approximate as Σ* sort* str_sort = seq.str.mk_string_sort(); expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr); @@ -4485,12 +4490,14 @@ namespace seq { if (!tok_re) return false; - if (!approx) { + if (!approx) approx = tok_re; - } else { + else { expr* ae = approx->get_expr(); - expr* te = tok_re->get_expr(); - if (!ae || !te) return false; + expr* te = tok_re->get_expr( + ); + if (!ae || !te) + return false; expr_ref cat(seq.re.mk_concat(ae, te), mgr); approx = m_sg.mk(cat); } diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 8b64f811b..d62565fce 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -19,9 +19,6 @@ Author: #include "smt/theory_nseq.h" #include "smt/smt_context.h" #include "smt/smt_justification.h" -#include "smt/proto_model/proto_model.h" -#include "ast/array_decl_plugin.h" -#include "ast/ast_pp.h" #include "util/statistics.h" #include "util/trail.h" @@ -87,8 +84,9 @@ namespace smt { bool theory_nseq::internalize_term(app* term) { // ensure ALL children are internalized (following theory_seq pattern) - for (auto arg : *term) + for (auto arg : *term) { mk_var(ensure_enode(arg)); + } if (ctx.e_internalized(term)) { mk_var(ctx.get_enode(term)); @@ -102,12 +100,10 @@ namespace smt { } enode* en; - if (ctx.e_internalized(term)) { + if (ctx.e_internalized(term)) en = ctx.get_enode(term); - } - else { + else en = ctx.mk_enode(term, false, m.is_bool(term), true); - } mk_var(en); // register in our private sgraph @@ -859,15 +855,10 @@ namespace smt { // jointly unsatisfiable. Assert a conflict from all their literals. enode_pair_vector eqs; literal_vector lits; - std::cout << "CONFLICT:\n"; for (unsigned i : mem_indices) { mem_source const& src = m_state.get_mem_source(i); SASSERT(ctx.get_assignment(src.m_lit) == l_true); // we already stored the polarity of the literal lits.push_back(src.m_lit); - std::cout << "\t\t"; - std::cout << mk_pp(ctx.literal2expr(src.m_lit), m) << std::endl; - std::cout << "\t\t"; - std::cout << src.m_lit << std::endl; } TRACE(seq, tout << "nseq regex precheck: empty intersection for var " << var_id << ", conflict with " << lits.size() << " lits\n";);