From 636b3b0a6b50442b47e26d0da89e5cd947567350 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Sat, 28 Feb 2026 10:06:36 -0800 Subject: [PATCH] Add Phase 5: regex membership checking via derivatives - check_regex_memberships: iterate regex constraints in final_check - check_regex_mem: for ground strings, compute Brzozowski derivatives and check nullable - derive_regex: step-by-step derivative of regex w.r.t. string prefix - is_ground_string: resolve string value through e-graph roots - Handles positive (str.in_re) and negative (not str.in_re) memberships - Uses seq_rewriter::mk_derivative and is_nullable (existing Z3 infrastructure) - No regressions on string-concat.smt2 (36/62 correct, 0 wrong) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/smt/theory_nseq.cpp | 105 +++++++++++++++++++++++++++++++++++++++- src/smt/theory_nseq.h | 6 +++ 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index bdf149e61..bfe9d9e9e 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -80,7 +80,9 @@ namespace smt { if (check_zero_length()) return FC_CONTINUE; - // TODO: implement regex membership checking + // Check regex membership constraints + if (check_regex_memberships()) + return FC_CONTINUE; if (all_eqs_solved() && m_state.mems().empty()) return FC_DONE; @@ -689,6 +691,107 @@ namespace smt { return false; } + // ------------------------------------------------------- + // Regex membership + // ------------------------------------------------------- + + bool theory_nseq::is_ground_string(expr* e, zstring& s) { + // Follow e-graph root and check if it's a ground string + if (!ctx.e_internalized(e)) + return false; + enode* n = ctx.get_enode(e); + expr* root = n->get_root()->get_expr(); + if (m_util.str.is_string(root, s)) + return true; + // Check if root is a concat of units/strings + expr_ref_vector units(m); + m_util.str.get_concat_units(root, units); + s = zstring(); + for (expr* u : units) { + if (ctx.e_internalized(u)) { + expr* ur = ctx.get_enode(u)->get_root()->get_expr(); + zstring us; + if (m_util.str.is_string(ur, us)) { + s = s + us; + continue; + } + unsigned ch; + expr* arg; + if (m_util.str.is_unit(ur, arg) && + ctx.e_internalized(arg) && + m_util.is_const_char(ctx.get_enode(arg)->get_root()->get_expr(), ch)) { + s = s + zstring(ch); + continue; + } + } + return false; + } + return true; + } + + expr_ref theory_nseq::derive_regex(expr* regex, zstring const& prefix) { + expr_ref r(regex, m); + for (unsigned i = 0; i < prefix.length(); ++i) { + expr_ref ch(m_util.mk_char(prefix[i]), m); + r = m_seq_rewrite.mk_derivative(ch, r); + } + return r; + } + + bool theory_nseq::check_regex_memberships() { + bool progress = false; + for (auto const& mem : m_state.mems()) { + if (check_regex_mem(mem)) + progress = true; + if (ctx.inconsistent()) + return true; + } + return progress; + } + + bool theory_nseq::check_regex_mem(nseq_mem const& mem) { + expr* s = mem.str(); + expr* r = mem.regex(); + bool sign = mem.sign(); + nseq_dependency* dep = mem.dep(); + + // Try to determine the string value from the e-graph + zstring sval; + if (!is_ground_string(s, sval)) + return false; + + // Compute derivatives for the known string + expr_ref derived = derive_regex(r, sval); + + // Check nullable: does the derived regex accept the empty word? + expr_ref nullable = m_seq_rewrite.is_nullable(derived); + m_rewrite(nullable); + + if (sign) { + // Positive membership: s must be in r + if (m.is_false(nullable)) { + // String is fully determined but regex rejects → conflict + set_conflict(dep); + return true; + } + } + else { + // Negative membership: s must NOT be in r + if (m.is_true(nullable)) { + // String is fully determined and regex accepts → conflict + set_conflict(dep); + return true; + } + } + + // Check if the derived regex is the empty set + if (sign && m_util.re.is_empty(derived)) { + set_conflict(dep); + return true; + } + return false; + } + // ------------------------------------------------------- // Length reasoning // ------------------------------------------------------- diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 75f6fd005..ffd7a27dc 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -100,6 +100,12 @@ namespace smt { bool all_eqs_solved(); bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep); + // Regex membership + bool check_regex_memberships(); + bool check_regex_mem(nseq_mem const& mem); + bool is_ground_string(expr* e, zstring& s); + expr_ref derive_regex(expr* regex, zstring const& prefix); + // Length reasoning void add_length_axiom(expr* n); bool check_zero_length();