3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 05:44:51 +00:00

Add Phase 5: regex membership checking via derivatives

- check_regex_memberships: iterate regex constraints in final_check
- check_regex_mem: for ground strings, compute Brzozowski derivatives and check nullable
- derive_regex: step-by-step derivative of regex w.r.t. string prefix
- is_ground_string: resolve string value through e-graph roots
- Handles positive (str.in_re) and negative (not str.in_re) memberships
- Uses seq_rewriter::mk_derivative and is_nullable (existing Z3 infrastructure)
- No regressions on string-concat.smt2 (36/62 correct, 0 wrong)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Nikolaj Bjorner 2026-02-28 10:06:36 -08:00
parent 4b58d57b61
commit 636b3b0a6b
2 changed files with 110 additions and 1 deletions

View file

@ -80,7 +80,9 @@ namespace smt {
if (check_zero_length())
return FC_CONTINUE;
// TODO: implement regex membership checking
// Check regex membership constraints
if (check_regex_memberships())
return FC_CONTINUE;
if (all_eqs_solved() && m_state.mems().empty())
return FC_DONE;
@ -689,6 +691,107 @@ namespace smt {
return false;
}
// -------------------------------------------------------
// Regex membership
// -------------------------------------------------------
bool theory_nseq::is_ground_string(expr* e, zstring& s) {
// Follow e-graph root and check if it's a ground string
if (!ctx.e_internalized(e))
return false;
enode* n = ctx.get_enode(e);
expr* root = n->get_root()->get_expr();
if (m_util.str.is_string(root, s))
return true;
// Check if root is a concat of units/strings
expr_ref_vector units(m);
m_util.str.get_concat_units(root, units);
s = zstring();
for (expr* u : units) {
if (ctx.e_internalized(u)) {
expr* ur = ctx.get_enode(u)->get_root()->get_expr();
zstring us;
if (m_util.str.is_string(ur, us)) {
s = s + us;
continue;
}
unsigned ch;
expr* arg;
if (m_util.str.is_unit(ur, arg) &&
ctx.e_internalized(arg) &&
m_util.is_const_char(ctx.get_enode(arg)->get_root()->get_expr(), ch)) {
s = s + zstring(ch);
continue;
}
}
return false;
}
return true;
}
expr_ref theory_nseq::derive_regex(expr* regex, zstring const& prefix) {
expr_ref r(regex, m);
for (unsigned i = 0; i < prefix.length(); ++i) {
expr_ref ch(m_util.mk_char(prefix[i]), m);
r = m_seq_rewrite.mk_derivative(ch, r);
}
return r;
}
bool theory_nseq::check_regex_memberships() {
bool progress = false;
for (auto const& mem : m_state.mems()) {
if (check_regex_mem(mem))
progress = true;
if (ctx.inconsistent())
return true;
}
return progress;
}
bool theory_nseq::check_regex_mem(nseq_mem const& mem) {
expr* s = mem.str();
expr* r = mem.regex();
bool sign = mem.sign();
nseq_dependency* dep = mem.dep();
// Try to determine the string value from the e-graph
zstring sval;
if (!is_ground_string(s, sval))
return false;
// Compute derivatives for the known string
expr_ref derived = derive_regex(r, sval);
// Check nullable: does the derived regex accept the empty word?
expr_ref nullable = m_seq_rewrite.is_nullable(derived);
m_rewrite(nullable);
if (sign) {
// Positive membership: s must be in r
if (m.is_false(nullable)) {
// String is fully determined but regex rejects → conflict
set_conflict(dep);
return true;
}
}
else {
// Negative membership: s must NOT be in r
if (m.is_true(nullable)) {
// String is fully determined and regex accepts → conflict
set_conflict(dep);
return true;
}
}
// Check if the derived regex is the empty set
if (sign && m_util.re.is_empty(derived)) {
set_conflict(dep);
return true;
}
return false;
}
// -------------------------------------------------------
// Length reasoning
// -------------------------------------------------------

View file

@ -100,6 +100,12 @@ namespace smt {
bool all_eqs_solved();
bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep);
// Regex membership
bool check_regex_memberships();
bool check_regex_mem(nseq_mem const& mem);
bool is_ground_string(expr* e, zstring& s);
expr_ref derive_regex(expr* regex, zstring const& prefix);
// Length reasoning
void add_length_axiom(expr* n);
bool check_zero_length();