mirror of
https://github.com/Z3Prover/z3
synced 2026-03-07 05:44:51 +00:00
Add Phase 5: regex membership checking via derivatives
- check_regex_memberships: iterate regex constraints in final_check - check_regex_mem: for ground strings, compute Brzozowski derivatives and check nullable - derive_regex: step-by-step derivative of regex w.r.t. string prefix - is_ground_string: resolve string value through e-graph roots - Handles positive (str.in_re) and negative (not str.in_re) memberships - Uses seq_rewriter::mk_derivative and is_nullable (existing Z3 infrastructure) - No regressions on string-concat.smt2 (36/62 correct, 0 wrong) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
4b58d57b61
commit
636b3b0a6b
2 changed files with 110 additions and 1 deletions
|
|
@ -80,7 +80,9 @@ namespace smt {
|
|||
if (check_zero_length())
|
||||
return FC_CONTINUE;
|
||||
|
||||
// TODO: implement regex membership checking
|
||||
// Check regex membership constraints
|
||||
if (check_regex_memberships())
|
||||
return FC_CONTINUE;
|
||||
|
||||
if (all_eqs_solved() && m_state.mems().empty())
|
||||
return FC_DONE;
|
||||
|
|
@ -689,6 +691,107 @@ namespace smt {
|
|||
return false;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------
|
||||
// Regex membership
|
||||
// -------------------------------------------------------
|
||||
|
||||
bool theory_nseq::is_ground_string(expr* e, zstring& s) {
|
||||
// Follow e-graph root and check if it's a ground string
|
||||
if (!ctx.e_internalized(e))
|
||||
return false;
|
||||
enode* n = ctx.get_enode(e);
|
||||
expr* root = n->get_root()->get_expr();
|
||||
if (m_util.str.is_string(root, s))
|
||||
return true;
|
||||
// Check if root is a concat of units/strings
|
||||
expr_ref_vector units(m);
|
||||
m_util.str.get_concat_units(root, units);
|
||||
s = zstring();
|
||||
for (expr* u : units) {
|
||||
if (ctx.e_internalized(u)) {
|
||||
expr* ur = ctx.get_enode(u)->get_root()->get_expr();
|
||||
zstring us;
|
||||
if (m_util.str.is_string(ur, us)) {
|
||||
s = s + us;
|
||||
continue;
|
||||
}
|
||||
unsigned ch;
|
||||
expr* arg;
|
||||
if (m_util.str.is_unit(ur, arg) &&
|
||||
ctx.e_internalized(arg) &&
|
||||
m_util.is_const_char(ctx.get_enode(arg)->get_root()->get_expr(), ch)) {
|
||||
s = s + zstring(ch);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
expr_ref theory_nseq::derive_regex(expr* regex, zstring const& prefix) {
|
||||
expr_ref r(regex, m);
|
||||
for (unsigned i = 0; i < prefix.length(); ++i) {
|
||||
expr_ref ch(m_util.mk_char(prefix[i]), m);
|
||||
r = m_seq_rewrite.mk_derivative(ch, r);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
bool theory_nseq::check_regex_memberships() {
|
||||
bool progress = false;
|
||||
for (auto const& mem : m_state.mems()) {
|
||||
if (check_regex_mem(mem))
|
||||
progress = true;
|
||||
if (ctx.inconsistent())
|
||||
return true;
|
||||
}
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool theory_nseq::check_regex_mem(nseq_mem const& mem) {
|
||||
expr* s = mem.str();
|
||||
expr* r = mem.regex();
|
||||
bool sign = mem.sign();
|
||||
nseq_dependency* dep = mem.dep();
|
||||
|
||||
// Try to determine the string value from the e-graph
|
||||
zstring sval;
|
||||
if (!is_ground_string(s, sval))
|
||||
return false;
|
||||
|
||||
// Compute derivatives for the known string
|
||||
expr_ref derived = derive_regex(r, sval);
|
||||
|
||||
// Check nullable: does the derived regex accept the empty word?
|
||||
expr_ref nullable = m_seq_rewrite.is_nullable(derived);
|
||||
m_rewrite(nullable);
|
||||
|
||||
if (sign) {
|
||||
// Positive membership: s must be in r
|
||||
if (m.is_false(nullable)) {
|
||||
// String is fully determined but regex rejects → conflict
|
||||
set_conflict(dep);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Negative membership: s must NOT be in r
|
||||
if (m.is_true(nullable)) {
|
||||
// String is fully determined and regex accepts → conflict
|
||||
set_conflict(dep);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the derived regex is the empty set
|
||||
if (sign && m_util.re.is_empty(derived)) {
|
||||
set_conflict(dep);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------
|
||||
// Length reasoning
|
||||
// -------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -100,6 +100,12 @@ namespace smt {
|
|||
bool all_eqs_solved();
|
||||
bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep);
|
||||
|
||||
// Regex membership
|
||||
bool check_regex_memberships();
|
||||
bool check_regex_mem(nseq_mem const& mem);
|
||||
bool is_ground_string(expr* e, zstring& s);
|
||||
expr_ref derive_regex(expr* regex, zstring const& prefix);
|
||||
|
||||
// Length reasoning
|
||||
void add_length_axiom(expr* n);
|
||||
bool check_zero_length();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue