mirror of
https://github.com/Z3Prover/z3
synced 2026-04-02 09:58:59 +00:00
Fix three nseq quick-win issues: non-greedy quantifiers, bool-eq normalization, stuck-node crash, precheck SAT soundness
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/03c71d3f-a889-4b06-95ff-ed96a2c0615c
This commit is contained in:
parent
f518faac9b
commit
3c1b1da8ac
5 changed files with 88 additions and 7 deletions
|
|
@ -687,6 +687,10 @@ void seq_decl_plugin::get_op_names(svector<builtin_name> & op_names, symbol cons
|
|||
op_names.push_back(builtin_name("int.to.str", OP_STRING_ITOS));
|
||||
op_names.push_back(builtin_name("re.nostr", _OP_REGEXP_EMPTY));
|
||||
op_names.push_back(builtin_name("re.complement", OP_RE_COMPLEMENT));
|
||||
// Non-greedy quantifiers: semantically identical to greedy for satisfiability
|
||||
op_names.push_back(builtin_name("re.+?", OP_RE_PLUS));
|
||||
op_names.push_back(builtin_name("re.*?", OP_RE_STAR));
|
||||
op_names.push_back(builtin_name("re.??", OP_RE_OPTION));
|
||||
op_names.push_back(builtin_name("str.from_ubv", OP_STRING_UBVTOS));
|
||||
op_names.push_back(builtin_name("str.from_sbv", OP_STRING_SBVTOS));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2453,7 +2453,13 @@ namespace seq {
|
|||
if (!node->is_extended()) {
|
||||
bool ext = generate_extensions(node);
|
||||
IF_VERBOSE(0, display(verbose_stream(), node));
|
||||
VERIFY(ext);
|
||||
if (!ext) {
|
||||
// No modifier applies: the node is stuck (unsupported equation form).
|
||||
// Mark as extended with no children to avoid redundant re-exploration,
|
||||
// and return unknown so the DFS will report FC_GIVEUP.
|
||||
node->set_extended(true);
|
||||
return search_result::unknown;
|
||||
}
|
||||
node->set_extended(true);
|
||||
++m_stats.m_num_extensions;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -612,6 +612,37 @@ namespace seq {
|
|||
return is_empty_bfs(result, max_states);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Check if the language of re accepts at least one non-empty string.
|
||||
// Returns true if a non-empty string is definitely accepted.
|
||||
// Returns false conservatively (also false when inconclusive).
|
||||
// Used by the regex precheck SAT shortcut: only shortcut to SAT when the
|
||||
// intersection language contains a non-empty witness, avoiding false SAT
|
||||
// for intersections like [a-u]* ∩ v* = {""} when var ≠ "" is also asserted.
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
bool seq_regex::accepts_nonempty_string(euf::snode* re) {
|
||||
if (!re || re->is_fail()) return false;
|
||||
// Quick structural checks: full-seq, full-char, and range always
|
||||
// accept non-empty strings (they require at least one character).
|
||||
if (re->is_full_seq() || re->is_full_char()) return true;
|
||||
// Only explore ground regexes; non-ground could expand unpredictably.
|
||||
if (!re->is_ground()) return true; // conservative: assume non-empty exists
|
||||
// For s_other snodes (unrecognized kinds), be conservative.
|
||||
if (re->kind() == euf::snode_kind::s_other) return true;
|
||||
// Compute one level of derivatives: if any derivative is non-fail and
|
||||
// non-empty, then the regex accepts some string starting with that char.
|
||||
euf::snode_vector reps;
|
||||
if (!get_alphabet_representatives(re, reps))
|
||||
return true; // conservative
|
||||
for (euf::snode* ch : reps) {
|
||||
euf::snode* deriv = m_sg.brzozowski_deriv(re, ch);
|
||||
if (deriv && !deriv->is_fail() && !is_empty_regex(deriv))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Language subset check: L(A) ⊆ L(B)
|
||||
// via intersection(A, complement(B)) = ∅
|
||||
|
|
|
|||
|
|
@ -191,6 +191,13 @@ namespace seq {
|
|||
lbool check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
|
||||
unsigned max_states = 10000);
|
||||
|
||||
// Check if the language of re contains at least one non-empty string.
|
||||
// Returns true if a non-empty string is definitely accepted.
|
||||
// Returns false if no non-empty string is accepted, or inconclusive.
|
||||
// Used by the regex precheck to avoid false SAT shortcuts for languages
|
||||
// that only contain the empty string, e.g. [a-u]* ∩ v* = {""}.
|
||||
bool accepts_nonempty_string(euf::snode* re);
|
||||
|
||||
// Check if L(subset_re) ⊆ L(superset_re).
|
||||
// Computed as: subset_re ∩ complement(superset_re) = ∅.
|
||||
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
|
||||
|
|
|
|||
|
|
@ -200,6 +200,19 @@ namespace smt {
|
|||
expr* e = ctx.bool_var2expr(v);
|
||||
expr* s = nullptr, *re = nullptr;
|
||||
TRACE(seq, tout << (is_true ? "" : "¬") << mk_bounded_pp(e, m, 3) << "\n";);
|
||||
// Normalize (= bool_seq_expr true/false) to a direct assignment.
|
||||
// Handles patterns like (not (= (str.contains "A" x) true)).
|
||||
{
|
||||
expr* eq_lhs = nullptr, *eq_rhs = nullptr;
|
||||
if (m.is_eq(e, eq_lhs, eq_rhs)) {
|
||||
if (m.is_true(eq_rhs))
|
||||
e = eq_lhs;
|
||||
else if (m.is_false(eq_rhs)) { e = eq_lhs; is_true = !is_true; }
|
||||
else if (m.is_true(eq_lhs))
|
||||
e = eq_rhs;
|
||||
else if (m.is_false(eq_lhs)) { e = eq_rhs; is_true = !is_true; }
|
||||
}
|
||||
}
|
||||
if (m_seq.str.is_in_re(e, s, re)) {
|
||||
euf::snode* sn_str = get_snode(s);
|
||||
euf::snode* sn_re = get_snode(re);
|
||||
|
|
@ -953,6 +966,10 @@ namespace smt {
|
|||
if (std::holds_alternative<eq_item>(item)) { has_eqs = true; break; }
|
||||
|
||||
bool any_undef = false;
|
||||
// Track whether any variable's intersection only accepts "" (nullable-only).
|
||||
// When true, we must not shortcut to SAT: the only witness is "", which may
|
||||
// be excluded by a disequality (e.g. var ≠ "").
|
||||
bool any_nullable_only = false;
|
||||
|
||||
// Check intersection emptiness for each variable.
|
||||
for (auto& kv : var_to_mems) {
|
||||
|
|
@ -985,19 +1002,35 @@ namespace smt {
|
|||
set_conflict(eqs, lits);
|
||||
return l_true; // conflict asserted
|
||||
}
|
||||
if (result == l_undef)
|
||||
if (result == l_undef) {
|
||||
any_undef = true;
|
||||
// l_false = non-empty intersection, this variable's constraints are satisfiable
|
||||
continue;
|
||||
}
|
||||
// l_false = intersection is non-empty; check if it has a non-empty witness.
|
||||
// Build the intersection snode to call accepts_nonempty_string.
|
||||
euf::snode* inter = nullptr;
|
||||
for (euf::snode* re : regexes) {
|
||||
if (!inter) { inter = re; continue; }
|
||||
expr* r1 = inter->get_expr();
|
||||
expr* r2 = re->get_expr();
|
||||
if (!r1 || !r2) { inter = nullptr; break; }
|
||||
expr_ref intersection(m_seq.re.mk_inter(r1, r2), m);
|
||||
inter = m_sgraph.mk(intersection);
|
||||
if (!inter) break;
|
||||
}
|
||||
if (!inter || !m_regex.accepts_nonempty_string(inter))
|
||||
any_nullable_only = true;
|
||||
}
|
||||
|
||||
if (any_undef)
|
||||
return l_undef; // cannot fully determine; let DFS decide
|
||||
|
||||
// All variables' regex intersections are non-empty.
|
||||
// If there are no word equations, variables are independent and
|
||||
// each can be assigned a witness string → SAT.
|
||||
if (all_primitive && !has_eqs && !has_unhandled_preds()) {
|
||||
TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, "
|
||||
// Only shortcut to SAT when every variable's intersection accepts a non-empty
|
||||
// witness string. If any intersection is nullable-only (= {""}) we cannot
|
||||
// guarantee a witness consistent with disequalities such as var ≠ "".
|
||||
if (all_primitive && !has_eqs && !has_unhandled_preds() && !any_nullable_only) {
|
||||
TRACE(seq, tout << "nseq regex precheck: all intersections have non-empty witnesses, "
|
||||
<< "no word eqs → SAT\n";);
|
||||
return l_false; // signals SAT (non-empty / satisfiable)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue