3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-04-02 09:58:59 +00:00

Fix three nseq quick-win issues: non-greedy quantifiers, bool-eq normalization, stuck-node crash, precheck SAT soundness

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/03c71d3f-a889-4b06-95ff-ed96a2c0615c
This commit is contained in:
copilot-swe-agent[bot] 2026-03-21 03:04:00 +00:00
parent f518faac9b
commit 3c1b1da8ac
5 changed files with 88 additions and 7 deletions

View file

@ -687,6 +687,10 @@ void seq_decl_plugin::get_op_names(svector<builtin_name> & op_names, symbol cons
op_names.push_back(builtin_name("int.to.str", OP_STRING_ITOS));
op_names.push_back(builtin_name("re.nostr", _OP_REGEXP_EMPTY));
op_names.push_back(builtin_name("re.complement", OP_RE_COMPLEMENT));
// Non-greedy quantifiers: semantically identical to greedy for satisfiability
op_names.push_back(builtin_name("re.+?", OP_RE_PLUS));
op_names.push_back(builtin_name("re.*?", OP_RE_STAR));
op_names.push_back(builtin_name("re.??", OP_RE_OPTION));
op_names.push_back(builtin_name("str.from_ubv", OP_STRING_UBVTOS));
op_names.push_back(builtin_name("str.from_sbv", OP_STRING_SBVTOS));
}

View file

@ -2453,7 +2453,13 @@ namespace seq {
if (!node->is_extended()) {
bool ext = generate_extensions(node);
IF_VERBOSE(0, display(verbose_stream(), node));
VERIFY(ext);
if (!ext) {
// No modifier applies: the node is stuck (unsupported equation form).
// Mark as extended with no children to avoid redundant re-exploration,
// and return unknown so the DFS will report FC_GIVEUP.
node->set_extended(true);
return search_result::unknown;
}
node->set_extended(true);
++m_stats.m_num_extensions;
}

View file

@ -612,6 +612,37 @@ namespace seq {
return is_empty_bfs(result, max_states);
}
// -----------------------------------------------------------------------
// Check if the language of re accepts at least one non-empty string.
// Returns true if a non-empty string is definitely accepted.
// Returns false conservatively (also false when inconclusive).
// Used by the regex precheck SAT shortcut: only shortcut to SAT when the
// intersection language contains a non-empty witness, avoiding false SAT
// for intersections like [a-u]* ∩ v* = {""} when var ≠ "" is also asserted.
// -----------------------------------------------------------------------
bool seq_regex::accepts_nonempty_string(euf::snode* re) {
if (!re || re->is_fail()) return false;
// Quick structural checks: full-seq, full-char, and range always
// accept non-empty strings (they require at least one character).
if (re->is_full_seq() || re->is_full_char()) return true;
// Only explore ground regexes; non-ground could expand unpredictably.
if (!re->is_ground()) return true; // conservative: assume non-empty exists
// For s_other snodes (unrecognized kinds), be conservative.
if (re->kind() == euf::snode_kind::s_other) return true;
// Compute one level of derivatives: if any derivative is non-fail and
// non-empty, then the regex accepts some string starting with that char.
euf::snode_vector reps;
if (!get_alphabet_representatives(re, reps))
return true; // conservative
for (euf::snode* ch : reps) {
euf::snode* deriv = m_sg.brzozowski_deriv(re, ch);
if (deriv && !deriv->is_fail() && !is_empty_regex(deriv))
return true;
}
return false;
}
// -----------------------------------------------------------------------
// Language subset check: L(A) ⊆ L(B)
// via intersection(A, complement(B)) = ∅

View file

@ -191,6 +191,13 @@ namespace seq {
lbool check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
unsigned max_states = 10000);
// Check if the language of re contains at least one non-empty string.
// Returns true if a non-empty string is definitely accepted.
// Returns false if no non-empty string is accepted, or inconclusive.
// Used by the regex precheck to avoid false SAT shortcuts for languages
// that only contain the empty string, e.g. [a-u]* ∩ v* = {""}.
bool accepts_nonempty_string(euf::snode* re);
// Check if L(subset_re) ⊆ L(superset_re).
// Computed as: subset_re ∩ complement(superset_re) = ∅.
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)

View file

@ -200,6 +200,19 @@ namespace smt {
expr* e = ctx.bool_var2expr(v);
expr* s = nullptr, *re = nullptr;
TRACE(seq, tout << (is_true ? "" : "¬") << mk_bounded_pp(e, m, 3) << "\n";);
// Normalize (= bool_seq_expr true/false) to a direct assignment.
// Handles patterns like (not (= (str.contains "A" x) true)).
{
expr* eq_lhs = nullptr, *eq_rhs = nullptr;
if (m.is_eq(e, eq_lhs, eq_rhs)) {
if (m.is_true(eq_rhs))
e = eq_lhs;
else if (m.is_false(eq_rhs)) { e = eq_lhs; is_true = !is_true; }
else if (m.is_true(eq_lhs))
e = eq_rhs;
else if (m.is_false(eq_lhs)) { e = eq_rhs; is_true = !is_true; }
}
}
if (m_seq.str.is_in_re(e, s, re)) {
euf::snode* sn_str = get_snode(s);
euf::snode* sn_re = get_snode(re);
@ -953,6 +966,10 @@ namespace smt {
if (std::holds_alternative<eq_item>(item)) { has_eqs = true; break; }
bool any_undef = false;
// Track whether any variable's intersection only accepts "" (nullable-only).
// When true, we must not shortcut to SAT: the only witness is "", which may
// be excluded by a disequality (e.g. var ≠ "").
bool any_nullable_only = false;
// Check intersection emptiness for each variable.
for (auto& kv : var_to_mems) {
@ -985,19 +1002,35 @@ namespace smt {
set_conflict(eqs, lits);
return l_true; // conflict asserted
}
if (result == l_undef)
if (result == l_undef) {
any_undef = true;
// l_false = non-empty intersection, this variable's constraints are satisfiable
continue;
}
// l_false = intersection is non-empty; check if it has a non-empty witness.
// Build the intersection snode to call accepts_nonempty_string.
euf::snode* inter = nullptr;
for (euf::snode* re : regexes) {
if (!inter) { inter = re; continue; }
expr* r1 = inter->get_expr();
expr* r2 = re->get_expr();
if (!r1 || !r2) { inter = nullptr; break; }
expr_ref intersection(m_seq.re.mk_inter(r1, r2), m);
inter = m_sgraph.mk(intersection);
if (!inter) break;
}
if (!inter || !m_regex.accepts_nonempty_string(inter))
any_nullable_only = true;
}
if (any_undef)
return l_undef; // cannot fully determine; let DFS decide
// All variables' regex intersections are non-empty.
// If there are no word equations, variables are independent and
// each can be assigned a witness string → SAT.
if (all_primitive && !has_eqs && !has_unhandled_preds()) {
TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, "
// Only shortcut to SAT when every variable's intersection accepts a non-empty
// witness string. If any intersection is nullable-only (= {""}) we cannot
// guarantee a witness consistent with disequalities such as var ≠ "".
if (all_primitive && !has_eqs && !has_unhandled_preds() && !any_nullable_only) {
TRACE(seq, tout << "nseq regex precheck: all intersections have non-empty witnesses, "
<< "no word eqs → SAT\n";);
return l_false; // signals SAT (non-empty / satisfiable)
}