3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 13:54:53 +00:00

Add Phase 4: length reasoning and canonize fix

- Length axiom generation: concat decomposition, unit=1, empty=0, string=k
- Bidirectional len(x)=0 <-> x="" axioms
- check_zero_length: propagate zero-length variables to empty
- propagate_length_eqs: equate lengths of equation sides
- Fixed canonize: decompose structurally first, then follow e-graph roots
  Prevents both equation sides resolving to same root expression
- Fixed branch_var_prefix: skip cyclic candidates (containing variable itself)
  and ground candidates with wrong length
- check_length_conflict prevents e-graph corruption from incompatible merges
- Fresh value pinning for model generation (m_fresh_values)
- Regression: 36/62 correct, 0 wrong, 26 unknown on string-concat.smt2

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
Nikolaj Bjorner 2026-02-27 22:48:58 -08:00
parent f48040d809
commit 4b58d57b61
5 changed files with 259 additions and 25 deletions

0
spec/log.txt Normal file
View file

View file

@ -28,7 +28,8 @@ namespace smt {
m_eq_id(0),
m_axioms(m),
m_axioms_head(0),
m_length_apps(m) {
m_length_apps(m),
m_length_exprs(m) {
}
void nseq_state::push_scope() {
@ -56,6 +57,7 @@ namespace smt {
m_axioms_head = 0;
m_has_length.reset();
m_length_apps.reset();
m_length_exprs.reset();
m_eq_id = 0;
}
@ -99,8 +101,10 @@ namespace smt {
if (m_has_length.contains(e))
return;
m_length_apps.push_back(len_app);
m_length_exprs.push_back(e);
m_has_length.insert(e);
ts.push(push_back_vector<expr_ref_vector>(m_length_apps));
ts.push(push_back_vector<expr_ref_vector>(m_length_exprs));
ts.push(insert_obj_trail<expr>(m_has_length, e));
}

View file

@ -71,6 +71,7 @@ namespace smt {
// Length tracking
obj_hashtable<expr> m_has_length;
expr_ref_vector m_length_apps;
expr_ref_vector m_length_exprs; // corresponding string exprs
// Trail for undo
trail_stack m_trail;
@ -107,6 +108,27 @@ namespace smt {
// Length tracking
bool has_length(expr* e) const { return m_has_length.contains(e); }
void add_length(expr* len_app, expr* e, trail_stack& ts);
unsigned length_count() const { return m_length_apps.size(); }
struct length_pair { expr* len_app; expr* str_expr; };
class length_pair_iter {
expr_ref_vector const& m_apps;
expr_ref_vector const& m_exprs;
public:
length_pair_iter(expr_ref_vector const& a, expr_ref_vector const& e) : m_apps(a), m_exprs(e) {}
struct iterator {
expr_ref_vector const& m_apps;
expr_ref_vector const& m_exprs;
unsigned m_idx;
iterator(expr_ref_vector const& a, expr_ref_vector const& e, unsigned i) : m_apps(a), m_exprs(e), m_idx(i) {}
length_pair operator*() const { return { m_apps[m_idx], m_exprs[m_idx] }; }
iterator& operator++() { ++m_idx; return *this; }
bool operator!=(iterator const& o) const { return m_idx != o.m_idx; }
};
iterator begin() const { return iterator(m_apps, m_exprs, 0); }
iterator end() const { return iterator(m_apps, m_exprs, m_apps.size()); }
};
length_pair_iter length_pairs() const { return length_pair_iter(m_length_apps, m_length_exprs); }
// Accessors
scoped_vector<nseq_eq> const& eqs() const { return m_eqs; }

View file

@ -36,7 +36,8 @@ namespace smt {
m_nielsen(ctx.get_manager(), m_seq_rewrite),
m_find(*this),
m_has_seq(false),
m_new_propagation(false) {
m_new_propagation(false),
m_fresh_values(ctx.get_manager()) {
}
theory_nseq::~theory_nseq() {
@ -71,8 +72,15 @@ namespace smt {
if (solve_eqs())
return FC_CONTINUE;
// Propagate length equalities from equations
if (propagate_length_eqs())
return FC_CONTINUE;
// Check zero-length variables
if (check_zero_length())
return FC_CONTINUE;
// TODO: implement regex membership checking
// TODO: implement length/Parikh reasoning
if (all_eqs_solved() && m_state.mems().empty())
return FC_DONE;
@ -89,6 +97,7 @@ namespace smt {
}
bool theory_nseq::internalize_term(app* term) {
TRACE(seq, tout << "internalize_term: " << mk_bounded_pp(term, m, 3) << "\n";);
if (!m_has_seq) {
ctx.push_trail(value_trail<bool>(m_has_seq));
m_has_seq = true;
@ -152,6 +161,7 @@ namespace smt {
}
void theory_nseq::apply_sort_cnstr(enode* n, sort* s) {
TRACE(seq, tout << "apply_sort_cnstr: " << mk_bounded_pp(n->get_expr(), m, 3) << "\n";);
mk_var(n);
}
@ -255,8 +265,10 @@ namespace smt {
}
void theory_nseq::relevant_eh(app* n) {
if (m_util.str.is_length(n))
if (m_util.str.is_length(n)) {
add_length(n);
enque_axiom(n);
}
// Enqueue axioms for operations that need reduction
if (m_util.str.is_index(n) ||
@ -280,8 +292,9 @@ namespace smt {
}
void theory_nseq::deque_axiom(expr* e) {
// TODO: generate axioms for string operations
TRACE(seq, tout << "deque axiom: " << mk_bounded_pp(e, m) << "\n";);
if (m_util.str.is_length(e))
add_length_axiom(e);
}
void theory_nseq::add_axiom(literal l1, literal l2, literal l3, literal l4, literal l5) {
@ -374,30 +387,70 @@ namespace smt {
return expr_ref(m_util.str.mk_length(s), m);
}
// Ensure that a string expression has its length axiom.
// Called when equation solving creates or uses new string terms.
void theory_nseq::ensure_length_axiom(expr* e) {
if (!m_util.is_seq(e))
return;
expr_ref len(m_util.str.mk_length(e), m);
ensure_enode(len);
if (!m_state.has_length(e)) {
m_state.add_length(len, e, m_state.trail());
enque_axiom(len);
}
}
expr_ref theory_nseq::mk_concat(expr_ref_vector const& es, sort* s) {
SASSERT(!es.empty());
return expr_ref(m_util.str.mk_concat(es.size(), es.data(), s), m);
}
// Canonize an equation side using current e-graph equivalences.
// Check if a propagation x = t would create a length inconsistency.
// If x has a known fixed length that differs from t's minimum length, return true (conflict).
bool theory_nseq::check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep) {
rational x_lo, x_hi;
expr_ref x_len = mk_len(x);
if (!ctx.e_internalized(x_len))
return false;
if (!lower_bound(x_len, x_lo) || !upper_bound(x_len, x_hi))
return false;
// Compute minimum length of the other side
rational min_len(0);
for (expr* e : es) {
zstring s;
if (m_util.str.is_string(e, s))
min_len += rational(s.length());
else if (m_util.str.is_unit(e))
min_len += rational(1);
// Variables contribute ≥ 0, so min remains unchanged
}
if (x_hi < min_len) {
// x's max length is less than the minimum length of the term
set_conflict(dep);
return true;
}
return false;
}
// Replaces each element with its canonical representative, decomposing
// concatenations and string constants as needed.
bool theory_nseq::canonize(expr_ref_vector const& src, expr_ref_vector& dst,
nseq_dependency*& dep) {
dst.reset();
for (expr* e : src) {
if (!ctx.e_internalized(e)) {
dst.push_back(e);
// Step 1: structurally decompose each src expression
expr_ref_vector units(m);
for (expr* e : src)
m_util.str.get_concat_units(e, units);
// Step 2: follow e-graph roots for each atomic component
for (expr* u : units) {
if (!ctx.e_internalized(u)) {
dst.push_back(u);
continue;
}
enode* n = ctx.get_enode(e);
enode* n = ctx.get_enode(u);
enode* r = n->get_root();
expr* re = r->get_expr();
if (re != e) {
// Track the dependency for the equality
if (re != u)
dep = m_state.mk_join(dep, m_state.mk_dep(n, r));
}
// Decompose the canonical representative into concat components
m_util.str.get_concat_units(re, dst);
}
return true;
@ -408,14 +461,11 @@ namespace smt {
for (auto const& eq : m_state.eqs()) {
expr_ref_vector lhs(m), rhs(m);
nseq_dependency* dep = eq.dep();
if (!canonize(eq.lhs(), lhs, dep) || !canonize(eq.rhs(), rhs, dep))
if (!canonize(eq.lhs(), lhs, dep))
return false;
if (!canonize(eq.rhs(), rhs, dep))
return false;
seq::nielsen_result result = m_nielsen.simplify(lhs, rhs);
TRACE(seq, tout << "all_eqs_solved [" << eq.id() << "]: ";
for (auto* e : lhs) tout << mk_bounded_pp(e, m, 2) << " ";
tout << "= ";
for (auto* e : rhs) tout << mk_bounded_pp(e, m, 2) << " ";
tout << " -> " << (int)result << "\n";);
if (result != seq::nielsen_result::solved)
return false;
}
@ -441,6 +491,7 @@ namespace smt {
if (!canonize(eq.lhs(), lhs, dep) || !canonize(eq.rhs(), rhs, dep))
return false;
TRACE(seq, tout << "solve_eq [" << eq.id() << "]: ";
for (auto* e : lhs) tout << mk_bounded_pp(e, m, 2) << " ";
tout << "= ";
@ -455,13 +506,19 @@ namespace smt {
// Propagate solved form: either both empty, var = empty, or var = ground term
bool changed = false;
if (lhs.size() == 1 && m_nielsen.is_var(lhs.get(0)) && !rhs.empty()) {
if (check_length_conflict(lhs.get(0), rhs, dep))
return true;
sort* s = lhs.get(0)->get_sort();
expr_ref rhs_concat = mk_concat(rhs, s);
ensure_length_axiom(rhs_concat);
changed = propagate_eq(dep, lhs.get(0), rhs_concat);
}
else if (rhs.size() == 1 && m_nielsen.is_var(rhs.get(0)) && !lhs.empty()) {
if (check_length_conflict(rhs.get(0), lhs, dep))
return true;
sort* s = rhs.get(0)->get_sort();
expr_ref lhs_concat = mk_concat(lhs, s);
ensure_length_axiom(lhs_concat);
changed = propagate_eq(dep, rhs.get(0), lhs_concat);
}
else {
@ -490,6 +547,8 @@ namespace smt {
sort* s = lhs[0]->get_sort();
expr_ref l = mk_concat(lhs, s);
expr_ref r = mk_concat(rhs, s);
ensure_length_axiom(l);
ensure_length_axiom(r);
changed = propagate_eq(dep, l, r);
}
else {
@ -574,8 +633,37 @@ namespace smt {
// For x starting one side, try x = prefix of other side
// x = "" was already tried (assigned false)
// Now enumerate: x = other[0], x = other[0]·other[1], ...
rational x_len;
bool has_x_len = get_length(x, x_len);
// Find the range of valid prefix lengths
// Stop before we include x itself (cyclic candidate)
unsigned max_prefix = other.size();
enode* x_root = ctx.get_enode(x)->get_root();
for (unsigned i = 0; i < other.size(); ++i) {
if (ctx.e_internalized(other[i]) &&
ctx.get_enode(other[i])->get_root() == x_root) {
max_prefix = i;
break;
}
}
if (max_prefix == 0)
return false;
expr_ref candidate(m);
for (unsigned len = 1; len <= other.size(); ++len) {
unsigned min_len = 0;
bool has_var = false;
for (unsigned len = 1; len <= max_prefix; ++len) {
min_len += m_util.str.min_length(other[len - 1]);
has_var = has_var || m_nielsen.is_var(other[len - 1]);
// Skip if candidate length is incompatible with variable length
if (has_x_len) {
if (rational(min_len) > x_len)
continue;
// For ground candidates (no variables), exact length must match
if (!has_var && rational(min_len) != x_len)
continue;
}
if (len == 1)
candidate = other[0];
else
@ -584,12 +672,14 @@ namespace smt {
switch (ctx.get_assignment(eq)) {
case l_undef:
TRACE(seq, tout << "branch " << mk_bounded_pp(x, m) << " = " << mk_bounded_pp(candidate, m) << "\n";);
ensure_length_axiom(candidate);
ctx.force_phase(eq);
ctx.mark_as_relevant(eq);
m_new_propagation = true;
m_state.stats().m_num_splits++;
return true;
case l_true:
ensure_length_axiom(candidate);
propagate_eq(dep, x, candidate);
return true;
case l_false:
@ -599,6 +689,109 @@ namespace smt {
return false;
}
// -------------------------------------------------------
// Length reasoning
// -------------------------------------------------------
void theory_nseq::add_length_axiom(expr* n) {
expr* x = nullptr;
VERIFY(m_util.str.is_length(n, x));
if (m_util.str.is_concat(x) && to_app(x)->get_num_args() != 0) {
// len(x1 ++ x2 ++ ...) = len(x1) + len(x2) + ...
ptr_vector<expr> args;
for (auto arg : *to_app(x))
args.push_back(m_util.str.mk_length(arg));
expr_ref len_sum(m_autil.mk_add(args.size(), args.data()), m);
add_axiom(mk_eq(len_sum, n, false));
}
else if (m_util.str.is_unit(x)) {
// len(unit(c)) = 1
add_axiom(mk_eq(n, m_autil.mk_int(1), false));
}
else if (m_util.str.is_empty(x)) {
// len("") = 0
add_axiom(mk_eq(n, m_autil.mk_int(0), false));
}
else {
zstring s;
if (m_util.str.is_string(x, s)) {
// len("abc") = 3
add_axiom(mk_eq(n, m_autil.mk_int(s.length()), false));
}
else {
// len(x) >= 0
add_axiom(mk_literal(m_autil.mk_ge(n, m_autil.mk_int(0))));
// len(x) = 0 ↔ x = ""
expr_ref emp(m_util.str.mk_empty(x->get_sort()), m);
literal len_zero = mk_eq(n, m_autil.mk_int(0), false);
literal x_empty = mk_eq(x, emp, false);
add_axiom(~len_zero, x_empty); // len(x) = 0 → x = ""
add_axiom(len_zero, ~x_empty); // x = "" → len(x) = 0
}
}
}
bool theory_nseq::get_length(expr* e, rational& val) {
expr_ref len = mk_len(e);
return m_arith_value.get_value(len, val);
}
bool theory_nseq::lower_bound(expr* e, rational& lo) {
bool strict;
return m_arith_value.get_lo(e, lo, strict) && !strict;
}
bool theory_nseq::upper_bound(expr* e, rational& hi) {
bool strict;
return m_arith_value.get_up(e, hi, strict) && !strict;
}
bool theory_nseq::check_zero_length() {
bool progress = false;
for (auto const& [len_app, str_expr] : m_state.length_pairs()) {
rational lo, hi;
if (lower_bound(len_app, lo) && upper_bound(len_app, hi) &&
lo.is_zero() && hi.is_zero()) {
// len(e) = 0 → e = ""
expr* e = str_expr;
expr_ref emp(m_util.str.mk_empty(e->get_sort()), m);
enode* n = ensure_enode(e);
enode* n_emp = ensure_enode(emp);
if (n->get_root() != n_emp->get_root()) {
literal len_zero = mk_eq(len_app, m_autil.mk_int(0), false);
nseq_dependency* dep = m_state.mk_dep(len_zero);
propagate_eq(dep, e, emp);
progress = true;
}
}
}
return progress;
}
bool theory_nseq::propagate_length_eqs() {
bool progress = false;
for (auto const& eq : m_state.eqs()) {
expr_ref_vector const& lhs = eq.lhs();
expr_ref_vector const& rhs = eq.rhs();
if (lhs.empty() || rhs.empty())
continue;
sort* s = lhs[0]->get_sort();
expr_ref l = mk_concat(lhs, s);
expr_ref r = mk_concat(rhs, s);
expr_ref len_l = mk_len(l);
expr_ref len_r = mk_len(r);
if (ctx.e_internalized(len_l) && ctx.e_internalized(len_r)) {
enode* nl = ctx.get_enode(len_l);
enode* nr = ctx.get_enode(len_r);
if (nl->get_root() != nr->get_root()) {
propagate_eq(eq.dep(), len_l, len_r, false);
progress = true;
}
}
}
return progress;
}
// -------------------------------------------------------
// Display and statistics
// -------------------------------------------------------
@ -626,7 +819,6 @@ namespace smt {
model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) {
app* e = n->get_expr();
TRACE(seq, tout << "mk_value: " << mk_bounded_pp(e, m) << "\n";);
if (m_util.is_seq(e)) {
// Walk the equivalence class looking for a concrete string value
@ -635,18 +827,22 @@ namespace smt {
do {
expr* ce = curr->get_expr();
zstring s;
if (m_util.str.is_string(ce, s))
if (m_util.str.is_string(ce, s)) {
return alloc(expr_wrapper_proc, to_app(ce));
if (m_util.str.is_empty(ce))
}
if (m_util.str.is_empty(ce)) {
return alloc(expr_wrapper_proc, to_app(ce));
}
curr = curr->get_next();
} while (curr != root);
// No concrete value found: use seq_factory to get a fresh value
expr_ref val(m);
val = mg.get_model().get_fresh_value(e->get_sort());
if (val)
if (val) {
m_fresh_values.push_back(val);
return alloc(expr_wrapper_proc, to_app(val));
}
// Fallback: empty string
return alloc(expr_wrapper_proc, to_app(m_util.str.mk_empty(e->get_sort())));
}
@ -662,6 +858,7 @@ namespace smt {
}
void theory_nseq::finalize_model(model_generator& mg) {
m_fresh_values.reset();
}
}

View file

@ -44,6 +44,7 @@ namespace smt {
nseq_union_find m_find;
bool m_has_seq;
bool m_new_propagation;
expr_ref_vector m_fresh_values; // keep model fresh values alive
// Theory interface
final_check_status final_check_eh(unsigned) override;
@ -83,6 +84,7 @@ namespace smt {
// Helpers
void add_length(expr* l);
void ensure_length_axiom(expr* e);
literal mk_literal(expr* e);
literal mk_eq_empty(expr* e, bool phase = true);
expr_ref mk_len(expr* s);
@ -96,6 +98,15 @@ namespace smt {
bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep);
bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep);
bool all_eqs_solved();
bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep);
// Length reasoning
void add_length_axiom(expr* n);
bool check_zero_length();
bool propagate_length_eqs();
bool get_length(expr* e, rational& val);
bool lower_bound(expr* e, rational& lo);
bool upper_bound(expr* e, rational& hi);
public:
theory_nseq(context& ctx);