diff --git a/spec/log.txt b/spec/log.txt new file mode 100644 index 000000000..e69de29bb diff --git a/src/smt/nseq_state.cpp b/src/smt/nseq_state.cpp index 55a9b1930..c59ab3f5a 100644 --- a/src/smt/nseq_state.cpp +++ b/src/smt/nseq_state.cpp @@ -28,7 +28,8 @@ namespace smt { m_eq_id(0), m_axioms(m), m_axioms_head(0), - m_length_apps(m) { + m_length_apps(m), + m_length_exprs(m) { } void nseq_state::push_scope() { @@ -56,6 +57,7 @@ namespace smt { m_axioms_head = 0; m_has_length.reset(); m_length_apps.reset(); + m_length_exprs.reset(); m_eq_id = 0; } @@ -99,8 +101,10 @@ namespace smt { if (m_has_length.contains(e)) return; m_length_apps.push_back(len_app); + m_length_exprs.push_back(e); m_has_length.insert(e); ts.push(push_back_vector(m_length_apps)); + ts.push(push_back_vector(m_length_exprs)); ts.push(insert_obj_trail(m_has_length, e)); } diff --git a/src/smt/nseq_state.h b/src/smt/nseq_state.h index ce11f556b..90dd4ed25 100644 --- a/src/smt/nseq_state.h +++ b/src/smt/nseq_state.h @@ -71,6 +71,7 @@ namespace smt { // Length tracking obj_hashtable m_has_length; expr_ref_vector m_length_apps; + expr_ref_vector m_length_exprs; // corresponding string exprs // Trail for undo trail_stack m_trail; @@ -107,6 +108,27 @@ namespace smt { // Length tracking bool has_length(expr* e) const { return m_has_length.contains(e); } void add_length(expr* len_app, expr* e, trail_stack& ts); + unsigned length_count() const { return m_length_apps.size(); } + + struct length_pair { expr* len_app; expr* str_expr; }; + class length_pair_iter { + expr_ref_vector const& m_apps; + expr_ref_vector const& m_exprs; + public: + length_pair_iter(expr_ref_vector const& a, expr_ref_vector const& e) : m_apps(a), m_exprs(e) {} + struct iterator { + expr_ref_vector const& m_apps; + expr_ref_vector const& m_exprs; + unsigned m_idx; + iterator(expr_ref_vector const& a, expr_ref_vector const& e, unsigned i) : m_apps(a), m_exprs(e), m_idx(i) {} + length_pair operator*() const { return { m_apps[m_idx], m_exprs[m_idx] }; } + iterator& operator++() { ++m_idx; return *this; } + bool operator!=(iterator const& o) const { return m_idx != o.m_idx; } + }; + iterator begin() const { return iterator(m_apps, m_exprs, 0); } + iterator end() const { return iterator(m_apps, m_exprs, m_apps.size()); } + }; + length_pair_iter length_pairs() const { return length_pair_iter(m_length_apps, m_length_exprs); } // Accessors scoped_vector const& eqs() const { return m_eqs; } diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 2f08b89a5..bdf149e61 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -36,7 +36,8 @@ namespace smt { m_nielsen(ctx.get_manager(), m_seq_rewrite), m_find(*this), m_has_seq(false), - m_new_propagation(false) { + m_new_propagation(false), + m_fresh_values(ctx.get_manager()) { } theory_nseq::~theory_nseq() { @@ -71,8 +72,15 @@ namespace smt { if (solve_eqs()) return FC_CONTINUE; + // Propagate length equalities from equations + if (propagate_length_eqs()) + return FC_CONTINUE; + + // Check zero-length variables + if (check_zero_length()) + return FC_CONTINUE; + // TODO: implement regex membership checking - // TODO: implement length/Parikh reasoning if (all_eqs_solved() && m_state.mems().empty()) return FC_DONE; @@ -89,6 +97,7 @@ namespace smt { } bool theory_nseq::internalize_term(app* term) { + TRACE(seq, tout << "internalize_term: " << mk_bounded_pp(term, m, 3) << "\n";); if (!m_has_seq) { ctx.push_trail(value_trail(m_has_seq)); m_has_seq = true; @@ -152,6 +161,7 @@ namespace smt { } void theory_nseq::apply_sort_cnstr(enode* n, sort* s) { + TRACE(seq, tout << "apply_sort_cnstr: " << mk_bounded_pp(n->get_expr(), m, 3) << "\n";); mk_var(n); } @@ -255,8 +265,10 @@ namespace smt { } void theory_nseq::relevant_eh(app* n) { - if (m_util.str.is_length(n)) + if (m_util.str.is_length(n)) { add_length(n); + enque_axiom(n); + } // Enqueue axioms for operations that need reduction if (m_util.str.is_index(n) || @@ -280,8 +292,9 @@ namespace smt { } void theory_nseq::deque_axiom(expr* e) { - // TODO: generate axioms for string operations TRACE(seq, tout << "deque axiom: " << mk_bounded_pp(e, m) << "\n";); + if (m_util.str.is_length(e)) + add_length_axiom(e); } void theory_nseq::add_axiom(literal l1, literal l2, literal l3, literal l4, literal l5) { @@ -374,30 +387,70 @@ namespace smt { return expr_ref(m_util.str.mk_length(s), m); } + // Ensure that a string expression has its length axiom. + // Called when equation solving creates or uses new string terms. + void theory_nseq::ensure_length_axiom(expr* e) { + if (!m_util.is_seq(e)) + return; + expr_ref len(m_util.str.mk_length(e), m); + ensure_enode(len); + if (!m_state.has_length(e)) { + m_state.add_length(len, e, m_state.trail()); + enque_axiom(len); + } + } + expr_ref theory_nseq::mk_concat(expr_ref_vector const& es, sort* s) { SASSERT(!es.empty()); return expr_ref(m_util.str.mk_concat(es.size(), es.data(), s), m); } - // Canonize an equation side using current e-graph equivalences. + // Check if a propagation x = t would create a length inconsistency. + // If x has a known fixed length that differs from t's minimum length, return true (conflict). + bool theory_nseq::check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep) { + rational x_lo, x_hi; + expr_ref x_len = mk_len(x); + if (!ctx.e_internalized(x_len)) + return false; + if (!lower_bound(x_len, x_lo) || !upper_bound(x_len, x_hi)) + return false; + // Compute minimum length of the other side + rational min_len(0); + for (expr* e : es) { + zstring s; + if (m_util.str.is_string(e, s)) + min_len += rational(s.length()); + else if (m_util.str.is_unit(e)) + min_len += rational(1); + // Variables contribute ≥ 0, so min remains unchanged + } + if (x_hi < min_len) { + // x's max length is less than the minimum length of the term + set_conflict(dep); + return true; + } + return false; + } // Replaces each element with its canonical representative, decomposing // concatenations and string constants as needed. bool theory_nseq::canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep) { dst.reset(); - for (expr* e : src) { - if (!ctx.e_internalized(e)) { - dst.push_back(e); + // Step 1: structurally decompose each src expression + expr_ref_vector units(m); + for (expr* e : src) + m_util.str.get_concat_units(e, units); + // Step 2: follow e-graph roots for each atomic component + for (expr* u : units) { + if (!ctx.e_internalized(u)) { + dst.push_back(u); continue; } - enode* n = ctx.get_enode(e); + enode* n = ctx.get_enode(u); enode* r = n->get_root(); expr* re = r->get_expr(); - if (re != e) { - // Track the dependency for the equality + if (re != u) dep = m_state.mk_join(dep, m_state.mk_dep(n, r)); - } - // Decompose the canonical representative into concat components m_util.str.get_concat_units(re, dst); } return true; @@ -408,14 +461,11 @@ namespace smt { for (auto const& eq : m_state.eqs()) { expr_ref_vector lhs(m), rhs(m); nseq_dependency* dep = eq.dep(); - if (!canonize(eq.lhs(), lhs, dep) || !canonize(eq.rhs(), rhs, dep)) + if (!canonize(eq.lhs(), lhs, dep)) + return false; + if (!canonize(eq.rhs(), rhs, dep)) return false; seq::nielsen_result result = m_nielsen.simplify(lhs, rhs); - TRACE(seq, tout << "all_eqs_solved [" << eq.id() << "]: "; - for (auto* e : lhs) tout << mk_bounded_pp(e, m, 2) << " "; - tout << "= "; - for (auto* e : rhs) tout << mk_bounded_pp(e, m, 2) << " "; - tout << " -> " << (int)result << "\n";); if (result != seq::nielsen_result::solved) return false; } @@ -441,6 +491,7 @@ namespace smt { if (!canonize(eq.lhs(), lhs, dep) || !canonize(eq.rhs(), rhs, dep)) return false; + TRACE(seq, tout << "solve_eq [" << eq.id() << "]: "; for (auto* e : lhs) tout << mk_bounded_pp(e, m, 2) << " "; tout << "= "; @@ -455,13 +506,19 @@ namespace smt { // Propagate solved form: either both empty, var = empty, or var = ground term bool changed = false; if (lhs.size() == 1 && m_nielsen.is_var(lhs.get(0)) && !rhs.empty()) { + if (check_length_conflict(lhs.get(0), rhs, dep)) + return true; sort* s = lhs.get(0)->get_sort(); expr_ref rhs_concat = mk_concat(rhs, s); + ensure_length_axiom(rhs_concat); changed = propagate_eq(dep, lhs.get(0), rhs_concat); } else if (rhs.size() == 1 && m_nielsen.is_var(rhs.get(0)) && !lhs.empty()) { + if (check_length_conflict(rhs.get(0), lhs, dep)) + return true; sort* s = rhs.get(0)->get_sort(); expr_ref lhs_concat = mk_concat(lhs, s); + ensure_length_axiom(lhs_concat); changed = propagate_eq(dep, rhs.get(0), lhs_concat); } else { @@ -490,6 +547,8 @@ namespace smt { sort* s = lhs[0]->get_sort(); expr_ref l = mk_concat(lhs, s); expr_ref r = mk_concat(rhs, s); + ensure_length_axiom(l); + ensure_length_axiom(r); changed = propagate_eq(dep, l, r); } else { @@ -574,8 +633,37 @@ namespace smt { // For x starting one side, try x = prefix of other side // x = "" was already tried (assigned false) // Now enumerate: x = other[0], x = other[0]·other[1], ... + rational x_len; + bool has_x_len = get_length(x, x_len); + + // Find the range of valid prefix lengths + // Stop before we include x itself (cyclic candidate) + unsigned max_prefix = other.size(); + enode* x_root = ctx.get_enode(x)->get_root(); + for (unsigned i = 0; i < other.size(); ++i) { + if (ctx.e_internalized(other[i]) && + ctx.get_enode(other[i])->get_root() == x_root) { + max_prefix = i; + break; + } + } + if (max_prefix == 0) + return false; + expr_ref candidate(m); - for (unsigned len = 1; len <= other.size(); ++len) { + unsigned min_len = 0; + bool has_var = false; + for (unsigned len = 1; len <= max_prefix; ++len) { + min_len += m_util.str.min_length(other[len - 1]); + has_var = has_var || m_nielsen.is_var(other[len - 1]); + // Skip if candidate length is incompatible with variable length + if (has_x_len) { + if (rational(min_len) > x_len) + continue; + // For ground candidates (no variables), exact length must match + if (!has_var && rational(min_len) != x_len) + continue; + } if (len == 1) candidate = other[0]; else @@ -584,12 +672,14 @@ namespace smt { switch (ctx.get_assignment(eq)) { case l_undef: TRACE(seq, tout << "branch " << mk_bounded_pp(x, m) << " = " << mk_bounded_pp(candidate, m) << "\n";); + ensure_length_axiom(candidate); ctx.force_phase(eq); ctx.mark_as_relevant(eq); m_new_propagation = true; m_state.stats().m_num_splits++; return true; case l_true: + ensure_length_axiom(candidate); propagate_eq(dep, x, candidate); return true; case l_false: @@ -599,6 +689,109 @@ namespace smt { return false; } + // ------------------------------------------------------- + // Length reasoning + // ------------------------------------------------------- + + void theory_nseq::add_length_axiom(expr* n) { + expr* x = nullptr; + VERIFY(m_util.str.is_length(n, x)); + if (m_util.str.is_concat(x) && to_app(x)->get_num_args() != 0) { + // len(x1 ++ x2 ++ ...) = len(x1) + len(x2) + ... + ptr_vector args; + for (auto arg : *to_app(x)) + args.push_back(m_util.str.mk_length(arg)); + expr_ref len_sum(m_autil.mk_add(args.size(), args.data()), m); + add_axiom(mk_eq(len_sum, n, false)); + } + else if (m_util.str.is_unit(x)) { + // len(unit(c)) = 1 + add_axiom(mk_eq(n, m_autil.mk_int(1), false)); + } + else if (m_util.str.is_empty(x)) { + // len("") = 0 + add_axiom(mk_eq(n, m_autil.mk_int(0), false)); + } + else { + zstring s; + if (m_util.str.is_string(x, s)) { + // len("abc") = 3 + add_axiom(mk_eq(n, m_autil.mk_int(s.length()), false)); + } + else { + // len(x) >= 0 + add_axiom(mk_literal(m_autil.mk_ge(n, m_autil.mk_int(0)))); + // len(x) = 0 ↔ x = "" + expr_ref emp(m_util.str.mk_empty(x->get_sort()), m); + literal len_zero = mk_eq(n, m_autil.mk_int(0), false); + literal x_empty = mk_eq(x, emp, false); + add_axiom(~len_zero, x_empty); // len(x) = 0 → x = "" + add_axiom(len_zero, ~x_empty); // x = "" → len(x) = 0 + } + } + } + + bool theory_nseq::get_length(expr* e, rational& val) { + expr_ref len = mk_len(e); + return m_arith_value.get_value(len, val); + } + + bool theory_nseq::lower_bound(expr* e, rational& lo) { + bool strict; + return m_arith_value.get_lo(e, lo, strict) && !strict; + } + + bool theory_nseq::upper_bound(expr* e, rational& hi) { + bool strict; + return m_arith_value.get_up(e, hi, strict) && !strict; + } + + bool theory_nseq::check_zero_length() { + bool progress = false; + for (auto const& [len_app, str_expr] : m_state.length_pairs()) { + rational lo, hi; + if (lower_bound(len_app, lo) && upper_bound(len_app, hi) && + lo.is_zero() && hi.is_zero()) { + // len(e) = 0 → e = "" + expr* e = str_expr; + expr_ref emp(m_util.str.mk_empty(e->get_sort()), m); + enode* n = ensure_enode(e); + enode* n_emp = ensure_enode(emp); + if (n->get_root() != n_emp->get_root()) { + literal len_zero = mk_eq(len_app, m_autil.mk_int(0), false); + nseq_dependency* dep = m_state.mk_dep(len_zero); + propagate_eq(dep, e, emp); + progress = true; + } + } + } + return progress; + } + + bool theory_nseq::propagate_length_eqs() { + bool progress = false; + for (auto const& eq : m_state.eqs()) { + expr_ref_vector const& lhs = eq.lhs(); + expr_ref_vector const& rhs = eq.rhs(); + if (lhs.empty() || rhs.empty()) + continue; + sort* s = lhs[0]->get_sort(); + expr_ref l = mk_concat(lhs, s); + expr_ref r = mk_concat(rhs, s); + expr_ref len_l = mk_len(l); + expr_ref len_r = mk_len(r); + if (ctx.e_internalized(len_l) && ctx.e_internalized(len_r)) { + enode* nl = ctx.get_enode(len_l); + enode* nr = ctx.get_enode(len_r); + if (nl->get_root() != nr->get_root()) { + propagate_eq(eq.dep(), len_l, len_r, false); + progress = true; + } + } + } + return progress; + } + // ------------------------------------------------------- // Display and statistics // ------------------------------------------------------- @@ -626,7 +819,6 @@ namespace smt { model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) { app* e = n->get_expr(); - TRACE(seq, tout << "mk_value: " << mk_bounded_pp(e, m) << "\n";); if (m_util.is_seq(e)) { // Walk the equivalence class looking for a concrete string value @@ -635,18 +827,22 @@ namespace smt { do { expr* ce = curr->get_expr(); zstring s; - if (m_util.str.is_string(ce, s)) + if (m_util.str.is_string(ce, s)) { return alloc(expr_wrapper_proc, to_app(ce)); - if (m_util.str.is_empty(ce)) + } + if (m_util.str.is_empty(ce)) { return alloc(expr_wrapper_proc, to_app(ce)); + } curr = curr->get_next(); } while (curr != root); // No concrete value found: use seq_factory to get a fresh value expr_ref val(m); val = mg.get_model().get_fresh_value(e->get_sort()); - if (val) + if (val) { + m_fresh_values.push_back(val); return alloc(expr_wrapper_proc, to_app(val)); + } // Fallback: empty string return alloc(expr_wrapper_proc, to_app(m_util.str.mk_empty(e->get_sort()))); } @@ -662,6 +858,7 @@ namespace smt { } void theory_nseq::finalize_model(model_generator& mg) { + m_fresh_values.reset(); } } diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 2891c3ce0..75f6fd005 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -44,6 +44,7 @@ namespace smt { nseq_union_find m_find; bool m_has_seq; bool m_new_propagation; + expr_ref_vector m_fresh_values; // keep model fresh values alive // Theory interface final_check_status final_check_eh(unsigned) override; @@ -83,6 +84,7 @@ namespace smt { // Helpers void add_length(expr* l); + void ensure_length_axiom(expr* e); literal mk_literal(expr* e); literal mk_eq_empty(expr* e, bool phase = true); expr_ref mk_len(expr* s); @@ -96,6 +98,15 @@ namespace smt { bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep); bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep); bool all_eqs_solved(); + bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep); + + // Length reasoning + void add_length_axiom(expr* n); + bool check_zero_length(); + bool propagate_length_eqs(); + bool get_length(expr* e, rational& val); + bool lower_bound(expr* e, rational& lo); + bool upper_bound(expr* e, rational& hi); public: theory_nseq(context& ctx);