diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 382371b23..fdf2e8c0f 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -42,14 +42,16 @@ NSB review: namespace seq { - void deps_to_lits(dep_tracker deps, svector &eqs, svector &lits) { + void deps_to_lits(dep_tracker deps, svector &eqs, svector &lits, vector& les) { vector vs; dep_manager::s_linearize(deps, vs); for (dep_source const &d : vs) { if (std::holds_alternative(d)) eqs.push_back(std::get(d)); - else + else if (std::holds_alternative(d)) lits.push_back(std::get(d)); + else + les.push_back(std::get(d)); } } @@ -373,22 +375,28 @@ namespace seq { add_constraint(constraint(m.mk_or(cases), dep, m)); } - bool nielsen_node::lower_bound(expr* e, rational& lo) const { - // TODO: Return dependencies + bool nielsen_node::lower_bound(expr* e, rational& lo, dep_tracker& dep) { SASSERT(e); - return m_graph.m_solver.lower_bound(e, lo); + if (!m_graph.m_solver.lower_bound(e, lo)) + return false; + expr_ref lo_expr(m_graph.a.mk_int(lo), m_graph.m); + m_graph.add_le_dependency(dep, this, lo_expr.get(), e); + return true; } - bool nielsen_node::upper_bound(expr* e, rational& up) const { - // TODO: Return dependencies + bool nielsen_node::upper_bound(expr* e, rational& up, dep_tracker& dep) { SASSERT(e); rational v; if (m_graph.a.is_numeral(e, v)) { up = v; return true; } - return m_graph.m_solver.upper_bound(e, up); + if (!m_graph.m_solver.upper_bound(e, up)) + return false; + expr_ref up_expr(m_graph.a.mk_int(up), m_graph.m); + m_graph.add_le_dependency(dep, this, e, up_expr.get()); + return true; } // ----------------------------------------------- @@ -503,6 +511,19 @@ namespace seq { SASSERT(m_sat_node == nullptr); } + void nielsen_graph::add_le_dependency(dep_tracker& dep, nielsen_node* n, expr* lhs, expr* rhs) { + SASSERT(lhs); + SASSERT(rhs); + expr_ref lhs_ref(lhs, m); + expr_ref rhs_ref(rhs, m); + // just assume it to be correct + dep_tracker d = m_dep_mgr.mk_leaf(le{ lhs_ref, rhs_ref }); + // Just add the constraint - we do not have to recompute it + // [also it is on the set of side-conditions if we assert a satisfied node] + n->add_constraint(constraint(a.mk_le(lhs_ref, rhs_ref), d, m)); + dep = m_dep_mgr.mk_join(dep, d); + } + // ----------------------------------------------------------------------- // nielsen_node: simplify_and_init // ----------------------------------------------------------------------- @@ -682,7 +703,8 @@ namespace seq { // Simplify constant-exponent powers: base^0 → ε, base^1 → base. // Returns new snode if any simplification happened, nullptr otherwise. - static euf::snode* simplify_const_powers(nielsen_node* node, euf::sgraph& sg, euf::snode* side) { + static euf::snode* simplify_const_powers(nielsen_node* node, euf::sgraph& sg, euf::snode* side, dep_tracker& dep) { + dep = nullptr; SASSERT(side); if (side->is_empty()) return nullptr; @@ -699,9 +721,11 @@ namespace seq { if (tok->is_power()) { expr* exp_e = get_power_exp_expr(tok, seq); rational ub; - if (exp_e && node->upper_bound(exp_e, ub)) { + dep_tracker ub_dep = nullptr; + if (exp_e && node->upper_bound(exp_e, ub, ub_dep)) { if (ub.is_zero()) { // base^0 → ε (skip this token entirely) + dep = node->graph().dep_mgr().mk_join(dep, ub_dep); simplified = true; continue; } @@ -709,6 +733,7 @@ namespace seq { // base^1 → base euf::snode* base_sn = tok->arg(0); if (base_sn) { + dep = node->graph().dep_mgr().mk_join(dep, ub_dep); result.push_back(base_sn); simplified = true; continue; @@ -915,10 +940,18 @@ namespace seq { continue; // 3a: simplify constant-exponent powers (base^0 → ε, base^1 → base) - if (euf::snode* s = simplify_const_powers(this, sg, eq.m_lhs)) - { eq.m_lhs = s; changed = true; } - if (euf::snode* s = simplify_const_powers(this, sg, eq.m_rhs)) - { eq.m_rhs = s; changed = true; } + dep_tracker lhs_pow_dep = nullptr; + if (euf::snode* s = simplify_const_powers(this, sg, eq.m_lhs, lhs_pow_dep)) { + eq.m_lhs = s; + eq.m_dep = m_graph.dep_mgr().mk_join(eq.m_dep, lhs_pow_dep); + changed = true; + } + dep_tracker rhs_pow_dep = nullptr; + if (euf::snode* s = simplify_const_powers(this, sg, eq.m_rhs, rhs_pow_dep)) { + eq.m_rhs = s; + eq.m_dep = m_graph.dep_mgr().mk_join(eq.m_dep, rhs_pow_dep); + changed = true; + } // 3b: merge adjacent same-base tokens into combined powers if (euf::snode* s = merge_adjacent_powers(sg, eq.m_lhs)) @@ -962,18 +995,22 @@ namespace seq { expr_ref norm_count = normalize_arith(m, count); bool pow_le_count = false, count_le_pow = false; + dep_tracker pow_le_dep = nullptr, count_le_dep = nullptr; rational diff; if (get_const_power_diff(norm_count, pow_exp, m_graph.a, diff)) { count_le_pow = diff.is_nonpos(); pow_le_count = diff.is_nonneg(); } else if (!cur_path.empty()) { - pow_le_count = m_graph.check_lp_le(pow_exp, norm_count); - count_le_pow = m_graph.check_lp_le(norm_count, pow_exp); + pow_le_count = m_graph.check_lp_le(pow_exp, norm_count, this, pow_le_dep); + count_le_pow = m_graph.check_lp_le(norm_count, pow_exp, this, count_le_dep); } if (!pow_le_count && !count_le_pow) continue; + eq.m_dep = m_graph.dep_mgr().mk_join(eq.m_dep, pow_le_dep); + eq.m_dep = m_graph.dep_mgr().mk_join(eq.m_dep, count_le_dep); + pow_side = dir_drop(sg, pow_side, 1, fwd); other_side = dir_drop(sg, other_side, consumed, fwd); expr* base_e = get_power_base_expr(end_tok, seq); @@ -1045,9 +1082,14 @@ namespace seq { } // 3e: LP-aware power directional elimination else if (lp && rp && !cur_path.empty()) { - bool lp_le_rp = m_graph.check_lp_le(lp, rp); - bool rp_le_lp = m_graph.check_lp_le(rp, lp); + dep_tracker lp_le_dep = nullptr, rp_le_dep = nullptr; + bool lp_le_rp = m_graph.check_lp_le(lp, rp, this, lp_le_dep); + bool rp_le_lp = m_graph.check_lp_le(rp, lp, this, rp_le_dep); if (lp_le_rp || rp_le_lp) { + if (lp_le_rp) + eq.m_dep = m_graph.dep_mgr().mk_join(eq.m_dep, lp_le_dep); + if (rp_le_lp) + eq.m_dep = m_graph.dep_mgr().mk_join(eq.m_dep, rp_le_dep); expr* smaller_exp = lp_le_rp ? lp : rp; expr* larger_exp = lp_le_rp ? rp : lp; eq.m_lhs = dir_drop(sg, eq.m_lhs, 1, fwd); @@ -1202,9 +1244,11 @@ namespace seq { // contradict the variable's current integer bounds? If so, mark this // node as a Parikh-image conflict immediately (avoids a solver call). str_mem const* confl_cnstr; - if (!node.is_currently_conflict() && (confl_cnstr = m_parikh->check_parikh_conflict(node)) != nullptr) { + dep_tracker parikh_dep = nullptr; + if (!node.is_currently_conflict() && + (confl_cnstr = m_parikh->check_parikh_conflict(node, parikh_dep)) != nullptr) { node.set_general_conflict(); - node.set_conflict(backtrack_reason::parikh_image, confl_cnstr->m_dep); + node.set_conflict(backtrack_reason::parikh_image, parikh_dep); } } @@ -3683,7 +3727,7 @@ namespace seq { for (dep_source const& d : vs) { if (std::holds_alternative(d)) eqs.push_back(std::get(d)); - else + else if (std::holds_alternative(d)) mem_literals.push_back(std::get(d)); } } @@ -4003,21 +4047,35 @@ namespace seq { return dep; } - bool nielsen_graph::check_lp_le(expr* lhs, expr* rhs) { - // TODO: We need the justification!! - rational lhs_lo, rhs_up; - if (m_solver.lower_bound(lhs, lhs_lo) && - m_solver.upper_bound(rhs, rhs_up)) { + bool nielsen_graph::check_lp_le(expr* lhs, expr* rhs, nielsen_node* n, dep_tracker& dep) { + dep = nullptr; + rational lhs_lo, rhs_up; + bool has_lhs_lo = false, has_rhs_up = false; + dep_tracker lhs_lo_dep = nullptr, rhs_up_dep = nullptr; + if (n->lower_bound(lhs, lhs_lo, lhs_lo_dep)) + has_lhs_lo = true; + if (has_lhs_lo && n->upper_bound(rhs, rhs_up, rhs_up_dep)) + has_rhs_up = true; + if (has_lhs_lo && has_rhs_up) { if (lhs_lo > rhs_up) + // NB: we only justify if we return true return false; // definitely infeasible } - rational rhs_lo, lhs_up; - if (m_solver.upper_bound(lhs, lhs_up) && - m_solver.lower_bound(rhs, rhs_lo)) { - if (lhs_up <= rhs_lo) + rational rhs_lo, lhs_up; + bool has_rhs_lo = false, has_lhs_up = false; + dep_tracker rhs_lo_dep = nullptr, lhs_up_dep = nullptr; + if (n->upper_bound(lhs, lhs_up, lhs_up_dep)) + has_lhs_up = true; + if (has_lhs_up && n->lower_bound(rhs, rhs_lo, rhs_lo_dep)) + has_rhs_lo = true; + if (has_lhs_up && has_rhs_lo) { + if (lhs_up <= rhs_lo) { + dep = m_dep_mgr.mk_join(dep, lhs_up_dep); + dep = m_dep_mgr.mk_join(dep, rhs_lo_dep); return true; // definitely feasible + } } // fall through - ask the solver [expensive] @@ -4033,7 +4091,11 @@ namespace seq { m_solver.assert_expr(a.mk_ge(lhs, rhs_plus_one)); lbool result = m_solver.check(); m_solver.pop(1); - return result == l_false; + if (result == l_false) { + add_le_dependency(dep, n, lhs, rhs); + return true; + } + return false; } constraint nielsen_graph::mk_constraint(expr* fml, dep_tracker const& dep) { diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index f2e3992ee..3212449af 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -338,7 +338,14 @@ namespace seq { // index is the 0-based position in the input eq or mem list respectively. using enode_pair = std::pair; - using dep_source = std::variant; + + // arithmetic implication dependency: lhs <= rhs + struct le { + expr_ref lhs; + expr_ref rhs; + }; + + using dep_source = std::variant; // Arena-based dependency manager: builds an immutable tree of dep_source @@ -350,10 +357,12 @@ namespace seq { // nullptr represents the empty dependency set. using dep_tracker = dep_manager::dependency*; - // partition dep_source leaves from deps into enode pairs and sat literals. + // partition dep_source leaves from deps into enode pairs, sat literals, + // and arithmetic <= dependencies. void deps_to_lits(dep_tracker deps, svector& eqs, - svector& lits); + svector& lits, + vector& les); // string equality constraint: lhs = rhs // mirrors ZIPT's StrEq (both sides are regex-free snode trees) @@ -607,8 +616,8 @@ namespace seq { // Query current bounds for a variable from the arithmetic subsolver. // Falls der Subsolver keinen Bound liefert, werden konservative Defaults // 0 / UINT_MAX verwendet. - bool lower_bound(expr* e, rational& lo) const; - bool upper_bound(expr* e, rational& up) const; + bool lower_bound(expr* e, rational& lo, dep_tracker& dep); + bool upper_bound(expr* e, rational& up, dep_tracker& dep); // character constraint access (mirrors ZIPT's CharRanges) u_map> char_ranges() const { return m_char_ranges; } @@ -827,7 +836,6 @@ namespace seq { // (e.g., explain_conflict) can call mk_join / linearize. mutable dep_manager m_dep_mgr; - std::ostream &display(std::ostream &out, nielsen_node const* n) const; public: @@ -972,6 +980,9 @@ namespace seq { dep_manager& dep_mgr() { return m_dep_mgr; } dep_manager const& dep_mgr() const { return m_dep_mgr; } + // Add a dependency leaf for lhs <= rhs and join it to dep. + void add_le_dependency(dep_tracker& dep, nielsen_node* n, expr* lhs, expr* rhs); + // Assert the constraints of `node` that are new relative to its // parent (indices [m_parent_ic_count..end)) into the current solver scope. // Called by search_dfs after simplify_and_init so that the newly derived @@ -1165,7 +1176,7 @@ namespace seq { // mirrors ZIPT's NielsenNode.IsLe(): temporarily asserts NOT(lhs <= rhs) // and returns true iff the result is unsatisfiable (i.e., lhs <= rhs is // entailed). Path constraints are already in the solver incrementally. - bool check_lp_le(expr* lhs, expr* rhs); + bool check_lp_le(expr* lhs, expr* rhs, nielsen_node* n, dep_tracker& dep); // create an integer constraint: lhs rhs constraint mk_constraint(expr* fml, dep_tracker const& dep); diff --git a/src/smt/seq/seq_parikh.cpp b/src/smt/seq/seq_parikh.cpp index b45a79ab9..383d82180 100644 --- a/src/smt/seq/seq_parikh.cpp +++ b/src/smt/seq/seq_parikh.cpp @@ -249,7 +249,8 @@ namespace seq { // // This check is lightweight — it uses only modular arithmetic on the already- // known regex min/max lengths and the per-variable bounds stored in the node. - str_mem const* seq_parikh::check_parikh_conflict(nielsen_node& node) { + str_mem const* seq_parikh::check_parikh_conflict(nielsen_node& node, dep_tracker& dep) { + dep = nullptr; for (str_mem const& mem : node.str_mems()) { if (!mem.m_str || !mem.m_regex || !mem.m_str->is_var()) continue; @@ -269,9 +270,15 @@ namespace seq { SASSERT(stride > 1); rational lb_r, ub_r; - if (!node.lower_bound(mem.m_str->get_expr(), lb_r) || !node.upper_bound(mem.m_str->get_expr(), ub_r)) + dep_tracker lb_dep = nullptr; + dep_tracker ub_dep = nullptr; + if (!node.lower_bound(mem.m_str->get_expr(), lb_r, lb_dep) || + !node.upper_bound(mem.m_str->get_expr(), ub_r, ub_dep)) continue; + dep_tracker cur_dep = node.graph().dep_mgr().mk_join(mem.m_dep, lb_dep); + cur_dep = node.graph().dep_mgr().mk_join(cur_dep, ub_dep); + SASSERT(lb_r <= ub_r); if (ub_r > INT_MAX) continue; @@ -294,6 +301,7 @@ namespace seq { // In that case k_min would be huge, and min_len + stride*k_min would // also overflow ub → treat as a conflict immediately. if (gap > UINT_MAX - (stride - 1)) { + dep = cur_dep; return &mem; // ceiling division would overflow → k_min too large } k_min = (gap + stride - 1) / stride; @@ -303,12 +311,15 @@ namespace seq { unsigned len_at_k_min; if (k_min > (UINT_MAX - min_len) / stride) { // Overflow: min_len + stride * k_min > UINT_MAX ≥ ub → conflict. + dep = cur_dep; return &mem; } len_at_k_min = min_len + stride * k_min; - if (ub != UINT_MAX && len_at_k_min > ub) + if (ub != UINT_MAX && len_at_k_min > ub) { + dep = cur_dep; return &mem; // no valid k exists → Parikh conflict + } } return nullptr; } diff --git a/src/smt/seq/seq_parikh.h b/src/smt/seq/seq_parikh.h index f12aabdc7..7b78bc90a 100644 --- a/src/smt/seq/seq_parikh.h +++ b/src/smt/seq/seq_parikh.h @@ -121,7 +121,7 @@ namespace seq { // This is a lightweight pre-check that avoids calling the integer // subsolver. It is sound (never returns true for a satisfiable node) // but incomplete (may miss conflicts that require the full solver). - str_mem const* check_parikh_conflict(nielsen_node& node); + str_mem const* check_parikh_conflict(nielsen_node& node, dep_tracker& dep); // Compute the length stride of a regex expression. // Exposed for testing and external callers. diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 98b722e48..a7ef85ced 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -24,6 +24,19 @@ Author: namespace smt { + namespace { + literal mk_le_literal(context& ctx, ast_manager& m, arith_util& a, seq::le const& d) { + SASSERT(d.lhs); + SASSERT(d.rhs); + expr_ref le_expr(a.mk_le(d.lhs, d.rhs), m); + if (!ctx.b_internalized(le_expr)) + ctx.internalize(le_expr, true); + literal lit = ctx.get_literal(le_expr); + ctx.mark_as_relevant(lit); + return lit; + } + } + theory_nseq::theory_nseq(context& ctx) : theory(ctx, ctx.get_manager().mk_family_id("seq")), m_seq(m), @@ -767,6 +780,7 @@ namespace smt { break; case l_false: // this should not happen because nielsen checks for this before returning a satisfying path. + // or maybe it can happen if we have a "le" dependency TRACE(seq, tout << "nseq final_check: nielsen assumption " << c.fml << " is false; internalized - " << ctx.e_internalized(c.fml) << "\n"); std::cout << "False [" << lit << "]: " << mk_pp(c.fml, m) << std::endl; ctx.push_trail(value_trail(m_should_internalize)); @@ -786,8 +800,10 @@ namespace smt { for (seq::dep_source const& d : m_nielsen.conflict_sources()) { if (std::holds_alternative(d)) eqs.push_back(std::get(d)); - else + else if (std::holds_alternative(d)) lits.push_back(std::get(d)); + else + lits.push_back(mk_le_literal(ctx, m, m_autil, std::get(d))); } ++m_num_conflicts; set_conflict(eqs, lits); @@ -838,8 +854,12 @@ namespace smt { std::get(d).second->get_expr() ) ); - else + else if (std::holds_alternative(d)) kernel.assert_expr(ctx.literal2expr(std::get(d))); + else { + auto const& p = std::get(d); + kernel.assert_expr(m_autil.mk_le(p.lhs, p.rhs)); + } } auto res = kernel.check(); if (res == l_true) { @@ -915,10 +935,34 @@ namespace smt { for (auto lit : lits) tout << ctx.literal2expr(lit) << "\n"; for (auto [a, b] : eqs) tout << enode_pp(a, ctx) << " == " << enode_pp(b, ctx) << "\n"; ); - ctx.set_conflict( - ctx.mk_justification( - ext_theory_conflict_justification( - get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data(), 0, nullptr))); + + bool all_true = true; + literal_vector eq_lits; + for (literal lit : lits) { + ctx.mark_as_relevant(lit); + all_true &= (ctx.get_assignment(lit) == l_true); + } + for (auto [a, b] : eqs) { + literal lit_eq = mk_eq(a->get_expr(), b->get_expr(), false); + eq_lits.push_back(lit_eq); + ctx.mark_as_relevant(lit_eq); + all_true &= (ctx.get_assignment(lit_eq) == l_true); + } + + if (all_true) { + ctx.set_conflict( + ctx.mk_justification( + ext_theory_conflict_justification( + get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data(), 0, nullptr))); + return; + } + + literal_vector clause; + for (literal lit : lits) + clause.push_back(~lit); + for (literal lit : eq_lits) + clause.push_back(~lit); + ctx.mk_th_axiom(get_id(), clause.size(), clause.data()); } // ----------------------------------------------------------------------- @@ -1186,16 +1230,43 @@ namespace smt { // conditional constraints: propagate with justification from dep_tracker enode_pair_vector eqs; literal_vector lits; - seq::deps_to_lits(lc.m_dep, eqs, lits); + vector les; + seq::deps_to_lits(lc.m_dep, eqs, lits, les); + for (auto const& d : les) + lits.push_back(mk_le_literal(ctx, m, m_autil, d)); + + bool all_true = true; + literal_vector eq_lits; + for (auto [a, b] : eqs) { + literal lit_eq = mk_eq(a->get_expr(), b->get_expr(), false); + eq_lits.push_back(lit_eq); + ctx.mark_as_relevant(lit_eq); + all_true &= (ctx.get_assignment(lit_eq) == l_true); + } + for (literal dep_lit : lits) { + ctx.mark_as_relevant(dep_lit); + all_true &= (ctx.get_assignment(dep_lit) == l_true); + } ctx.mark_as_relevant(lit); - justification* js = ctx.mk_justification( - ext_theory_propagation_justification( - get_id(), ctx, - lits.size(), lits.data(), - eqs.size(), eqs.data(), - lit)); - ctx.assign(lit, js); + if (all_true) { + justification* js = ctx.mk_justification( + ext_theory_propagation_justification( + get_id(), ctx, + lits.size(), lits.data(), + eqs.size(), eqs.data(), + lit)); + ctx.assign(lit, js); + } + else { + literal_vector clause; + for (literal dep_lit : lits) + clause.push_back(~dep_lit); + for (literal lit_eq : eq_lits) + clause.push_back(~lit_eq); + clause.push_back(lit); + ctx.mk_th_axiom(get_id(), clause.size(), clause.data()); + } TRACE(seq, tout << "nseq length propagation: " << mk_pp(lc.m_expr, m) << " (" << eqs.size() << " eqs, " << lits.size() << " lits)\n"; @@ -1449,20 +1520,46 @@ namespace smt { enode_pair_vector eqs; literal_vector dep_lits; + vector dep_les; for (unsigned idx : mem_indices) { if (mems[idx].m_dep) - seq::deps_to_lits(mems[idx].m_dep, eqs, dep_lits); + seq::deps_to_lits(mems[idx].m_dep, eqs, dep_lits, dep_les); + } + for (auto const& d : dep_les) + dep_lits.push_back(mk_le_literal(ctx, m, m_autil, d)); + + bool all_true = true; + literal_vector eq_lits; + for (auto [a, b] : eqs) { + literal lit_eq = mk_eq(a->get_expr(), b->get_expr(), false); + eq_lits.push_back(lit_eq); + ctx.mark_as_relevant(lit_eq); + all_true &= (ctx.get_assignment(lit_eq) == l_true); + } + for (literal dep_lit : dep_lits) { + ctx.mark_as_relevant(dep_lit); + all_true &= (ctx.get_assignment(dep_lit) == l_true); } ctx.mark_as_relevant(lit_prop); - justification* js = ctx.mk_justification( - ext_theory_propagation_justification( - get_id(), ctx, - dep_lits.size(), dep_lits.data(), - eqs.size(), eqs.data(), - lit_prop)); - - ctx.assign(lit_prop, js); + if (all_true) { + justification* js = ctx.mk_justification( + ext_theory_propagation_justification( + get_id(), ctx, + dep_lits.size(), dep_lits.data(), + eqs.size(), eqs.data(), + lit_prop)); + ctx.assign(lit_prop, js); + } + else { + literal_vector clause; + for (literal dep_lit : dep_lits) + clause.push_back(~dep_lit); + for (literal lit_eq : eq_lits) + clause.push_back(~lit_eq); + clause.push_back(lit_prop); + ctx.mk_th_axiom(get_id(), clause.size(), clause.data()); + } TRACE(seq, tout << "nseq length coherence check: length " << l << " with gradient " << g << " is incompatible for " << mk_pp(s, m) << ", propagated " << mk_pp(prop_expr, m) << "\n";); IF_VERBOSE(1, verbose_stream() << "nseq length coherence check: length " << l << " with gradient " << g << " is incompatible for " << mk_pp(s, m) << ", propagated " << mk_pp(prop_expr, m) << "\n";);