diff --git a/src/smt/nseq_context_solver.h b/src/smt/nseq_context_solver.h index 60db2868a..5b2c9263a 100644 --- a/src/smt/nseq_context_solver.h +++ b/src/smt/nseq_context_solver.h @@ -49,6 +49,7 @@ namespace smt { } void assert_expr(expr* e) override { + // std::cout << "Asserting: " << mk_pp(e, m_kernel.m()) << std::endl; m_kernel.assert_expr(e); } diff --git a/src/smt/nseq_model.cpp b/src/smt/nseq_model.cpp index 95ed7c869..8eb8d5a59 100644 --- a/src/smt/nseq_model.cpp +++ b/src/smt/nseq_model.cpp @@ -233,8 +233,34 @@ namespace smt { if (exp_val.is_one()) return base_val; - // For small exponents, concatenate directly + // For small exponents, concatenate directly; for large ones, + // build a concrete string constant to avoid enormous AST chains + // that cause cleanup_expr to diverge. unsigned n_val = exp_val.get_unsigned(); + constexpr unsigned POWER_EXPAND_LIMIT = 1000; + if (n_val > POWER_EXPAND_LIMIT) { + // Try to extract a concrete character from the base (seq.unit(c)) + // and build a string literal directly (O(1) AST node). + unsigned ch = 0; + expr* unit_arg = nullptr; + if (m_seq.str.is_unit(base_val, unit_arg) && m_seq.is_const_char(unit_arg, ch)) { + svector buf(n_val, ch); + zstring result(buf.size(), buf.data()); + return expr_ref(m_seq.str.mk_string(result), m); + } + // Also handle if base is already a string constant + zstring base_str; + if (m_seq.str.is_string(base_val, base_str) && base_str.length() > 0) { + svector buf; + for (unsigned i = 0; i < n_val; ++i) + for (unsigned j = 0; j < base_str.length(); ++j) + buf.push_back(base_str[j]); + zstring result(buf.size(), buf.data()); + return expr_ref(m_seq.str.mk_string(result), m); + } + // Fallback: cap exponent to avoid divergence + n_val = POWER_EXPAND_LIMIT; + } expr_ref acc(base_val); for (unsigned i = 1; i < n_val; ++i) acc = m_seq.str.mk_concat(acc, base_val); diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 8fdf57d4d..6c2b4a299 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -442,7 +442,8 @@ namespace seq { nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver): m_sg(sg), - m_solver(solver) { + m_solver(solver), + m_len_vars(sg.get_manager()) { } nielsen_graph::~nielsen_graph() { @@ -519,6 +520,9 @@ namespace seq { m_num_input_eqs = 0; m_num_input_mems = 0; m_root_constraints_asserted = false; + m_mod_cnt.reset(); + m_len_var_cache.clear(); + m_len_vars.reset(); } std::ostream& nielsen_graph::display(std::ostream& out) const { @@ -1970,6 +1974,9 @@ namespace seq { // node into the current solver scope. Constraints inherited from the parent // (indices 0..m_parent_ic_count-1) are already present at the enclosing // scope level; only the newly-added tail needs to be asserted here. + // Also generate per-node |LHS| = |RHS| length constraints for descendant + // equations (root constraints are already at the base level). + generate_node_length_constraints(node); assert_node_new_int_constraints(node); // integer feasibility check: the solver now holds all path constraints @@ -2017,9 +2024,27 @@ namespace seq { // inside the recursive call (above). On return, pop the scope so // that backtracking removes those assertions. m_solver.push(); + + // Lazily compute substitution length constraints (|x| = |u|) on first + // traversal. This must happen before asserting side_int and before + // bumping mod counts, so that LHS uses the parent's counts and RHS + // uses the temporarily-bumped counts. + if (!e->len_constraints_computed()) { + add_subst_length_constraints(e); + e->set_len_constraints_computed(true); + } + for (auto const &ic : e->side_int()) m_solver.assert_expr(int_constraint_to_expr(ic)); + + // Bump modification counts for the child's context. + inc_edge_mod_counts(e); + search_result r = search_dfs(e->tgt(), e->is_progress() ? depth : depth + 1, cur_path); + + // Restore modification counts on backtrack. + dec_edge_mod_counts(e); + m_solver.pop(1); if (r == search_result::sat) return search_result::sat; @@ -3533,7 +3558,18 @@ namespace seq { return expr_ref(arith.mk_add(left, right), m); } - // for variables and other terms, use symbolic (str.len expr) + // For variables: consult modification counter. + // mod_count > 0 means the variable has been reused by a non-eliminating + // substitution; use a fresh integer variable to avoid the circular + // |x| = 1 + |x| problem. + if (n->is_var()) { + unsigned mc = 0; + m_mod_cnt.find(n->id(), mc); + if (mc > 0) + return get_or_create_len_var(n, mc); + } + + // for variables at mod_count 0 and other terms, use symbolic (str.len expr) return expr_ref(seq.str.mk_length(n->get_expr()), m); } @@ -3664,6 +3700,131 @@ namespace seq { return expr_ref(m.mk_true(), m); } + // ----------------------------------------------------------------------- + // Modification counter: substitution length tracking + // mirrors ZIPT's LocalInfo.CurrentModificationCnt + NielsenEdge.IncModCount/DecModCount + // + NielsenNode constructor length assertion logic + // ----------------------------------------------------------------------- + + expr_ref nielsen_graph::get_or_create_len_var(euf::snode* var, unsigned mod_count) { + ast_manager& m = m_sg.get_manager(); + SASSERT(var && var->is_var()); + SASSERT(mod_count > 0); + + auto key = std::make_pair(var->id(), mod_count); + auto it = m_len_var_cache.find(key); + if (it != m_len_var_cache.end()) + return expr_ref(it->second, m); + + // Create a fresh integer variable: len__ + arith_util arith(m); + std::string name = "len!" + std::to_string(var->id()) + "!" + std::to_string(mod_count); + expr_ref fresh(m.mk_fresh_const(name.c_str(), arith.mk_int()), m); + m_len_vars.push_back(fresh); + m_len_var_cache.insert({key, fresh.get()}); + return fresh; + } + + void nielsen_graph::add_subst_length_constraints(nielsen_edge* e) { + auto const& substs = e->subst(); + + // Quick check: any non-eliminating substitutions? + bool has_non_elim = false; + for (auto const& s : substs) + if (!s.is_eliminating()) { has_non_elim = true; break; } + if (!has_non_elim) return; + + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + + // Step 1: Compute LHS (|x|) for each non-eliminating substitution + // using current m_mod_cnt (before bumping). + // Also assert |x|_k >= 0 (mirrors ZIPT's NielsenNode constructor line 172). + svector> lhs_exprs; + for (unsigned i = 0; i < substs.size(); ++i) { + auto const& s = substs[i]; + if (s.is_eliminating()) continue; + SASSERT(s.m_var && s.m_var->is_var()); + expr_ref lhs = compute_length_expr(s.m_var); + lhs_exprs.push_back({i, lhs.get()}); + // Assert LHS >= 0 + e->add_side_int(int_constraint(lhs, arith.mk_int(0), + int_constraint_kind::ge, s.m_dep, m)); + } + + // Step 2: Bump mod counts for all non-eliminating variables at once. + for (auto const& s : substs) { + if (s.is_eliminating()) continue; + unsigned id = s.m_var->id(); + unsigned prev = 0; + m_mod_cnt.find(id, prev); + m_mod_cnt.insert(id, prev + 1); + } + + // Step 3: Compute RHS (|u|) with bumped mod counts and add |x| = |u|. + for (auto const& p : lhs_exprs) { + unsigned idx = p.first; + expr* lhs_expr = p.second; + auto const& s = substs[idx]; + expr_ref rhs = compute_length_expr(s.m_replacement); + e->add_side_int(int_constraint(lhs_expr, rhs, int_constraint_kind::eq, + s.m_dep, m)); + + // Assert non-negativity for any fresh length variables in the RHS + // (variables at mod_count > 0 that are newly created). + euf::snode_vector tokens; + s.m_replacement->collect_tokens(tokens); + for (euf::snode* tok : tokens) { + if (tok->is_var()) { + unsigned mc = 0; + m_mod_cnt.find(tok->id(), mc); + if (mc > 0) { + expr_ref len_var = get_or_create_len_var(tok, mc); + e->add_side_int(int_constraint(len_var, arith.mk_int(0), + int_constraint_kind::ge, s.m_dep, m)); + } + } + } + } + + // Step 4: Restore mod counts (temporary bump for computing RHS only). + for (auto const& s : substs) { + if (s.is_eliminating()) continue; + unsigned id = s.m_var->id(); + unsigned prev = 0; + m_mod_cnt.find(id, prev); + SASSERT(prev >= 1); + if (prev <= 1) + m_mod_cnt.remove(id); + else + m_mod_cnt.insert(id, prev - 1); + } + } + + void nielsen_graph::inc_edge_mod_counts(nielsen_edge* e) { + for (auto const& s : e->subst()) { + if (s.is_eliminating()) continue; + unsigned id = s.m_var->id(); + unsigned prev = 0; + m_mod_cnt.find(id, prev); + m_mod_cnt.insert(id, prev + 1); + } + } + + void nielsen_graph::dec_edge_mod_counts(nielsen_edge* e) { + for (auto const& s : e->subst()) { + if (s.is_eliminating()) continue; + unsigned id = s.m_var->id(); + unsigned prev = 0; + m_mod_cnt.find(id, prev); + SASSERT(prev >= 1); + if (prev <= 1) + m_mod_cnt.remove(id); + else + m_mod_cnt.insert(id, prev - 1); + } + } + void nielsen_graph::assert_node_new_int_constraints(nielsen_node* node) { // Assert only the int_constraints that are new to this node (beyond those // inherited from its parent via clone_from). The parent's constraints are @@ -3674,6 +3835,78 @@ namespace seq { m_solver.assert_expr(int_constraint_to_expr(node->int_constraints()[i])); } + void nielsen_graph::generate_node_length_constraints(nielsen_node* node) { + if (node->m_node_len_constraints_generated) + return; + node->m_node_len_constraints_generated = true; + + // Skip the root node — its length constraints are already asserted + // at the base solver level by assert_root_constraints_to_solver(). + if (node == m_root) + return; + + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + uint_set seen_vars; + + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) + continue; + + expr_ref len_lhs = compute_length_expr(eq.m_lhs); + expr_ref len_rhs = compute_length_expr(eq.m_rhs); + node->add_int_constraint(int_constraint(len_lhs, len_rhs, + int_constraint_kind::eq, eq.m_dep, m)); + + // non-negativity for each variable (mod-count-aware) + euf::snode_vector tokens; + eq.m_lhs->collect_tokens(tokens); + eq.m_rhs->collect_tokens(tokens); + for (euf::snode* tok : tokens) { + if (tok->is_var() && !seen_vars.contains(tok->id())) { + seen_vars.insert(tok->id()); + expr_ref len_var = compute_length_expr(tok); + node->add_int_constraint(int_constraint(len_var, arith.mk_int(0), + int_constraint_kind::ge, eq.m_dep, m)); + } + } + } + + // Parikh interval bounds for regex memberships at this node + seq_util& seq = m_sg.get_seq_util(); + for (str_mem const& mem : node->str_mems()) { + expr* re_expr = mem.m_regex->get_expr(); + if (!re_expr || !seq.is_re(re_expr)) + continue; + + unsigned min_len = 0, max_len = UINT_MAX; + compute_regex_length_interval(mem.m_regex, min_len, max_len); + + expr_ref len_str = compute_length_expr(mem.m_str); + + if (min_len > 0) { + node->add_int_constraint(int_constraint(len_str, arith.mk_int(min_len), + int_constraint_kind::ge, mem.m_dep, m)); + } + if (max_len < UINT_MAX) { + node->add_int_constraint(int_constraint(len_str, arith.mk_int(max_len), + int_constraint_kind::le, mem.m_dep, m)); + } + + // non-negativity for string-side variables + euf::snode_vector tokens; + mem.m_str->collect_tokens(tokens); + for (euf::snode* tok : tokens) { + if (tok->is_var() && !seen_vars.contains(tok->id())) { + seen_vars.insert(tok->id()); + expr_ref len_var = compute_length_expr(tok); + node->add_int_constraint(int_constraint(len_var, arith.mk_int(0), + int_constraint_kind::ge, mem.m_dep, m)); + } + } + } + } + bool nielsen_graph::check_int_feasibility(nielsen_node* node, svector const& cur_path) { // In incremental mode the solver already holds all path constraints // (root length constraints at the base level, edge side_int and node diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index a93de717c..cafe39be5 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -241,6 +241,7 @@ Author: #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_sgraph.h" #include +#include #include "model/model.h" namespace seq { @@ -455,6 +456,7 @@ namespace seq { ptr_vector m_side_str_mem; // side constraints: regex memberships vector m_side_int; // side constraints: integer equalities/inequalities bool m_is_progress; // does this edge represent progress? + bool m_len_constraints_computed = false; // lazily computed substitution length constraints public: nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress); @@ -479,6 +481,9 @@ namespace seq { bool is_progress() const { return m_is_progress; } + bool len_constraints_computed() const { return m_len_constraints_computed; } + void set_len_constraints_computed(bool v) { m_len_constraints_computed = v; } + bool operator==(nielsen_edge const& other) const { return m_src == other.m_src && m_tgt == other.m_tgt; } @@ -518,6 +523,7 @@ namespace seq { bool m_is_extended = false; backtrack_reason m_reason = backtrack_reason::unevaluated; bool m_is_progress = false; + bool m_node_len_constraints_generated = false; // true after generate_node_length_constraints runs // evaluation index for run tracking unsigned m_eval_idx = 0; @@ -725,6 +731,26 @@ namespace seq { // Set to true after assert_root_constraints_to_solver() is first called. bool m_root_constraints_asserted = false; + // ----------------------------------------------- + // Modification counter for substitution length tracking. + // mirrors ZIPT's LocalInfo.CurrentModificationCnt + // ----------------------------------------------- + + // Maps snode id of string variable → current modification (reuse) count + // along the DFS path. When a non-eliminating substitution x/u is applied + // (x appears in u), x's count is bumped. This produces distinct length + // variables for x before and after substitution, avoiding the unsatisfiable + // |x| = 1 + |x| that results from reusing the same length symbol. + u_map m_mod_cnt; + + // Cache: (var snode id, modification count) → fresh integer variable. + // Variables at mod_count 0 use str.len(var_expr) (standard form). + // Variables at mod_count > 0 get a fresh Z3 integer constant. + std::map, expr*> m_len_var_cache; + + // Pins the fresh length variable expressions so they aren't garbage collected. + expr_ref_vector m_len_vars; + public: // Construct with a caller-supplied solver. Ownership is NOT transferred; // the caller is responsible for keeping the solver alive. @@ -966,6 +992,14 @@ namespace seq { // bounds become visible to subsequent check() and check_lp_le() calls. void assert_node_new_int_constraints(nielsen_node* node); + // Generate |LHS| = |RHS| length constraints for a non-root node's own + // string equalities and add them as int_constraints on the node. + // Called once per node (guarded by m_node_len_constraints_generated). + // Uses compute_length_expr (mod-count-aware) so that variables with + // non-zero modification counts get fresh length variables. + // Mirrors ZIPT's Constraint.Shared forwarding for per-node equations. + void generate_node_length_constraints(nielsen_node* node); + // check integer feasibility of the constraints along the current path. // returns true if feasible (including unknown), false only if l_false. // Precondition: all path constraints have been incrementally asserted to @@ -991,6 +1025,30 @@ namespace seq { // convert an int_constraint to an expr* assertion expr_ref int_constraint_to_expr(int_constraint const& ic); + + // ----------------------------------------------- + // Modification counter methods for substitution length tracking. + // mirrors ZIPT's NielsenEdge.IncModCount / DecModCount and + // NielsenNode constructor length assertion logic. + // ----------------------------------------------- + + // Get or create a fresh integer variable for len(var) at the given + // modification count. Returns str.len(var_expr) when mod_count == 0. + expr_ref get_or_create_len_var(euf::snode* var, unsigned mod_count); + + // Compute and add |x| = |u| length constraints to an edge for all + // its non-eliminating substitutions. Uses current m_mod_cnt. + // Temporarily bumps m_mod_cnt for RHS computation, then restores. + // Called lazily on first edge traversal in search_dfs. + void add_subst_length_constraints(nielsen_edge* e); + + // Bump modification counts for an edge's non-eliminating substitutions. + // Called when entering an edge during DFS. + void inc_edge_mod_counts(nielsen_edge* e); + + // Restore modification counts for an edge's non-eliminating substitutions. + // Called when backtracking from an edge during DFS. + void dec_edge_mod_counts(nielsen_edge* e); }; }