diff --git a/src/smt/nseq_model.cpp b/src/smt/nseq_model.cpp index fd9382219..f3b62d291 100644 --- a/src/smt/nseq_model.cpp +++ b/src/smt/nseq_model.cpp @@ -42,11 +42,12 @@ namespace smt { register_existing_values(nielsen); collect_var_regex_constraints(state); - // if the last solve returned sat, extract assignments from the - // satisfying leaf node found during DFS. - seq::nielsen_node const* root = nielsen.root(); - if (root && root->is_satisfied()) - extract_assignments(root); + // extract variable assignments from the satisfying leaf's substitution path + seq::nielsen_node const* sat = nielsen.sat_node(); + IF_VERBOSE(1, verbose_stream() << "nseq model init: sat_node=" << (sat ? "set" : "null") + << " path_len=" << nielsen.sat_path().size() << "\n";); + extract_assignments(nielsen.sat_path()); + IF_VERBOSE(1, verbose_stream() << "nseq model: m_var_values has " << m_var_values.size() << " entries\n";); } model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) { @@ -74,6 +75,12 @@ namespace smt { // look up snode for this expression euf::snode* sn = m_sg.find(e); + IF_VERBOSE(2, { + verbose_stream() << "nseq mk_value: expr=" << mk_bounded_pp(e, m, 2); + if (sn) verbose_stream() << " snode[" << sn->id() << "] kind=" << (int)sn->kind(); + else verbose_stream() << " snode=null"; + verbose_stream() << "\n"; + }); expr_ref val(m); if (sn) val = snode_to_value(sn); @@ -99,25 +106,46 @@ namespace smt { m_factory = nullptr; } - void nseq_model::extract_assignments(seq::nielsen_node const* node) { - if (!node) - return; - for (auto const& eq : node->str_eqs()) { - if (!eq.m_lhs || !eq.m_rhs) - continue; - if (eq.m_lhs->is_var() && !m_var_values.contains(eq.m_lhs->id())) { - expr_ref val = snode_to_value(eq.m_rhs); - if (val) { - m_trail.push_back(val); - m_var_values.insert(eq.m_lhs->id(), val); - } + void nseq_model::extract_assignments(svector const& sat_path) { + IF_VERBOSE(1, verbose_stream() << "nseq extract_assignments: path length=" << sat_path.size() << "\n";); + + // compose substitutions root-to-leaf. + // bindings[i] = (var_snode, current_value_snode). + // When a new substitution (s.m_var -> s.m_replacement) is applied, + // substitute s.m_var in all existing values, then record the new binding. + vector> bindings; + for (seq::nielsen_edge* e : sat_path) { + for (seq::nielsen_subst const& s : e->subst()) { + if (!s.m_var) continue; + IF_VERBOSE(1, { + verbose_stream() << " subst: snode[" << s.m_var->id() << "]"; + if (s.m_var->get_expr()) verbose_stream() << "=" << mk_bounded_pp(s.m_var->get_expr(), m, 2); + verbose_stream() << " -> snode[" << (s.m_replacement ? (int)s.m_replacement->id() : -1) << "]"; + if (s.m_replacement && s.m_replacement->get_expr()) verbose_stream() << "=" << mk_bounded_pp(s.m_replacement->get_expr(), m, 2); + verbose_stream() << "\n"; + }); + for (auto& b : bindings) + b.second = m_sg.subst(b.second, s.m_var, s.m_replacement); + bindings.push_back({s.m_var, s.m_replacement}); } - if (eq.m_rhs->is_var() && !m_var_values.contains(eq.m_rhs->id())) { - expr_ref val = snode_to_value(eq.m_lhs); - if (val) { - m_trail.push_back(val); - m_var_values.insert(eq.m_rhs->id(), val); - } + } + + IF_VERBOSE(1, verbose_stream() << "nseq extract_assignments: " << bindings.size() << " bindings\n";); + for (auto const& b : bindings) { + unsigned id = b.first->id(); + if (m_var_values.contains(id)) + continue; + expr_ref val = snode_to_value(b.second); + IF_VERBOSE(1, { + verbose_stream() << " var snode[" << id << "]"; + if (b.first->get_expr()) verbose_stream() << "=" << mk_bounded_pp(b.first->get_expr(), m, 2); + verbose_stream() << " -> "; + if (val) verbose_stream() << mk_bounded_pp(val, m, 3); else verbose_stream() << "(null)"; + verbose_stream() << "\n"; + }); + if (val) { + m_trail.push_back(val); + m_var_values.insert(id, val); } } } @@ -281,26 +309,6 @@ namespace smt { } } - // validate negative memberships: str ∉ regex - for (auto const& entry : state.neg_mems()) { - if (!entry.m_str || !entry.m_regex) - continue; - expr* s_expr = entry.m_str->get_expr(); - expr* r_expr = entry.m_regex->get_expr(); - if (!s_expr || !r_expr) - continue; - - expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m); - expr_ref val(m); - mdl.eval(in_re, val, true); - if (val && m.is_true(val)) { - IF_VERBOSE(0, verbose_stream() << "nseq model: negative membership violated: " - << mk_bounded_pp(s_expr, m, 3) - << " not in " << mk_bounded_pp(r_expr, m, 3) << "\n";); - ok = false; - } - } - return ok; } diff --git a/src/smt/nseq_model.h b/src/smt/nseq_model.h index 359703fed..cd5d13082 100644 --- a/src/smt/nseq_model.h +++ b/src/smt/nseq_model.h @@ -88,9 +88,9 @@ namespace smt { bool validate_regex(nseq_state const& state, ::proto_model& mdl); private: - // extract variable assignments from a satisfying Nielsen node. - // Walks str_eqs looking for x = value patterns and records them. - void extract_assignments(seq::nielsen_node const* node); + // extract variable assignments from the sat path (root-to-leaf edges). + // Composes substitutions along the path to compute final var values. + void extract_assignments(svector const& sat_path); // recursively substitute known variable assignments into an snode tree. // Returns a concrete Z3 expression. diff --git a/src/smt/nseq_state.h b/src/smt/nseq_state.h index dfe28c1b3..3ebb81427 100644 --- a/src/smt/nseq_state.h +++ b/src/smt/nseq_state.h @@ -44,13 +44,6 @@ namespace smt { enode* m_n2; }; - // negative regex membership: ¬(str in regex) - struct neg_mem_entry { - euf::snode* m_str; - euf::snode* m_regex; - literal m_lit; - }; - class nseq_state { euf::sgraph& m_sg; vector m_str_eqs; @@ -58,11 +51,9 @@ namespace smt { vector m_eq_sources; vector m_mem_sources; vector m_diseqs; - vector m_neg_mems; unsigned_vector m_str_eq_lim; unsigned_vector m_str_mem_lim; unsigned_vector m_diseq_lim; - unsigned_vector m_neg_mem_lim; unsigned m_next_mem_id = 0; public: @@ -72,7 +63,6 @@ namespace smt { m_str_eq_lim.push_back(m_str_eqs.size()); m_str_mem_lim.push_back(m_str_mems.size()); m_diseq_lim.push_back(m_diseqs.size()); - m_neg_mem_lim.push_back(m_neg_mems.size()); } void pop(unsigned n) { @@ -85,8 +75,6 @@ namespace smt { m_str_mem_lim.pop_back(); m_diseqs.shrink(m_diseq_lim.back()); m_diseq_lim.pop_back(); - m_neg_mems.shrink(m_neg_mem_lim.back()); - m_neg_mem_lim.pop_back(); } } @@ -106,21 +94,15 @@ namespace smt { m_diseqs.push_back({n1, n2}); } - void add_neg_mem(euf::snode* str, euf::snode* regex, literal lit) { - m_neg_mems.push_back({str, regex, lit}); - } - vector const& str_eqs() const { return m_str_eqs; } vector const& str_mems() const { return m_str_mems; } vector const& diseqs() const { return m_diseqs; } - vector const& neg_mems() const { return m_neg_mems; } eq_source const& get_eq_source(unsigned i) const { return m_eq_sources[i]; } mem_source const& get_mem_source(unsigned i) const { return m_mem_sources[i]; } diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; } - neg_mem_entry const& get_neg_mem(unsigned i) const { return m_neg_mems[i]; } - bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_neg_mems.empty() && m_diseqs.empty(); } + bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_diseqs.empty(); } void reset() { m_str_eqs.reset(); @@ -128,11 +110,9 @@ namespace smt { m_eq_sources.reset(); m_mem_sources.reset(); m_diseqs.reset(); - m_neg_mems.reset(); m_str_eq_lim.reset(); m_str_mem_lim.reset(); m_diseq_lim.reset(); - m_neg_mem_lim.reset(); } }; diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 087da4c5e..d25c64958 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -260,6 +260,8 @@ namespace seq { m_nodes.reset(); m_edges.reset(); m_root = nullptr; + m_sat_node = nullptr; + m_sat_path.reset(); m_run_idx = 0; m_depth_bound = 0; m_next_mem_id = 0; @@ -533,6 +535,8 @@ namespace seq { return search_result::sat; ++m_stats.m_num_solve_calls; + m_sat_node = nullptr; + m_sat_path.reset(); // Iterative deepening: start at depth 3, increment by 1 on each failure. // m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it. @@ -541,7 +545,8 @@ namespace seq { if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) break; inc_run_idx(); - search_result r = search_dfs(m_root, 0); + svector cur_path; + search_result r = search_dfs(m_root, 0, cur_path); if (r == search_result::sat) { ++m_stats.m_num_sat; return r; @@ -559,7 +564,7 @@ namespace seq { return search_result::unknown; } - nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, unsigned depth) { + nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, unsigned depth, svector& cur_path) { ++m_stats.m_num_dfs_nodes; if (depth > m_stats.m_max_depth) m_stats.m_max_depth = depth; @@ -567,8 +572,11 @@ namespace seq { // cycle/revisit detection: if already visited this run, return cached status. // mirrors ZIPT's NielsenNode.GraphExpansion() evalIdx check. if (node->eval_idx() == m_run_idx) { - if (node->is_satisfied()) + if (node->is_satisfied()) { + m_sat_node = node; + m_sat_path = cur_path; return search_result::sat; + } if (node->is_currently_conflict()) return search_result::unsat; return search_result::unknown; @@ -583,8 +591,11 @@ namespace seq { node->set_general_conflict(true); return search_result::unsat; } - if (sr == simplify_result::satisfied || node->is_satisfied()) + if (sr == simplify_result::satisfied || node->is_satisfied()) { + m_sat_node = node; + m_sat_path = cur_path; return search_result::sat; + } // depth bound check if (depth >= m_depth_bound) @@ -611,10 +622,11 @@ namespace seq { // explore children bool any_unknown = false; for (nielsen_edge* e : node->outgoing()) { - nielsen_node* child = e->tgt(); - search_result r = search_dfs(child, depth + 1); + cur_path.push_back(e); + search_result r = search_dfs(e->tgt(), depth + 1, cur_path); if (r == search_result::sat) return search_result::sat; + cur_path.pop_back(); if (r == search_result::unknown) any_unknown = true; } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 8630ef5c8..2939cd80e 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -425,6 +425,7 @@ namespace seq { // edges ptr_vector m_outgoing; nielsen_node* m_backedge = nullptr; + nielsen_edge* m_parent_edge = nullptr; // status flags bool m_is_general_conflict = false; @@ -457,6 +458,9 @@ namespace seq { nielsen_node* backedge() const { return m_backedge; } void set_backedge(nielsen_node* n) { m_backedge = n; } + nielsen_edge* parent_edge() const { return m_parent_edge; } + void set_parent_edge(nielsen_edge* e) { m_parent_edge = e; } + // status bool is_general_conflict() const { return m_is_general_conflict; } void set_general_conflict(bool v) { m_is_general_conflict = v; } @@ -532,6 +536,8 @@ namespace seq { ptr_vector m_nodes; ptr_vector m_edges; nielsen_node* m_root = nullptr; + nielsen_node* m_sat_node = nullptr; + svector m_sat_path; unsigned m_run_idx = 0; unsigned m_depth_bound = 0; unsigned m_max_search_depth = 0; @@ -558,6 +564,11 @@ namespace seq { nielsen_node* root() const { return m_root; } void set_root(nielsen_node* n) { m_root = n; } + // satisfying leaf node (set by solve() when result is sat) + nielsen_node* sat_node() const { return m_sat_node; } + // path of edges from root to sat_node (set when sat_node is set) + svector const& sat_path() const { return m_sat_path; } + // add constraints to the root node from external solver void add_str_eq(euf::snode* lhs, euf::snode* rhs); void add_str_mem(euf::snode* str, euf::snode* regex); @@ -625,7 +636,7 @@ namespace seq { void generate_length_constraints(vector& constraints); private: - search_result search_dfs(nielsen_node* node, unsigned depth); + search_result search_dfs(nielsen_node* node, unsigned depth, svector& cur_path); // create a fresh variable with a unique name euf::snode* mk_fresh_var(); diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index b91dd7455..03d8e5d98 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -193,10 +193,15 @@ namespace smt { m_prop_queue.push_back({prop_item::pos_mem_prop, idx}); } else { - unsigned idx = m_state.neg_mems().size(); + // ¬(str ∈ R) ≡ str ∈ complement(R): store as a positive membership + // so the Nielsen graph sees it uniformly; the original negative literal + // is kept in mem_source for conflict reporting. + expr_ref re_compl(m_seq.re.mk_complement(re), get_manager()); + euf::snode* sn_re_compl = get_snode(re_compl.get()); + unsigned idx = m_state.str_mems().size(); literal lit(v, true); - m_state.add_neg_mem(sn_str, sn_re, lit); - m_prop_queue.push_back({prop_item::neg_mem_prop, idx}); + m_state.add_str_mem(sn_str, sn_re_compl, lit); + m_prop_queue.push_back({prop_item::pos_mem_prop, idx}); } TRACE(seq, tout << "nseq assign_eh: " << (is_true ? "" : "¬") @@ -256,9 +261,6 @@ namespace smt { case prop_item::pos_mem_prop: propagate_pos_mem(item.m_idx); break; - case prop_item::neg_mem_prop: - propagate_neg_mem(item.m_idx); - break; } } } @@ -313,31 +315,6 @@ namespace smt { ensure_length_var(s_expr); } - void theory_nseq::propagate_neg_mem(unsigned idx) { - auto const& entry = m_state.get_neg_mem(idx); - - if (!entry.m_str || !entry.m_regex) - return; - - // ¬(s in Σ*) is always false → conflict - if (m_regex.is_full_regex(entry.m_regex)) { - enode_pair_vector eqs; - literal_vector lits; - lits.push_back(entry.m_lit); - set_conflict(eqs, lits); - return; - } - - // ¬(ε in R) where R is nullable → conflict - if (entry.m_str->is_empty() && entry.m_regex->is_nullable()) { - enode_pair_vector eqs; - literal_vector lits; - lits.push_back(entry.m_lit); - set_conflict(eqs, lits); - return; - } - } - void theory_nseq::ensure_length_var(expr* e) { if (!e || !m_seq.is_seq(e)) return; @@ -397,49 +374,67 @@ namespace smt { final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) { // Always assert non-negativity for all string theory vars, // even when there are no string equations/memberships. - if (assert_nonneg_for_all_vars()) + if (assert_nonneg_for_all_vars()) { + IF_VERBOSE(1, verbose_stream() << "nseq final_check: nonneg assertions added, FC_CONTINUE\n";); return FC_CONTINUE; + } // If there are unhandled boolean string predicates (prefixof, contains, etc.) // we cannot declare sat — return unknown. - if (has_unhandled_preds()) + if (has_unhandled_preds()) { + IF_VERBOSE(1, verbose_stream() << "nseq final_check: unhandled preds, FC_GIVEUP\n";); return FC_GIVEUP; + } - if (m_state.empty() && m_ho_terms.empty()) + if (m_state.empty() && m_ho_terms.empty()) { + IF_VERBOSE(1, verbose_stream() << "nseq final_check: empty state+ho, FC_DONE (no solve)\n";); return FC_DONE; + } // unfold higher-order terms when sequence structure is known - if (unfold_ho_terms()) + if (unfold_ho_terms()) { + IF_VERBOSE(1, verbose_stream() << "nseq final_check: unfolded ho_terms, FC_CONTINUE\n";); return FC_CONTINUE; + } - if (m_state.empty()) + if (m_state.empty()) { + IF_VERBOSE(1, verbose_stream() << "nseq final_check: empty state (after ho), FC_DONE (no solve)\n";); return FC_DONE; + } + IF_VERBOSE(1, verbose_stream() << "nseq final_check: populating graph with " + << m_state.str_eqs().size() << " eqs, " << m_state.str_mems().size() << " mems\n";); populate_nielsen_graph(); // assert length constraints derived from string equalities - if (assert_length_constraints()) + if (assert_length_constraints()) { + IF_VERBOSE(1, verbose_stream() << "nseq final_check: length constraints asserted, FC_CONTINUE\n";); return FC_CONTINUE; + } ++m_num_final_checks; m_nielsen.set_max_search_depth(get_fparams().m_nseq_max_depth); + IF_VERBOSE(1, verbose_stream() << "nseq final_check: calling solve()\n";); auto result = m_nielsen.solve(); if (result == seq::nielsen_graph::search_result::sat) { + IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve SAT, sat_node=" + << (m_nielsen.sat_node() ? "set" : "null") << "\n";); // Nielsen found a consistent assignment for positive constraints. - // If there are negative memberships or disequalities we haven't verified, - // we cannot soundly declare sat. - if (!m_state.neg_mems().empty() || !m_state.diseqs().empty()) + // If there are disequalities we haven't verified, we cannot soundly declare sat. + if (!m_state.diseqs().empty()) return FC_GIVEUP; return FC_DONE; } if (result == seq::nielsen_graph::search_result::unsat) { + IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve UNSAT\n";); explain_nielsen_conflict(); return FC_CONTINUE; } + IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve UNKNOWN, FC_GIVEUP\n";); return FC_GIVEUP; } @@ -556,7 +551,6 @@ namespace smt { out << " str_eqs: " << m_state.str_eqs().size() << "\n"; out << " str_mems: " << m_state.str_mems().size() << "\n"; out << " diseqs: " << m_state.diseqs().size() << "\n"; - out << " neg_mems: " << m_state.neg_mems().size() << "\n"; out << " prop_queue: " << m_prop_qhead << "/" << m_prop_queue.size() << "\n"; out << " ho_terms: " << m_ho_terms.size() << "\n"; } diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 0f4e94a87..6e7be11b9 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -45,7 +45,7 @@ namespace smt { // propagation queue struct prop_item { - enum kind_t { eq_prop, diseq_prop, pos_mem_prop, neg_mem_prop } m_kind; + enum kind_t { eq_prop, diseq_prop, pos_mem_prop } m_kind; unsigned m_idx; }; svector m_prop_queue; @@ -112,7 +112,6 @@ namespace smt { void propagate_eq(unsigned idx); void propagate_diseq(unsigned idx); void propagate_pos_mem(unsigned idx); - void propagate_neg_mem(unsigned idx); void ensure_length_var(expr* e); // higher-order term unfolding