diff --git a/src/params/smt_params.cpp b/src/params/smt_params.cpp index a80483d0f..29a0d72a5 100644 --- a/src/params/smt_params.cpp +++ b/src/params/smt_params.cpp @@ -187,9 +187,9 @@ void smt_params::display(std::ostream & out) const { } void smt_params::validate_string_solver(symbol const& s) const { - if (s == "z3str3" || s == "seq" || s == "empty" || s == "auto" || s == "none") + if (s == "z3str3" || s == "seq" || s == "empty" || s == "auto" || s == "none" || s == "nseq") return; - throw default_exception("Invalid string solver value. Legal values are z3str3, seq, empty, auto, none"); + throw default_exception("Invalid string solver value. Legal values are z3str3, seq, empty, auto, none, nseq"); } void smt_params::setup_QF_UF() { diff --git a/src/params/smt_params_helper.pyg b/src/params/smt_params_helper.pyg index 451a07964..58d5fbf8c 100644 --- a/src/params/smt_params_helper.pyg +++ b/src/params/smt_params_helper.pyg @@ -122,7 +122,7 @@ def_module_params(module_name='smt', ('dack.gc_inv_decay', DOUBLE, 0.8, 'Dynamic ackermannization garbage collection decay'), ('dack.threshold', UINT, 10, ' number of times the congruence rule must be used before Leibniz\'s axiom is expanded'), ('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'), - ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver)'), + ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver), \'nseq\' (Nielsen-based string solver)'), ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'), ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'), ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'), diff --git a/src/smt/CMakeLists.txt b/src/smt/CMakeLists.txt index a35b809e1..119845726 100644 --- a/src/smt/CMakeLists.txt +++ b/src/smt/CMakeLists.txt @@ -64,6 +64,10 @@ z3_add_component(smt theory_fpa.cpp theory_intblast.cpp theory_lra.cpp + theory_nseq.cpp + nseq_state.cpp + nseq_regex.cpp + nseq_model.cpp theory_opt.cpp theory_pb.cpp theory_recfun.cpp @@ -91,4 +95,5 @@ z3_add_component(smt proto_model simplex substitution + smt_seq ) diff --git a/src/smt/nseq_model.cpp b/src/smt/nseq_model.cpp new file mode 100644 index 000000000..feb0d002d --- /dev/null +++ b/src/smt/nseq_model.cpp @@ -0,0 +1,17 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + nseq_model.cpp + +Abstract: + + Implementation of nseq_model. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + +--*/ +#include "smt/nseq_model.h" diff --git a/src/smt/nseq_model.h b/src/smt/nseq_model.h new file mode 100644 index 000000000..b804cca31 --- /dev/null +++ b/src/smt/nseq_model.h @@ -0,0 +1,72 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + nseq_model.h + +Abstract: + + Model generation from solved Nielsen graph. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + +--*/ +#pragma once + +#include "ast/ast.h" +#include "ast/seq_decl_plugin.h" +#include "util/zstring.h" +#include "ast/euf/euf_sgraph.h" +#include "smt/seq/seq_nielsen.h" +#include +#include + +namespace smt { + + class nseq_model { + ast_manager& m; + seq_util m_seq; + euf::sgraph& m_sg; + unsigned m_fresh_counter = 0; + + public: + nseq_model(ast_manager& m, euf::sgraph& sg) : m(m), m_seq(m), m_sg(sg) {} + + // generate a fresh string value (used when a variable is unconstrained) + expr_ref mk_fresh_value() { + std::string name = "s!" + std::to_string(m_fresh_counter++); + zstring zs(name.c_str()); + return expr_ref(m_seq.str.mk_string(zs), m); + } + + // extract variable assignments from a satisfied leaf node + // Returns true if all variables got a valid assignment + bool extract_assignments(seq::nielsen_node* node, + std::vector>& assignment) { + if (!node) + return false; + for (auto const& eq : node->str_eqs()) { + if (!eq.m_lhs || !eq.m_rhs) + continue; + if (eq.m_lhs->is_var() && eq.m_rhs->get_expr()) { + assignment.emplace_back(eq.m_lhs, eq.m_rhs->get_expr()); + } + else if (eq.m_rhs->is_var() && eq.m_lhs->get_expr()) { + assignment.emplace_back(eq.m_rhs, eq.m_lhs->get_expr()); + } + } + return true; + } + + // validate that a regex membership constraint is satisfied by the assignment + bool validate_regex(seq::str_mem const& mem, + obj_map const& assignment) { + // stub: assume valid for now + return true; + } + }; + +} diff --git a/src/smt/nseq_regex.cpp b/src/smt/nseq_regex.cpp new file mode 100644 index 000000000..7a36a26e7 --- /dev/null +++ b/src/smt/nseq_regex.cpp @@ -0,0 +1,17 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + nseq_regex.cpp + +Abstract: + + Implementation of nseq_regex. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + +--*/ +#include "smt/nseq_regex.h" diff --git a/src/smt/nseq_regex.h b/src/smt/nseq_regex.h new file mode 100644 index 000000000..67598b36a --- /dev/null +++ b/src/smt/nseq_regex.h @@ -0,0 +1,66 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + nseq_regex.h + +Abstract: + + Regex membership handling using Brzozowski derivatives. + Processes str_mem constraints after character consumption. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + +--*/ +#pragma once + +#include "ast/euf/euf_sgraph.h" +#include "smt/seq/seq_nielsen.h" + +namespace smt { + + class nseq_regex { + euf::sgraph& m_sg; + + public: + nseq_regex(euf::sgraph& sg) : m_sg(sg) {} + + // check if a regex snode represents the empty language + bool is_empty_regex(euf::snode* re) const { + return re && re->is_fail(); + } + + // compute derivative of regex re with respect to char elem and + // return a new str_mem for the resulting constraint + seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) { + euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem); + euf::snode* new_str = m_sg.drop_first(mem.m_str); + return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep); + } + + // process a regex membership constraint after one character has been consumed + // returns false if the resulting regex is empty (conflict) + bool process_str_mem(seq::str_mem const& mem, + vector& out_mems) { + if (!mem.m_str || !mem.m_regex) + return true; + // if regex does not accept the empty string and the string side is empty, conflict + if (mem.m_str->is_empty()) { + return mem.m_regex->is_nullable(); + } + // compute minterms for the regex + euf::snode_vector minterms; + m_sg.compute_minterms(mem.m_regex, minterms); + for (euf::snode* ch : minterms) { + seq::str_mem new_mem = derive(mem, ch); + if (!is_empty_regex(new_mem.m_regex)) + out_mems.push_back(new_mem); + } + return true; + } + }; + +} diff --git a/src/smt/nseq_state.cpp b/src/smt/nseq_state.cpp new file mode 100644 index 000000000..1b6fc2f23 --- /dev/null +++ b/src/smt/nseq_state.cpp @@ -0,0 +1,17 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + nseq_state.cpp + +Abstract: + + Implementation of nseq_state. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + +--*/ +#include "smt/nseq_state.h" diff --git a/src/smt/nseq_state.h b/src/smt/nseq_state.h new file mode 100644 index 000000000..41306384c --- /dev/null +++ b/src/smt/nseq_state.h @@ -0,0 +1,75 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + nseq_state.h + +Abstract: + + Constraint store bridging the SMT context to the Nielsen graph. + Holds word equations and regex membership constraints with + push/pop support for backtracking. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + +--*/ +#pragma once + +#include "util/vector.h" +#include "ast/euf/euf_sgraph.h" +#include "smt/seq/seq_nielsen.h" + +namespace smt { + + class nseq_state { + euf::sgraph& m_sg; + vector m_str_eqs; + vector m_str_mems; + unsigned_vector m_str_eq_lim; + unsigned_vector m_str_mem_lim; + unsigned m_next_mem_id = 0; + + public: + nseq_state(euf::sgraph& sg) : m_sg(sg) {} + + void push() { + m_str_eq_lim.push_back(m_str_eqs.size()); + m_str_mem_lim.push_back(m_str_mems.size()); + } + + void pop(unsigned n) { + for (unsigned i = 0; i < n; ++i) { + m_str_eqs.shrink(m_str_eq_lim.back()); + m_str_eq_lim.pop_back(); + m_str_mems.shrink(m_str_mem_lim.back()); + m_str_mem_lim.pop_back(); + } + } + + void add_str_eq(euf::snode* lhs, euf::snode* rhs) { + seq::dep_tracker dep; + m_str_eqs.push_back(seq::str_eq(lhs, rhs, dep)); + } + + void add_str_mem(euf::snode* str, euf::snode* regex) { + seq::dep_tracker dep; + m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep)); + } + + vector const& str_eqs() const { return m_str_eqs; } + vector const& str_mems() const { return m_str_mems; } + + bool empty() const { return m_str_eqs.empty() && m_str_mems.empty(); } + + void reset() { + m_str_eqs.reset(); + m_str_mems.reset(); + m_str_eq_lim.reset(); + m_str_mem_lim.reset(); + } + }; + +} diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 90b1ffa2a..6b3a3efe5 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -308,4 +308,325 @@ namespace seq { return out; } + // ----------------------------------------------------------------------- + // nielsen_node: simplify_and_init + // ----------------------------------------------------------------------- + + simplify_result nielsen_node::simplify_and_init(nielsen_graph& g) { + euf::sgraph& sg = g.sg(); + bool changed = true; + + while (changed) { + changed = false; + + // pass 1: remove trivially satisfied equalities + unsigned wi = 0; + for (unsigned i = 0; i < m_str_eq.size(); ++i) { + str_eq& eq = m_str_eq[i]; + if (eq.is_trivial()) + continue; + m_str_eq[wi++] = eq; + } + if (wi < m_str_eq.size()) { + m_str_eq.shrink(wi); + changed = true; + } + + // pass 2: detect symbol clashes and empty-propagation + for (str_eq& eq : m_str_eq) { + if (!eq.m_lhs || !eq.m_rhs) + continue; + + // both sides start with a concrete character: check match + if (eq.m_lhs->is_char() && eq.m_rhs->is_char()) { + if (eq.m_lhs->id() != eq.m_rhs->id()) { + // symbol clash + m_is_general_conflict = true; + m_reason = backtrack_reason::symbol_clash; + return simplify_result::conflict; + } + // same char: drop from both sides + eq.m_lhs = sg.drop_first(eq.m_lhs); + eq.m_rhs = sg.drop_first(eq.m_rhs); + changed = true; + continue; + } + + // one side empty, the other not empty => conflict or substitution + if (eq.m_lhs->is_empty() && !eq.m_rhs->is_empty()) { + // rhs must also be empty; if it is a concrete non-empty string => conflict + if (eq.m_rhs->is_char() || eq.m_rhs->is_concat()) { + // check if rhs has any non-variable tokens + euf::snode_vector tokens; + eq.m_rhs->collect_tokens(tokens); + bool all_vars = true; + for (euf::snode* t : tokens) + if (!t->is_var()) { all_vars = false; break; } + if (!all_vars) { + m_is_general_conflict = true; + m_reason = backtrack_reason::symbol_clash; + return simplify_result::conflict; + } + // substitute: every variable in rhs -> empty + for (euf::snode* t : tokens) { + if (t->is_var()) { + nielsen_subst s(t, sg.mk_empty(), eq.m_dep); + apply_subst(sg, s); + changed = true; + } + } + } + continue; + } + if (eq.m_rhs->is_empty() && !eq.m_lhs->is_empty()) { + euf::snode_vector tokens; + eq.m_lhs->collect_tokens(tokens); + bool all_vars = true; + for (euf::snode* t : tokens) + if (!t->is_var()) { all_vars = false; break; } + if (!all_vars) { + m_is_general_conflict = true; + m_reason = backtrack_reason::symbol_clash; + return simplify_result::conflict; + } + for (euf::snode* t : tokens) { + if (t->is_var()) { + nielsen_subst s(t, sg.mk_empty(), eq.m_dep); + apply_subst(sg, s); + changed = true; + } + } + continue; + } + + // prefix matching: lhs and rhs both start with the same char => cancel + { + euf::snode_vector lhs_toks, rhs_toks; + eq.m_lhs->collect_tokens(lhs_toks); + eq.m_rhs->collect_tokens(rhs_toks); + unsigned prefix = 0; + while (prefix < lhs_toks.size() && prefix < rhs_toks.size() && + lhs_toks[prefix]->is_char() && rhs_toks[prefix]->is_char()) { + if (lhs_toks[prefix]->id() != rhs_toks[prefix]->id()) { + m_is_general_conflict = true; + m_reason = backtrack_reason::symbol_clash; + return simplify_result::conflict; + } + ++prefix; + } + if (prefix > 0) { + eq.m_lhs = sg.drop_left(eq.m_lhs, prefix); + eq.m_rhs = sg.drop_left(eq.m_rhs, prefix); + changed = true; + } + } + } + } + + // check for regex memberships that are immediately infeasible + for (str_mem& mem : m_str_mem) { + if (!mem.m_str || !mem.m_regex) + continue; + if (mem.m_str->is_empty() && !mem.m_regex->is_nullable()) { + m_is_general_conflict = true; + m_reason = backtrack_reason::regex; + return simplify_result::conflict; + } + } + + if (is_satisfied()) + return simplify_result::satisfied; + + return simplify_result::proceed; + } + + bool nielsen_node::is_satisfied() const { + for (str_eq const& eq : m_str_eq) + if (!eq.is_trivial()) return false; + return m_str_mem.empty(); + } + + bool nielsen_node::is_subsumed_by(nielsen_node const& other) const { + // check if every constraint in 'other' also appears in 'this' + for (str_eq const& oeq : other.m_str_eq) { + bool found = false; + for (str_eq const& teq : m_str_eq) + if (teq == oeq) { found = true; break; } + if (!found) return false; + } + for (str_mem const& omem : other.m_str_mem) { + bool found = false; + for (str_mem const& tmem : m_str_mem) + if (tmem == omem) { found = true; break; } + if (!found) return false; + } + return true; + } + + // ----------------------------------------------------------------------- + // nielsen_graph: search + // ----------------------------------------------------------------------- + + nielsen_graph::search_result nielsen_graph::solve() { + if (!m_root) + return search_result::sat; + + m_depth_bound = 10; + for (unsigned iter = 0; iter < 6; ++iter, m_depth_bound *= 2) { + inc_run_idx(); + search_result r = search_dfs(m_root, 0); + if (r != search_result::unknown) + return r; + // depth limit hit – increase bound + } + return search_result::unknown; + } + + nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, unsigned depth) { + simplify_result sr = node->simplify_and_init(*this); + + if (sr == simplify_result::conflict) + return search_result::unsat; + if (sr == simplify_result::satisfied || node->is_satisfied()) + return search_result::sat; + + if (depth >= m_depth_bound) + return search_result::unknown; + + if (!generate_extensions(node, depth)) + return search_result::unsat; + + bool any_unknown = false; + for (nielsen_edge* e : node->outgoing()) { + nielsen_node* child = e->tgt(); + search_result r = search_dfs(child, depth + 1); + if (r == search_result::sat) + return search_result::sat; + if (r == search_result::unknown) + any_unknown = true; + } + return any_unknown ? search_result::unknown : search_result::unsat; + } + + simplify_result nielsen_graph::simplify_node(nielsen_node* node) { + return node->simplify_and_init(*this); + } + + bool nielsen_graph::generate_extensions(nielsen_node* node, unsigned /*depth*/) { + // find the first non-trivial string equality to split on + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; + + euf::snode_vector lhs_toks, rhs_toks; + eq.m_lhs->collect_tokens(lhs_toks); + eq.m_rhs->collect_tokens(rhs_toks); + + if (lhs_toks.empty() || rhs_toks.empty()) + continue; + + euf::snode* lhead = lhs_toks[0]; + euf::snode* rhead = rhs_toks[0]; + + // Det modifier: if one side starts with a variable whose other side is empty + if (lhead->is_var() && eq.m_rhs->is_empty()) { + // substitute lhead -> empty + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + return true; + } + if (rhead->is_var() && eq.m_lhs->is_empty()) { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + return true; + } + + // Const Nielsen modifier: lhs starts with char, rhs starts with var + // -> substitute rhs_var = char . fresh_var + if (lhead->is_char() && rhead->is_var()) { + symbol fresh_name(("v!" + std::to_string(node->id())).c_str()); + euf::snode* fresh = m_sg.mk_var(fresh_name); + euf::snode* replacement = m_sg.mk_concat(lhead, fresh); + + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(rhead, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + return true; + } + + if (rhead->is_char() && lhead->is_var()) { + symbol fresh_name(("v!" + std::to_string(node->id())).c_str()); + euf::snode* fresh = m_sg.mk_var(fresh_name); + euf::snode* replacement = m_sg.mk_concat(rhead, fresh); + + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(lhead, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + return true; + } + + // Eq split modifier: both sides start with variables x.A = y.B + // Produce three children: + // 1) x = eps, A = y.B + // 2) x = y, A = B (when len(x) = len(y)) + // 3) y = eps, x.A = B + if (lhead->is_var() && rhead->is_var()) { + // child 1: lhead -> eps + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + // child 2: lhead = rhead (if different vars, substitute lhead -> rhead) + if (lhead->id() != rhead->id()) { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, false); + nielsen_subst s(lhead, rhead, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + // child 3: rhead -> eps + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + return true; + } + + // no applicable modifier for this equality; try next + } + + // no extension was generated + return false; + } + + void nielsen_graph::collect_conflict_deps(dep_tracker& deps) const { + for (nielsen_node const* n : m_nodes) { + if (!n->is_currently_conflict()) + continue; + for (str_eq const& eq : n->str_eqs()) + deps.merge(eq.m_dep); + for (str_mem const& mem : n->str_mems()) + deps.merge(mem.m_dep); + } + } + } + diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 09e925115..3834a56b3 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -459,6 +459,16 @@ namespace seq { // apply a substitution to all constraints void apply_subst(euf::sgraph& sg, nielsen_subst const& s); + + // simplify all constraints at this node and initialize status. + // Returns proceed, conflict, satisfied, or restart. + simplify_result simplify_and_init(nielsen_graph& g); + + // true if all str_eqs are trivial and there are no str_mems + bool is_satisfied() const; + + // true if other's constraint set is a subset of this node's + bool is_subsumed_by(nielsen_node const& other) const; }; // the overall Nielsen transformation graph @@ -514,6 +524,25 @@ namespace seq { // reset all nodes and state void reset(); + + // search result returned by solve() / search_dfs() + enum class search_result { sat, unsat, unknown }; + + // main search entry point: iterative deepening DFS + search_result solve(); + + // simplify a node's constraints (delegate to node) + simplify_result simplify_node(nielsen_node* node); + + // generate child nodes by applying modifier rules + // returns true if at least one child was generated + bool generate_extensions(nielsen_node* node, unsigned depth); + + // collect dependency information from conflicting constraints + void collect_conflict_deps(dep_tracker& deps) const; + + private: + search_result search_dfs(nielsen_node* node, unsigned depth); }; } diff --git a/src/smt/smt_setup.cpp b/src/smt/smt_setup.cpp index 69dec1348..0798ffb16 100644 --- a/src/smt/smt_setup.cpp +++ b/src/smt/smt_setup.cpp @@ -35,6 +35,7 @@ Revision History: #include "smt/theory_seq_empty.h" #include "smt/theory_seq.h" #include "smt/theory_char.h" +#include "smt/theory_nseq.h" #include "smt/theory_special_relations.h" #include "smt/theory_sls.h" #include "smt/theory_pb.h" @@ -571,7 +572,9 @@ namespace smt { else if (m_params.m_string_solver == "auto") { setup_unknown(); } - + else if (m_params.m_string_solver == "nseq") { + setup_nseq(); + } else if (m_params.m_string_solver == "empty") { setup_seq(); } @@ -579,7 +582,7 @@ namespace smt { // don't register any solver. } else { - throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto'"); + throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto', 'nseq'"); } } @@ -754,11 +757,14 @@ namespace smt { else if (m_params.m_string_solver == "none") { // don't register any solver. } + else if (m_params.m_string_solver == "nseq") { + setup_nseq(); + } else if (m_params.m_string_solver == "auto") { setup_seq(); } else { - throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto'"); + throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto', 'nseq'"); } } @@ -785,6 +791,11 @@ namespace smt { m_context.register_plugin(alloc(smt::theory_char, m_context)); } + void setup::setup_nseq() { + m_context.register_plugin(alloc(smt::theory_nseq, m_context)); + setup_char(); + } + void setup::setup_finite_set() { m_context.register_plugin(alloc(smt::theory_finite_set, m_context)); } diff --git a/src/smt/smt_setup.h b/src/smt/smt_setup.h index 897755ef7..8fcb351de 100644 --- a/src/smt/smt_setup.h +++ b/src/smt/smt_setup.h @@ -102,6 +102,7 @@ namespace smt { void setup_seq_str(static_features const & st); void setup_seq(); void setup_char(); + void setup_nseq(); void setup_finite_set(); void setup_card(); void setup_sls(); diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp new file mode 100644 index 000000000..d41b2e7f6 --- /dev/null +++ b/src/smt/theory_nseq.cpp @@ -0,0 +1,171 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + theory_nseq.cpp + +Abstract: + + ZIPT string solver theory for Z3. + Implementation of theory_nseq. + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + +--*/ +#include "smt/theory_nseq.h" +#include "smt/smt_context.h" +#include "util/statistics.h" + +namespace smt { + + theory_nseq::theory_nseq(context& ctx) : + theory(ctx, ctx.get_manager().mk_family_id("seq")), + m_seq(ctx.get_manager()), + m_autil(ctx.get_manager()), + m_rewriter(ctx.get_manager()), + m_egraph(ctx.get_manager()), + m_sgraph(ctx.get_manager(), m_egraph), + m_nielsen(m_sgraph), + m_state(m_sgraph) + {} + + // ----------------------------------------------------------------------- + // Internalization + // ----------------------------------------------------------------------- + + bool theory_nseq::internalize_atom(app* atom, bool /*gate_ctx*/) { + return internalize_term(atom); + } + + bool theory_nseq::internalize_term(app* term) { + context& ctx = get_context(); + ast_manager& m = get_manager(); + + // ensure children are internalized first + for (expr* arg : *term) { + if (is_app(arg) && m_seq.is_seq(arg)) { + ctx.internalize(arg, false); + } + } + + if (!ctx.e_internalized(term)) { + ctx.mk_enode(term, false, m.is_bool(term), true); + } + + enode* en = ctx.get_enode(term); + if (!is_attached_to_var(en)) { + theory_var v = mk_var(en); + (void)v; + } + + // register in our private sgraph + get_snode(term); + return true; + } + + // ----------------------------------------------------------------------- + // Equality / disequality notifications + // ----------------------------------------------------------------------- + + void theory_nseq::new_eq_eh(theory_var v1, theory_var v2) { + expr* e1 = get_enode(v1)->get_expr(); + expr* e2 = get_enode(v2)->get_expr(); + if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2)) + return; + euf::snode* s1 = get_snode(e1); + euf::snode* s2 = get_snode(e2); + if (s1 && s2) + m_state.add_str_eq(s1, s2); + } + + void theory_nseq::new_diseq_eh(theory_var /*v1*/, theory_var /*v2*/) { + // not handled in this initial skeleton + } + + // ----------------------------------------------------------------------- + // Scope management + // ----------------------------------------------------------------------- + + void theory_nseq::push_scope_eh() { + theory::push_scope_eh(); + m_state.push(); + m_sgraph.push(); + } + + void theory_nseq::pop_scope_eh(unsigned num_scopes) { + theory::pop_scope_eh(num_scopes); + m_state.pop(num_scopes); + m_sgraph.pop(num_scopes); + } + + // ----------------------------------------------------------------------- + // Final check: build Nielsen graph and search + // ----------------------------------------------------------------------- + + void theory_nseq::populate_nielsen_graph() { + m_nielsen.reset(); + seq::nielsen_node* root = m_nielsen.mk_node(); + m_nielsen.set_root(root); + for (auto const& eq : m_state.str_eqs()) + root->add_str_eq(eq); + for (auto const& mem : m_state.str_mems()) + root->add_str_mem(mem); + } + + final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) { + if (m_state.empty()) + return FC_DONE; + // For now, give up if there are string constraints. + // The full search will be wired in once the Nielsen algorithms are complete. + populate_nielsen_graph(); + ++m_num_nodes_explored; + return FC_GIVEUP; + } + + // ----------------------------------------------------------------------- + // Model generation + // ----------------------------------------------------------------------- + + void theory_nseq::init_model(model_generator& /*mg*/) { + // stub – no model assignment for now + } + + // ----------------------------------------------------------------------- + // Statistics / display + // ----------------------------------------------------------------------- + + void theory_nseq::collect_statistics(::statistics& st) const { + st.update("nseq conflicts", m_num_conflicts); + st.update("nseq nodes explored", m_num_nodes_explored); + st.update("nseq depth increases", m_num_depth_increases); + } + + void theory_nseq::display(std::ostream& out) const { + out << "theory_nseq\n"; + out << " str_eqs: " << m_state.str_eqs().size() << "\n"; + out << " str_mems: " << m_state.str_mems().size() << "\n"; + } + + // ----------------------------------------------------------------------- + // Factory / clone + // ----------------------------------------------------------------------- + + theory* theory_nseq::mk_fresh(context* ctx) { + return alloc(theory_nseq, *ctx); + } + + // ----------------------------------------------------------------------- + // Helpers + // ----------------------------------------------------------------------- + + euf::snode* theory_nseq::get_snode(expr* e) { + euf::snode* s = m_sgraph.find(e); + if (!s) + s = m_sgraph.mk(e); + return s; + } + +} diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h new file mode 100644 index 000000000..c123a055a --- /dev/null +++ b/src/smt/theory_nseq.h @@ -0,0 +1,78 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + theory_nseq.h + +Abstract: + + ZIPT string solver theory for Z3. + Implements theory_nseq, a theory plugin for string/sequence constraints + using the Nielsen transformation graph (Nielsen graph). + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-01 + +--*/ +#pragma once + +#include "ast/seq_decl_plugin.h" +#include "ast/rewriter/seq_rewriter.h" +#include "ast/euf/euf_egraph.h" +#include "ast/euf/euf_sgraph.h" +#include "smt/smt_theory.h" +#include "smt/smt_arith_value.h" +#include "smt/seq/seq_nielsen.h" +#include "smt/nseq_state.h" +#include "smt/nseq_regex.h" +#include "smt/nseq_model.h" + +namespace smt { + + class theory_nseq : public theory { + seq_util m_seq; + arith_util m_autil; + seq_rewriter m_rewriter; + euf::egraph m_egraph; // private egraph (not shared with smt context) + euf::sgraph m_sgraph; // private sgraph + seq::nielsen_graph m_nielsen; + nseq_state m_state; + + // statistics + unsigned m_num_conflicts = 0; + unsigned m_num_nodes_explored = 0; + unsigned m_num_depth_increases = 0; + + // map from context enode to private sgraph snode + obj_map m_expr2snode; + + // required virtual methods + bool internalize_atom(app* a, bool gate_ctx) override; + bool internalize_term(app* term) override; + void new_eq_eh(theory_var v1, theory_var v2) override; + void new_diseq_eh(theory_var v1, theory_var v2) override; + theory* mk_fresh(context* ctx) override; + void display(std::ostream& out) const override; + + // optional overrides + bool can_propagate() override { return false; } + void propagate() override {} + final_check_status final_check_eh(unsigned) override; + void push_scope_eh() override; + void pop_scope_eh(unsigned num_scopes) override; + void init_model(model_generator& mg) override; + void collect_statistics(::statistics& st) const override; + + char const* get_name() const override { return "nseq"; } + + // private helpers + void populate_nielsen_graph(); + euf::snode* get_snode(expr* e); + + public: + theory_nseq(context& ctx); + }; + +} diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index b0d0a6a1e..c0d3ff6f7 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -132,6 +132,7 @@ add_executable(test-z3 sls_test.cpp sls_seq_plugin.cpp seq_nielsen.cpp + nseq_basic.cpp small_object_allocator.cpp smt2print_parse.cpp smt_context.cpp diff --git a/src/test/main.cpp b/src/test/main.cpp index 734dfe4b7..10c0269f7 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -287,6 +287,7 @@ int main(int argc, char ** argv) { TST(scoped_vector); TST(sls_seq_plugin); TST(seq_nielsen); + TST(nseq_basic); TST(ho_matcher); TST(finite_set); TST(finite_set_rewriter); diff --git a/src/test/nseq_basic.cpp b/src/test/nseq_basic.cpp new file mode 100644 index 000000000..f7a102e98 --- /dev/null +++ b/src/test/nseq_basic.cpp @@ -0,0 +1,109 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + nseq_basic.cpp + +Abstract: + + Basic unit tests for theory_nseq and supporting infrastructure. + +--*/ +#include "util/util.h" +#include "ast/reg_decl_plugins.h" +#include "ast/euf/euf_egraph.h" +#include "ast/euf/euf_sgraph.h" +#include "smt/seq/seq_nielsen.h" +#include "params/smt_params.h" +#include + +// Test 1: instantiation of nielsen_graph compiles and doesn't crash +static void test_nseq_instantiation() { + std::cout << "test_nseq_instantiation\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + SASSERT(ng.root() == nullptr); + SASSERT(ng.num_nodes() == 0); + std::cout << " ok\n"; +} + +// Test 2: parameter validation accepts "nseq" +static void test_nseq_param_validation() { + std::cout << "test_nseq_param_validation\n"; + smt_params p; + // Should not throw + try { + p.validate_string_solver(symbol("nseq")); + std::cout << " ok: nseq accepted\n"; + } catch (...) { + SASSERT(false && "nseq should be accepted as a valid string_solver value"); + } + // Should not throw for legacy values + try { + p.validate_string_solver(symbol("seq")); + p.validate_string_solver(symbol("auto")); + p.validate_string_solver(symbol("none")); + std::cout << " ok: legacy values still accepted\n"; + } catch (...) { + SASSERT(false && "legacy values should still be accepted"); + } +} + +// Test 3: nielsen graph simplification (trivial case) +static void test_nseq_simplification() { + std::cout << "test_nseq_simplification\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + // Add a trivial equality: empty = empty + euf::snode* empty1 = sg.mk_empty(); + euf::snode* empty2 = sg.mk_empty(); + + ng.add_str_eq(empty1, empty2); + + seq::nielsen_graph::search_result r = ng.solve(); + // empty = empty is trivially satisfied + SASSERT(r == seq::nielsen_graph::search_result::sat); + std::cout << " ok: trivial equality solved as sat\n"; +} + +// Test 4: node is_satisfied check +static void test_nseq_node_satisfied() { + std::cout << "test_nseq_node_satisfied\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + seq::nielsen_node* node = ng.mk_node(); + // empty node has no constraints => satisfied + SASSERT(node->is_satisfied()); + + // add a trivial equality + euf::snode* empty = sg.mk_empty(); + seq::dep_tracker dep; + seq::str_eq eq(empty, empty, dep); + node->add_str_eq(eq); + SASSERT(node->str_eqs().size() == 1); + SASSERT(!node->str_eqs()[0].is_trivial() || node->str_eqs()[0].m_lhs == node->str_eqs()[0].m_rhs); + // After simplification, trivial equalities should be removed + seq::simplify_result sr = node->simplify_and_init(ng); + VERIFY(sr == seq::simplify_result::satisfied || sr == seq::simplify_result::proceed); + std::cout << " ok\n"; +} + +void tst_nseq_basic() { + test_nseq_instantiation(); + test_nseq_param_validation(); + test_nseq_simplification(); + test_nseq_node_satisfied(); + std::cout << "nseq_basic: all tests passed\n"; +}