From c325c56de4ed13464f54d524765c9acde99df6fb Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Fri, 27 Feb 2026 17:00:59 -0800 Subject: [PATCH] Add theory_nseq skeleton: new string solver selectable via smt.string_solver=nseq Phase 1 of the theory_nseq implementation: a stub theory solver for strings based on Nielsen transformations and lazy regex membership. - New files: theory_nseq.h/cpp with all required theory virtual methods - Add 'nseq' to smt.string_solver parameter validation and documentation - Wire into smt_setup.cpp dispatch (setup_QF_S, setup_seq_str, setup_nseq) - Currently returns FC_GIVEUP for non-trivial string constraints Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/params/smt_params.cpp | 4 +- src/params/smt_params_helper.pyg | 2 +- src/smt/CMakeLists.txt | 1 + src/smt/smt_setup.cpp | 18 ++++- src/smt/smt_setup.h | 1 + src/smt/theory_nseq.cpp | 121 +++++++++++++++++++++++++++++++ src/smt/theory_nseq.h | 70 ++++++++++++++++++ 7 files changed, 211 insertions(+), 6 deletions(-) create mode 100644 src/smt/theory_nseq.cpp create mode 100644 src/smt/theory_nseq.h diff --git a/src/params/smt_params.cpp b/src/params/smt_params.cpp index a80483d0f..82ae1485e 100644 --- a/src/params/smt_params.cpp +++ b/src/params/smt_params.cpp @@ -187,9 +187,9 @@ void smt_params::display(std::ostream & out) const { } void smt_params::validate_string_solver(symbol const& s) const { - if (s == "z3str3" || s == "seq" || s == "empty" || s == "auto" || s == "none") + if (s == "z3str3" || s == "seq" || s == "nseq" || s == "empty" || s == "auto" || s == "none") return; - throw default_exception("Invalid string solver value. Legal values are z3str3, seq, empty, auto, none"); + throw default_exception("Invalid string solver value. Legal values are z3str3, seq, nseq, empty, auto, none"); } void smt_params::setup_QF_UF() { diff --git a/src/params/smt_params_helper.pyg b/src/params/smt_params_helper.pyg index 451a07964..118112dd7 100644 --- a/src/params/smt_params_helper.pyg +++ b/src/params/smt_params_helper.pyg @@ -122,7 +122,7 @@ def_module_params(module_name='smt', ('dack.gc_inv_decay', DOUBLE, 0.8, 'Dynamic ackermannization garbage collection decay'), ('dack.threshold', UINT, 10, ' number of times the congruence rule must be used before Leibniz\'s axiom is expanded'), ('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'), - ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver)'), + ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'nseq\' (Nielsen-based sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver)'), ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'), ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'), ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'), diff --git a/src/smt/CMakeLists.txt b/src/smt/CMakeLists.txt index a35b809e1..f5c35c441 100644 --- a/src/smt/CMakeLists.txt +++ b/src/smt/CMakeLists.txt @@ -68,6 +68,7 @@ z3_add_component(smt theory_pb.cpp theory_recfun.cpp theory_seq.cpp + theory_nseq.cpp theory_sls.cpp theory_special_relations.cpp theory_user_propagator.cpp diff --git a/src/smt/smt_setup.cpp b/src/smt/smt_setup.cpp index 69dec1348..abaf4fbdf 100644 --- a/src/smt/smt_setup.cpp +++ b/src/smt/smt_setup.cpp @@ -34,6 +34,7 @@ Revision History: #include "smt/theory_dl.h" #include "smt/theory_seq_empty.h" #include "smt/theory_seq.h" +#include "smt/theory_nseq.h" #include "smt/theory_char.h" #include "smt/theory_special_relations.h" #include "smt/theory_sls.h" @@ -564,6 +565,9 @@ namespace smt { if (m_params.m_string_solver == "seq") { setup_unknown(); } + else if (m_params.m_string_solver == "nseq") { + setup_unknown(); + } else if (m_params.m_string_solver == "char") { setup_QF_BV(); setup_char(); @@ -579,7 +583,7 @@ namespace smt { // don't register any solver. } else { - throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto'"); + throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'nseq', 'auto'"); } } @@ -747,7 +751,10 @@ namespace smt { // check params for what to do here when it's ambiguous if (m_params.m_string_solver == "seq") { setup_seq(); - } + } + else if (m_params.m_string_solver == "nseq") { + setup_nseq(); + } else if (m_params.m_string_solver == "empty") { setup_seq(); } @@ -758,7 +765,7 @@ namespace smt { setup_seq(); } else { - throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'auto'"); + throw default_exception("invalid parameter for smt.string_solver, valid options are 'seq', 'nseq', 'auto'"); } } @@ -781,6 +788,11 @@ namespace smt { setup_char(); } + void setup::setup_nseq() { + m_context.register_plugin(alloc(smt::theory_nseq, m_context)); + setup_char(); + } + void setup::setup_char() { m_context.register_plugin(alloc(smt::theory_char, m_context)); } diff --git a/src/smt/smt_setup.h b/src/smt/smt_setup.h index 897755ef7..f9852c90b 100644 --- a/src/smt/smt_setup.h +++ b/src/smt/smt_setup.h @@ -101,6 +101,7 @@ namespace smt { void setup_dl(); void setup_seq_str(static_features const & st); void setup_seq(); + void setup_nseq(); void setup_char(); void setup_finite_set(); void setup_card(); diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp new file mode 100644 index 000000000..6ce242d26 --- /dev/null +++ b/src/smt/theory_nseq.cpp @@ -0,0 +1,121 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + theory_nseq.cpp + +Abstract: + + Theory solver for strings based on Nielsen transformations + and lazy regex membership (ZIPT algorithm). + +Author: + + Clemens Eisenhofer + Nikolaj Bjorner (nbjorner) 2025-2-28 + +--*/ + +#include "smt/theory_nseq.h" +#include "smt/smt_context.h" + +namespace smt { + + theory_nseq::theory_nseq(context& ctx) + : theory(ctx, ctx.get_manager().mk_family_id("seq")), + m_util(ctx.get_manager()), + m_autil(ctx.get_manager()), + m_seq_rewrite(ctx.get_manager()), + m_rewrite(ctx.get_manager()), + m_sk(ctx.get_manager(), m_rewrite), + m_arith_value(ctx.get_manager()), + m_has_seq(false) { + } + + theory_nseq::~theory_nseq() { + } + + void theory_nseq::init() { + } + + final_check_status theory_nseq::final_check_eh(unsigned) { + if (m_has_seq) + return FC_GIVEUP; + return FC_DONE; + } + + bool theory_nseq::internalize_atom(app* atom, bool) { + if (!m_has_seq) { + get_context().push_trail(value_trail(m_has_seq)); + m_has_seq = true; + } + return false; + } + + bool theory_nseq::internalize_term(app* term) { + if (!m_has_seq) { + get_context().push_trail(value_trail(m_has_seq)); + m_has_seq = true; + } + return false; + } + + void theory_nseq::internalize_eq_eh(app* atom, bool_var v) { + } + + void theory_nseq::new_eq_eh(theory_var v1, theory_var v2) { + } + + void theory_nseq::new_diseq_eh(theory_var v1, theory_var v2) { + } + + void theory_nseq::assign_eh(bool_var v, bool is_true) { + } + + bool theory_nseq::can_propagate() { + return false; + } + + void theory_nseq::propagate() { + } + + void theory_nseq::push_scope_eh() { + theory::push_scope_eh(); + } + + void theory_nseq::pop_scope_eh(unsigned num_scopes) { + theory::pop_scope_eh(num_scopes); + } + + void theory_nseq::restart_eh() { + } + + void theory_nseq::relevant_eh(app* n) { + } + + theory_var theory_nseq::mk_var(enode* n) { + return theory::mk_var(n); + } + + void theory_nseq::apply_sort_cnstr(enode* n, sort* s) { + } + + void theory_nseq::display(std::ostream& out) const { + out << "theory_nseq\n"; + } + + void theory_nseq::collect_statistics(::statistics& st) const { + } + + model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) { + return nullptr; + } + + void theory_nseq::init_model(model_generator& mg) { + mg.register_factory(alloc(seq_factory, get_manager(), get_family_id(), mg.get_model())); + } + + void theory_nseq::finalize_model(model_generator& mg) { + } +} diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h new file mode 100644 index 000000000..791d91038 --- /dev/null +++ b/src/smt/theory_nseq.h @@ -0,0 +1,70 @@ +/*++ +Copyright (c) 2025 Microsoft Corporation + +Module Name: + + theory_nseq.h + +Abstract: + + Theory solver for strings based on Nielsen transformations + and lazy regex membership (ZIPT algorithm). + +Author: + + Clemens Eisenhofer + Nikolaj Bjorner (nbjorner) 2025-2-28 + +--*/ +#pragma once + +#include "ast/seq_decl_plugin.h" +#include "ast/arith_decl_plugin.h" +#include "ast/rewriter/seq_rewriter.h" +#include "ast/rewriter/seq_skolem.h" +#include "ast/rewriter/th_rewriter.h" +#include "model/seq_factory.h" +#include "smt/smt_theory.h" +#include "smt/smt_arith_value.h" +#include "smt/smt_model_generator.h" + +namespace smt { + + class theory_nseq : public theory { + seq_util m_util; + arith_util m_autil; + seq_rewriter m_seq_rewrite; + th_rewriter m_rewrite; + seq::skolem m_sk; + arith_value m_arith_value; + bool m_has_seq; + + final_check_status final_check_eh(unsigned) override; + bool internalize_atom(app* atom, bool) override; + bool internalize_term(app*) override; + void internalize_eq_eh(app* atom, bool_var v) override; + void new_eq_eh(theory_var, theory_var) override; + void new_diseq_eh(theory_var, theory_var) override; + void assign_eh(bool_var v, bool is_true) override; + bool can_propagate() override; + void propagate() override; + void push_scope_eh() override; + void pop_scope_eh(unsigned num_scopes) override; + void restart_eh() override; + void relevant_eh(app* n) override; + theory* mk_fresh(context* new_ctx) override { return alloc(theory_nseq, *new_ctx); } + char const* get_name() const override { return "nseq"; } + void display(std::ostream& out) const override; + void collect_statistics(::statistics& st) const override; + model_value_proc* mk_value(enode* n, model_generator& mg) override; + void init_model(model_generator& mg) override; + void finalize_model(model_generator& mg) override; + theory_var mk_var(enode* n) override; + void apply_sort_cnstr(enode* n, sort* s) override; + + public: + theory_nseq(context& ctx); + ~theory_nseq() override; + void init() override; + }; +}