diff --git a/src/smt/CMakeLists.txt b/src/smt/CMakeLists.txt index 04aadac54..00072f4e3 100644 --- a/src/smt/CMakeLists.txt +++ b/src/smt/CMakeLists.txt @@ -65,9 +65,7 @@ z3_add_component(smt theory_intblast.cpp theory_lra.cpp theory_nseq.cpp - nseq_state.cpp - nseq_regex.cpp - nseq_model.cpp + seq_model.cpp theory_opt.cpp theory_pb.cpp theory_recfun.cpp diff --git a/src/smt/seq/CMakeLists.txt b/src/smt/seq/CMakeLists.txt index e6ed34c34..d05d2c9af 100644 --- a/src/smt/seq/CMakeLists.txt +++ b/src/smt/seq/CMakeLists.txt @@ -1,5 +1,7 @@ z3_add_component(smt_seq SOURCES + seq_regex.cpp + seq_state.cpp seq_parikh.cpp seq_nielsen.cpp COMPONENT_DEPENDENCIES diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 52d3ec36b..0bc4df202 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -21,7 +21,7 @@ Author: #include "smt/seq/seq_nielsen.h" #include "smt/seq/seq_parikh.h" -#include "smt/nseq_regex.h" +#include "smt/seq/seq_regex.h" #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" #include "ast/rewriter/seq_rewriter.h" @@ -513,13 +513,13 @@ namespace seq { m_sg(sg), m_solver(solver), m_parikh(alloc(seq_parikh, sg)), - m_nseq_regex(alloc(smt::nseq_regex, sg)), + m_seq_regex(alloc(seq::seq_regex, sg)), m_len_vars(sg.get_manager()) { } nielsen_graph::~nielsen_graph() { dealloc(m_parikh); - dealloc(m_nseq_regex); + dealloc(m_seq_regex); reset(); } @@ -1798,7 +1798,7 @@ namespace seq { // and check if the result intersected with the target regex is empty. // Detects infeasible constraints that would otherwise require // expensive exploration. Mirrors ZIPT NielsenNode.CheckRegexWidening. - if (g.m_nseq_regex) { + if (g.m_seq_regex) { for (str_mem const& mem : m_str_mem) { if (!mem.m_str || !mem.m_regex) continue; @@ -3290,11 +3290,11 @@ namespace seq { // 3. Fall back to simple star(R) euf::snode* stab_base = nullptr; - if (m_nseq_regex && m_nseq_regex->is_self_stabilizing(mem.m_regex)) { + if (m_seq_regex && m_seq_regex->is_self_stabilizing(mem.m_regex)) { stab_base = mem.m_regex; } - else if (m_nseq_regex && mem.m_history) { - stab_base = m_nseq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history); + else if (m_seq_regex && mem.m_history) { + stab_base = m_seq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history); } if (!stab_base) { @@ -3303,12 +3303,12 @@ namespace seq { } // Register the stabilizer - if (m_nseq_regex) - m_nseq_regex->add_stabilizer(mem.m_regex, stab_base); + if (m_seq_regex) + m_seq_regex->add_stabilizer(mem.m_regex, stab_base); // Check if stabilizer equals regex → self-stabilizing - if (m_nseq_regex && stab_base == mem.m_regex) - m_nseq_regex->set_self_stabilizing(mem.m_regex); + if (m_seq_regex && stab_base == mem.m_regex) + m_seq_regex->set_self_stabilizing(mem.m_regex); // Build stab_star = star(stab_base) expr* stab_expr = stab_base->get_expr(); @@ -3320,7 +3320,7 @@ namespace seq { continue; // Try subsumption: if L(x_range) ⊆ L(stab_star), drop the constraint - if (m_nseq_regex && m_nseq_regex->try_subsume(mem, *node)) + if (m_seq_regex && m_seq_regex->try_subsume(mem, *node)) continue; // Create child: x → pr · po @@ -4281,7 +4281,7 @@ namespace seq { bool nielsen_graph::check_regex_widening(nielsen_node const& node, euf::snode* str, euf::snode* regex) { - if (!str || !regex || !m_nseq_regex) + if (!str || !regex || !m_seq_regex) return false; // Only apply to ground regexes with non-trivial strings if (!regex->is_ground()) @@ -4313,7 +4313,7 @@ namespace seq { } else if (tok->is_var()) { // Variable → intersection of primitive regex constraints, or Σ* - euf::snode* x_range = m_nseq_regex->collect_primitive_regex_intersection(tok, node); + euf::snode* x_range = m_seq_regex->collect_primitive_regex_intersection(tok, node); if (x_range) { tok_re = x_range; } else { @@ -4389,7 +4389,7 @@ namespace seq { if (!inter_sn) return false; - lbool result = m_nseq_regex->is_empty_bfs(inter_sn, 5000); + lbool result = m_seq_regex->is_empty_bfs(inter_sn, 5000); return result == l_true; } @@ -4399,7 +4399,7 @@ namespace seq { // ----------------------------------------------------------------------- bool nielsen_graph::check_leaf_regex(nielsen_node const& node) { - if (!m_nseq_regex) + if (!m_seq_regex) return true; // Group str_mem constraints by variable (primitive constraints only) @@ -4421,7 +4421,7 @@ namespace seq { for (auto& [var_id, regexes] : var_regexes) { if (regexes.size() < 2) continue; - lbool result = m_nseq_regex->check_intersection_emptiness(regexes, 5000); + lbool result = m_seq_regex->check_intersection_emptiness(regexes, 5000); if (result == l_true) { // Intersection is empty — infeasible return false; diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 987350924..79fc91d34 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -244,8 +244,8 @@ Author: #include #include "model/model.h" -namespace smt { - class nseq_regex; // forward declaration (defined in smt/nseq_regex.h) +namespace seq { + class seq_regex; // forward declaration (defined in smt/seq/seq_regex.h) } namespace seq { @@ -761,7 +761,7 @@ namespace seq { // Regex membership module: stabilizers, emptiness checks, language // inclusion, derivatives. Allocated in the constructor; owned by this graph. - smt::nseq_regex* m_nseq_regex = nullptr; + seq::seq_regex* m_seq_regex = nullptr; // ----------------------------------------------- // Modification counter for substitution length tracking. @@ -895,8 +895,8 @@ namespace seq { // Caller takes ownership of the returned model pointer. bool solve_sat_path_ints(model_ref& mdl); - // accessor for the nseq_regex module - smt::nseq_regex* nseq_regex_module() const { return m_nseq_regex; } + // accessor for the seq_regex module + seq::seq_regex* seq_regex_module() const { return m_seq_regex; } private: search_result search_dfs(nielsen_node* node, unsigned depth, svector& cur_path); diff --git a/src/smt/nseq_regex.cpp b/src/smt/seq/seq_regex.cpp similarity index 94% rename from src/smt/nseq_regex.cpp rename to src/smt/seq/seq_regex.cpp index 743b74ef6..37f009eef 100644 --- a/src/smt/nseq_regex.cpp +++ b/src/smt/seq/seq_regex.cpp @@ -3,7 +3,7 @@ Copyright (c) 2026 Microsoft Corporation Module Name: - nseq_regex.cpp + seq_regex.cpp Abstract: @@ -11,24 +11,25 @@ Abstract: Author: + Clemens Eisenhofer 2026-03-01 Nikolaj Bjorner (nbjorner) 2026-03-01 --*/ -#include "smt/nseq_regex.h" +#include "smt/seq/seq_regex.h" #include -namespace smt { +namespace seq { // ----------------------------------------------------------------------- // Stabilizer store // ----------------------------------------------------------------------- - void nseq_regex::reset_stabilizers() { + void seq_regex::reset_stabilizers() { m_stabilizers.reset(); m_self_stabilizing.reset(); } - void nseq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) { + void seq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) { if (!regex || !stabilizer) return; @@ -43,7 +44,7 @@ namespace smt { stabs.push_back(stabilizer); } - euf::snode* nseq_regex::get_stabilizer_union(euf::snode* regex) { + euf::snode* seq_regex::get_stabilizer_union(euf::snode* regex) { if (!regex) return nullptr; @@ -73,7 +74,7 @@ namespace smt { return result; } - bool nseq_regex::has_stabilizers(euf::snode* regex) const { + bool seq_regex::has_stabilizers(euf::snode* regex) const { if (!regex) return false; if (!m_stabilizers.contains(regex->id())) @@ -81,7 +82,7 @@ namespace smt { return !m_stabilizers[regex->id()].empty(); } - ptr_vector const* nseq_regex::get_stabilizers(euf::snode* regex) const { + ptr_vector const* seq_regex::get_stabilizers(euf::snode* regex) const { if (!regex) return nullptr; if (!m_stabilizers.contains(regex->id())) @@ -89,13 +90,13 @@ namespace smt { return &m_stabilizers[regex->id()]; } - void nseq_regex::set_self_stabilizing(euf::snode* regex) { + void seq_regex::set_self_stabilizing(euf::snode* regex) { if (!regex) return; m_self_stabilizing.insert(regex->id()); } - bool nseq_regex::is_self_stabilizing(euf::snode* regex) const { + bool seq_regex::is_self_stabilizing(euf::snode* regex) const { if (!regex) return false; return m_self_stabilizing.contains(regex->id()); @@ -105,7 +106,7 @@ namespace smt { // Self-stabilizing auto-detection // ----------------------------------------------------------------------- - bool nseq_regex::compute_self_stabilizing(euf::snode* regex) const { + bool seq_regex::compute_self_stabilizing(euf::snode* regex) const { if (!regex) return false; @@ -147,7 +148,7 @@ namespace smt { // Self-stabilizing propagation through derivatives // ----------------------------------------------------------------------- - void nseq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) { + void seq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) { if (!parent || !deriv) return; @@ -248,7 +249,7 @@ namespace smt { // Derivative with propagation // ----------------------------------------------------------------------- - euf::snode* nseq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) { + euf::snode* seq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) { if (!re || !elem) return nullptr; euf::snode* deriv = derivative(re, elem); @@ -261,7 +262,7 @@ namespace smt { // Uniform derivative (symbolic character consumption) // ----------------------------------------------------------------------- - euf::snode* nseq_regex::try_uniform_derivative(euf::snode* regex) { + euf::snode* seq_regex::try_uniform_derivative(euf::snode* regex) { if (!regex) return nullptr; @@ -305,7 +306,7 @@ namespace smt { // Ground prefix consumption // ----------------------------------------------------------------------- - bool nseq_regex::is_empty_regex(euf::snode* re) const { + bool seq_regex::is_empty_regex(euf::snode* re) const { if (!re) return false; // direct empty language constant @@ -355,7 +356,7 @@ namespace smt { // BFS regex emptiness check — helper: collect character boundaries // ----------------------------------------------------------------------- - void nseq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const { + void seq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const { if (!re || !re->get_expr()) return; @@ -401,7 +402,7 @@ namespace smt { // BFS regex emptiness check — helper: alphabet representatives // ----------------------------------------------------------------------- - void nseq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) { + void seq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) { unsigned_vector bounds; bounds.push_back(0); // always include character 0 collect_char_boundaries(re, bounds); @@ -421,7 +422,7 @@ namespace smt { // BFS regex emptiness check // ----------------------------------------------------------------------- - lbool nseq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) { + lbool seq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) { if (!re || !re->get_expr()) return l_undef; if (re->is_fail()) @@ -507,7 +508,7 @@ namespace smt { // Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469) // ----------------------------------------------------------------------- - lbool nseq_regex::check_intersection_emptiness(ptr_vector const& regexes, + lbool seq_regex::check_intersection_emptiness(ptr_vector const& regexes, unsigned max_states) { if (regexes.empty()) return l_false; // empty intersection = full language (vacuously non-empty) @@ -649,7 +650,7 @@ namespace smt { // Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385) // ----------------------------------------------------------------------- - lbool nseq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) { + lbool seq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) { if (!subset_re || !superset_re) return l_undef; @@ -689,7 +690,7 @@ namespace smt { // Collect primitive regex intersection for a variable // ----------------------------------------------------------------------- - euf::snode* nseq_regex::collect_primitive_regex_intersection( + euf::snode* seq_regex::collect_primitive_regex_intersection( euf::snode* var, seq::nielsen_node const& node) { if (!var) return nullptr; @@ -726,7 +727,7 @@ namespace smt { // Cycle detection // ----------------------------------------------------------------------- - bool nseq_regex::detect_cycle(seq::str_mem const& mem) const { + bool seq_regex::detect_cycle(seq::str_mem const& mem) const { return extract_cycle(mem) != nullptr; } @@ -734,7 +735,7 @@ namespace smt { // Ground prefix consumption // ----------------------------------------------------------------------- - nseq_regex::simplify_status nseq_regex::simplify_ground_prefix(seq::str_mem& mem) { + seq_regex::simplify_status seq_regex::simplify_ground_prefix(seq::str_mem& mem) { if (!mem.m_str || !mem.m_regex) return simplify_status::ok; @@ -768,7 +769,7 @@ namespace smt { // Ground suffix consumption (best-effort) // ----------------------------------------------------------------------- - nseq_regex::simplify_status nseq_regex::simplify_ground_suffix(seq::str_mem& mem) { + seq_regex::simplify_status seq_regex::simplify_ground_suffix(seq::str_mem& mem) { // Suffix consumption via reverse derivatives is complex. // For now, only handle the case where the entire string is ground: // consume all characters from the front (which covers trailing chars @@ -786,7 +787,7 @@ namespace smt { // Trivial checks // ----------------------------------------------------------------------- - int nseq_regex::check_trivial(seq::str_mem const& mem) const { + int seq_regex::check_trivial(seq::str_mem const& mem) const { if (!mem.m_str || !mem.m_regex) return 0; // regex is ∅ => always conflict @@ -808,7 +809,7 @@ namespace smt { // Minterm computation with filtering // ----------------------------------------------------------------------- - void nseq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) { + void seq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) { if (!regex) return; @@ -831,7 +832,7 @@ namespace smt { // Collect first characters // ----------------------------------------------------------------------- - void nseq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) { + void seq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) { if (!re) return; @@ -907,7 +908,7 @@ namespace smt { // Membership processing // ----------------------------------------------------------------------- - bool nseq_regex::process_str_mem(seq::str_mem const& mem, + bool seq_regex::process_str_mem(seq::str_mem const& mem, vector& out_mems) { if (!mem.m_str || !mem.m_regex) return true; @@ -946,7 +947,7 @@ namespace smt { // History recording // ----------------------------------------------------------------------- - seq::str_mem nseq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) { + seq::str_mem seq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) { // Build a history chain by prepending the new regex entry to the // existing history. Uses regex-concat as a cons cell: // new_history = re.concat(history_re, old_history) @@ -969,7 +970,7 @@ namespace smt { // Cycle detection // ----------------------------------------------------------------------- - euf::snode* nseq_regex::extract_cycle(seq::str_mem const& mem) const { + euf::snode* seq_regex::extract_cycle(seq::str_mem const& mem) const { // Walk the history chain looking for a repeated regex. // A cycle exists when the current regex matches a regex in the history. if (!mem.m_regex || !mem.m_history) @@ -1013,7 +1014,7 @@ namespace smt { // Stabilizer from cycle // ----------------------------------------------------------------------- - euf::snode* nseq_regex::stabilizer_from_cycle(euf::snode* cycle_regex, + euf::snode* seq_regex::stabilizer_from_cycle(euf::snode* cycle_regex, euf::snode* current_regex) { if (!cycle_regex || !current_regex) return nullptr; @@ -1031,7 +1032,7 @@ namespace smt { // Extract cycle history tokens // ----------------------------------------------------------------------- - euf::snode* nseq_regex::extract_cycle_history(seq::str_mem const& current, + euf::snode* seq_regex::extract_cycle_history(seq::str_mem const& current, seq::str_mem const& ancestor) { // The history is built by simplify_and_init as a left-associative // string concat chain: concat(concat(concat(nil, c1), c2), c3). @@ -1056,7 +1057,7 @@ namespace smt { // Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243) // ----------------------------------------------------------------------- - euf::snode* nseq_regex::get_filtered_stabilizer_star(euf::snode* re, + euf::snode* seq_regex::get_filtered_stabilizer_star(euf::snode* re, euf::snode* excluded_char) { if (!re) return nullptr; @@ -1103,7 +1104,7 @@ namespace smt { // Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225) // ----------------------------------------------------------------------- - euf::snode* nseq_regex::strengthened_stabilizer(euf::snode* cycle_regex, + euf::snode* seq_regex::strengthened_stabilizer(euf::snode* cycle_regex, euf::snode* cycle_history) { if (!cycle_regex || !cycle_history) return nullptr; @@ -1239,7 +1240,7 @@ namespace smt { // Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386) // ----------------------------------------------------------------------- - bool nseq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) { + bool seq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) { if (!mem.m_str || !mem.m_regex) return false; diff --git a/src/smt/nseq_regex.h b/src/smt/seq/seq_regex.h similarity index 99% rename from src/smt/nseq_regex.h rename to src/smt/seq/seq_regex.h index 6d354f99e..1a2f13936 100644 --- a/src/smt/nseq_regex.h +++ b/src/smt/seq/seq_regex.h @@ -3,7 +3,7 @@ Copyright (c) 2026 Microsoft Corporation Module Name: - nseq_regex.h + seq_regex.h Abstract: @@ -24,6 +24,7 @@ Abstract: Author: + Clemens Eisenhofer 2026-03-01 Nikolaj Bjorner (nbjorner) 2026-03-01 --*/ @@ -34,9 +35,9 @@ Author: #include "util/uint_set.h" #include "util/lbool.h" -namespace smt { +namespace seq { - class nseq_regex { + class seq_regex { euf::sgraph& m_sg; // ----------------------------------------------------------------- @@ -71,7 +72,7 @@ namespace smt { void get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps); public: - nseq_regex(euf::sgraph& sg) : m_sg(sg) {} + seq_regex(euf::sgraph& sg) : m_sg(sg) {} euf::sgraph& sg() { return m_sg; } diff --git a/src/smt/nseq_state.cpp b/src/smt/seq/seq_state.cpp similarity index 53% rename from src/smt/nseq_state.cpp rename to src/smt/seq/seq_state.cpp index 1b6fc2f23..9f4e52899 100644 --- a/src/smt/nseq_state.cpp +++ b/src/smt/seq/seq_state.cpp @@ -3,15 +3,16 @@ Copyright (c) 2026 Microsoft Corporation Module Name: - nseq_state.cpp + seq_state.cpp Abstract: - Implementation of nseq_state. + Implementation of seq_state. Author: + Clemens Eisenhofer 2026-03-01 Nikolaj Bjorner (nbjorner) 2026-03-01 --*/ -#include "smt/nseq_state.h" +#include "smt/seq/seq_state.h" diff --git a/src/smt/nseq_state.h b/src/smt/seq/seq_state.h similarity index 92% rename from src/smt/nseq_state.h rename to src/smt/seq/seq_state.h index 3ebb81427..29ace6308 100644 --- a/src/smt/nseq_state.h +++ b/src/smt/seq/seq_state.h @@ -3,7 +3,7 @@ Copyright (c) 2026 Microsoft Corporation Module Name: - nseq_state.h + seq_state.h Abstract: @@ -13,15 +13,15 @@ Abstract: Author: + Clemens Eisenhofer 2026-03-01 Nikolaj Bjorner (nbjorner) 2026-03-01 --*/ #pragma once #include "util/vector.h" -#include "ast/euf/euf_sgraph.h" #include "smt/seq/seq_nielsen.h" -#include "smt/smt_literal.h" +#include "util/sat_literal.h" namespace smt { @@ -35,7 +35,7 @@ namespace smt { // source info for a regex membership (the literal that asserted it) struct mem_source { - literal m_lit; + sat::literal m_lit; }; // source info for a string disequality @@ -44,8 +44,7 @@ namespace smt { enode* m_n2; }; - class nseq_state { - euf::sgraph& m_sg; + class seq_state { vector m_str_eqs; vector m_str_mems; vector m_eq_sources; @@ -57,7 +56,7 @@ namespace smt { unsigned m_next_mem_id = 0; public: - nseq_state(euf::sgraph& sg) : m_sg(sg) {} + seq_state() = default; void push() { m_str_eq_lim.push_back(m_str_eqs.size()); @@ -84,7 +83,7 @@ namespace smt { m_eq_sources.push_back({n1, n2}); } - void add_str_mem(euf::snode* str, euf::snode* regex, literal lit) { + void add_str_mem(euf::snode* str, euf::snode* regex, sat::literal lit) { seq::dep_tracker dep; m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep)); m_mem_sources.push_back({lit}); diff --git a/src/smt/nseq_model.cpp b/src/smt/seq_model.cpp similarity index 90% rename from src/smt/nseq_model.cpp rename to src/smt/seq_model.cpp index 8eb8d5a59..7177ec05f 100644 --- a/src/smt/nseq_model.cpp +++ b/src/smt/seq_model.cpp @@ -3,22 +3,22 @@ Copyright (c) 2026 Microsoft Corporation Module Name: - nseq_model.cpp + seq_model.cpp Abstract: - Implementation of nseq_model: model construction for the + Implementation of seq_model: model construction for the Nielsen-based string solver. Author: + Clemens Eisenhofer 2026-03-01 Nikolaj Bjorner (nbjorner) 2026-03-01 --*/ -#include "smt/nseq_model.h" -#include "smt/theory_nseq.h" -#include "smt/nseq_regex.h" -#include "smt/nseq_state.h" +#include "smt/seq_model.h" +#include "smt/seq/seq_regex.h" +#include "smt/seq/seq_state.h" #include "smt/smt_context.h" #include "smt/smt_model_generator.h" #include "smt/proto_model/proto_model.h" @@ -26,19 +26,19 @@ Author: namespace smt { - nseq_model::nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq, - seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex) - : m_th(th), m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_regex(regex), m_trail(m) + seq_model::seq_model(ast_manager& m, seq_util& seq, + seq_rewriter& rw, euf::sgraph& sg, seq::seq_regex& regex) + : m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_regex(regex), m_trail(m) {} - void nseq_model::init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state) { + void seq_model::init(model_generator& mg, seq::nielsen_graph& nielsen, seq_state const& state) { m_var_values.reset(); m_var_regex.reset(); m_trail.reset(); m_int_model = nullptr; m_mg = &mg; - m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model()); + m_factory = alloc(seq_factory, m, m_seq.get_family_id(), mg.get_model()); mg.register_factory(m_factory); register_existing_values(nielsen); @@ -52,7 +52,7 @@ namespace smt { extract_assignments(nielsen.sat_path()); } - model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) { + model_value_proc* seq_model::mk_value(enode* n, model_generator& mg) { app* e = n->get_expr(); if (!m_seq.is_seq(e) && !m_seq.is_re(e) && !m_seq.str.is_nth_u(e)) return nullptr; @@ -101,7 +101,7 @@ namespace smt { return alloc(expr_wrapper_proc, to_app(m_seq.str.mk_empty(e->get_sort()))); } - void nseq_model::finalize(model_generator& mg) { + void seq_model::finalize(model_generator& mg) { m_var_values.reset(); m_var_regex.reset(); m_trail.reset(); @@ -110,7 +110,7 @@ namespace smt { m_factory = nullptr; } - void nseq_model::extract_assignments(svector const& sat_path) { + void seq_model::extract_assignments(svector const& sat_path) { IF_VERBOSE(1, verbose_stream() << "nseq extract_assignments: path length=" << sat_path.size() << "\n";); // compose substitutions root-to-leaf. @@ -154,12 +154,15 @@ namespace smt { } } - expr_ref nseq_model::snode_to_value(euf::snode* n) { + expr_ref seq_model::snode_to_value(euf::snode* n) { if (!n) return expr_ref(m); - if (n->is_empty()) - return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); + if (n->is_empty()) { + sort* srt = n->get_sort(); + if (!srt) srt = m_seq.str.mk_string_sort(); + return expr_ref(m_seq.str.mk_empty(srt), m); + } if (n->is_char() || n->is_unit()) { expr* e = n->get_expr(); @@ -272,7 +275,7 @@ namespace smt { return e ? expr_ref(e, m) : expr_ref(m); } - expr_ref nseq_model::generate_regex_witness(euf::snode* regex, unsigned depth) { + expr_ref seq_model::generate_regex_witness(euf::snode* regex, unsigned depth) { if (!regex) return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); @@ -307,7 +310,7 @@ namespace smt { return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m); } - void nseq_model::register_existing_values(seq::nielsen_graph& nielsen) { + void seq_model::register_existing_values(seq::nielsen_graph& nielsen) { seq::nielsen_node const* root = nielsen.root(); if (!root) return; @@ -319,7 +322,7 @@ namespace smt { } } - expr* nseq_model::get_var_value(euf::snode* var) { + expr* seq_model::get_var_value(euf::snode* var) { expr* val = nullptr; if (m_var_values.find(var->id(), val)) return val; @@ -333,7 +336,7 @@ namespace smt { return val; } - expr* nseq_model::mk_fresh_value(euf::snode* var) { + expr* seq_model::mk_fresh_value(euf::snode* var) { // check if this variable has regex constraints euf::snode* re = nullptr; if (m_var_regex.find(var->id(), re) && re) { @@ -353,7 +356,7 @@ namespace smt { return m_seq.str.mk_empty(srt); } - void nseq_model::collect_var_regex_constraints(nseq_state const& state) { + void seq_model::collect_var_regex_constraints(seq_state const& state) { for (auto const& mem : state.str_mems()) { if (!mem.m_str || !mem.m_regex) continue; @@ -380,7 +383,7 @@ namespace smt { } } - bool nseq_model::validate_regex(nseq_state const& state, ::proto_model& mdl) { + bool seq_model::validate_regex(seq_state const& state, ::proto_model& mdl) { bool ok = true; // validate positive memberships: str ∈ regex diff --git a/src/smt/nseq_model.h b/src/smt/seq_model.h similarity index 89% rename from src/smt/nseq_model.h rename to src/smt/seq_model.h index 14baa40cf..533d253aa 100644 --- a/src/smt/nseq_model.h +++ b/src/smt/seq_model.h @@ -3,7 +3,7 @@ Copyright (c) 2026 Microsoft Corporation Module Name: - nseq_model.h + seq_model.h Abstract: @@ -22,6 +22,7 @@ Abstract: Author: + Clemens Eisenhofer 2026-03-01 Nikolaj Bjorner (nbjorner) 2026-03-01 --*/ @@ -30,26 +31,28 @@ Author: #include "ast/seq_decl_plugin.h" #include "ast/rewriter/seq_rewriter.h" #include "ast/euf/euf_sgraph.h" -#include "smt/smt_types.h" #include "smt/seq/seq_nielsen.h" #include "model/seq_factory.h" class proto_model; +namespace seq { + class seq_regex; +} + namespace smt { - class theory_nseq; - class nseq_regex; - class nseq_state; + class enode; + class model_generator; + class seq_state; class model_value_proc; - class nseq_model { - theory_nseq& m_th; + class seq_model { ast_manager& m; seq_util& m_seq; seq_rewriter& m_rewriter; euf::sgraph& m_sg; - nseq_regex& m_regex; + seq::seq_regex& m_regex; // factory for generating fresh string/regex values seq_factory* m_factory = nullptr; @@ -70,14 +73,14 @@ namespace smt { u_map m_var_regex; public: - nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq, - seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex); + seq_model(ast_manager& m, seq_util& seq, + seq_rewriter& rw, euf::sgraph& sg, seq::seq_regex& regex); // Phase 1: initialize model construction. // Allocates seq_factory, registers it with mg, collects // existing string literals, and extracts variable assignments // from the satisfying Nielsen leaf node. - void init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state); + void init(model_generator& mg, seq::nielsen_graph& nielsen, seq_state const& state); // Phase 2: build a model_value_proc for the given enode. // Returns nullptr if the enode is not a sequence/string sort. @@ -89,7 +92,7 @@ namespace smt { // Validate that model assignments satisfy all regex membership // constraints from the state. Checks positive and negative // memberships. Returns true if all constraints pass. - bool validate_regex(nseq_state const& state, ::proto_model& mdl); + bool validate_regex(seq_state const& state, ::proto_model& mdl); private: // extract variable assignments from the sat path (root-to-leaf edges). @@ -123,7 +126,7 @@ namespace smt { // collect per-variable regex constraints from the state. // For each positive str_mem, records the regex (or intersects // with existing) into m_var_regex keyed by the string snode id. - void collect_var_regex_constraints(nseq_state const& state); + void collect_var_regex_constraints(seq_state const& state); }; } diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index fdda483cd..78931e63f 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -35,9 +35,9 @@ namespace smt { m_sgraph(ctx.get_manager(), m_egraph), m_context_solver(ctx.get_manager()), m_nielsen(m_sgraph, m_context_solver), - m_state(m_sgraph), + m_state(), m_regex(m_sgraph), - m_model(*this, ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex) + m_model(ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex) {} // ----------------------------------------------------------------------- @@ -339,7 +339,7 @@ namespace smt { m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs); } - // transfer regex memberships, pre-processing through nseq_regex + // transfer regex memberships, pre-processing through seq_regex // to consume ground prefixes via Brzozowski derivatives for (unsigned state_idx = 0; state_idx < m_state.str_mems().size(); ++state_idx) { auto const& mem = m_state.str_mems()[state_idx]; diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 24f5ad566..d44c6d67c 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -25,9 +25,9 @@ Author: #include "smt/smt_theory.h" #include "smt/smt_arith_value.h" #include "smt/seq/seq_nielsen.h" -#include "smt/nseq_state.h" -#include "smt/nseq_regex.h" -#include "smt/nseq_model.h" +#include "smt/seq/seq_state.h" +#include "smt/seq/seq_regex.h" +#include "smt/seq_model.h" #include "smt/nseq_context_solver.h" namespace smt { @@ -43,9 +43,9 @@ namespace smt { // to the m_nielsen constructor and must remain stable for the object's lifetime. context_solver m_context_solver; seq::nielsen_graph m_nielsen; - nseq_state m_state; - nseq_regex m_regex; // regex membership pre-processing - nseq_model m_model; // model construction helper + seq_state m_state; + seq::seq_regex m_regex; // regex membership pre-processing + seq_model m_model; // model construction helper // propagation queue struct prop_item { diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 1620bd790..a85bae7c5 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -134,7 +134,7 @@ add_executable(test-z3 seq_nielsen.cpp seq_parikh.cpp nseq_basic.cpp - nseq_regex.cpp + seq_regex.cpp nseq_zipt.cpp small_object_allocator.cpp smt2print_parse.cpp diff --git a/src/test/main.cpp b/src/test/main.cpp index 0f378df67..f2a7adcc3 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -289,7 +289,7 @@ int main(int argc, char ** argv) { TST(seq_nielsen); TST(seq_parikh); TST(nseq_basic); - TST(nseq_regex); + TST(seq_regex); TST(nseq_zipt); TST(ho_matcher); TST(finite_set); diff --git a/src/test/nseq_regex.cpp b/src/test/seq_regex.cpp similarity index 94% rename from src/test/nseq_regex.cpp rename to src/test/seq_regex.cpp index 6970413b0..3e6ee9138 100644 --- a/src/test/nseq_regex.cpp +++ b/src/test/seq_regex.cpp @@ -3,11 +3,11 @@ Copyright (c) 2026 Microsoft Corporation Module Name: - nseq_regex.cpp + seq_regex.cpp Abstract: - Unit tests for nseq_regex: lazy regex membership processing + Unit tests for seq_regex: lazy regex membership processing for the Nielsen-based string solver. --*/ @@ -15,32 +15,32 @@ Abstract: #include "ast/reg_decl_plugins.h" #include "ast/euf/euf_egraph.h" #include "ast/euf/euf_sgraph.h" -#include "smt/nseq_regex.h" +#include "smt/seq/seq_regex.h" #include "smt/seq/seq_nielsen.h" #include "util/lbool.h" #include "util/zstring.h" #include -// Test 1: nseq_regex instantiation -static void test_nseq_regex_instantiation() { - std::cout << "test_nseq_regex_instantiation\n"; +// Test 1: seq_regex instantiation +static void test_seq_regex_instantiation() { + std::cout << "test_seq_regex_instantiation\n"; ast_manager m; reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); SASSERT(&nr.sg() == &sg); std::cout << " ok\n"; } // Test 2: is_empty_regex on an empty-language node -static void test_nseq_regex_is_empty() { - std::cout << "test_nseq_regex_is_empty\n"; +static void test_seq_regex_is_empty() { + std::cout << "test_seq_regex_is_empty\n"; ast_manager m; reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); @@ -52,13 +52,13 @@ static void test_nseq_regex_is_empty() { } // Test 3: is_empty_regex on a full-match regex (not empty) -static void test_nseq_regex_is_full() { - std::cout << "test_nseq_regex_is_full\n"; +static void test_seq_regex_is_full() { + std::cout << "test_seq_regex_is_full\n"; ast_manager m; reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); @@ -76,7 +76,7 @@ static void test_strengthened_stabilizer_null() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); SASSERT(nr.strengthened_stabilizer(nullptr, nullptr) == nullptr); @@ -100,7 +100,7 @@ static void test_strengthened_stabilizer_single_char() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); // Build a* @@ -126,7 +126,7 @@ static void test_strengthened_stabilizer_two_char() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); // Build (ab)* @@ -152,7 +152,7 @@ static void test_filtered_stabilizer_star_empty() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); @@ -173,7 +173,7 @@ static void test_filtered_stabilizer_star_with_stab() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); // Build a* as the regex state @@ -201,7 +201,7 @@ static void test_filtered_stabilizer_star_filtered() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); // Build a* as the regex state @@ -227,7 +227,7 @@ static void test_extract_cycle_history_basic() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); @@ -265,7 +265,7 @@ static void test_extract_cycle_history_null_ancestor() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); @@ -297,7 +297,7 @@ static void test_bfs_empty_none() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); sort* re_sort = su.re.mk_re(str_sort); @@ -316,7 +316,7 @@ static void test_bfs_nonempty_full() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); sort* re_sort = su.re.mk_re(str_sort); @@ -335,7 +335,7 @@ static void test_bfs_nonempty_to_re() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); expr_ref to_re_abc(su.re.mk_to_re(su.str.mk_string("abc")), m); @@ -352,7 +352,7 @@ static void test_bfs_nonempty_star() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m); @@ -369,7 +369,7 @@ static void test_bfs_empty_union_of_empties() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); sort* re_sort = su.re.mk_re(str_sort); @@ -390,7 +390,7 @@ static void test_bfs_nonempty_range() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); @@ -410,7 +410,7 @@ static void test_bfs_empty_complement_full() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); sort* re_sort = su.re.mk_re(str_sort); @@ -429,7 +429,7 @@ static void test_bfs_null_safety() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); lbool result = nr.is_empty_bfs(nullptr); SASSERT(result == l_undef); @@ -443,7 +443,7 @@ static void test_bfs_bounded() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); // (a|b)+ requires at least one char; with max_states=1 should bail @@ -495,7 +495,7 @@ static void test_stabilizer_store_basic() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); @@ -527,7 +527,7 @@ static void test_self_stabilizing() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m); @@ -552,7 +552,7 @@ static void test_check_intersection_sat() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); // a* ∩ (a|b)* should be non-empty (both accept "a") @@ -580,7 +580,7 @@ static void test_check_intersection_unsat() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); @@ -605,7 +605,7 @@ static void test_is_language_subset_true() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); // a* ⊆ (a|b)* should be true @@ -630,7 +630,7 @@ static void test_is_language_subset_false() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); // (a|b)* ⊄ a* should be false (b ∈ (a|b)* but b ∉ a*) @@ -655,7 +655,7 @@ static void test_is_language_subset_trivial() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - smt::nseq_regex nr(sg); + seq::seq_regex nr(sg); seq_util su(m); sort* str_sort = su.str.mk_string_sort(); @@ -677,10 +677,10 @@ static void test_is_language_subset_trivial() { std::cout << " ok\n"; } -void tst_nseq_regex() { - test_nseq_regex_instantiation(); - test_nseq_regex_is_empty(); - test_nseq_regex_is_full(); +void tst_seq_regex() { + test_seq_regex_instantiation(); + test_seq_regex_is_empty(); + test_seq_regex_is_full(); test_strengthened_stabilizer_null(); test_strengthened_stabilizer_single_char(); test_strengthened_stabilizer_two_char(); @@ -707,5 +707,5 @@ void tst_nseq_regex() { test_is_language_subset_true(); test_is_language_subset_false(); test_is_language_subset_trivial(); - std::cout << "nseq_regex: all tests passed\n"; + std::cout << "seq_regex: all tests passed\n"; }