3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-17 18:43:45 +00:00

Move nseq_regex/state into smt/seq and seq_model into smt/; rename to seq_* prefix (#8984)

* Initial plan

* build verified: nseq_regex moved to smt/seq

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>

* rename nseq_regex/state/model to seq_regex/state/model in smt/seq; add Clemens Eisenhofer as co-author

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>

* seq_model: remove theory_nseq dependency; get family_id from seq_util

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>

* Add comments for regex enhancements in seq_model

Added comments regarding future improvements for regex handling.

* Add comments for large exponent handling

Add comments for handling large exponents in seq_model.cpp

* Revise comments for clarity on sort usage

Updated comments to reflect review suggestions regarding the use of the sort of 'n'.

* seq_state: remove sgraph dep; seq_model: use snode sort for is_empty; remove NSB review comments

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>

* Update seq_state.h

* Remove unnecessary include for smt_context.h

* move seq_model from smt/seq/ to smt/; fix seq_state.h add_str_mem typo

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
Co-authored-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
Copilot 2026-03-14 11:45:32 -07:00 committed by GitHub
parent 27f5541b0b
commit 5a3dbaf9f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 172 additions and 164 deletions

View file

@ -65,9 +65,7 @@ z3_add_component(smt
theory_intblast.cpp
theory_lra.cpp
theory_nseq.cpp
nseq_state.cpp
nseq_regex.cpp
nseq_model.cpp
seq_model.cpp
theory_opt.cpp
theory_pb.cpp
theory_recfun.cpp

View file

@ -1,5 +1,7 @@
z3_add_component(smt_seq
SOURCES
seq_regex.cpp
seq_state.cpp
seq_parikh.cpp
seq_nielsen.cpp
COMPONENT_DEPENDENCIES

View file

@ -21,7 +21,7 @@ Author:
#include "smt/seq/seq_nielsen.h"
#include "smt/seq/seq_parikh.h"
#include "smt/nseq_regex.h"
#include "smt/seq/seq_regex.h"
#include "ast/arith_decl_plugin.h"
#include "ast/ast_pp.h"
#include "ast/rewriter/seq_rewriter.h"
@ -513,13 +513,13 @@ namespace seq {
m_sg(sg),
m_solver(solver),
m_parikh(alloc(seq_parikh, sg)),
m_nseq_regex(alloc(smt::nseq_regex, sg)),
m_seq_regex(alloc(seq::seq_regex, sg)),
m_len_vars(sg.get_manager()) {
}
nielsen_graph::~nielsen_graph() {
dealloc(m_parikh);
dealloc(m_nseq_regex);
dealloc(m_seq_regex);
reset();
}
@ -1798,7 +1798,7 @@ namespace seq {
// and check if the result intersected with the target regex is empty.
// Detects infeasible constraints that would otherwise require
// expensive exploration. Mirrors ZIPT NielsenNode.CheckRegexWidening.
if (g.m_nseq_regex) {
if (g.m_seq_regex) {
for (str_mem const& mem : m_str_mem) {
if (!mem.m_str || !mem.m_regex)
continue;
@ -3290,11 +3290,11 @@ namespace seq {
// 3. Fall back to simple star(R)
euf::snode* stab_base = nullptr;
if (m_nseq_regex && m_nseq_regex->is_self_stabilizing(mem.m_regex)) {
if (m_seq_regex && m_seq_regex->is_self_stabilizing(mem.m_regex)) {
stab_base = mem.m_regex;
}
else if (m_nseq_regex && mem.m_history) {
stab_base = m_nseq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history);
else if (m_seq_regex && mem.m_history) {
stab_base = m_seq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history);
}
if (!stab_base) {
@ -3303,12 +3303,12 @@ namespace seq {
}
// Register the stabilizer
if (m_nseq_regex)
m_nseq_regex->add_stabilizer(mem.m_regex, stab_base);
if (m_seq_regex)
m_seq_regex->add_stabilizer(mem.m_regex, stab_base);
// Check if stabilizer equals regex → self-stabilizing
if (m_nseq_regex && stab_base == mem.m_regex)
m_nseq_regex->set_self_stabilizing(mem.m_regex);
if (m_seq_regex && stab_base == mem.m_regex)
m_seq_regex->set_self_stabilizing(mem.m_regex);
// Build stab_star = star(stab_base)
expr* stab_expr = stab_base->get_expr();
@ -3320,7 +3320,7 @@ namespace seq {
continue;
// Try subsumption: if L(x_range) ⊆ L(stab_star), drop the constraint
if (m_nseq_regex && m_nseq_regex->try_subsume(mem, *node))
if (m_seq_regex && m_seq_regex->try_subsume(mem, *node))
continue;
// Create child: x → pr · po
@ -4281,7 +4281,7 @@ namespace seq {
bool nielsen_graph::check_regex_widening(nielsen_node const& node,
euf::snode* str, euf::snode* regex) {
if (!str || !regex || !m_nseq_regex)
if (!str || !regex || !m_seq_regex)
return false;
// Only apply to ground regexes with non-trivial strings
if (!regex->is_ground())
@ -4313,7 +4313,7 @@ namespace seq {
}
else if (tok->is_var()) {
// Variable → intersection of primitive regex constraints, or Σ*
euf::snode* x_range = m_nseq_regex->collect_primitive_regex_intersection(tok, node);
euf::snode* x_range = m_seq_regex->collect_primitive_regex_intersection(tok, node);
if (x_range) {
tok_re = x_range;
} else {
@ -4389,7 +4389,7 @@ namespace seq {
if (!inter_sn)
return false;
lbool result = m_nseq_regex->is_empty_bfs(inter_sn, 5000);
lbool result = m_seq_regex->is_empty_bfs(inter_sn, 5000);
return result == l_true;
}
@ -4399,7 +4399,7 @@ namespace seq {
// -----------------------------------------------------------------------
bool nielsen_graph::check_leaf_regex(nielsen_node const& node) {
if (!m_nseq_regex)
if (!m_seq_regex)
return true;
// Group str_mem constraints by variable (primitive constraints only)
@ -4421,7 +4421,7 @@ namespace seq {
for (auto& [var_id, regexes] : var_regexes) {
if (regexes.size() < 2)
continue;
lbool result = m_nseq_regex->check_intersection_emptiness(regexes, 5000);
lbool result = m_seq_regex->check_intersection_emptiness(regexes, 5000);
if (result == l_true) {
// Intersection is empty — infeasible
return false;

View file

@ -244,8 +244,8 @@ Author:
#include <map>
#include "model/model.h"
namespace smt {
class nseq_regex; // forward declaration (defined in smt/nseq_regex.h)
namespace seq {
class seq_regex; // forward declaration (defined in smt/seq/seq_regex.h)
}
namespace seq {
@ -761,7 +761,7 @@ namespace seq {
// Regex membership module: stabilizers, emptiness checks, language
// inclusion, derivatives. Allocated in the constructor; owned by this graph.
smt::nseq_regex* m_nseq_regex = nullptr;
seq::seq_regex* m_seq_regex = nullptr;
// -----------------------------------------------
// Modification counter for substitution length tracking.
@ -895,8 +895,8 @@ namespace seq {
// Caller takes ownership of the returned model pointer.
bool solve_sat_path_ints(model_ref& mdl);
// accessor for the nseq_regex module
smt::nseq_regex* nseq_regex_module() const { return m_nseq_regex; }
// accessor for the seq_regex module
seq::seq_regex* seq_regex_module() const { return m_seq_regex; }
private:
search_result search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path);

View file

@ -3,7 +3,7 @@ Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_regex.cpp
seq_regex.cpp
Abstract:
@ -11,24 +11,25 @@ Abstract:
Author:
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#include "smt/nseq_regex.h"
#include "smt/seq/seq_regex.h"
#include <unordered_set>
namespace smt {
namespace seq {
// -----------------------------------------------------------------------
// Stabilizer store
// -----------------------------------------------------------------------
void nseq_regex::reset_stabilizers() {
void seq_regex::reset_stabilizers() {
m_stabilizers.reset();
m_self_stabilizing.reset();
}
void nseq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) {
void seq_regex::add_stabilizer(euf::snode* regex, euf::snode* stabilizer) {
if (!regex || !stabilizer)
return;
@ -43,7 +44,7 @@ namespace smt {
stabs.push_back(stabilizer);
}
euf::snode* nseq_regex::get_stabilizer_union(euf::snode* regex) {
euf::snode* seq_regex::get_stabilizer_union(euf::snode* regex) {
if (!regex)
return nullptr;
@ -73,7 +74,7 @@ namespace smt {
return result;
}
bool nseq_regex::has_stabilizers(euf::snode* regex) const {
bool seq_regex::has_stabilizers(euf::snode* regex) const {
if (!regex)
return false;
if (!m_stabilizers.contains(regex->id()))
@ -81,7 +82,7 @@ namespace smt {
return !m_stabilizers[regex->id()].empty();
}
ptr_vector<euf::snode> const* nseq_regex::get_stabilizers(euf::snode* regex) const {
ptr_vector<euf::snode> const* seq_regex::get_stabilizers(euf::snode* regex) const {
if (!regex)
return nullptr;
if (!m_stabilizers.contains(regex->id()))
@ -89,13 +90,13 @@ namespace smt {
return &m_stabilizers[regex->id()];
}
void nseq_regex::set_self_stabilizing(euf::snode* regex) {
void seq_regex::set_self_stabilizing(euf::snode* regex) {
if (!regex)
return;
m_self_stabilizing.insert(regex->id());
}
bool nseq_regex::is_self_stabilizing(euf::snode* regex) const {
bool seq_regex::is_self_stabilizing(euf::snode* regex) const {
if (!regex)
return false;
return m_self_stabilizing.contains(regex->id());
@ -105,7 +106,7 @@ namespace smt {
// Self-stabilizing auto-detection
// -----------------------------------------------------------------------
bool nseq_regex::compute_self_stabilizing(euf::snode* regex) const {
bool seq_regex::compute_self_stabilizing(euf::snode* regex) const {
if (!regex)
return false;
@ -147,7 +148,7 @@ namespace smt {
// Self-stabilizing propagation through derivatives
// -----------------------------------------------------------------------
void nseq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) {
void seq_regex::propagate_self_stabilizing(euf::snode* parent, euf::snode* deriv) {
if (!parent || !deriv)
return;
@ -248,7 +249,7 @@ namespace smt {
// Derivative with propagation
// -----------------------------------------------------------------------
euf::snode* nseq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) {
euf::snode* seq_regex::derivative_with_propagation(euf::snode* re, euf::snode* elem) {
if (!re || !elem)
return nullptr;
euf::snode* deriv = derivative(re, elem);
@ -261,7 +262,7 @@ namespace smt {
// Uniform derivative (symbolic character consumption)
// -----------------------------------------------------------------------
euf::snode* nseq_regex::try_uniform_derivative(euf::snode* regex) {
euf::snode* seq_regex::try_uniform_derivative(euf::snode* regex) {
if (!regex)
return nullptr;
@ -305,7 +306,7 @@ namespace smt {
// Ground prefix consumption
// -----------------------------------------------------------------------
bool nseq_regex::is_empty_regex(euf::snode* re) const {
bool seq_regex::is_empty_regex(euf::snode* re) const {
if (!re)
return false;
// direct empty language constant
@ -355,7 +356,7 @@ namespace smt {
// BFS regex emptiness check — helper: collect character boundaries
// -----------------------------------------------------------------------
void nseq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const {
void seq_regex::collect_char_boundaries(euf::snode* re, unsigned_vector& bounds) const {
if (!re || !re->get_expr())
return;
@ -401,7 +402,7 @@ namespace smt {
// BFS regex emptiness check — helper: alphabet representatives
// -----------------------------------------------------------------------
void nseq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) {
void seq_regex::get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps) {
unsigned_vector bounds;
bounds.push_back(0); // always include character 0
collect_char_boundaries(re, bounds);
@ -421,7 +422,7 @@ namespace smt {
// BFS regex emptiness check
// -----------------------------------------------------------------------
lbool nseq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
lbool seq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
if (!re || !re->get_expr())
return l_undef;
if (re->is_fail())
@ -507,7 +508,7 @@ namespace smt {
// Mirrors ZIPT NielsenNode.CheckEmptiness (NielsenNode.cs:1429-1469)
// -----------------------------------------------------------------------
lbool nseq_regex::check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
lbool seq_regex::check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
unsigned max_states) {
if (regexes.empty())
return l_false; // empty intersection = full language (vacuously non-empty)
@ -649,7 +650,7 @@ namespace smt {
// Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
// -----------------------------------------------------------------------
lbool nseq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) {
lbool seq_regex::is_language_subset(euf::snode* subset_re, euf::snode* superset_re) {
if (!subset_re || !superset_re)
return l_undef;
@ -689,7 +690,7 @@ namespace smt {
// Collect primitive regex intersection for a variable
// -----------------------------------------------------------------------
euf::snode* nseq_regex::collect_primitive_regex_intersection(
euf::snode* seq_regex::collect_primitive_regex_intersection(
euf::snode* var, seq::nielsen_node const& node) {
if (!var)
return nullptr;
@ -726,7 +727,7 @@ namespace smt {
// Cycle detection
// -----------------------------------------------------------------------
bool nseq_regex::detect_cycle(seq::str_mem const& mem) const {
bool seq_regex::detect_cycle(seq::str_mem const& mem) const {
return extract_cycle(mem) != nullptr;
}
@ -734,7 +735,7 @@ namespace smt {
// Ground prefix consumption
// -----------------------------------------------------------------------
nseq_regex::simplify_status nseq_regex::simplify_ground_prefix(seq::str_mem& mem) {
seq_regex::simplify_status seq_regex::simplify_ground_prefix(seq::str_mem& mem) {
if (!mem.m_str || !mem.m_regex)
return simplify_status::ok;
@ -768,7 +769,7 @@ namespace smt {
// Ground suffix consumption (best-effort)
// -----------------------------------------------------------------------
nseq_regex::simplify_status nseq_regex::simplify_ground_suffix(seq::str_mem& mem) {
seq_regex::simplify_status seq_regex::simplify_ground_suffix(seq::str_mem& mem) {
// Suffix consumption via reverse derivatives is complex.
// For now, only handle the case where the entire string is ground:
// consume all characters from the front (which covers trailing chars
@ -786,7 +787,7 @@ namespace smt {
// Trivial checks
// -----------------------------------------------------------------------
int nseq_regex::check_trivial(seq::str_mem const& mem) const {
int seq_regex::check_trivial(seq::str_mem const& mem) const {
if (!mem.m_str || !mem.m_regex)
return 0;
// regex is ∅ => always conflict
@ -808,7 +809,7 @@ namespace smt {
// Minterm computation with filtering
// -----------------------------------------------------------------------
void nseq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) {
void seq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) {
if (!regex)
return;
@ -831,7 +832,7 @@ namespace smt {
// Collect first characters
// -----------------------------------------------------------------------
void nseq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) {
void seq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) {
if (!re)
return;
@ -907,7 +908,7 @@ namespace smt {
// Membership processing
// -----------------------------------------------------------------------
bool nseq_regex::process_str_mem(seq::str_mem const& mem,
bool seq_regex::process_str_mem(seq::str_mem const& mem,
vector<seq::str_mem>& out_mems) {
if (!mem.m_str || !mem.m_regex)
return true;
@ -946,7 +947,7 @@ namespace smt {
// History recording
// -----------------------------------------------------------------------
seq::str_mem nseq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) {
seq::str_mem seq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) {
// Build a history chain by prepending the new regex entry to the
// existing history. Uses regex-concat as a cons cell:
// new_history = re.concat(history_re, old_history)
@ -969,7 +970,7 @@ namespace smt {
// Cycle detection
// -----------------------------------------------------------------------
euf::snode* nseq_regex::extract_cycle(seq::str_mem const& mem) const {
euf::snode* seq_regex::extract_cycle(seq::str_mem const& mem) const {
// Walk the history chain looking for a repeated regex.
// A cycle exists when the current regex matches a regex in the history.
if (!mem.m_regex || !mem.m_history)
@ -1013,7 +1014,7 @@ namespace smt {
// Stabilizer from cycle
// -----------------------------------------------------------------------
euf::snode* nseq_regex::stabilizer_from_cycle(euf::snode* cycle_regex,
euf::snode* seq_regex::stabilizer_from_cycle(euf::snode* cycle_regex,
euf::snode* current_regex) {
if (!cycle_regex || !current_regex)
return nullptr;
@ -1031,7 +1032,7 @@ namespace smt {
// Extract cycle history tokens
// -----------------------------------------------------------------------
euf::snode* nseq_regex::extract_cycle_history(seq::str_mem const& current,
euf::snode* seq_regex::extract_cycle_history(seq::str_mem const& current,
seq::str_mem const& ancestor) {
// The history is built by simplify_and_init as a left-associative
// string concat chain: concat(concat(concat(nil, c1), c2), c3).
@ -1056,7 +1057,7 @@ namespace smt {
// Mirrors ZIPT StrMem.GetFilteredStabilizerStar (StrMem.cs:228-243)
// -----------------------------------------------------------------------
euf::snode* nseq_regex::get_filtered_stabilizer_star(euf::snode* re,
euf::snode* seq_regex::get_filtered_stabilizer_star(euf::snode* re,
euf::snode* excluded_char) {
if (!re)
return nullptr;
@ -1103,7 +1104,7 @@ namespace smt {
// Mirrors ZIPT StrMem.StabilizerFromCycle (StrMem.cs:163-225)
// -----------------------------------------------------------------------
euf::snode* nseq_regex::strengthened_stabilizer(euf::snode* cycle_regex,
euf::snode* seq_regex::strengthened_stabilizer(euf::snode* cycle_regex,
euf::snode* cycle_history) {
if (!cycle_regex || !cycle_history)
return nullptr;
@ -1239,7 +1240,7 @@ namespace smt {
// Mirrors ZIPT StrMem.TrySubsume (StrMem.cs:354-386)
// -----------------------------------------------------------------------
bool nseq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) {
bool seq_regex::try_subsume(seq::str_mem const& mem, seq::nielsen_node const& node) {
if (!mem.m_str || !mem.m_regex)
return false;

View file

@ -3,7 +3,7 @@ Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_regex.h
seq_regex.h
Abstract:
@ -24,6 +24,7 @@ Abstract:
Author:
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
@ -34,9 +35,9 @@ Author:
#include "util/uint_set.h"
#include "util/lbool.h"
namespace smt {
namespace seq {
class nseq_regex {
class seq_regex {
euf::sgraph& m_sg;
// -----------------------------------------------------------------
@ -71,7 +72,7 @@ namespace smt {
void get_alphabet_representatives(euf::snode* re, euf::snode_vector& reps);
public:
nseq_regex(euf::sgraph& sg) : m_sg(sg) {}
seq_regex(euf::sgraph& sg) : m_sg(sg) {}
euf::sgraph& sg() { return m_sg; }

View file

@ -3,15 +3,16 @@ Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_state.cpp
seq_state.cpp
Abstract:
Implementation of nseq_state.
Implementation of seq_state.
Author:
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#include "smt/nseq_state.h"
#include "smt/seq/seq_state.h"

View file

@ -3,7 +3,7 @@ Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_state.h
seq_state.h
Abstract:
@ -13,15 +13,15 @@ Abstract:
Author:
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#pragma once
#include "util/vector.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/seq/seq_nielsen.h"
#include "smt/smt_literal.h"
#include "util/sat_literal.h"
namespace smt {
@ -35,7 +35,7 @@ namespace smt {
// source info for a regex membership (the literal that asserted it)
struct mem_source {
literal m_lit;
sat::literal m_lit;
};
// source info for a string disequality
@ -44,8 +44,7 @@ namespace smt {
enode* m_n2;
};
class nseq_state {
euf::sgraph& m_sg;
class seq_state {
vector<seq::str_eq> m_str_eqs;
vector<seq::str_mem> m_str_mems;
vector<eq_source> m_eq_sources;
@ -57,7 +56,7 @@ namespace smt {
unsigned m_next_mem_id = 0;
public:
nseq_state(euf::sgraph& sg) : m_sg(sg) {}
seq_state() = default;
void push() {
m_str_eq_lim.push_back(m_str_eqs.size());
@ -84,7 +83,7 @@ namespace smt {
m_eq_sources.push_back({n1, n2});
}
void add_str_mem(euf::snode* str, euf::snode* regex, literal lit) {
void add_str_mem(euf::snode* str, euf::snode* regex, sat::literal lit) {
seq::dep_tracker dep;
m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep));
m_mem_sources.push_back({lit});

View file

@ -3,22 +3,22 @@ Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_model.cpp
seq_model.cpp
Abstract:
Implementation of nseq_model: model construction for the
Implementation of seq_model: model construction for the
Nielsen-based string solver.
Author:
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
#include "smt/nseq_model.h"
#include "smt/theory_nseq.h"
#include "smt/nseq_regex.h"
#include "smt/nseq_state.h"
#include "smt/seq_model.h"
#include "smt/seq/seq_regex.h"
#include "smt/seq/seq_state.h"
#include "smt/smt_context.h"
#include "smt/smt_model_generator.h"
#include "smt/proto_model/proto_model.h"
@ -26,19 +26,19 @@ Author:
namespace smt {
nseq_model::nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq,
seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex)
: m_th(th), m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_regex(regex), m_trail(m)
seq_model::seq_model(ast_manager& m, seq_util& seq,
seq_rewriter& rw, euf::sgraph& sg, seq::seq_regex& regex)
: m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_regex(regex), m_trail(m)
{}
void nseq_model::init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state) {
void seq_model::init(model_generator& mg, seq::nielsen_graph& nielsen, seq_state const& state) {
m_var_values.reset();
m_var_regex.reset();
m_trail.reset();
m_int_model = nullptr;
m_mg = &mg;
m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model());
m_factory = alloc(seq_factory, m, m_seq.get_family_id(), mg.get_model());
mg.register_factory(m_factory);
register_existing_values(nielsen);
@ -52,7 +52,7 @@ namespace smt {
extract_assignments(nielsen.sat_path());
}
model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) {
model_value_proc* seq_model::mk_value(enode* n, model_generator& mg) {
app* e = n->get_expr();
if (!m_seq.is_seq(e) && !m_seq.is_re(e) && !m_seq.str.is_nth_u(e))
return nullptr;
@ -101,7 +101,7 @@ namespace smt {
return alloc(expr_wrapper_proc, to_app(m_seq.str.mk_empty(e->get_sort())));
}
void nseq_model::finalize(model_generator& mg) {
void seq_model::finalize(model_generator& mg) {
m_var_values.reset();
m_var_regex.reset();
m_trail.reset();
@ -110,7 +110,7 @@ namespace smt {
m_factory = nullptr;
}
void nseq_model::extract_assignments(svector<seq::nielsen_edge*> const& sat_path) {
void seq_model::extract_assignments(svector<seq::nielsen_edge*> const& sat_path) {
IF_VERBOSE(1, verbose_stream() << "nseq extract_assignments: path length=" << sat_path.size() << "\n";);
// compose substitutions root-to-leaf.
@ -154,12 +154,15 @@ namespace smt {
}
}
expr_ref nseq_model::snode_to_value(euf::snode* n) {
expr_ref seq_model::snode_to_value(euf::snode* n) {
if (!n)
return expr_ref(m);
if (n->is_empty())
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
if (n->is_empty()) {
sort* srt = n->get_sort();
if (!srt) srt = m_seq.str.mk_string_sort();
return expr_ref(m_seq.str.mk_empty(srt), m);
}
if (n->is_char() || n->is_unit()) {
expr* e = n->get_expr();
@ -272,7 +275,7 @@ namespace smt {
return e ? expr_ref(e, m) : expr_ref(m);
}
expr_ref nseq_model::generate_regex_witness(euf::snode* regex, unsigned depth) {
expr_ref seq_model::generate_regex_witness(euf::snode* regex, unsigned depth) {
if (!regex)
return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m);
@ -307,7 +310,7 @@ namespace smt {
return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m);
}
void nseq_model::register_existing_values(seq::nielsen_graph& nielsen) {
void seq_model::register_existing_values(seq::nielsen_graph& nielsen) {
seq::nielsen_node const* root = nielsen.root();
if (!root)
return;
@ -319,7 +322,7 @@ namespace smt {
}
}
expr* nseq_model::get_var_value(euf::snode* var) {
expr* seq_model::get_var_value(euf::snode* var) {
expr* val = nullptr;
if (m_var_values.find(var->id(), val))
return val;
@ -333,7 +336,7 @@ namespace smt {
return val;
}
expr* nseq_model::mk_fresh_value(euf::snode* var) {
expr* seq_model::mk_fresh_value(euf::snode* var) {
// check if this variable has regex constraints
euf::snode* re = nullptr;
if (m_var_regex.find(var->id(), re) && re) {
@ -353,7 +356,7 @@ namespace smt {
return m_seq.str.mk_empty(srt);
}
void nseq_model::collect_var_regex_constraints(nseq_state const& state) {
void seq_model::collect_var_regex_constraints(seq_state const& state) {
for (auto const& mem : state.str_mems()) {
if (!mem.m_str || !mem.m_regex)
continue;
@ -380,7 +383,7 @@ namespace smt {
}
}
bool nseq_model::validate_regex(nseq_state const& state, ::proto_model& mdl) {
bool seq_model::validate_regex(seq_state const& state, ::proto_model& mdl) {
bool ok = true;
// validate positive memberships: str ∈ regex

View file

@ -3,7 +3,7 @@ Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_model.h
seq_model.h
Abstract:
@ -22,6 +22,7 @@ Abstract:
Author:
Clemens Eisenhofer 2026-03-01
Nikolaj Bjorner (nbjorner) 2026-03-01
--*/
@ -30,26 +31,28 @@ Author:
#include "ast/seq_decl_plugin.h"
#include "ast/rewriter/seq_rewriter.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/smt_types.h"
#include "smt/seq/seq_nielsen.h"
#include "model/seq_factory.h"
class proto_model;
namespace seq {
class seq_regex;
}
namespace smt {
class theory_nseq;
class nseq_regex;
class nseq_state;
class enode;
class model_generator;
class seq_state;
class model_value_proc;
class nseq_model {
theory_nseq& m_th;
class seq_model {
ast_manager& m;
seq_util& m_seq;
seq_rewriter& m_rewriter;
euf::sgraph& m_sg;
nseq_regex& m_regex;
seq::seq_regex& m_regex;
// factory for generating fresh string/regex values
seq_factory* m_factory = nullptr;
@ -70,14 +73,14 @@ namespace smt {
u_map<euf::snode*> m_var_regex;
public:
nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq,
seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex);
seq_model(ast_manager& m, seq_util& seq,
seq_rewriter& rw, euf::sgraph& sg, seq::seq_regex& regex);
// Phase 1: initialize model construction.
// Allocates seq_factory, registers it with mg, collects
// existing string literals, and extracts variable assignments
// from the satisfying Nielsen leaf node.
void init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state);
void init(model_generator& mg, seq::nielsen_graph& nielsen, seq_state const& state);
// Phase 2: build a model_value_proc for the given enode.
// Returns nullptr if the enode is not a sequence/string sort.
@ -89,7 +92,7 @@ namespace smt {
// Validate that model assignments satisfy all regex membership
// constraints from the state. Checks positive and negative
// memberships. Returns true if all constraints pass.
bool validate_regex(nseq_state const& state, ::proto_model& mdl);
bool validate_regex(seq_state const& state, ::proto_model& mdl);
private:
// extract variable assignments from the sat path (root-to-leaf edges).
@ -123,7 +126,7 @@ namespace smt {
// collect per-variable regex constraints from the state.
// For each positive str_mem, records the regex (or intersects
// with existing) into m_var_regex keyed by the string snode id.
void collect_var_regex_constraints(nseq_state const& state);
void collect_var_regex_constraints(seq_state const& state);
};
}

View file

@ -35,9 +35,9 @@ namespace smt {
m_sgraph(ctx.get_manager(), m_egraph),
m_context_solver(ctx.get_manager()),
m_nielsen(m_sgraph, m_context_solver),
m_state(m_sgraph),
m_state(),
m_regex(m_sgraph),
m_model(*this, ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex)
m_model(ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex)
{}
// -----------------------------------------------------------------------
@ -339,7 +339,7 @@ namespace smt {
m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs);
}
// transfer regex memberships, pre-processing through nseq_regex
// transfer regex memberships, pre-processing through seq_regex
// to consume ground prefixes via Brzozowski derivatives
for (unsigned state_idx = 0; state_idx < m_state.str_mems().size(); ++state_idx) {
auto const& mem = m_state.str_mems()[state_idx];

View file

@ -25,9 +25,9 @@ Author:
#include "smt/smt_theory.h"
#include "smt/smt_arith_value.h"
#include "smt/seq/seq_nielsen.h"
#include "smt/nseq_state.h"
#include "smt/nseq_regex.h"
#include "smt/nseq_model.h"
#include "smt/seq/seq_state.h"
#include "smt/seq/seq_regex.h"
#include "smt/seq_model.h"
#include "smt/nseq_context_solver.h"
namespace smt {
@ -43,9 +43,9 @@ namespace smt {
// to the m_nielsen constructor and must remain stable for the object's lifetime.
context_solver m_context_solver;
seq::nielsen_graph m_nielsen;
nseq_state m_state;
nseq_regex m_regex; // regex membership pre-processing
nseq_model m_model; // model construction helper
seq_state m_state;
seq::seq_regex m_regex; // regex membership pre-processing
seq_model m_model; // model construction helper
// propagation queue
struct prop_item {

View file

@ -134,7 +134,7 @@ add_executable(test-z3
seq_nielsen.cpp
seq_parikh.cpp
nseq_basic.cpp
nseq_regex.cpp
seq_regex.cpp
nseq_zipt.cpp
small_object_allocator.cpp
smt2print_parse.cpp

View file

@ -289,7 +289,7 @@ int main(int argc, char ** argv) {
TST(seq_nielsen);
TST(seq_parikh);
TST(nseq_basic);
TST(nseq_regex);
TST(seq_regex);
TST(nseq_zipt);
TST(ho_matcher);
TST(finite_set);

View file

@ -3,11 +3,11 @@ Copyright (c) 2026 Microsoft Corporation
Module Name:
nseq_regex.cpp
seq_regex.cpp
Abstract:
Unit tests for nseq_regex: lazy regex membership processing
Unit tests for seq_regex: lazy regex membership processing
for the Nielsen-based string solver.
--*/
@ -15,32 +15,32 @@ Abstract:
#include "ast/reg_decl_plugins.h"
#include "ast/euf/euf_egraph.h"
#include "ast/euf/euf_sgraph.h"
#include "smt/nseq_regex.h"
#include "smt/seq/seq_regex.h"
#include "smt/seq/seq_nielsen.h"
#include "util/lbool.h"
#include "util/zstring.h"
#include <iostream>
// Test 1: nseq_regex instantiation
static void test_nseq_regex_instantiation() {
std::cout << "test_nseq_regex_instantiation\n";
// Test 1: seq_regex instantiation
static void test_seq_regex_instantiation() {
std::cout << "test_seq_regex_instantiation\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
SASSERT(&nr.sg() == &sg);
std::cout << " ok\n";
}
// Test 2: is_empty_regex on an empty-language node
static void test_nseq_regex_is_empty() {
std::cout << "test_nseq_regex_is_empty\n";
static void test_seq_regex_is_empty() {
std::cout << "test_seq_regex_is_empty\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
@ -52,13 +52,13 @@ static void test_nseq_regex_is_empty() {
}
// Test 3: is_empty_regex on a full-match regex (not empty)
static void test_nseq_regex_is_full() {
std::cout << "test_nseq_regex_is_full\n";
static void test_seq_regex_is_full() {
std::cout << "test_seq_regex_is_full\n";
ast_manager m;
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
@ -76,7 +76,7 @@ static void test_strengthened_stabilizer_null() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
SASSERT(nr.strengthened_stabilizer(nullptr, nullptr) == nullptr);
@ -100,7 +100,7 @@ static void test_strengthened_stabilizer_single_char() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
// Build a*
@ -126,7 +126,7 @@ static void test_strengthened_stabilizer_two_char() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
// Build (ab)*
@ -152,7 +152,7 @@ static void test_filtered_stabilizer_star_empty() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
@ -173,7 +173,7 @@ static void test_filtered_stabilizer_star_with_stab() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
// Build a* as the regex state
@ -201,7 +201,7 @@ static void test_filtered_stabilizer_star_filtered() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
// Build a* as the regex state
@ -227,7 +227,7 @@ static void test_extract_cycle_history_basic() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
@ -265,7 +265,7 @@ static void test_extract_cycle_history_null_ancestor() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
@ -297,7 +297,7 @@ static void test_bfs_empty_none() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
@ -316,7 +316,7 @@ static void test_bfs_nonempty_full() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
@ -335,7 +335,7 @@ static void test_bfs_nonempty_to_re() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
expr_ref to_re_abc(su.re.mk_to_re(su.str.mk_string("abc")), m);
@ -352,7 +352,7 @@ static void test_bfs_nonempty_star() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
expr_ref star_a(su.re.mk_star(su.re.mk_to_re(su.str.mk_string("a"))), m);
@ -369,7 +369,7 @@ static void test_bfs_empty_union_of_empties() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
@ -390,7 +390,7 @@ static void test_bfs_nonempty_range() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
@ -410,7 +410,7 @@ static void test_bfs_empty_complement_full() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
sort* re_sort = su.re.mk_re(str_sort);
@ -429,7 +429,7 @@ static void test_bfs_null_safety() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
lbool result = nr.is_empty_bfs(nullptr);
SASSERT(result == l_undef);
@ -443,7 +443,7 @@ static void test_bfs_bounded() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
// (a|b)+ requires at least one char; with max_states=1 should bail
@ -495,7 +495,7 @@ static void test_stabilizer_store_basic() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
@ -527,7 +527,7 @@ static void test_self_stabilizing() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
expr_ref a_re(su.re.mk_to_re(su.str.mk_string("a")), m);
@ -552,7 +552,7 @@ static void test_check_intersection_sat() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
// a* ∩ (a|b)* should be non-empty (both accept "a")
@ -580,7 +580,7 @@ static void test_check_intersection_unsat() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
@ -605,7 +605,7 @@ static void test_is_language_subset_true() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
// a* ⊆ (a|b)* should be true
@ -630,7 +630,7 @@ static void test_is_language_subset_false() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
// (a|b)* ⊄ a* should be false (b ∈ (a|b)* but b ∉ a*)
@ -655,7 +655,7 @@ static void test_is_language_subset_trivial() {
reg_decl_plugins(m);
euf::egraph eg(m);
euf::sgraph sg(m, eg);
smt::nseq_regex nr(sg);
seq::seq_regex nr(sg);
seq_util su(m);
sort* str_sort = su.str.mk_string_sort();
@ -677,10 +677,10 @@ static void test_is_language_subset_trivial() {
std::cout << " ok\n";
}
void tst_nseq_regex() {
test_nseq_regex_instantiation();
test_nseq_regex_is_empty();
test_nseq_regex_is_full();
void tst_seq_regex() {
test_seq_regex_instantiation();
test_seq_regex_is_empty();
test_seq_regex_is_full();
test_strengthened_stabilizer_null();
test_strengthened_stabilizer_single_char();
test_strengthened_stabilizer_two_char();
@ -707,5 +707,5 @@ void tst_nseq_regex() {
test_is_language_subset_true();
test_is_language_subset_false();
test_is_language_subset_trivial();
std::cout << "nseq_regex: all tests passed\n";
std::cout << "seq_regex: all tests passed\n";
}