3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 05:44:51 +00:00

Use correct parameters for iterative deepening

Updated spec
This commit is contained in:
CEisenhofer 2026-03-04 17:37:04 +01:00
parent 2d44a4dbf3
commit e4787e57f6
4 changed files with 73 additions and 82 deletions

View file

@ -24,26 +24,28 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT]
- ✅ `seq_plugin` — Egraph plugin for associativity, identity, absorption, star merge (`src/ast/euf/euf_seq_plugin.h/.cpp`)
- ✅ Nielsen graph data structures — `str_eq`, `str_mem`, `nielsen_subst`, `nielsen_edge`, `nielsen_node`, `nielsen_graph`, `dep_tracker` (`src/smt/seq/seq_nielsen.h/.cpp`)
- ✅ 38 unit tests (18 sgraph + 7 seq_plugin + 13 nielsen)
- ✅ `theory_nseq` skeleton and SMT integration (`src/smt/theory_nseq.h/.cpp`)
- ✅ Parameter integration — `smt.string_solver=nseq` and `smt.nseq.max_depth` (`src/params/smt_params*.{cpp,h,pyg}`, `src/smt/smt_setup.cpp`)
- ✅ Build system — `theory_nseq.cpp`, `nseq_state.cpp`, `nseq_regex.cpp`, `nseq_model.cpp` in `src/smt/CMakeLists.txt`
- ✅ All 13 Nielsen modifiers (`apply_det_modifier`, `apply_const_nielsen`, `apply_var_nielsen`, `apply_eq_split`, `apply_regex_char_split`, `apply_power_epsilon`, `apply_num_cmp`, `apply_const_num_unwinding`, `apply_star_intr`, `apply_gpower_intr`, `apply_regex_var_split`, `apply_power_split`, `apply_var_num_unwinding`)
- ✅ Graph expansion / iterative deepening DFS — `nielsen_graph::solve()` + `search_dfs()` (depth 3, +1/failure, bounded by `smt.nseq.max_depth`)
- ✅ Constraint simplification engine — `nielsen_node::simplify_and_init()`
- ✅ Conflict detection and explanation — `dep_tracker`-based conflict clauses
- ✅ Propagation engine — `theory_nseq::propagate()`, eq/diseq/regex membership wiring
- ✅ Model generation — `nseq_model` (`src/smt/nseq_model.h/.cpp`), wired via `init_model()` / `mk_value()` / `finalize_model()`
- ✅ Regex membership processing — `nseq_regex` (`src/smt/nseq_regex.h/.cpp`)
- ✅ Constraint state management — `nseq_state` (`src/smt/nseq_state.h/.cpp`) with push/pop backtracking
- ✅ Integration unit tests — `tst_nseq_basic` (`src/test/nseq_basic.cpp`), `tst_nseq_regex` (`src/test/nseq_regex.cpp`)
- ✅ Length constraint generation — `nielsen_graph::generate_length_constraints()` (length equalities, power non-negativity, regex bounds)
**Not yet implemented:**
- ❌ `theory_nseq` skeleton and SMT integration
- ❌ Parameter integration (`smt.string_solver=nseq`)
- ❌ Nielsen transformation algorithms (modifier chain, 13 modifiers)
- ❌ Graph expansion / iterative deepening DFS
- ❌ Constraint simplification engine
- ❌ Conflict detection and explanation
- ❌ Subsumption checking (not needed)
- ❌ Parikh image / PDD / Interval reasoning
- ❌ Regex splitting modifiers
- ❌ Propagation engine
- ❌ Model generation
**Not yet implemented / incomplete:**
- ❌ Integer constraint reasoning via `lp::int_solver` — length constraints are generated but not yet discharged through `lp::int_solver`; currently propagated as theory lemmas via `arith_value`
- ❌ Correct results on Z3's full string regression suite (`C:\z3test\regressions\smt2\string*.smt2`)
### 2.2 The Problem
- **Algorithmic gap**: The core solving algorithms — the "brain" of ZIPT — have not been ported. Only the "bones" (data structures) exist.
- **No solver entry point**: There is no `theory_nseq` class, so the ZIPT algorithm cannot be invoked from Z3.
- **Missing search**: Iterative deepening and conflict detection are absent.
- **Missing reasoning**: Parikh image (length constraints via polynomial decision diagrams) and regex splitting modifiers are absent.
- **Algorithmic gap**: The core solving algorithms are implemented. The main remaining gap is full `lp::int_solver` integration for integer subproblems.
- **Regression coverage**: The solver has not yet been validated against Z3's full string regression suite.
### 2.3 Reference Documents
@ -56,14 +58,14 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT]
### 3.1 Functional Goals
- [ ] `theory_nseq` theory solver class implementing `smt::theory` interface
- [ ] Selectable via `smt.string_solver=nseq` parameter
- [ ] Nielsen transformation-based word equation solving
- [ ] Lazy regex membership via Brzozowski derivatives (reusing `sgraph::brzozowski_deriv`)
- [x] `theory_nseq` theory solver class implementing `smt::theory` interface
- [x] Selectable via `smt.string_solver=nseq` parameter
- [x] Nielsen transformation-based word equation solving
- [x] Lazy regex membership via Brzozowski derivatives (reusing `sgraph::brzozowski_deriv`)
- [ ] Integer constraint reasoning via `lp::int_solver` (length, power terms, Parikh image)
- [ ] Iterative deepening search: initial depth 3, +1 per failure, upper bound via `smt.nseq.max_depth` (0 = infinity)
- [ ] Conflict detection with dependency-tracked explanations
- [ ] Model generation for satisfiable instances
- [x] Iterative deepening search: initial depth 3, +1 per failure, upper bound via `smt.nseq.max_depth` (0 = infinity)
- [x] Conflict detection with dependency-tracked explanations
- [x] Model generation for satisfiable instances
- [ ] Correct results on Z3's existing string regression suite (`C:\z3test\regressions\smt2\string*.smt2`)
### 3.2 Non-Goals (Out of Scope)

View file

@ -554,16 +554,12 @@ namespace seq {
++m_stats.m_num_solve_calls;
// iterative deepening: 6 iterations starting at depth 10, doubling
// mirrors ZIPT's NielsenGraph.Check()
// If m_max_search_depth is set, clamp the initial bound and stop
// once the bound has hit the cap (further iterations are identical).
m_depth_bound = 10;
if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth)
m_depth_bound = m_max_search_depth;
for (unsigned iter = 0; iter < 6; ++iter, m_depth_bound *= 2) {
// Iterative deepening: start at depth 3, increment by 1 on each failure.
// m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it.
m_depth_bound = 3;
while (true) {
if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth)
m_depth_bound = m_max_search_depth;
break;
inc_run_idx();
search_result r = search_dfs(m_root, 0);
if (r == search_result::sat) {
@ -574,10 +570,10 @@ namespace seq {
++m_stats.m_num_unsat;
return r;
}
// depth limit hit increase bound and retry
// if already at max, no further growth is possible
// depth limit hit increment bound by 1 and retry
if (m_max_search_depth > 0 && m_depth_bound >= m_max_search_depth)
break;
++m_depth_bound;
}
++m_stats.m_num_unknown;
return search_result::unknown;
@ -631,7 +627,7 @@ namespace seq {
// generate extensions only once per node; children persist across runs
if (!node->is_extended()) {
bool ext = generate_extensions(node, depth);
bool ext = generate_extensions(node);
if (!ext) {
node->set_extended(true);
// No extensions could be generated. If the node still has
@ -1039,7 +1035,7 @@ namespace seq {
return false;
}
bool nielsen_graph::generate_extensions(nielsen_node* node, unsigned /*depth*/) {
bool nielsen_graph::generate_extensions(nielsen_node *node) {
// Modifier priority chain mirrors ZIPT's ModifierBase.TypeOrder.
// The first modifier that produces edges is used and returned immediately.

View file

@ -113,8 +113,7 @@ Abstract:
3. Node constraint containers: ZIPT's NielsenNode stores str_eq constraints
in NList<StrEq> (a sorted list for O(log n) subsumption lookup) and str_mem
constraints in Dictionary<uint, StrMem> (keyed by id for O(1) cycle lookup).
Z3 uses plain vector<str_eq> and vector<str_mem>, which is simpler but
loses the sorted-list subsumption candidate structure.
Z3 uses plain vector<str_eq> and vector<str_mem>, which is simpler.
4. nielsen_edge substitution list: ZIPT's NielsenEdge carries two substitution
lists -- Subst (string-level, mapping string variables to strings) and
@ -125,8 +124,7 @@ Abstract:
5. nielsen_graph node registry: ZIPT keeps nodes in a HashSet<NielsenNode> plus
a Dictionary<NList<StrEq>, List<NielsenNode>> for subsumption candidate
lookup. Z3 uses a ptr_vector<nielsen_node> without any subsumption map,
simplifying memory management at the cost of subsumption checking.
lookup. Z3 uses a ptr_vector<nielsen_node>, simplifying memory management.
6. nielsen_graph::display() vs NielsenGraph.ToDot(): ZIPT outputs a DOT-format
graph with color highlighting for the current satisfying path. Z3 outputs
@ -155,8 +153,7 @@ Abstract:
equation-splitting heuristic used to choose the best split point.
- StrMem.SimplifyCharRegex() / SimplifyDir(): Brzozowski derivative-based
simplification consuming ground prefixes/suffixes of the string.
- StrMem.TrySubsume(): stabilizer-based subsumption that drops leading
variables already covered by regex stabilizers.
- StrMem.TrySubsume(): stabilizer-based subsumption (not ported, not needed).
- StrMem.ExtractCycle() / StabilizerFromCycle(): cycle detection over the
search path and extraction of a Kleene-star stabilizer to generalize the
cycle. This is the key termination argument for regex membership.
@ -197,15 +194,15 @@ Abstract:
Search procedure:
- NielsenGraph.Check() / NielsenNode.GraphExpansion(): ported as
nielsen_graph::solve() (iterative deepening, 6 rounds starting at
depth 10, doubling) and search_dfs() (depth-bounded DFS with
eval_idx cycle detection and node status tracking). The inner solver
setup and subsumption-node lookup within Check() are not ported.
nielsen_graph::solve() (iterative deepening, starting at depth 3,
incrementing by 1 per failure, bounded by smt.nseq.max_depth) and
search_dfs() (depth-bounded DFS with eval_idx cycle detection and
node status tracking).
- NielsenNode.SimplifyAndInit(): ported as
nielsen_node::simplify_and_init() with prefix matching, symbol clash,
empty propagation, and Brzozowski derivative consumption.
- NielsenGraph.FindExisting(): the subsumption cache lookup over
subsumptionCandidates is not ported.
- NielsenGraph.FindExisting() / subsumption cache lookup: not ported,
not needed.
Auxiliary infrastructure:
- LocalInfo: thread-local search bookkeeping (current path, modification
@ -264,7 +261,7 @@ namespace seq {
extended,
symbol_clash,
parikh_image,
subsumption,
subsumption, // not used; retained for enum completeness
arithmetic,
regex,
regex_widening,
@ -494,9 +491,6 @@ namespace seq {
// true if all str_eqs are trivial and there are no str_mems
bool is_satisfied() const;
// true if other's constraint set is a subset of this node's
bool is_subsumed_by(nielsen_node const& other) const;
// true if any constraint has opaque (s_other) terms that
// the Nielsen graph cannot decompose
bool has_opaque_terms() const;
@ -510,7 +504,6 @@ namespace seq {
unsigned m_num_unsat = 0;
unsigned m_num_unknown = 0;
unsigned m_num_simplify_conflict = 0;
unsigned m_num_subsumptions = 0;
unsigned m_num_extensions = 0;
unsigned m_num_fresh_vars = 0;
unsigned m_max_depth = 0;
@ -609,7 +602,7 @@ namespace seq {
// generate child nodes by applying modifier rules
// returns true if at least one child was generated
bool generate_extensions(nielsen_node* node, unsigned depth);
bool generate_extensions(nielsen_node *node);
// collect dependency information from conflicting constraints
void collect_conflict_deps(dep_tracker& deps) const;

View file

@ -486,7 +486,7 @@ static void test_eq_split_basic() {
// eq_split fires: both heads are distinct vars
// produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress)
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 3);
@ -597,7 +597,7 @@ static void test_const_nielsen_char_var() {
ng.add_str_eq(a, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 2);
// both branches are progress
@ -623,7 +623,7 @@ static void test_const_nielsen_var_char() {
ng.add_str_eq(x, by);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 2);
SASSERT(root->outgoing()[0]->is_progress());
@ -694,7 +694,7 @@ static void test_const_nielsen_priority_over_eq_split() {
ng.add_str_eq(ax, yb);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// const_nielsen produces 2 children, not eq_split's 3
SASSERT(root->outgoing().size() == 2);
@ -721,7 +721,7 @@ static void test_const_nielsen_not_applicable_both_vars() {
ng.add_str_eq(xa, yb);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 3);
}
@ -780,7 +780,7 @@ static void test_regex_char_split_basic() {
auto sr = ng.root()->simplify_and_init(ng);
SASSERT(sr != seq::simplify_result::conflict);
bool extended = ng.generate_extensions(ng.root(), 0);
bool extended = ng.generate_extensions(ng.root());
SASSERT(extended);
// should have at least 2 children: x→'A'·z and x→ε
SASSERT(ng.root()->outgoing().size() >= 2);
@ -994,7 +994,7 @@ static void test_var_nielsen_basic() {
ng.add_str_eq(x, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 3);
SASSERT(root->outgoing()[0]->is_progress());
@ -1039,7 +1039,7 @@ static void test_var_nielsen_not_applicable_char() {
ng.add_str_eq(a, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 2);
}
@ -1126,7 +1126,7 @@ static void test_var_nielsen_priority() {
ng.add_str_eq(x, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// eq_split produces 3 children
SASSERT(root->outgoing().size() == 3);
@ -1155,7 +1155,7 @@ static void test_generate_extensions_det_priority() {
ng.add_str_eq(xa, xy);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// det modifier produces 1 child (head cancel), not 2 (const) or 3 (var)
SASSERT(root->outgoing().size() == 1);
@ -1181,7 +1181,7 @@ static void test_generate_extensions_no_applicable() {
ng.add_str_eq(a, b);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(!extended);
SASSERT(root->outgoing().empty());
}
@ -1211,7 +1211,7 @@ static void test_generate_extensions_regex_only() {
root->simplify_and_init(ng);
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// at least 1 child (epsilon branch) + possibly char branches
SASSERT(root->outgoing().size() >= 1);
@ -1246,7 +1246,7 @@ static void test_generate_extensions_mixed_det_first() {
ng.add_str_mem(y, re_node);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// det modifier (same-head x cancel) produces 1 child
SASSERT(root->outgoing().size() == 1);
@ -2017,7 +2017,7 @@ static void test_det_cancel_child_eq() {
ng.add_str_eq(xa, xb);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 1);
@ -2044,7 +2044,7 @@ static void test_const_nielsen_child_substitutions() {
ng.add_str_eq(a, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 2);
@ -2076,7 +2076,7 @@ static void test_var_nielsen_substitution_types() {
ng.add_str_eq(x, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 3);
@ -2554,7 +2554,7 @@ static void test_power_epsilon_no_power() {
// det fires (x = single char → const_nielsen fires eventually),
// but power_epsilon (priority 2) should not fire; det (priority 1) fires.
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// det catches x = A first (single token eq, but actually ConstNielsen fires):
// x is var, A is char → ConstNielsen: 2 children (x→ε, x→A)
@ -2578,7 +2578,7 @@ static void test_num_cmp_no_power() {
ng.add_str_eq(x, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// eq_split fires (priority 5): 3 children
SASSERT(root->outgoing().size() == 3);
@ -2611,7 +2611,7 @@ static void test_star_intr_no_backedge() {
auto sr = root->simplify_and_init(ng);
SASSERT(sr != seq::simplify_result::conflict);
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// regex_char_split fires (priority 9): at least 2 children (x→A·z, x→ε)
SASSERT(root->outgoing().size() >= 2);
@ -2650,7 +2650,7 @@ static void test_star_intr_with_backedge() {
return; // OK, the regex is nullable so it was removed
}
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
if (extended) {
// star_intr should have generated at least 1 child
SASSERT(root->outgoing().size() >= 1);
@ -2678,7 +2678,7 @@ static void test_gpower_intr_repeated_chars() {
ng.add_str_eq(x, aab);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// gpower_intr should fire (priority 7), producing 1 child: x = fresh_power · fresh_suffix
SASSERT(root->outgoing().size() == 1);
@ -2704,7 +2704,7 @@ static void test_gpower_intr_no_repeat() {
ng.add_str_eq(x, ab);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// gpower_intr should NOT fire (< 2 repeats)
// const_nielsen (priority 8) fires for var vs char: 2 children
@ -2741,7 +2741,7 @@ static void test_regex_var_split_basic() {
auto sr = root->simplify_and_init(ng);
SASSERT(sr != seq::simplify_result::conflict);
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// Should produce children via regex_char_split or regex_var_split
SASSERT(root->outgoing().size() >= 2);
@ -2767,7 +2767,7 @@ static void test_power_split_no_power() {
ng.add_str_eq(xa, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// eq_split or const_nielsen fires
SASSERT(root->outgoing().size() >= 2);
@ -2790,7 +2790,7 @@ static void test_var_num_unwinding_no_power() {
ng.add_str_eq(x, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
// eq_split fires: 3 children
SASSERT(root->outgoing().size() == 3);
@ -2839,7 +2839,7 @@ static void test_priority_chain_order() {
ng.add_str_eq(xa, xb);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 1); // Det: single child (cancel)
}
@ -2855,7 +2855,7 @@ static void test_priority_chain_order() {
ng.add_str_eq(x, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 3); // EqSplit: 3 children
}
@ -2871,7 +2871,7 @@ static void test_priority_chain_order() {
ng.add_str_eq(a, y);
seq::nielsen_node* root = ng.root();
bool extended = ng.generate_extensions(root, 0);
bool extended = ng.generate_extensions(root);
SASSERT(extended);
SASSERT(root->outgoing().size() == 2); // ConstNielsen: 2 children
}