From e4787e57f6fcf6d317e2ad057ee3997507e04e94 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Wed, 4 Mar 2026 17:37:04 +0100 Subject: [PATCH] Use correct parameters for iterative deepening Updated spec --- specs/theory-nseq-zipt-port.md | 48 +++++++++++++++-------------- src/smt/seq/seq_nielsen.cpp | 22 ++++++------- src/smt/seq/seq_nielsen.h | 29 +++++++----------- src/test/seq_nielsen.cpp | 56 +++++++++++++++++----------------- 4 files changed, 73 insertions(+), 82 deletions(-) diff --git a/specs/theory-nseq-zipt-port.md b/specs/theory-nseq-zipt-port.md index be333789e..b68b6876d 100644 --- a/specs/theory-nseq-zipt-port.md +++ b/specs/theory-nseq-zipt-port.md @@ -24,26 +24,28 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT] - ✅ `seq_plugin` — Egraph plugin for associativity, identity, absorption, star merge (`src/ast/euf/euf_seq_plugin.h/.cpp`) - ✅ Nielsen graph data structures — `str_eq`, `str_mem`, `nielsen_subst`, `nielsen_edge`, `nielsen_node`, `nielsen_graph`, `dep_tracker` (`src/smt/seq/seq_nielsen.h/.cpp`) - ✅ 38 unit tests (18 sgraph + 7 seq_plugin + 13 nielsen) +- ✅ `theory_nseq` skeleton and SMT integration (`src/smt/theory_nseq.h/.cpp`) +- ✅ Parameter integration — `smt.string_solver=nseq` and `smt.nseq.max_depth` (`src/params/smt_params*.{cpp,h,pyg}`, `src/smt/smt_setup.cpp`) +- ✅ Build system — `theory_nseq.cpp`, `nseq_state.cpp`, `nseq_regex.cpp`, `nseq_model.cpp` in `src/smt/CMakeLists.txt` +- ✅ All 13 Nielsen modifiers (`apply_det_modifier`, `apply_const_nielsen`, `apply_var_nielsen`, `apply_eq_split`, `apply_regex_char_split`, `apply_power_epsilon`, `apply_num_cmp`, `apply_const_num_unwinding`, `apply_star_intr`, `apply_gpower_intr`, `apply_regex_var_split`, `apply_power_split`, `apply_var_num_unwinding`) +- ✅ Graph expansion / iterative deepening DFS — `nielsen_graph::solve()` + `search_dfs()` (depth 3, +1/failure, bounded by `smt.nseq.max_depth`) +- ✅ Constraint simplification engine — `nielsen_node::simplify_and_init()` +- ✅ Conflict detection and explanation — `dep_tracker`-based conflict clauses +- ✅ Propagation engine — `theory_nseq::propagate()`, eq/diseq/regex membership wiring +- ✅ Model generation — `nseq_model` (`src/smt/nseq_model.h/.cpp`), wired via `init_model()` / `mk_value()` / `finalize_model()` +- ✅ Regex membership processing — `nseq_regex` (`src/smt/nseq_regex.h/.cpp`) +- ✅ Constraint state management — `nseq_state` (`src/smt/nseq_state.h/.cpp`) with push/pop backtracking +- ✅ Integration unit tests — `tst_nseq_basic` (`src/test/nseq_basic.cpp`), `tst_nseq_regex` (`src/test/nseq_regex.cpp`) +- ✅ Length constraint generation — `nielsen_graph::generate_length_constraints()` (length equalities, power non-negativity, regex bounds) -**Not yet implemented:** -- ❌ `theory_nseq` skeleton and SMT integration -- ❌ Parameter integration (`smt.string_solver=nseq`) -- ❌ Nielsen transformation algorithms (modifier chain, 13 modifiers) -- ❌ Graph expansion / iterative deepening DFS -- ❌ Constraint simplification engine -- ❌ Conflict detection and explanation -- ❌ Subsumption checking (not needed) -- ❌ Parikh image / PDD / Interval reasoning -- ❌ Regex splitting modifiers -- ❌ Propagation engine -- ❌ Model generation +**Not yet implemented / incomplete:** +- ❌ Integer constraint reasoning via `lp::int_solver` — length constraints are generated but not yet discharged through `lp::int_solver`; currently propagated as theory lemmas via `arith_value` +- ❌ Correct results on Z3's full string regression suite (`C:\z3test\regressions\smt2\string*.smt2`) ### 2.2 The Problem -- **Algorithmic gap**: The core solving algorithms — the "brain" of ZIPT — have not been ported. Only the "bones" (data structures) exist. -- **No solver entry point**: There is no `theory_nseq` class, so the ZIPT algorithm cannot be invoked from Z3. -- **Missing search**: Iterative deepening and conflict detection are absent. -- **Missing reasoning**: Parikh image (length constraints via polynomial decision diagrams) and regex splitting modifiers are absent. +- **Algorithmic gap**: The core solving algorithms are implemented. The main remaining gap is full `lp::int_solver` integration for integer subproblems. +- **Regression coverage**: The solver has not yet been validated against Z3's full string regression suite. ### 2.3 Reference Documents @@ -56,14 +58,14 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT] ### 3.1 Functional Goals -- [ ] `theory_nseq` theory solver class implementing `smt::theory` interface -- [ ] Selectable via `smt.string_solver=nseq` parameter -- [ ] Nielsen transformation-based word equation solving -- [ ] Lazy regex membership via Brzozowski derivatives (reusing `sgraph::brzozowski_deriv`) +- [x] `theory_nseq` theory solver class implementing `smt::theory` interface +- [x] Selectable via `smt.string_solver=nseq` parameter +- [x] Nielsen transformation-based word equation solving +- [x] Lazy regex membership via Brzozowski derivatives (reusing `sgraph::brzozowski_deriv`) - [ ] Integer constraint reasoning via `lp::int_solver` (length, power terms, Parikh image) -- [ ] Iterative deepening search: initial depth 3, +1 per failure, upper bound via `smt.nseq.max_depth` (0 = infinity) -- [ ] Conflict detection with dependency-tracked explanations -- [ ] Model generation for satisfiable instances +- [x] Iterative deepening search: initial depth 3, +1 per failure, upper bound via `smt.nseq.max_depth` (0 = infinity) +- [x] Conflict detection with dependency-tracked explanations +- [x] Model generation for satisfiable instances - [ ] Correct results on Z3's existing string regression suite (`C:\z3test\regressions\smt2\string*.smt2`) ### 3.2 Non-Goals (Out of Scope) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index f7957c24a..88b21598b 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -554,16 +554,12 @@ namespace seq { ++m_stats.m_num_solve_calls; - // iterative deepening: 6 iterations starting at depth 10, doubling - // mirrors ZIPT's NielsenGraph.Check() - // If m_max_search_depth is set, clamp the initial bound and stop - // once the bound has hit the cap (further iterations are identical). - m_depth_bound = 10; - if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) - m_depth_bound = m_max_search_depth; - for (unsigned iter = 0; iter < 6; ++iter, m_depth_bound *= 2) { + // Iterative deepening: start at depth 3, increment by 1 on each failure. + // m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it. + m_depth_bound = 3; + while (true) { if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) - m_depth_bound = m_max_search_depth; + break; inc_run_idx(); search_result r = search_dfs(m_root, 0); if (r == search_result::sat) { @@ -574,10 +570,10 @@ namespace seq { ++m_stats.m_num_unsat; return r; } - // depth limit hit – increase bound and retry - // if already at max, no further growth is possible + // depth limit hit – increment bound by 1 and retry if (m_max_search_depth > 0 && m_depth_bound >= m_max_search_depth) break; + ++m_depth_bound; } ++m_stats.m_num_unknown; return search_result::unknown; @@ -631,7 +627,7 @@ namespace seq { // generate extensions only once per node; children persist across runs if (!node->is_extended()) { - bool ext = generate_extensions(node, depth); + bool ext = generate_extensions(node); if (!ext) { node->set_extended(true); // No extensions could be generated. If the node still has @@ -1039,7 +1035,7 @@ namespace seq { return false; } - bool nielsen_graph::generate_extensions(nielsen_node* node, unsigned /*depth*/) { + bool nielsen_graph::generate_extensions(nielsen_node *node) { // Modifier priority chain mirrors ZIPT's ModifierBase.TypeOrder. // The first modifier that produces edges is used and returned immediately. diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 79eabb51e..8630ef5c8 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -113,8 +113,7 @@ Abstract: 3. Node constraint containers: ZIPT's NielsenNode stores str_eq constraints in NList (a sorted list for O(log n) subsumption lookup) and str_mem constraints in Dictionary (keyed by id for O(1) cycle lookup). - Z3 uses plain vector and vector, which is simpler but - loses the sorted-list subsumption candidate structure. + Z3 uses plain vector and vector, which is simpler. 4. nielsen_edge substitution list: ZIPT's NielsenEdge carries two substitution lists -- Subst (string-level, mapping string variables to strings) and @@ -125,8 +124,7 @@ Abstract: 5. nielsen_graph node registry: ZIPT keeps nodes in a HashSet plus a Dictionary, List> for subsumption candidate - lookup. Z3 uses a ptr_vector without any subsumption map, - simplifying memory management at the cost of subsumption checking. + lookup. Z3 uses a ptr_vector, simplifying memory management. 6. nielsen_graph::display() vs NielsenGraph.ToDot(): ZIPT outputs a DOT-format graph with color highlighting for the current satisfying path. Z3 outputs @@ -155,8 +153,7 @@ Abstract: equation-splitting heuristic used to choose the best split point. - StrMem.SimplifyCharRegex() / SimplifyDir(): Brzozowski derivative-based simplification consuming ground prefixes/suffixes of the string. - - StrMem.TrySubsume(): stabilizer-based subsumption that drops leading - variables already covered by regex stabilizers. + - StrMem.TrySubsume(): stabilizer-based subsumption (not ported, not needed). - StrMem.ExtractCycle() / StabilizerFromCycle(): cycle detection over the search path and extraction of a Kleene-star stabilizer to generalize the cycle. This is the key termination argument for regex membership. @@ -197,15 +194,15 @@ Abstract: Search procedure: - NielsenGraph.Check() / NielsenNode.GraphExpansion(): ported as - nielsen_graph::solve() (iterative deepening, 6 rounds starting at - depth 10, doubling) and search_dfs() (depth-bounded DFS with - eval_idx cycle detection and node status tracking). The inner solver - setup and subsumption-node lookup within Check() are not ported. + nielsen_graph::solve() (iterative deepening, starting at depth 3, + incrementing by 1 per failure, bounded by smt.nseq.max_depth) and + search_dfs() (depth-bounded DFS with eval_idx cycle detection and + node status tracking). - NielsenNode.SimplifyAndInit(): ported as nielsen_node::simplify_and_init() with prefix matching, symbol clash, empty propagation, and Brzozowski derivative consumption. - - NielsenGraph.FindExisting(): the subsumption cache lookup over - subsumptionCandidates is not ported. + - NielsenGraph.FindExisting() / subsumption cache lookup: not ported, + not needed. Auxiliary infrastructure: - LocalInfo: thread-local search bookkeeping (current path, modification @@ -264,7 +261,7 @@ namespace seq { extended, symbol_clash, parikh_image, - subsumption, + subsumption, // not used; retained for enum completeness arithmetic, regex, regex_widening, @@ -494,9 +491,6 @@ namespace seq { // true if all str_eqs are trivial and there are no str_mems bool is_satisfied() const; - // true if other's constraint set is a subset of this node's - bool is_subsumed_by(nielsen_node const& other) const; - // true if any constraint has opaque (s_other) terms that // the Nielsen graph cannot decompose bool has_opaque_terms() const; @@ -510,7 +504,6 @@ namespace seq { unsigned m_num_unsat = 0; unsigned m_num_unknown = 0; unsigned m_num_simplify_conflict = 0; - unsigned m_num_subsumptions = 0; unsigned m_num_extensions = 0; unsigned m_num_fresh_vars = 0; unsigned m_max_depth = 0; @@ -609,7 +602,7 @@ namespace seq { // generate child nodes by applying modifier rules // returns true if at least one child was generated - bool generate_extensions(nielsen_node* node, unsigned depth); + bool generate_extensions(nielsen_node *node); // collect dependency information from conflicting constraints void collect_conflict_deps(dep_tracker& deps) const; diff --git a/src/test/seq_nielsen.cpp b/src/test/seq_nielsen.cpp index 4d645fe7c..2eb0e740b 100644 --- a/src/test/seq_nielsen.cpp +++ b/src/test/seq_nielsen.cpp @@ -486,7 +486,7 @@ static void test_eq_split_basic() { // eq_split fires: both heads are distinct vars // produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress) - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 3); @@ -597,7 +597,7 @@ static void test_const_nielsen_char_var() { ng.add_str_eq(a, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 2); // both branches are progress @@ -623,7 +623,7 @@ static void test_const_nielsen_var_char() { ng.add_str_eq(x, by); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 2); SASSERT(root->outgoing()[0]->is_progress()); @@ -694,7 +694,7 @@ static void test_const_nielsen_priority_over_eq_split() { ng.add_str_eq(ax, yb); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // const_nielsen produces 2 children, not eq_split's 3 SASSERT(root->outgoing().size() == 2); @@ -721,7 +721,7 @@ static void test_const_nielsen_not_applicable_both_vars() { ng.add_str_eq(xa, yb); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 3); } @@ -780,7 +780,7 @@ static void test_regex_char_split_basic() { auto sr = ng.root()->simplify_and_init(ng); SASSERT(sr != seq::simplify_result::conflict); - bool extended = ng.generate_extensions(ng.root(), 0); + bool extended = ng.generate_extensions(ng.root()); SASSERT(extended); // should have at least 2 children: x→'A'·z and x→ε SASSERT(ng.root()->outgoing().size() >= 2); @@ -994,7 +994,7 @@ static void test_var_nielsen_basic() { ng.add_str_eq(x, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 3); SASSERT(root->outgoing()[0]->is_progress()); @@ -1039,7 +1039,7 @@ static void test_var_nielsen_not_applicable_char() { ng.add_str_eq(a, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 2); } @@ -1126,7 +1126,7 @@ static void test_var_nielsen_priority() { ng.add_str_eq(x, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // eq_split produces 3 children SASSERT(root->outgoing().size() == 3); @@ -1155,7 +1155,7 @@ static void test_generate_extensions_det_priority() { ng.add_str_eq(xa, xy); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // det modifier produces 1 child (head cancel), not 2 (const) or 3 (var) SASSERT(root->outgoing().size() == 1); @@ -1181,7 +1181,7 @@ static void test_generate_extensions_no_applicable() { ng.add_str_eq(a, b); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(!extended); SASSERT(root->outgoing().empty()); } @@ -1211,7 +1211,7 @@ static void test_generate_extensions_regex_only() { root->simplify_and_init(ng); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // at least 1 child (epsilon branch) + possibly char branches SASSERT(root->outgoing().size() >= 1); @@ -1246,7 +1246,7 @@ static void test_generate_extensions_mixed_det_first() { ng.add_str_mem(y, re_node); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // det modifier (same-head x cancel) produces 1 child SASSERT(root->outgoing().size() == 1); @@ -2017,7 +2017,7 @@ static void test_det_cancel_child_eq() { ng.add_str_eq(xa, xb); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 1); @@ -2044,7 +2044,7 @@ static void test_const_nielsen_child_substitutions() { ng.add_str_eq(a, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 2); @@ -2076,7 +2076,7 @@ static void test_var_nielsen_substitution_types() { ng.add_str_eq(x, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 3); @@ -2554,7 +2554,7 @@ static void test_power_epsilon_no_power() { // det fires (x = single char → const_nielsen fires eventually), // but power_epsilon (priority 2) should not fire; det (priority 1) fires. - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // det catches x = A first (single token eq, but actually ConstNielsen fires): // x is var, A is char → ConstNielsen: 2 children (x→ε, x→A) @@ -2578,7 +2578,7 @@ static void test_num_cmp_no_power() { ng.add_str_eq(x, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // eq_split fires (priority 5): 3 children SASSERT(root->outgoing().size() == 3); @@ -2611,7 +2611,7 @@ static void test_star_intr_no_backedge() { auto sr = root->simplify_and_init(ng); SASSERT(sr != seq::simplify_result::conflict); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // regex_char_split fires (priority 9): at least 2 children (x→A·z, x→ε) SASSERT(root->outgoing().size() >= 2); @@ -2650,7 +2650,7 @@ static void test_star_intr_with_backedge() { return; // OK, the regex is nullable so it was removed } - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); if (extended) { // star_intr should have generated at least 1 child SASSERT(root->outgoing().size() >= 1); @@ -2678,7 +2678,7 @@ static void test_gpower_intr_repeated_chars() { ng.add_str_eq(x, aab); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // gpower_intr should fire (priority 7), producing 1 child: x = fresh_power · fresh_suffix SASSERT(root->outgoing().size() == 1); @@ -2704,7 +2704,7 @@ static void test_gpower_intr_no_repeat() { ng.add_str_eq(x, ab); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // gpower_intr should NOT fire (< 2 repeats) // const_nielsen (priority 8) fires for var vs char: 2 children @@ -2741,7 +2741,7 @@ static void test_regex_var_split_basic() { auto sr = root->simplify_and_init(ng); SASSERT(sr != seq::simplify_result::conflict); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // Should produce children via regex_char_split or regex_var_split SASSERT(root->outgoing().size() >= 2); @@ -2767,7 +2767,7 @@ static void test_power_split_no_power() { ng.add_str_eq(xa, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // eq_split or const_nielsen fires SASSERT(root->outgoing().size() >= 2); @@ -2790,7 +2790,7 @@ static void test_var_num_unwinding_no_power() { ng.add_str_eq(x, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); // eq_split fires: 3 children SASSERT(root->outgoing().size() == 3); @@ -2839,7 +2839,7 @@ static void test_priority_chain_order() { ng.add_str_eq(xa, xb); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 1); // Det: single child (cancel) } @@ -2855,7 +2855,7 @@ static void test_priority_chain_order() { ng.add_str_eq(x, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 3); // EqSplit: 3 children } @@ -2871,7 +2871,7 @@ static void test_priority_chain_order() { ng.add_str_eq(a, y); seq::nielsen_node* root = ng.root(); - bool extended = ng.generate_extensions(root, 0); + bool extended = ng.generate_extensions(root); SASSERT(extended); SASSERT(root->outgoing().size() == 2); // ConstNielsen: 2 children }