From ee02c1c8f3f3e4e2932da36f56739669ebda640c Mon Sep 17 00:00:00 2001
From: CEisenhofer <clemens.eisenhofer@tuwien.ac.at>
Date: Wed, 4 Mar 2026 15:53:49 +0100
Subject: [PATCH] Changed specifications

---
 specs/theory-nseq-zipt-port.md | 52 ++++++++++++++++------------------
 src/test/nseq_basic.cpp        | 51 +++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 28 deletions(-)

diff --git a/specs/theory-nseq-zipt-port.md b/specs/theory-nseq-zipt-port.md
index bb2f964cc..1ac2f049e 100644
--- a/specs/theory-nseq-zipt-port.md
+++ b/specs/theory-nseq-zipt-port.md
@@ -32,7 +32,7 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT]
 - ❌ Graph expansion / iterative deepening DFS
 - ❌ Constraint simplification engine
 - ❌ Conflict detection and explanation
-- ❌ Subsumption checking
+- ❌ Subsumption checking (not needed)
 - ❌ Parikh image / PDD / Interval reasoning
 - ❌ Regex splitting modifiers
 - ❌ Propagation engine
@@ -42,7 +42,7 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT]
 
 - **Algorithmic gap**: The core solving algorithms — the "brain" of ZIPT — have not been ported. Only the "bones" (data structures) exist.
 - **No solver entry point**: There is no `theory_nseq` class, so the ZIPT algorithm cannot be invoked from Z3.
-- **Missing search**: Iterative deepening, subsumption, and conflict detection are absent.
+- **Missing search**: Iterative deepening and conflict detection are absent.
 - **Missing reasoning**: Parikh image (length constraints via polynomial decision diagrams) and regex splitting modifiers are absent.
 
 ### 2.3 Reference Documents
@@ -61,8 +61,7 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT]
 - [ ] Nielsen transformation-based word equation solving
 - [ ] Lazy regex membership via Brzozowski derivatives (reusing `sgraph::brzozowski_deriv`)
 - [ ] Parikh image / length constraint reasoning
-- [ ] Iterative deepening search with depth bounds
-- [ ] Subsumption-based pruning
+- [ ] Iterative deepening search: initial depth 3, +1 per failure, upper bound via `smt.nseq.max_depth` (0 = infinity)
 - [ ] Conflict detection with dependency-tracked explanations
 - [ ] Model generation for satisfiable instances
 - [ ] Correct results on Z3's existing string regression suite (`C:\z3test\regressions\smt2\string*.smt2`)
@@ -73,6 +72,9 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT]
 - [ ] Polymorphic sequences — focus on strings (`Seq[Char]`) initially; generic `Seq[T]` can follow later
 - [ ] Higher-order sequence ops (`seq.map`, `seq.foldl`) — defer to `theory_seq` or axiomatize minimally
 - [ ] Bitvector-to-string conversions (`ubv2s`, `sbv2s`) — defer
+- [ ] Integer-to-string / string-to-integer conversions (`str.to_int`, `int.to_str`) — defer
+- [ ] Extended string predicates: containment (`str.contains`), prefix/suffix (`str.prefixof`, `str.suffixof`) — defer
+- [ ] String ordering (`str.<`, `str.<=`) — defer
 - [ ] New Z3 API surface — `theory_nseq` is internal only, selected by parameter
 - [ ] Performance parity with `theory_seq` on all benchmarks in Phase 1 — correctness first
 
@@ -95,10 +97,10 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT]
 │  │         │             │                            │  │
 │  │  ┌──────┴─────────────┴────────────────────────┐  │  │
 │  │  │          nielsen_graph (search engine)       │  │  │
-│  │  │  ┌──────────┐ ┌──────────┐ ┌─────────────┐ │  │  │
-│  │  │  │ modifier │ │ simplify │ │ subsumption │ │  │  │
-│  │  │  │  chain   │ │  engine  │ │   checker   │ │  │  │
-│  │  │  └──────────┘ └──────────┘ └─────────────┘ │  │  │
+│  │  │  ┌──────────┐ ┌──────────┐                       │  │  │
+│  │  │  │ modifier │ │ simplify │                       │  │  │
+│  │  │  │  chain   │ │  engine  │                       │  │  │
+│  │  │  └──────────┘ └──────────┘                       │  │  │
 │  │  └───────────────────┬─────────────────────────┘  │  │
 │  │                      │                             │  │
 │  │  ┌───────────────────┴─────────────────────────┐  │  │
@@ -119,7 +121,7 @@ The ZIPT algorithm (reference C# implementation at [github.com/CEisenhofer/ZIPT]
 1. **Internalization**: String terms/atoms are registered, creating theory variables and snode representations.
 2. **Propagation**: Equality/disequality merges and boolean assignments update the constraint store.
 3. **Final check**: Build a nielsen_graph from active constraints, run iterative deepening search.
-4. **Search**: At each node, apply the modifier chain (13 modifiers in priority order) to generate child nodes. Check for conflicts, subsumption, and satisfaction.
+4. **Search**: At each node, apply the modifier chain (13 modifiers in priority order) to generate child nodes. Check for conflicts and satisfaction.
 5. **Conflict/Satisfaction**: If all leaves are satisfied → FC_DONE. If a conflict is found → generate explanation clause and return FC_CONTINUE. If stuck → increase depth bound and retry.
 
 ### 4.3 Key Components
@@ -187,7 +189,7 @@ class theory_nseq : public theory {
 
 **Internalization pattern** (following `theory_seq`):
 - `internalize_term`: For string-sorted terms, create theory variable, register with egraph/sgraph. For `str.++`, `str.len`, `str.in_re`, etc., recursively internalize children.
-- `internalize_atom`: For `str.in_re`, `str.contains`, `str.prefixof`, `str.suffixof`, `str.<`, `str.<=` — create boolean variable.
+- `internalize_atom`: For `str.in_re` — create boolean variable. (`str.contains`, `str.prefixof`, `str.suffixof`, `str.<`, `str.<=` are out of scope for `theory_nseq`.)
 
 #### 5.1.2 Parameter integration
 
@@ -217,7 +219,11 @@ void setup::setup_nseq() {
 
 **`src/smt/smt_setup.h`** — Declare `setup_nseq()`.
 
-**`src/params/smt_params_helper.pyg`** — Document `nseq` in the `string_solver` parameter description.
+**`src/params/smt_params_helper.pyg`** — Document `nseq` in the `string_solver` parameter description, and add:
+```python
+def_param("nseq.max_depth", UINT, 0,
+    "Maximum Nielsen search depth for theory_nseq (0 = unlimited)")
+```
 
 #### 5.1.3 Build system
 
@@ -339,24 +345,14 @@ search_result expand_node(nielsen_node* node, unsigned depth);
 ```
 
 **Algorithm** (matching ZIPT's `NielsenGraph.Run()`):
-1. Set initial depth bound (e.g., 10)
-2. Run DFS from root with depth bound
+1. Set initial depth bound to **3**
+2. Run DFS from root with current depth bound
 3. If all branches conflict → UNSAT
 4. If a leaf is satisfied (all constraints trivial) → SAT
-5. If depth bound hit → increase bound, restart (iterative deepening)
-6. Cap total iterations to prevent non-termination
+5. If depth bound hit → increase bound by **1**, restart (iterative deepening)
+6. If `smt.nseq.max_depth > 0` and bound exceeds it → give up (`FC_GIVEUP`); if `smt.nseq.max_depth == 0` (default) → no upper limit
 
-#### 5.3.4 Subsumption Checking
-
-Add to `nielsen_node`:
-
-```cpp
-bool is_subsumed_by(nielsen_node const& other) const;
-```
-
-A node N₁ is subsumed by N₂ if every constraint in N₂ also appears in N₁ (via snode pointer equality after canonicalization). Subsumption prevents exploring redundant branches.
-
-#### 5.3.5 Conflict Detection & Explanation
+#### 5.3.4 Conflict Detection & Explanation
 
 Add to `nielsen_graph`:
 
@@ -638,7 +634,7 @@ Create focused test files in `tests/nseq/`:
 
 | File | Phase | Change |
 |------|-------|--------|
-| `src/smt/seq/seq_nielsen.h` | 3 | Add simplification, modifier chain, search, subsumption methods |
+| `src/smt/seq/seq_nielsen.h` | 3 | Add simplification, modifier chain, search methods |
 | `src/smt/seq/seq_nielsen.cpp` | 3 | Implement above methods |
 
 ### Files to Modify (Integration)
@@ -657,7 +653,7 @@ Create focused test files in `tests/nseq/`:
 
 1. **Phase 1** — Skeleton (`theory_nseq` class, parameter integration, build system) — compilable stub returning `FC_GIVEUP`
 2. **Phase 2** — State management (`nseq_state`) — constraint store with backtracking
-3. **Phase 3** — Nielsen algorithms (simplification, modifiers, search, subsumption, conflict) — core solver
+3. **Phase 3** — Nielsen algorithms (simplification, modifiers, search, conflict) — core solver
 4. **Phase 4** — Length/Parikh reasoning — arithmetic integration
 5. **Phase 5** — Regex membership (`nseq_regex`) — lazy derivatives
 6. **Phase 6** — Propagation wiring — connect all components in `final_check_eh`
diff --git a/src/test/nseq_basic.cpp b/src/test/nseq_basic.cpp
index d71204f53..7472755fb 100644
--- a/src/test/nseq_basic.cpp
+++ b/src/test/nseq_basic.cpp
@@ -16,6 +16,9 @@ Abstract:
 #include "ast/euf/euf_sgraph.h"
 #include "smt/seq/seq_nielsen.h"
 #include "params/smt_params.h"
+#include "ast/seq_decl_plugin.h"
+#include "smt/smt_context.h"
+#include "smt/theory_nseq.h"
 #include <iostream>
 
 // Test 1: instantiation of nielsen_graph compiles and doesn't crash
@@ -53,6 +56,26 @@ static void test_nseq_param_validation() {
     }
 }
 
+// Test 2b: parameter validation rejects invalid variants of "nseq"
+static void test_nseq_param_validation_rejects_invalid() {
+    std::cout << "test_nseq_param_validation_rejects_invalid\n";
+    smt_params p;
+    static const char* invalid_variants[] = { "nseq2", "NSEQ", "nseqq", "nse", "Nseq", "nseq ", "" };
+    for (auto s : invalid_variants) {
+        bool threw = false;
+        try {
+            p.validate_string_solver(symbol(s));
+        } catch (...) {
+            threw = true;
+        }
+        if (!threw) {
+            std::cerr << "  FAIL: '" << s << "' should have been rejected\n";
+            SASSERT(false && "invalid string solver variant was accepted");
+        }
+    }
+    std::cout << "  ok: all invalid variants rejected\n";
+}
+
 // Test 3: nielsen graph simplification (trivial case)
 static void test_nseq_simplification() {
     std::cout << "test_nseq_simplification\n";
@@ -204,9 +227,36 @@ static void test_nseq_length_mismatch() {
     std::cout << "  ok: ab = a detected as unsat\n";
 }
 
+// Test 10: setup_seq_str dispatches to setup_nseq() when string_solver == "nseq"
+static void test_setup_seq_str_dispatches_nseq() {
+    std::cout << "test_setup_seq_str_dispatches_nseq\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+
+    smt_params params;
+    params.m_string_solver = symbol("nseq");
+
+    smt::context ctx(m, params);
+
+    // Assert a string equality to trigger string theory setup during check()
+    seq_util su(m);
+    sort* str_sort = su.str.mk_string_sort();
+    app_ref x(m.mk_const(symbol("x_setup_test"), str_sort), m);
+    app_ref eq(m.mk_eq(x.get(), x.get()), m);
+    ctx.assert_expr(eq);
+    ctx.check();
+
+    // Verify that theory_nseq (not theory_seq) was registered for the "seq" family
+    family_id seq_fid = m.mk_family_id("seq");
+    SASSERT(ctx.get_theory(seq_fid) != nullptr);
+    SASSERT(dynamic_cast<smt::theory_nseq*>(ctx.get_theory(seq_fid)) != nullptr);
+    std::cout << "  ok: setup_seq_str dispatched to setup_nseq for 'nseq'\n";
+}
+
 void tst_nseq_basic() {
     test_nseq_instantiation();
     test_nseq_param_validation();
+    test_nseq_param_validation_rejects_invalid();
     test_nseq_simplification();
     test_nseq_node_satisfied();
     test_nseq_symbol_clash();
@@ -214,5 +264,6 @@ void tst_nseq_basic() {
     test_nseq_prefix_clash();
     test_nseq_const_nielsen_solvable();
     test_nseq_length_mismatch();
+    test_setup_seq_str_dispatches_nseq();
     std::cout << "nseq_basic: all tests passed\n";
 }