From 5003cece9d770466d44c164bfc5322c5fbc2e2fc Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Wed, 4 Mar 2026 15:45:55 +0000
Subject: [PATCH] Implement Parameter integration for theory_nseq
 (smt.nseq.max_depth)

Co-authored-by: CEisenhofer <56730610+CEisenhofer@users.noreply.github.com>
---
 src/params/smt_params.cpp        |  1 +
 src/params/smt_params.h          |  1 +
 src/params/smt_params_helper.pyg |  1 +
 src/smt/seq/seq_nielsen.cpp      |  9 ++++
 src/smt/seq/seq_nielsen.h        |  5 +++
 src/smt/theory_nseq.cpp          |  1 +
 src/test/nseq_regex.cpp          | 74 ++++++++++++++++++++++++++++++++
 7 files changed, 92 insertions(+)
 create mode 100644 src/test/nseq_regex.cpp

diff --git a/src/params/smt_params.cpp b/src/params/smt_params.cpp
index 29a0d72a5..54695899a 100644
--- a/src/params/smt_params.cpp
+++ b/src/params/smt_params.cpp
@@ -53,6 +53,7 @@ void smt_params::updt_local_params(params_ref const & _p) {
     m_sls_parallel = p.sls_parallel();
     m_logic = _p.get_sym("logic", m_logic);
     m_string_solver = p.string_solver();
+    m_nseq_max_depth = p.nseq_max_depth();
     m_up_persist_clauses = p.up_persist_clauses();
     validate_string_solver(m_string_solver);
     if (_p.get_bool("arith.greatest_error_pivot", false))
diff --git a/src/params/smt_params.h b/src/params/smt_params.h
index 68ab50ffe..b40de83e5 100644
--- a/src/params/smt_params.h
+++ b/src/params/smt_params.h
@@ -248,6 +248,7 @@ struct smt_params : public preprocessor_params,
     //
     // -----------------------------------
     symbol m_string_solver;
+    unsigned m_nseq_max_depth = 0;
 
     smt_params(params_ref const & p = params_ref()):
         m_string_solver(symbol("auto")){
diff --git a/src/params/smt_params_helper.pyg b/src/params/smt_params_helper.pyg
index 58d5fbf8c..287ed0098 100644
--- a/src/params/smt_params_helper.pyg
+++ b/src/params/smt_params_helper.pyg
@@ -123,6 +123,7 @@ def_module_params(module_name='smt',
                           ('dack.threshold', UINT, 10, ' number of times the congruence rule must be used before Leibniz\'s axiom is expanded'),
                           ('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'),
                           ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver), \'nseq\' (Nielsen-based string solver)'),
+                          ('nseq.max_depth', UINT, 0, 'maximum Nielsen search depth for theory_nseq (0 = unlimited)'),
                           ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'),
                           ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'),
                           ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'),
diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp
index d5d5a3e24..f7957c24a 100644
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@@ -556,8 +556,14 @@ namespace seq {
 
         // iterative deepening: 6 iterations starting at depth 10, doubling
         // mirrors ZIPT's NielsenGraph.Check()
+        // If m_max_search_depth is set, clamp the initial bound and stop
+        // once the bound has hit the cap (further iterations are identical).
         m_depth_bound = 10;
+        if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth)
+            m_depth_bound = m_max_search_depth;
         for (unsigned iter = 0; iter < 6; ++iter, m_depth_bound *= 2) {
+            if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth)
+                m_depth_bound = m_max_search_depth;
             inc_run_idx();
             search_result r = search_dfs(m_root, 0);
             if (r == search_result::sat) {
@@ -569,6 +575,9 @@ namespace seq {
                 return r;
             }
             // depth limit hit – increase bound and retry
+            // if already at max, no further growth is possible
+            if (m_max_search_depth > 0 && m_depth_bound >= m_max_search_depth)
+                break;
         }
         ++m_stats.m_num_unknown;
         return search_result::unknown;
diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h
index 09ca10f65..79eabb51e 100644
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@@ -541,6 +541,7 @@ namespace seq {
         nielsen_node*                 m_root = nullptr;
         unsigned                      m_run_idx = 0;
         unsigned                      m_depth_bound = 0;
+        unsigned                      m_max_search_depth = 0;
         unsigned                      m_next_mem_id = 0;
         unsigned                      m_fresh_cnt = 0;
         unsigned                      m_num_input_eqs = 0;
@@ -580,6 +581,10 @@ namespace seq {
         unsigned depth_bound() const { return m_depth_bound; }
         void set_depth_bound(unsigned d) { m_depth_bound = d; }
 
+        // maximum overall search depth (0 = unlimited)
+        unsigned max_search_depth() const { return m_max_search_depth; }
+        void set_max_search_depth(unsigned d) { m_max_search_depth = d; }
+
         // generate next unique regex membership id
         unsigned next_mem_id() { return m_next_mem_id++; }
 
diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp
index 9b9c68b62..2781d59b0 100644
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@@ -423,6 +423,7 @@ namespace smt {
 
         ++m_num_final_checks;
 
+        m_nielsen.set_max_search_depth(get_fparams().m_nseq_max_depth);
         auto result = m_nielsen.solve();
 
         if (result == seq::nielsen_graph::search_result::sat) {
diff --git a/src/test/nseq_regex.cpp b/src/test/nseq_regex.cpp
new file mode 100644
index 000000000..41a58f6b2
--- /dev/null
+++ b/src/test/nseq_regex.cpp
@@ -0,0 +1,74 @@
+/*++
+Copyright (c) 2026 Microsoft Corporation
+
+Module Name:
+
+    nseq_regex.cpp
+
+Abstract:
+
+    Unit tests for nseq_regex: lazy regex membership processing
+    for the Nielsen-based string solver.
+
+--*/
+#include "util/util.h"
+#include "ast/reg_decl_plugins.h"
+#include "ast/euf/euf_egraph.h"
+#include "ast/euf/euf_sgraph.h"
+#include "smt/nseq_regex.h"
+#include <iostream>
+
+// Test 1: nseq_regex instantiation
+static void test_nseq_regex_instantiation() {
+    std::cout << "test_nseq_regex_instantiation\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    smt::nseq_regex nr(sg);
+    SASSERT(&nr.sg() == &sg);
+    std::cout << "  ok\n";
+}
+
+// Test 2: is_empty_regex on an empty-language node
+static void test_nseq_regex_is_empty() {
+    std::cout << "test_nseq_regex_is_empty\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    smt::nseq_regex nr(sg);
+
+    seq_util su(m);
+    sort* str_sort = su.str.mk_string_sort();
+    // re.none is the empty language
+    expr_ref none_e(su.re.mk_empty(su.re.mk_re(str_sort)), m);
+    euf::snode* none_n = sg.mk(none_e.get());
+    SASSERT(nr.is_empty_regex(none_n));
+    std::cout << "  ok: re.none recognized as empty\n";
+}
+
+// Test 3: is_empty_regex on a full-match regex (not empty)
+static void test_nseq_regex_is_full() {
+    std::cout << "test_nseq_regex_is_full\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    euf::egraph eg(m);
+    euf::sgraph sg(m, eg);
+    smt::nseq_regex nr(sg);
+
+    seq_util su(m);
+    sort* str_sort = su.str.mk_string_sort();
+    // re.all (full sequence regex) is not empty
+    expr_ref full_e(su.re.mk_full_seq(su.re.mk_re(str_sort)), m);
+    euf::snode* full_n = sg.mk(full_e.get());
+    SASSERT(!nr.is_empty_regex(full_n));
+    std::cout << "  ok: re.all not recognized as empty\n";
+}
+
+void tst_nseq_regex() {
+    test_nseq_regex_instantiation();
+    test_nseq_regex_is_empty();
+    test_nseq_regex_is_full();
+    std::cout << "nseq_regex: all tests passed\n";
+}