From 5003cece9d770466d44c164bfc5322c5fbc2e2fc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 4 Mar 2026 15:45:55 +0000 Subject: [PATCH] Implement Parameter integration for theory_nseq (smt.nseq.max_depth) Co-authored-by: CEisenhofer <56730610+CEisenhofer@users.noreply.github.com> --- src/params/smt_params.cpp | 1 + src/params/smt_params.h | 1 + src/params/smt_params_helper.pyg | 1 + src/smt/seq/seq_nielsen.cpp | 9 ++++ src/smt/seq/seq_nielsen.h | 5 +++ src/smt/theory_nseq.cpp | 1 + src/test/nseq_regex.cpp | 74 ++++++++++++++++++++++++++++++++ 7 files changed, 92 insertions(+) create mode 100644 src/test/nseq_regex.cpp diff --git a/src/params/smt_params.cpp b/src/params/smt_params.cpp index 29a0d72a5..54695899a 100644 --- a/src/params/smt_params.cpp +++ b/src/params/smt_params.cpp @@ -53,6 +53,7 @@ void smt_params::updt_local_params(params_ref const & _p) { m_sls_parallel = p.sls_parallel(); m_logic = _p.get_sym("logic", m_logic); m_string_solver = p.string_solver(); + m_nseq_max_depth = p.nseq_max_depth(); m_up_persist_clauses = p.up_persist_clauses(); validate_string_solver(m_string_solver); if (_p.get_bool("arith.greatest_error_pivot", false)) diff --git a/src/params/smt_params.h b/src/params/smt_params.h index 68ab50ffe..b40de83e5 100644 --- a/src/params/smt_params.h +++ b/src/params/smt_params.h @@ -248,6 +248,7 @@ struct smt_params : public preprocessor_params, // // ----------------------------------- symbol m_string_solver; + unsigned m_nseq_max_depth = 0; smt_params(params_ref const & p = params_ref()): m_string_solver(symbol("auto")){ diff --git a/src/params/smt_params_helper.pyg b/src/params/smt_params_helper.pyg index 58d5fbf8c..287ed0098 100644 --- a/src/params/smt_params_helper.pyg +++ b/src/params/smt_params_helper.pyg @@ -123,6 +123,7 @@ def_module_params(module_name='smt', ('dack.threshold', UINT, 10, ' number of times the congruence rule must be used before Leibniz\'s axiom is expanded'), ('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'), ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver), \'nseq\' (Nielsen-based string solver)'), + ('nseq.max_depth', UINT, 0, 'maximum Nielsen search depth for theory_nseq (0 = unlimited)'), ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'), ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'), ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'), diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index d5d5a3e24..f7957c24a 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -556,8 +556,14 @@ namespace seq { // iterative deepening: 6 iterations starting at depth 10, doubling // mirrors ZIPT's NielsenGraph.Check() + // If m_max_search_depth is set, clamp the initial bound and stop + // once the bound has hit the cap (further iterations are identical). m_depth_bound = 10; + if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) + m_depth_bound = m_max_search_depth; for (unsigned iter = 0; iter < 6; ++iter, m_depth_bound *= 2) { + if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) + m_depth_bound = m_max_search_depth; inc_run_idx(); search_result r = search_dfs(m_root, 0); if (r == search_result::sat) { @@ -569,6 +575,9 @@ namespace seq { return r; } // depth limit hit – increase bound and retry + // if already at max, no further growth is possible + if (m_max_search_depth > 0 && m_depth_bound >= m_max_search_depth) + break; } ++m_stats.m_num_unknown; return search_result::unknown; diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 09ca10f65..79eabb51e 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -541,6 +541,7 @@ namespace seq { nielsen_node* m_root = nullptr; unsigned m_run_idx = 0; unsigned m_depth_bound = 0; + unsigned m_max_search_depth = 0; unsigned m_next_mem_id = 0; unsigned m_fresh_cnt = 0; unsigned m_num_input_eqs = 0; @@ -580,6 +581,10 @@ namespace seq { unsigned depth_bound() const { return m_depth_bound; } void set_depth_bound(unsigned d) { m_depth_bound = d; } + // maximum overall search depth (0 = unlimited) + unsigned max_search_depth() const { return m_max_search_depth; } + void set_max_search_depth(unsigned d) { m_max_search_depth = d; } + // generate next unique regex membership id unsigned next_mem_id() { return m_next_mem_id++; } diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 9b9c68b62..2781d59b0 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -423,6 +423,7 @@ namespace smt { ++m_num_final_checks; + m_nielsen.set_max_search_depth(get_fparams().m_nseq_max_depth); auto result = m_nielsen.solve(); if (result == seq::nielsen_graph::search_result::sat) { diff --git a/src/test/nseq_regex.cpp b/src/test/nseq_regex.cpp new file mode 100644 index 000000000..41a58f6b2 --- /dev/null +++ b/src/test/nseq_regex.cpp @@ -0,0 +1,74 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + nseq_regex.cpp + +Abstract: + + Unit tests for nseq_regex: lazy regex membership processing + for the Nielsen-based string solver. + +--*/ +#include "util/util.h" +#include "ast/reg_decl_plugins.h" +#include "ast/euf/euf_egraph.h" +#include "ast/euf/euf_sgraph.h" +#include "smt/nseq_regex.h" +#include + +// Test 1: nseq_regex instantiation +static void test_nseq_regex_instantiation() { + std::cout << "test_nseq_regex_instantiation\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + SASSERT(&nr.sg() == &sg); + std::cout << " ok\n"; +} + +// Test 2: is_empty_regex on an empty-language node +static void test_nseq_regex_is_empty() { + std::cout << "test_nseq_regex_is_empty\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + // re.none is the empty language + expr_ref none_e(su.re.mk_empty(su.re.mk_re(str_sort)), m); + euf::snode* none_n = sg.mk(none_e.get()); + SASSERT(nr.is_empty_regex(none_n)); + std::cout << " ok: re.none recognized as empty\n"; +} + +// Test 3: is_empty_regex on a full-match regex (not empty) +static void test_nseq_regex_is_full() { + std::cout << "test_nseq_regex_is_full\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + smt::nseq_regex nr(sg); + + seq_util su(m); + sort* str_sort = su.str.mk_string_sort(); + // re.all (full sequence regex) is not empty + expr_ref full_e(su.re.mk_full_seq(su.re.mk_re(str_sort)), m); + euf::snode* full_n = sg.mk(full_e.get()); + SASSERT(!nr.is_empty_regex(full_n)); + std::cout << " ok: re.all not recognized as empty\n"; +} + +void tst_nseq_regex() { + test_nseq_regex_instantiation(); + test_nseq_regex_is_empty(); + test_nseq_regex_is_full(); + std::cout << "nseq_regex: all tests passed\n"; +}