diff --git a/src/params/smt_params.cpp b/src/params/smt_params.cpp index daabdf66b..55ae9b2f8 100644 --- a/src/params/smt_params.cpp +++ b/src/params/smt_params.cpp @@ -54,7 +54,9 @@ void smt_params::updt_local_params(params_ref const & _p) { m_logic = _p.get_sym("logic", m_logic); m_string_solver = p.string_solver(); m_nseq_max_depth = p.nseq_max_depth(); + m_nseq_max_nodes = p.nseq_max_nodes(); m_nseq_parikh = p.nseq_parikh(); + m_nseq_regex_precheck = p.nseq_regex_precheck(); m_up_persist_clauses = p.up_persist_clauses(); validate_string_solver(m_string_solver); if (_p.get_bool("arith.greatest_error_pivot", false)) diff --git a/src/params/smt_params.h b/src/params/smt_params.h index 7d96863eb..950a05fb3 100644 --- a/src/params/smt_params.h +++ b/src/params/smt_params.h @@ -249,7 +249,9 @@ struct smt_params : public preprocessor_params, // ----------------------------------- symbol m_string_solver; unsigned m_nseq_max_depth = 0; + unsigned m_nseq_max_nodes = 0; bool m_nseq_parikh = true; + bool m_nseq_regex_precheck = true; smt_params(params_ref const & p = params_ref()): m_string_solver(symbol("auto")){ diff --git a/src/params/smt_params_helper.pyg b/src/params/smt_params_helper.pyg index 086b7eed3..b5b132834 100644 --- a/src/params/smt_params_helper.pyg +++ b/src/params/smt_params_helper.pyg @@ -124,7 +124,9 @@ def_module_params(module_name='smt', ('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'), ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver), \'nseq\' (Nielsen-based string solver)'), ('nseq.max_depth', UINT, 0, 'maximum Nielsen search depth for theory_nseq (0 = unlimited)'), + ('nseq.max_nodes', UINT, 0, 'maximum number of DFS nodes explored by theory_nseq per solve() call (0 = unlimited)'), ('nseq.parikh', BOOL, True, 'enable Parikh image checks in nseq solver'), + ('nseq.regex_precheck', BOOL, True, 'enable regex membership pre-check before DFS in theory_nseq: checks intersection emptiness per-variable and short-circuits SAT/UNSAT for regex-only problems'), ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'), ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'), ('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'), diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 79d175f9e..180e1daa5 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -2172,6 +2172,10 @@ namespace seq { if (m_cancel_fn && m_cancel_fn()) return search_result::unknown; + // check DFS node budget (0 = unlimited) + if (m_max_nodes > 0 && m_stats.m_num_dfs_nodes > m_max_nodes) + return search_result::unknown; + // revisit detection: if already visited this run, return cached status. // mirrors ZIPT's NielsenNode.GraphExpansion() evalIdx check. if (node->eval_idx() == m_run_idx) { diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 79fc91d34..8a2fe520a 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -734,6 +734,7 @@ namespace seq { unsigned m_run_idx = 0; unsigned m_depth_bound = 0; unsigned m_max_search_depth = 0; + unsigned m_max_nodes = 0; // 0 = unlimited bool m_parikh_enabled = true; unsigned m_next_mem_id = 0; unsigned m_fresh_cnt = 0; @@ -821,6 +822,9 @@ namespace seq { // maximum overall search depth (0 = unlimited) void set_max_search_depth(unsigned d) { m_max_search_depth = d; } + + // maximum total DFS nodes per solve() call (0 = unlimited) + void set_max_nodes(unsigned n) { m_max_nodes = n; } // enable/disable Parikh image verification constraints void set_parikh_enabled(bool e) { m_parikh_enabled = e; } diff --git a/src/smt/seq/seq_regex.cpp b/src/smt/seq/seq_regex.cpp index 37f009eef..252991e6a 100644 --- a/src/smt/seq/seq_regex.cpp +++ b/src/smt/seq/seq_regex.cpp @@ -438,6 +438,12 @@ namespace seq { // Only handle ground regexes; non-ground can't be fully explored if (!re->is_ground()) return l_undef; + // s_other snodes (unrecognized regex kinds, e.g. re.+) cannot be + // efficiently explored: the alphabet partition is trivially {∅} and + // derivative computations may be slow. Report l_undef and let the + // caller fall back to a more capable procedure. + if (re->kind() == euf::snode_kind::s_other) + return l_undef; // BFS over the Brzozowski derivative automaton. // Each state is a derivative regex snode identified by its id. diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 78931e63f..bce2f1432 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -416,7 +416,27 @@ namespace smt { ++m_num_final_checks; m_nielsen.set_max_search_depth(get_fparams().m_nseq_max_depth); + m_nielsen.set_max_nodes(get_fparams().m_nseq_max_nodes); m_nielsen.set_parikh_enabled(get_fparams().m_nseq_parikh); + + // Regex membership pre-check: before running DFS, check intersection + // emptiness for each variable's regex constraints. This handles + // regex-only problems that the DFS cannot efficiently solve. + if (get_fparams().m_nseq_regex_precheck) { + lbool precheck = check_regex_memberships_precheck(); + if (precheck == l_true) { + // conflict was asserted inside check_regex_memberships_precheck + IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck UNSAT\n";); + return FC_CONTINUE; + } + if (precheck == l_false) { + // all regex constraints satisfiable, no word eqs/diseqs → SAT + IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck SAT\n";); + return FC_DONE; + } + // l_undef: inconclusive, fall through to DFS + } + IF_VERBOSE(1, verbose_stream() << "nseq final_check: calling solve()\n";); // here the actual Nielsen solving happens @@ -829,4 +849,96 @@ namespace smt { return new_axiom; } + // ----------------------------------------------------------------------- + // Regex membership pre-check + // For each variable with regex membership constraints, check intersection + // emptiness before DFS. Mirrors ZIPT's per-variable regex evaluation. + // + // Returns: + // l_true — conflict asserted (empty intersection for some variable) + // l_false — all variables satisfiable and no word eqs/diseqs → SAT + // l_undef — inconclusive, proceed to DFS + // ----------------------------------------------------------------------- + + lbool theory_nseq::check_regex_memberships_precheck() { + auto const& mems = m_state.str_mems(); + if (mems.empty()) + return l_undef; + + // Group membership indices by variable snode id. + // Only consider memberships whose string snode is a plain variable (s_var). + u_map var_to_mems; + bool all_var_str = true; + + for (unsigned i = 0; i < mems.size(); ++i) { + auto const& mem = mems[i]; + if (!mem.m_str || !mem.m_regex) + continue; + if (mem.m_str->is_var()) { + auto& vec = var_to_mems.insert_if_not_there(mem.m_str->id(), unsigned_vector()); + vec.push_back(i); + } + else { + all_var_str = false; + } + } + + if (var_to_mems.empty()) + return l_undef; + + bool any_undef = false; + + // Check intersection emptiness for each variable. + for (auto& kv : var_to_mems) { + unsigned var_id = kv.m_key; + unsigned_vector const& mem_indices = kv.m_value; + ptr_vector regexes; + for (unsigned i : mem_indices) { + euf::snode* re = mems[i].m_regex; + if (re) + regexes.push_back(re); + } + if (regexes.empty()) + continue; + + // Use a bounded BFS (50 states) for the pre-check to keep it fast. + // If the BFS times out (l_undef), we fall through to DFS. + lbool result = m_regex.check_intersection_emptiness(regexes, 50); + + if (result == l_true) { + // Intersection is empty → the memberships on this variable are + // jointly unsatisfiable. Assert a conflict from all their literals. + enode_pair_vector eqs; + literal_vector lits; + for (unsigned i : mem_indices) { + mem_source const& src = m_state.get_mem_source(i); + if (get_context().get_assignment(src.m_lit) == l_true) + lits.push_back(src.m_lit); + } + TRACE(seq, tout << "nseq regex precheck: empty intersection for var " + << var_id << ", conflict with " << lits.size() << " lits\n";); + set_conflict(eqs, lits); + return l_true; // conflict asserted + } + else if (result == l_undef) { + any_undef = true; + } + // l_false = non-empty intersection, this variable's constraints are satisfiable + } + + if (any_undef) + return l_undef; // cannot fully determine; let DFS decide + + // All variables' regex intersections are non-empty. + // If there are no word equations and no disequalities, variables are + // independent and each can be assigned a witness string → SAT. + if (all_var_str && m_state.str_eqs().empty() && m_state.diseqs().empty()) { + TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, " + << "no word eqs/diseqs → SAT\n";); + return l_false; // signals SAT (non-empty / satisfiable) + } + + return l_undef; // mixed constraints; let DFS decide + } + } diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index d44c6d67c..36b86439b 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -131,6 +131,14 @@ namespace smt { bool assert_nonneg_for_all_vars(); bool assert_length_constraints(); + // Regex membership pre-check: for each variable with regex constraints, + // check intersection emptiness before running DFS. + // l_true → found empty intersection, conflict asserted, return FC_CONTINUE + // l_false → all variables' regex constraints satisfiable and no word + // equations / disequalities, return FC_DONE (SAT) + // l_undef → inconclusive, proceed to DFS + lbool check_regex_memberships_precheck(); + public: theory_nseq(context& ctx); };