3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-17 18:43:45 +00:00

nseq: port ZIPT regex pre-check to fix benchmark discrepancy on regex-only problems (#8994)

* Initial plan

* Port ZIPT regex pre-check and DFS node budget to address nseq benchmark discrepancy

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
Copilot 2026-03-15 10:10:53 -07:00 committed by GitHub
parent 2212f59704
commit d53846d501
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 140 additions and 0 deletions

View file

@ -54,7 +54,9 @@ void smt_params::updt_local_params(params_ref const & _p) {
m_logic = _p.get_sym("logic", m_logic);
m_string_solver = p.string_solver();
m_nseq_max_depth = p.nseq_max_depth();
m_nseq_max_nodes = p.nseq_max_nodes();
m_nseq_parikh = p.nseq_parikh();
m_nseq_regex_precheck = p.nseq_regex_precheck();
m_up_persist_clauses = p.up_persist_clauses();
validate_string_solver(m_string_solver);
if (_p.get_bool("arith.greatest_error_pivot", false))

View file

@ -249,7 +249,9 @@ struct smt_params : public preprocessor_params,
// -----------------------------------
symbol m_string_solver;
unsigned m_nseq_max_depth = 0;
unsigned m_nseq_max_nodes = 0;
bool m_nseq_parikh = true;
bool m_nseq_regex_precheck = true;
smt_params(params_ref const & p = params_ref()):
m_string_solver(symbol("auto")){

View file

@ -124,7 +124,9 @@ def_module_params(module_name='smt',
('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'),
('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver), \'nseq\' (Nielsen-based string solver)'),
('nseq.max_depth', UINT, 0, 'maximum Nielsen search depth for theory_nseq (0 = unlimited)'),
('nseq.max_nodes', UINT, 0, 'maximum number of DFS nodes explored by theory_nseq per solve() call (0 = unlimited)'),
('nseq.parikh', BOOL, True, 'enable Parikh image checks in nseq solver'),
('nseq.regex_precheck', BOOL, True, 'enable regex membership pre-check before DFS in theory_nseq: checks intersection emptiness per-variable and short-circuits SAT/UNSAT for regex-only problems'),
('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'),
('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'),
('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'),

View file

@ -2172,6 +2172,10 @@ namespace seq {
if (m_cancel_fn && m_cancel_fn())
return search_result::unknown;
// check DFS node budget (0 = unlimited)
if (m_max_nodes > 0 && m_stats.m_num_dfs_nodes > m_max_nodes)
return search_result::unknown;
// revisit detection: if already visited this run, return cached status.
// mirrors ZIPT's NielsenNode.GraphExpansion() evalIdx check.
if (node->eval_idx() == m_run_idx) {

View file

@ -734,6 +734,7 @@ namespace seq {
unsigned m_run_idx = 0;
unsigned m_depth_bound = 0;
unsigned m_max_search_depth = 0;
unsigned m_max_nodes = 0; // 0 = unlimited
bool m_parikh_enabled = true;
unsigned m_next_mem_id = 0;
unsigned m_fresh_cnt = 0;
@ -821,6 +822,9 @@ namespace seq {
// maximum overall search depth (0 = unlimited)
void set_max_search_depth(unsigned d) { m_max_search_depth = d; }
// maximum total DFS nodes per solve() call (0 = unlimited)
void set_max_nodes(unsigned n) { m_max_nodes = n; }
// enable/disable Parikh image verification constraints
void set_parikh_enabled(bool e) { m_parikh_enabled = e; }

View file

@ -438,6 +438,12 @@ namespace seq {
// Only handle ground regexes; non-ground can't be fully explored
if (!re->is_ground())
return l_undef;
// s_other snodes (unrecognized regex kinds, e.g. re.+) cannot be
// efficiently explored: the alphabet partition is trivially {∅} and
// derivative computations may be slow. Report l_undef and let the
// caller fall back to a more capable procedure.
if (re->kind() == euf::snode_kind::s_other)
return l_undef;
// BFS over the Brzozowski derivative automaton.
// Each state is a derivative regex snode identified by its id.

View file

@ -416,7 +416,27 @@ namespace smt {
++m_num_final_checks;
m_nielsen.set_max_search_depth(get_fparams().m_nseq_max_depth);
m_nielsen.set_max_nodes(get_fparams().m_nseq_max_nodes);
m_nielsen.set_parikh_enabled(get_fparams().m_nseq_parikh);
// Regex membership pre-check: before running DFS, check intersection
// emptiness for each variable's regex constraints. This handles
// regex-only problems that the DFS cannot efficiently solve.
if (get_fparams().m_nseq_regex_precheck) {
lbool precheck = check_regex_memberships_precheck();
if (precheck == l_true) {
// conflict was asserted inside check_regex_memberships_precheck
IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck UNSAT\n";);
return FC_CONTINUE;
}
if (precheck == l_false) {
// all regex constraints satisfiable, no word eqs/diseqs → SAT
IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck SAT\n";);
return FC_DONE;
}
// l_undef: inconclusive, fall through to DFS
}
IF_VERBOSE(1, verbose_stream() << "nseq final_check: calling solve()\n";);
// here the actual Nielsen solving happens
@ -829,4 +849,96 @@ namespace smt {
return new_axiom;
}
// -----------------------------------------------------------------------
// Regex membership pre-check
// For each variable with regex membership constraints, check intersection
// emptiness before DFS. Mirrors ZIPT's per-variable regex evaluation.
//
// Returns:
// l_true — conflict asserted (empty intersection for some variable)
// l_false — all variables satisfiable and no word eqs/diseqs → SAT
// l_undef — inconclusive, proceed to DFS
// -----------------------------------------------------------------------
lbool theory_nseq::check_regex_memberships_precheck() {
auto const& mems = m_state.str_mems();
if (mems.empty())
return l_undef;
// Group membership indices by variable snode id.
// Only consider memberships whose string snode is a plain variable (s_var).
u_map<unsigned_vector> var_to_mems;
bool all_var_str = true;
for (unsigned i = 0; i < mems.size(); ++i) {
auto const& mem = mems[i];
if (!mem.m_str || !mem.m_regex)
continue;
if (mem.m_str->is_var()) {
auto& vec = var_to_mems.insert_if_not_there(mem.m_str->id(), unsigned_vector());
vec.push_back(i);
}
else {
all_var_str = false;
}
}
if (var_to_mems.empty())
return l_undef;
bool any_undef = false;
// Check intersection emptiness for each variable.
for (auto& kv : var_to_mems) {
unsigned var_id = kv.m_key;
unsigned_vector const& mem_indices = kv.m_value;
ptr_vector<euf::snode> regexes;
for (unsigned i : mem_indices) {
euf::snode* re = mems[i].m_regex;
if (re)
regexes.push_back(re);
}
if (regexes.empty())
continue;
// Use a bounded BFS (50 states) for the pre-check to keep it fast.
// If the BFS times out (l_undef), we fall through to DFS.
lbool result = m_regex.check_intersection_emptiness(regexes, 50);
if (result == l_true) {
// Intersection is empty → the memberships on this variable are
// jointly unsatisfiable. Assert a conflict from all their literals.
enode_pair_vector eqs;
literal_vector lits;
for (unsigned i : mem_indices) {
mem_source const& src = m_state.get_mem_source(i);
if (get_context().get_assignment(src.m_lit) == l_true)
lits.push_back(src.m_lit);
}
TRACE(seq, tout << "nseq regex precheck: empty intersection for var "
<< var_id << ", conflict with " << lits.size() << " lits\n";);
set_conflict(eqs, lits);
return l_true; // conflict asserted
}
else if (result == l_undef) {
any_undef = true;
}
// l_false = non-empty intersection, this variable's constraints are satisfiable
}
if (any_undef)
return l_undef; // cannot fully determine; let DFS decide
// All variables' regex intersections are non-empty.
// If there are no word equations and no disequalities, variables are
// independent and each can be assigned a witness string → SAT.
if (all_var_str && m_state.str_eqs().empty() && m_state.diseqs().empty()) {
TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, "
<< "no word eqs/diseqs → SAT\n";);
return l_false; // signals SAT (non-empty / satisfiable)
}
return l_undef; // mixed constraints; let DFS decide
}
}

View file

@ -131,6 +131,14 @@ namespace smt {
bool assert_nonneg_for_all_vars();
bool assert_length_constraints();
// Regex membership pre-check: for each variable with regex constraints,
// check intersection emptiness before running DFS.
// l_true → found empty intersection, conflict asserted, return FC_CONTINUE
// l_false → all variables' regex constraints satisfiable and no word
// equations / disequalities, return FC_DONE (SAT)
// l_undef → inconclusive, proceed to DFS
lbool check_regex_memberships_precheck();
public:
theory_nseq(context& ctx);
};