mirror of
https://github.com/Z3Prover/z3
synced 2026-03-17 18:43:45 +00:00
Port ZIPT regex pre-check and DFS node budget to address nseq benchmark discrepancy
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
parent
988baeba05
commit
7b747a4d5f
8 changed files with 140 additions and 0 deletions
|
|
@ -54,7 +54,9 @@ void smt_params::updt_local_params(params_ref const & _p) {
|
|||
m_logic = _p.get_sym("logic", m_logic);
|
||||
m_string_solver = p.string_solver();
|
||||
m_nseq_max_depth = p.nseq_max_depth();
|
||||
m_nseq_max_nodes = p.nseq_max_nodes();
|
||||
m_nseq_parikh = p.nseq_parikh();
|
||||
m_nseq_regex_precheck = p.nseq_regex_precheck();
|
||||
m_up_persist_clauses = p.up_persist_clauses();
|
||||
validate_string_solver(m_string_solver);
|
||||
if (_p.get_bool("arith.greatest_error_pivot", false))
|
||||
|
|
|
|||
|
|
@ -249,7 +249,9 @@ struct smt_params : public preprocessor_params,
|
|||
// -----------------------------------
|
||||
symbol m_string_solver;
|
||||
unsigned m_nseq_max_depth = 0;
|
||||
unsigned m_nseq_max_nodes = 0;
|
||||
bool m_nseq_parikh = true;
|
||||
bool m_nseq_regex_precheck = true;
|
||||
|
||||
smt_params(params_ref const & p = params_ref()):
|
||||
m_string_solver(symbol("auto")){
|
||||
|
|
|
|||
|
|
@ -124,7 +124,9 @@ def_module_params(module_name='smt',
|
|||
('theory_case_split', BOOL, False, 'Allow the context to use heuristics involving theory case splits, which are a set of literals of which exactly one can be assigned True. If this option is false, the context will generate extra axioms to enforce this instead.'),
|
||||
('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver), \'nseq\' (Nielsen-based string solver)'),
|
||||
('nseq.max_depth', UINT, 0, 'maximum Nielsen search depth for theory_nseq (0 = unlimited)'),
|
||||
('nseq.max_nodes', UINT, 0, 'maximum number of DFS nodes explored by theory_nseq per solve() call (0 = unlimited)'),
|
||||
('nseq.parikh', BOOL, True, 'enable Parikh image checks in nseq solver'),
|
||||
('nseq.regex_precheck', BOOL, True, 'enable regex membership pre-check before DFS in theory_nseq: checks intersection emptiness per-variable and short-circuits SAT/UNSAT for regex-only problems'),
|
||||
('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'),
|
||||
('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'),
|
||||
('seq.validate', BOOL, False, 'enable self-validation of theory axioms created by seq theory'),
|
||||
|
|
|
|||
|
|
@ -2172,6 +2172,10 @@ namespace seq {
|
|||
if (m_cancel_fn && m_cancel_fn())
|
||||
return search_result::unknown;
|
||||
|
||||
// check DFS node budget (0 = unlimited)
|
||||
if (m_max_nodes > 0 && m_stats.m_num_dfs_nodes > m_max_nodes)
|
||||
return search_result::unknown;
|
||||
|
||||
// revisit detection: if already visited this run, return cached status.
|
||||
// mirrors ZIPT's NielsenNode.GraphExpansion() evalIdx check.
|
||||
if (node->eval_idx() == m_run_idx) {
|
||||
|
|
|
|||
|
|
@ -734,6 +734,7 @@ namespace seq {
|
|||
unsigned m_run_idx = 0;
|
||||
unsigned m_depth_bound = 0;
|
||||
unsigned m_max_search_depth = 0;
|
||||
unsigned m_max_nodes = 0; // 0 = unlimited
|
||||
bool m_parikh_enabled = true;
|
||||
unsigned m_next_mem_id = 0;
|
||||
unsigned m_fresh_cnt = 0;
|
||||
|
|
@ -821,6 +822,9 @@ namespace seq {
|
|||
|
||||
// maximum overall search depth (0 = unlimited)
|
||||
void set_max_search_depth(unsigned d) { m_max_search_depth = d; }
|
||||
|
||||
// maximum total DFS nodes per solve() call (0 = unlimited)
|
||||
void set_max_nodes(unsigned n) { m_max_nodes = n; }
|
||||
|
||||
// enable/disable Parikh image verification constraints
|
||||
void set_parikh_enabled(bool e) { m_parikh_enabled = e; }
|
||||
|
|
|
|||
|
|
@ -438,6 +438,12 @@ namespace seq {
|
|||
// Only handle ground regexes; non-ground can't be fully explored
|
||||
if (!re->is_ground())
|
||||
return l_undef;
|
||||
// s_other snodes (unrecognized regex kinds, e.g. re.+) cannot be
|
||||
// efficiently explored: the alphabet partition is trivially {∅} and
|
||||
// derivative computations may be slow. Report l_undef and let the
|
||||
// caller fall back to a more capable procedure.
|
||||
if (re->kind() == euf::snode_kind::s_other)
|
||||
return l_undef;
|
||||
|
||||
// BFS over the Brzozowski derivative automaton.
|
||||
// Each state is a derivative regex snode identified by its id.
|
||||
|
|
|
|||
|
|
@ -416,7 +416,27 @@ namespace smt {
|
|||
++m_num_final_checks;
|
||||
|
||||
m_nielsen.set_max_search_depth(get_fparams().m_nseq_max_depth);
|
||||
m_nielsen.set_max_nodes(get_fparams().m_nseq_max_nodes);
|
||||
m_nielsen.set_parikh_enabled(get_fparams().m_nseq_parikh);
|
||||
|
||||
// Regex membership pre-check: before running DFS, check intersection
|
||||
// emptiness for each variable's regex constraints. This handles
|
||||
// regex-only problems that the DFS cannot efficiently solve.
|
||||
if (get_fparams().m_nseq_regex_precheck) {
|
||||
lbool precheck = check_regex_memberships_precheck();
|
||||
if (precheck == l_true) {
|
||||
// conflict was asserted inside check_regex_memberships_precheck
|
||||
IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck UNSAT\n";);
|
||||
return FC_CONTINUE;
|
||||
}
|
||||
if (precheck == l_false) {
|
||||
// all regex constraints satisfiable, no word eqs/diseqs → SAT
|
||||
IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck SAT\n";);
|
||||
return FC_DONE;
|
||||
}
|
||||
// l_undef: inconclusive, fall through to DFS
|
||||
}
|
||||
|
||||
IF_VERBOSE(1, verbose_stream() << "nseq final_check: calling solve()\n";);
|
||||
|
||||
// here the actual Nielsen solving happens
|
||||
|
|
@ -829,4 +849,96 @@ namespace smt {
|
|||
return new_axiom;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Regex membership pre-check
|
||||
// For each variable with regex membership constraints, check intersection
|
||||
// emptiness before DFS. Mirrors ZIPT's per-variable regex evaluation.
|
||||
//
|
||||
// Returns:
|
||||
// l_true — conflict asserted (empty intersection for some variable)
|
||||
// l_false — all variables satisfiable and no word eqs/diseqs → SAT
|
||||
// l_undef — inconclusive, proceed to DFS
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
lbool theory_nseq::check_regex_memberships_precheck() {
|
||||
auto const& mems = m_state.str_mems();
|
||||
if (mems.empty())
|
||||
return l_undef;
|
||||
|
||||
// Group membership indices by variable snode id.
|
||||
// Only consider memberships whose string snode is a plain variable (s_var).
|
||||
u_map<unsigned_vector> var_to_mems;
|
||||
bool all_var_str = true;
|
||||
|
||||
for (unsigned i = 0; i < mems.size(); ++i) {
|
||||
auto const& mem = mems[i];
|
||||
if (!mem.m_str || !mem.m_regex)
|
||||
continue;
|
||||
if (mem.m_str->is_var()) {
|
||||
auto& vec = var_to_mems.insert_if_not_there(mem.m_str->id(), unsigned_vector());
|
||||
vec.push_back(i);
|
||||
}
|
||||
else {
|
||||
all_var_str = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (var_to_mems.empty())
|
||||
return l_undef;
|
||||
|
||||
bool any_undef = false;
|
||||
|
||||
// Check intersection emptiness for each variable.
|
||||
for (auto& kv : var_to_mems) {
|
||||
unsigned var_id = kv.m_key;
|
||||
unsigned_vector const& mem_indices = kv.m_value;
|
||||
ptr_vector<euf::snode> regexes;
|
||||
for (unsigned i : mem_indices) {
|
||||
euf::snode* re = mems[i].m_regex;
|
||||
if (re)
|
||||
regexes.push_back(re);
|
||||
}
|
||||
if (regexes.empty())
|
||||
continue;
|
||||
|
||||
// Use a bounded BFS (50 states) for the pre-check to keep it fast.
|
||||
// If the BFS times out (l_undef), we fall through to DFS.
|
||||
lbool result = m_regex.check_intersection_emptiness(regexes, 50);
|
||||
|
||||
if (result == l_true) {
|
||||
// Intersection is empty → the memberships on this variable are
|
||||
// jointly unsatisfiable. Assert a conflict from all their literals.
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
for (unsigned i : mem_indices) {
|
||||
mem_source const& src = m_state.get_mem_source(i);
|
||||
if (get_context().get_assignment(src.m_lit) == l_true)
|
||||
lits.push_back(src.m_lit);
|
||||
}
|
||||
TRACE(seq, tout << "nseq regex precheck: empty intersection for var "
|
||||
<< var_id << ", conflict with " << lits.size() << " lits\n";);
|
||||
set_conflict(eqs, lits);
|
||||
return l_true; // conflict asserted
|
||||
}
|
||||
else if (result == l_undef) {
|
||||
any_undef = true;
|
||||
}
|
||||
// l_false = non-empty intersection, this variable's constraints are satisfiable
|
||||
}
|
||||
|
||||
if (any_undef)
|
||||
return l_undef; // cannot fully determine; let DFS decide
|
||||
|
||||
// All variables' regex intersections are non-empty.
|
||||
// If there are no word equations and no disequalities, variables are
|
||||
// independent and each can be assigned a witness string → SAT.
|
||||
if (all_var_str && m_state.str_eqs().empty() && m_state.diseqs().empty()) {
|
||||
TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, "
|
||||
<< "no word eqs/diseqs → SAT\n";);
|
||||
return l_false; // signals SAT (non-empty / satisfiable)
|
||||
}
|
||||
|
||||
return l_undef; // mixed constraints; let DFS decide
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -131,6 +131,14 @@ namespace smt {
|
|||
bool assert_nonneg_for_all_vars();
|
||||
bool assert_length_constraints();
|
||||
|
||||
// Regex membership pre-check: for each variable with regex constraints,
|
||||
// check intersection emptiness before running DFS.
|
||||
// l_true → found empty intersection, conflict asserted, return FC_CONTINUE
|
||||
// l_false → all variables' regex constraints satisfiable and no word
|
||||
// equations / disequalities, return FC_DONE (SAT)
|
||||
// l_undef → inconclusive, proceed to DFS
|
||||
lbool check_regex_memberships_precheck();
|
||||
|
||||
public:
|
||||
theory_nseq(context& ctx);
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue