From 2aa1d1ee011f5f1c5b88eaa9d01d2455556ae79c Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Thu, 25 Jun 2026 15:40:53 +0200 Subject: [PATCH] Memorize previous conflicts --- scripts/compare_seq_solvers.py | 121 +++++++++++++--------- src/smt/seq/seq_nielsen.cpp | 179 ++++++++++++++++++++++++++++----- src/smt/seq/seq_nielsen.h | 33 +++++- 3 files changed, 261 insertions(+), 72 deletions(-) diff --git a/scripts/compare_seq_solvers.py b/scripts/compare_seq_solvers.py index bf027f1659..35ee46d70a 100644 --- a/scripts/compare_seq_solvers.py +++ b/scripts/compare_seq_solvers.py @@ -1,14 +1,21 @@ #!/usr/bin/env python3 """ -Compare z3 string solver configurations, and optionally against an external ZIPT solver. +Compare z3 string solver configurations, and optionally against external solvers. We always run three z3 configurations: nseq_md monadic decomposition (parikh off, eager regex factorization) nseq_pa parikh (parikh on, no regex factorization) seq the old/baseline string solver +Optionally compare against external solvers: + zipt ZIPT solver (--zipt /path/to/zipt) + ostrich Ostrich solver (--ostrich /path/to/ostrich) + noodler z3-noodler solver (--noodler /path/to/z3noodler) + Usage: - python compare_seq_solvers.py --z3 /path/to/z3 [--zipt /path/to/zipt] [--ext .smt2] + python compare_seq_solvers.py --z3 /path/to/z3 + [--zipt /path/to/zipt] [--ostrich /path/to/ostrich] [--noodler /path/to/noodler] + [--ext .smt2] Finds all .smt2 files under the given path, runs the solvers with a configurable timeout, and reports timeouts, divergences, and summary statistics. @@ -106,17 +113,21 @@ def run_z3(z3_bin: str, smt_file: Path, solver_args: list[str], timeout_s: int = return f"error:{e}", elapsed -def run_zipt(zipt_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT) -> tuple[str, float]: - """Run ZIPT on a file. Returns (result, elapsed).""" +def run_external_solver(binary: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT, + extra_args: list[str] | None = None) -> tuple[str, float]: + """Run an external solver on a file. Returns (result, elapsed). + + Passes -t:{timeout_ms} as the first argument, matching the Z3/ZIPT convention. + Supply extra_args for any solver-specific flags that precede the file path. + """ timeout_ms = timeout_s * 1000 - cmd = [zipt_bin, f"-t:{timeout_ms}", str(smt_file)] + cmd = [binary, f"-t:{timeout_ms}"] + (extra_args or []) + [str(smt_file)] start = time.monotonic() try: proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout_s + 5) elapsed = time.monotonic() - start - out = proc.stdout.strip() - return _parse_result(out), elapsed + return _parse_result(proc.stdout.strip()), elapsed except subprocess.TimeoutExpired: elapsed = time.monotonic() - start return "timeout", elapsed @@ -151,7 +162,11 @@ def classify(solver_results: dict[str, str]) -> str: def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT, - zipt_bin: str | None = None) -> dict: + external_bins: dict[str, str] | None = None) -> dict: + """Run all z3 configurations and any external solvers on smt_file. + + external_bins maps solver label (e.g. 'zipt', 'ostrich', 'noodler') to binary path. + """ solver_results: dict[str, str] = {} solver_times: dict[str, float] = {} for name in SOLVER_NAMES: @@ -162,21 +177,21 @@ def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT, cat = classify(solver_results) smtlib_status = read_smtlib_status(smt_file) status = determine_status(solver_results, smtlib_status) - result = { - "file": smt_file, - "category": cat, - "smtlib_status": smtlib_status, - "status": status, - "zipt": None, - "t_zipt": None, + result: dict = { + "file": smt_file, + "category": cat, + "smtlib_status": smtlib_status, + "status": status, } for name in SOLVER_NAMES: result[name] = solver_results[name] result[f"t_{name}"] = solver_times[name] - if zipt_bin: - res_zipt, t_zipt = run_zipt(zipt_bin, smt_file, timeout_s) - result["zipt"] = res_zipt - result["t_zipt"] = t_zipt + + for label, binary in (external_bins or {}).items(): + res, t = run_external_solver(binary, smt_file, timeout_s) + result[label] = res + result[f"t_{label}"] = t + return result @@ -193,14 +208,24 @@ def main(): parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, metavar="SEC", help=f"Per-solver timeout in seconds (default: {DEFAULT_TIMEOUT})") parser.add_argument("--zipt", metavar="PATH", default=None, - help="Path to ZIPT binary (optional; if omitted, ZIPT is not benchmarked)") + help="Path to ZIPT binary (optional)") + parser.add_argument("--ostrich", metavar="PATH", default=None, + help="Path to Ostrich binary (optional)") + parser.add_argument("--noodler", metavar="PATH", default=None, + help="Path to z3-noodler binary (optional)") parser.add_argument("--csv", metavar="FILE", help="Also write results to a CSV file") args = parser.parse_args() z3_bin = args.z3 - zipt_bin = args.zipt timeout_s = args.timeout + # Collect external solvers in declaration order so output columns are stable. + external_bins: dict[str, str] = {} + if args.zipt: external_bins["zipt"] = args.zipt + if args.ostrich: external_bins["ostrich"] = args.ostrich + if args.noodler: external_bins["noodler"] = args.noodler + ext_names = list(external_bins.keys()) + root = Path(args.path) if not root.exists(): print(f"Error: path does not exist: {root}", file=sys.stderr) @@ -225,14 +250,13 @@ def main(): print(f"Sampling: {len(files)} files selected " f"(max {args.max_per_folder} per subfolder, {len(by_folder)} subfolder(s))") - solvers_label = ", ".join(SOLVER_NAMES) - if zipt_bin: solvers_label += ", zipt" - print(f"Found {len(files)} files. Solvers: {solvers_label}. " + all_solver_labels = SOLVER_NAMES + ext_names + print(f"Found {len(files)} files. Solvers: {', '.join(all_solver_labels)}. " f"Workers: {args.jobs}, timeout: {timeout_s}s\n") results = [] pool = ThreadPoolExecutor(max_workers=args.jobs) - futures = {pool.submit(process_file, z3_bin, f, timeout_s, zipt_bin): f for f in files} + futures = {pool.submit(process_file, z3_bin, f, timeout_s, external_bins): f for f in files} done = 0 try: for fut in as_completed(futures): @@ -240,10 +264,8 @@ def main(): r = fut.result() results.append(r) solver_part = " ".join(f"{name}={r[name]:8s} ({r[f't_{name}']:.1f}s)" for name in SOLVER_NAMES) - zipt_part = "" - if zipt_bin: - zipt_part = f" zipt={r['zipt']:8s} ({r['t_zipt']:.1f}s)" - print(f"[{done:4d}/{len(files)}] {r['category']:20s} {solver_part}{zipt_part} {r['file'].parent.name}/{r['file'].name}") + ext_part = "".join(f" {label}={r[label]:8s} ({r[f't_{label}']:.1f}s)" for label in ext_names) + print(f"[{done:4d}/{len(files)}] {r['category']:20s} {solver_part}{ext_part} {r['file'].parent.name}/{r['file'].name}") except KeyboardInterrupt: print("\nInterrupted — cancelling pending tasks.", file=sys.stderr) pool.shutdown(wait=False, cancel_futures=True) @@ -283,10 +305,11 @@ def main(): timeouts = [r for r in results if r[name] == "timeout"] if timeouts: _print_file_list(f"{name.upper()} TIMES OUT", timeouts) - if zipt_bin: - zipt_timeouts = [r for r in results if r["zipt"] == "timeout"] - if zipt_timeouts: - _print_file_list("ZIPT TIMES OUT", zipt_timeouts) + + for label in ext_names: + timeouts = [r for r in results if r[label] == "timeout"] + if timeouts: + _print_file_list(f"{label.upper()} TIMES OUT", timeouts) all_to = categories["all_timeout"] if all_to: @@ -294,20 +317,24 @@ def main(): invalid_models = categories.get("invalid_model", []) if invalid_models: _print_file_list("INVALID MODEL GENERATED", invalid_models) - if zipt_bin: - all_three_to = [r for r in results - if all(r[name] == "timeout" for name in SOLVER_NAMES) and r["zipt"] == "timeout"] - if all_three_to: - _print_file_list("ALL SOLVERS (INCL. ZIPT) TIME OUT", all_three_to) + + if ext_names: + all_ext_to = [r for r in results + if all(r[name] == "timeout" for name in SOLVER_NAMES) + and all(r[label] == "timeout" for label in ext_names)] + if all_ext_to: + _print_file_list("ALL SOLVERS TIME OUT", all_ext_to) + if diverged: _print_file_list("DIVERGE among z3 configurations (sat vs unsat)", diverged) - if zipt_bin: - definite = {"sat", "unsat"} - zipt_diverged = [r for r in results - if r["zipt"] in definite - and any(r[name] in definite and r[name] != r["zipt"] for name in SOLVER_NAMES)] - if zipt_diverged: - _print_file_list("DIVERGE involving ZIPT", zipt_diverged) + + definite = {"sat", "unsat"} + for label in ext_names: + ext_diverged = [r for r in results + if r[label] in definite + and any(r[name] in definite and r[name] != r[label] for name in SOLVER_NAMES)] + if ext_diverged: + _print_file_list(f"DIVERGE involving {label.upper()}", ext_diverged) print() @@ -331,8 +358,8 @@ def main(): csv_path = Path(args.csv) fieldnames = ["file"] + SOLVER_NAMES + [f"t_{name}" for name in SOLVER_NAMES] fieldnames.extend(["category", "smtlib_status", "status"]) - if zipt_bin: - fieldnames.extend(["zipt", "t_zipt"]) + for label in ext_names: + fieldnames.extend([label, f"t_{label}"]) with csv_path.open("w", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore") writer.writeheader() diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 9314355a68..e152bd01ff 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -684,6 +684,9 @@ namespace seq { m_partial_dfa_edge_index.clear(); m_partial_dfa_pin.reset(); m_projection_extract_idx = 0; + m_explored_automaton.reset(); + m_unsat_node_cache.clear(); + m_num_cache_hits = 0; // m_length_trail.reset(); // m_length_info.reset(); m_dep_mgr.reset(); @@ -2003,6 +2006,87 @@ namespace seq { } } + // ---- Transposition-table helpers (node memoization) ---------------------- + + static bool reason_is_string_only(backtrack_reason r) { + switch (r) { + case backtrack_reason::regex: + case backtrack_reason::regex_widening: + case backtrack_reason::symbol_clash: + case backtrack_reason::character_range: + return true; + default: + // arithmetic, parikh_image, external, children_failed, unevaluated + return false; + } + } + + bool nielsen_graph::node_unsat_string_only(nielsen_node const* n) const { + if (n->m_reason == backtrack_reason::children_failed) + return n->m_unsat_cacheable; // set when the children_failed result was formed + return reason_is_string_only(n->m_reason); + } + + std::vector nielsen_graph::compute_node_signature(nielsen_node const* n) const { + std::vector sig; + // string equalities (order-independent) + { + std::vector> v; + for (auto const& e : n->str_eqs()) + v.emplace_back(e.m_lhs->id(), e.m_rhs->id()); + std::sort(v.begin(), v.end()); + sig.push_back(static_cast(v.size())); + for (auto const& [a,b] : v) { sig.push_back(a); sig.push_back(b); } + } + sig.push_back(UINT_MAX); // section separator + // string disequalities + { + std::vector> v; + for (auto const& d : n->str_deqs()) + v.emplace_back(d.m_lhs->id(), d.m_rhs->id()); + std::sort(v.begin(), v.end()); + sig.push_back(static_cast(v.size())); + for (auto const& [a,b] : v) { sig.push_back(a); sig.push_back(b); } + } + sig.push_back(UINT_MAX); + // regex memberships incl. view/guard metadata + { + std::vector> v; + for (auto const& mm : n->str_mems()) + v.push_back({ mm.m_str->id(), mm.m_regex->id(), + static_cast(mm.m_kind), + mm.m_root ? mm.m_root->id() : UINT_MAX, + mm.m_nu, mm.m_discharged ? 1u : 0u }); + std::sort(v.begin(), v.end()); + sig.push_back(static_cast(v.size())); + for (auto const& a : v) for (unsigned x : a) sig.push_back(x); + } + sig.push_back(UINT_MAX); + // character-range constraints (per symbolic unit) + { + std::vector> v; + for (auto const& [uid, cr] : n->char_ranges()) { + std::vector entry; + entry.push_back(uid); + for (auto const& rg : cr.first.ranges()) { entry.push_back(rg.m_lo); entry.push_back(rg.m_hi); } + v.push_back(std::move(entry)); + } + std::sort(v.begin(), v.end()); + sig.push_back(static_cast(v.size())); + for (auto const& e : v) { sig.push_back(static_cast(e.size())); for (unsigned x : e) sig.push_back(x); } + } + return sig; + } + + dep_tracker nielsen_graph::node_all_deps(nielsen_node const* n) { + dep_tracker d = nullptr; + for (auto const& e : n->str_eqs()) d = m_dep_mgr.mk_join(d, e.m_dep); + for (auto const& q : n->str_deqs()) d = m_dep_mgr.mk_join(d, q.m_dep); + for (auto const& mm : n->str_mems()) d = m_dep_mgr.mk_join(d, mm.m_dep); + for (auto const& [uid, cr] : n->char_ranges()) d = m_dep_mgr.mk_join(d, cr.second); + return d; + } + nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, ptr_vector& cur_path, const unsigned depth) { @@ -2053,6 +2137,23 @@ namespace seq { return search_result::unsat; } + // Transposition-table lookup. The node is now canonical (post-simplify). + // If an equivalent node was already proven UNSAT for string/regex-only + // reasons, this node is unsat too — independently of its (integer) side + // constraints — so we prune without re-exploring its subtree. We derive + // the conflict from this node's own constraint deps (a sound over-approx). + { + const std::vector sig = compute_node_signature(node); + if (m_unsat_node_cache.contains(sig)) { + node->set_conflict(backtrack_reason::regex, node_all_deps(node)); + node->set_general_conflict(); + node->m_unsat_cacheable = true; + ++m_stats.m_num_simplify_conflict; + ++m_num_cache_hits; + return search_result::unsat; + } + } + // Apply Parikh image filter: generate modular length constraints and // perform a lightweight feasibility pre-check. The filter is guarded // internally (m_parikh_applied) so it only runs once per node. @@ -2102,6 +2203,9 @@ namespace seq { if (!check_leaf_regex(*node, dep)) { node->set_general_conflict(); node->set_conflict(backtrack_reason::regex, dep); + // string-only conflict (empty intersection) → memoize. + node->m_unsat_cacheable = true; + m_unsat_node_cache.insert(compute_node_signature(node)); return search_result::unsat; } assert_node_side_constraints(node); @@ -2189,6 +2293,20 @@ namespace seq { } if (!any_unknown) { node->set_child_conflict(); + // Memoize this internal UNSAT iff its whole subtree closed for + // string/regex-only reasons (no child relied on arithmetic / Parikh / + // external context). Then a same-signature node found via any other + // path is pruned by the lookup above. + bool all_string_only = true; + for (nielsen_edge* e : node->outgoing()) { + if (!node_unsat_string_only(e->tgt())) { + all_string_only = false; + break; + } + } + node->m_unsat_cacheable = all_string_only; + if (all_string_only) + m_unsat_node_cache.insert(compute_node_signature(node)); return search_result::unsat; } return search_result::unknown; @@ -3368,43 +3486,55 @@ namespace seq { } // ----------------------------------------------------------------------- - // Helper: precompute_partial_dfa - // BFS of Brzozowski derivatives from root_re up to `depth` steps, recording - // all concrete minterm edges in the partial DFA. Called eagerly from - // apply_cycle_decomposition (priority 6) so that SCC detection can fire - // before apply_regex_var_split (priority 10) creates a symbolic child whose - // ite-derivative would otherwise delay SCC detection by one more level. + // Helper: ensure_automaton_explored (lazy, on-demand, cached) + // Records the *complete* reachable Brzozowski automaton of root_re into the + // partial DFA — but only once per regex component. This is the lazy + // Q-completion: instead of re-running a fixed-depth BFS on every cycle + // decomposition (the old precompute_partial_dfa(R, depth), which re-explored + // R's automaton on each of the ~hundreds of decompositions and, with a + // shallow depth, left Q incomplete so guards discharged prematurely and laps + // never closed), we explore each component once to saturation and remember + // every state we visited. A later decomposition whose head was reached by an + // earlier exploration finds it already covered and does no work. + // + // Completeness matters: the cycle guard / stabilizer view gate on whether the + // current state lies in Q (projection_state_in_Q), and that is only sound as + // a *bound* when Q contains the whole SCC of the head. The BFS is bounded by + // the finite reachable automaton (Brzozowski states modulo ACI), so it + // terminates; m.inc() keeps it responsive to the resource limit. // ----------------------------------------------------------------------- - void nielsen_graph::precompute_partial_dfa(euf::snode const* root_re, const unsigned depth) { + void nielsen_graph::ensure_automaton_explored(euf::snode const* root_re) { SASSERT(root_re); if (!root_re->is_ground()) return; + if (m_explored_automaton.contains(root_re->get_expr()->get_id())) + return; - struct work_item { euf::snode const* re; unsigned d; }; - svector queue; - queue.push_back({root_re, depth}); - uint_set visited; - visited.insert(root_re->id()); + svector queue; + queue.push_back(root_re); while (!queue.empty()) { - auto [re, d] = queue.back(); + if (!m.inc()) + return; // resource limit: leave Q partial (sound under-approx) + euf::snode const* re = queue.back(); queue.pop_back(); + const unsigned re_eid = re->get_expr()->get_id(); + if (m_explored_automaton.contains(re_eid)) + continue; // already explored (here or in a previous component) + m_explored_automaton.insert(re_eid); + euf::snode_vector mts; m_sg.compute_minterms(re, mts); for (euf::snode const* mt : mts) { - // std::cout << "minterm: " << mk_pp(mt->get_expr(), m) << std::endl; euf::snode const* deriv = m_sg.brzozowski_deriv(re, mt); if (!deriv || deriv->is_fail()) continue; record_partial_derivative_edge(re, mt, deriv); - if (d > 0 && deriv->is_ground() && !visited.contains(deriv->id())) { - visited.insert(deriv->id()); - queue.push_back({deriv, d - 1}); - } + if (deriv->is_ground() && !m_explored_automaton.contains(deriv->get_expr()->get_id())) + queue.push_back(deriv); } } - //std::string s = partial_dfa_to_dot(root_re, true); } // ----------------------------------------------------------------------- @@ -3500,10 +3630,10 @@ namespace seq { euf::snode const* x = first; euf::snode const* R = mem.m_regex; - // Eagerly precompute partial DFA edges from this regex so that - // collect_scc_for_projection can detect cycles without waiting - // for apply_regex_var_split to create per-minterm children. - precompute_partial_dfa(R, 64); + // Lazily explore R's full automaton (once, cached) so that + // collect_scc_for_projection sees the complete SCC of R and the + // stabilizer view / cycle guard gate on a complete Q. + ensure_automaton_explored(R); // R must sit on a cycle (an SCC of the partial DFA). uint_set scc; @@ -3515,7 +3645,6 @@ namespace seq { const unsigned nu = m_projection_extract_idx; if (nu == 0) continue; - fprintf(stderr, "DEC R=%u nu=%u sccsz=%u x=%u nedges=%u\n", R->id(), nu, scc.num_elems(), x->id(), (unsigned)m_partial_dfa_edges.size()); fflush(stderr); // Trigger condition: x must not already carry a cycle guard for the // current SCC snapshot ν. All states of one SCC share a single ν, so @@ -5139,6 +5268,8 @@ namespace seq { st.update("nseq mod var num unwind (eq)", m_stats.m_mod_var_num_unwinding_eq); st.update("nseq mod var num unwind (mem)", m_stats.m_mod_var_num_unwinding_mem); st.update("nseq mod axiomatized disequalities", m_stats.m_ax_diseq); + st.update("nseq unsat-cache size", (unsigned) m_unsat_node_cache.size()); + st.update("nseq unsat-cache hits", m_num_cache_hits); } } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 707c3a8641..880e87298d 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -49,6 +49,8 @@ Author: #include "util/uint_set.h" #include "util/vector.h" #include +#include +#include namespace smt { class enode; } @@ -512,6 +514,11 @@ namespace seq { backtrack_reason m_reason = backtrack_reason::unevaluated; bool m_is_progress = false; bool m_node_len_constraints_generated = false; // true after generate_node_length_constraints runs + // true once this node has been proven UNSAT for reasons that depend only + // on its string/regex constraints (not on arithmetic / Parikh / external + // context). Such an unsat is a property of the node's string signature + // and is safe to memoize in the transposition table (m_unsat_node_cache). + bool m_unsat_cacheable = false; // Parikh filter: set to true once apply_parikh_to_node has been applied // to this node. Prevents duplicate constraint generation across DFS runs. @@ -829,6 +836,18 @@ namespace seq { // when the explored SCC's edge set actually grows; identifies which // partial-DFA edges (m_projection_idx ≤ ν) belong to a projection's Q. unsigned m_projection_extract_idx = 0; + // Expr ids of regex states whose full reachable automaton has already + // been recorded into the partial DFA (lazy, once-per-component Q growth; + // see ensure_automaton_explored). + uint_set m_explored_automaton; + + // Transposition table: structural signatures of nodes already proven + // UNSAT for string/regex-only reasons. A node whose signature is present + // is unsatisfiable regardless of how it was reached, so the DFS can prune + // it without re-exploring its subtree (turns the search tree into the + // finite DAG the termination proof bounds). See compute_node_signature. + std::set> m_unsat_node_cache; + unsigned m_num_cache_hits = 0; public: @@ -1010,6 +1029,16 @@ namespace seq { search_result search_dfs(nielsen_node *node, ptr_vector& path, unsigned depth = 0); + // Transposition table helpers (node memoization of string-only UNSAT). + // Canonical structural signature of a node (string equalities, + // disequalities, memberships incl. view/guard metadata, char ranges). + // Two nodes with equal signatures have identical string constraints. + std::vector compute_node_signature(nielsen_node const* n) const; + // Union of all constraint deps of a node (sound over-approx conflict). + dep_tracker node_all_deps(nielsen_node const* n); + // True iff the node's UNSAT depends only on string/regex constraints. + bool node_unsat_string_only(nielsen_node const* n) const; + // Regex widening: overapproximate `str` by replacing variables with // the intersection of their primitive regex constraints (or Σ* if // unconstrained), replacing symbolic chars with their char ranges, @@ -1180,7 +1209,9 @@ namespace seq { // eagerly recording concrete minterm edges in the partial DFA so that // collect_scc_for_projection can find cycles without first waiting for // concrete children to record them one level at a time. - void precompute_partial_dfa(euf::snode const* root_re, unsigned depth); + // Lazily record the complete reachable automaton of root_re into the + // partial DFA, once per regex component (cached in m_explored_automaton). + void ensure_automaton_explored(euf::snode const* root_re); // generalized power introduction: for an equation where one head is // a variable v and the other side has ground prefix + a variable x