3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-27 10:58:48 +00:00

Memorize previous conflicts

This commit is contained in:
CEisenhofer 2026-06-25 15:40:53 +02:00
parent d0c0c1b573
commit 2aa1d1ee01
3 changed files with 261 additions and 72 deletions

View file

@ -1,14 +1,21 @@
#!/usr/bin/env python3
"""
Compare z3 string solver configurations, and optionally against an external ZIPT solver.
Compare z3 string solver configurations, and optionally against external solvers.
We always run three z3 configurations:
nseq_md monadic decomposition (parikh off, eager regex factorization)
nseq_pa parikh (parikh on, no regex factorization)
seq the old/baseline string solver
Optionally compare against external solvers:
zipt ZIPT solver (--zipt /path/to/zipt)
ostrich Ostrich solver (--ostrich /path/to/ostrich)
noodler z3-noodler solver (--noodler /path/to/z3noodler)
Usage:
python compare_seq_solvers.py <path-to-smtlib-files> --z3 /path/to/z3 [--zipt /path/to/zipt] [--ext .smt2]
python compare_seq_solvers.py <path-to-smtlib-files> --z3 /path/to/z3
[--zipt /path/to/zipt] [--ostrich /path/to/ostrich] [--noodler /path/to/noodler]
[--ext .smt2]
Finds all .smt2 files under the given path, runs the solvers with a configurable timeout,
and reports timeouts, divergences, and summary statistics.
@ -106,17 +113,21 @@ def run_z3(z3_bin: str, smt_file: Path, solver_args: list[str], timeout_s: int =
return f"error:{e}", elapsed
def run_zipt(zipt_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT) -> tuple[str, float]:
"""Run ZIPT on a file. Returns (result, elapsed)."""
def run_external_solver(binary: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT,
extra_args: list[str] | None = None) -> tuple[str, float]:
"""Run an external solver on a file. Returns (result, elapsed).
Passes -t:{timeout_ms} as the first argument, matching the Z3/ZIPT convention.
Supply extra_args for any solver-specific flags that precede the file path.
"""
timeout_ms = timeout_s * 1000
cmd = [zipt_bin, f"-t:{timeout_ms}", str(smt_file)]
cmd = [binary, f"-t:{timeout_ms}"] + (extra_args or []) + [str(smt_file)]
start = time.monotonic()
try:
proc = subprocess.run(cmd, capture_output=True, text=True,
timeout=timeout_s + 5)
elapsed = time.monotonic() - start
out = proc.stdout.strip()
return _parse_result(out), elapsed
return _parse_result(proc.stdout.strip()), elapsed
except subprocess.TimeoutExpired:
elapsed = time.monotonic() - start
return "timeout", elapsed
@ -151,7 +162,11 @@ def classify(solver_results: dict[str, str]) -> str:
def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT,
zipt_bin: str | None = None) -> dict:
external_bins: dict[str, str] | None = None) -> dict:
"""Run all z3 configurations and any external solvers on smt_file.
external_bins maps solver label (e.g. 'zipt', 'ostrich', 'noodler') to binary path.
"""
solver_results: dict[str, str] = {}
solver_times: dict[str, float] = {}
for name in SOLVER_NAMES:
@ -162,21 +177,21 @@ def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT,
cat = classify(solver_results)
smtlib_status = read_smtlib_status(smt_file)
status = determine_status(solver_results, smtlib_status)
result = {
"file": smt_file,
"category": cat,
"smtlib_status": smtlib_status,
"status": status,
"zipt": None,
"t_zipt": None,
result: dict = {
"file": smt_file,
"category": cat,
"smtlib_status": smtlib_status,
"status": status,
}
for name in SOLVER_NAMES:
result[name] = solver_results[name]
result[f"t_{name}"] = solver_times[name]
if zipt_bin:
res_zipt, t_zipt = run_zipt(zipt_bin, smt_file, timeout_s)
result["zipt"] = res_zipt
result["t_zipt"] = t_zipt
for label, binary in (external_bins or {}).items():
res, t = run_external_solver(binary, smt_file, timeout_s)
result[label] = res
result[f"t_{label}"] = t
return result
@ -193,14 +208,24 @@ def main():
parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, metavar="SEC",
help=f"Per-solver timeout in seconds (default: {DEFAULT_TIMEOUT})")
parser.add_argument("--zipt", metavar="PATH", default=None,
help="Path to ZIPT binary (optional; if omitted, ZIPT is not benchmarked)")
help="Path to ZIPT binary (optional)")
parser.add_argument("--ostrich", metavar="PATH", default=None,
help="Path to Ostrich binary (optional)")
parser.add_argument("--noodler", metavar="PATH", default=None,
help="Path to z3-noodler binary (optional)")
parser.add_argument("--csv", metavar="FILE", help="Also write results to a CSV file")
args = parser.parse_args()
z3_bin = args.z3
zipt_bin = args.zipt
timeout_s = args.timeout
# Collect external solvers in declaration order so output columns are stable.
external_bins: dict[str, str] = {}
if args.zipt: external_bins["zipt"] = args.zipt
if args.ostrich: external_bins["ostrich"] = args.ostrich
if args.noodler: external_bins["noodler"] = args.noodler
ext_names = list(external_bins.keys())
root = Path(args.path)
if not root.exists():
print(f"Error: path does not exist: {root}", file=sys.stderr)
@ -225,14 +250,13 @@ def main():
print(f"Sampling: {len(files)} files selected "
f"(max {args.max_per_folder} per subfolder, {len(by_folder)} subfolder(s))")
solvers_label = ", ".join(SOLVER_NAMES)
if zipt_bin: solvers_label += ", zipt"
print(f"Found {len(files)} files. Solvers: {solvers_label}. "
all_solver_labels = SOLVER_NAMES + ext_names
print(f"Found {len(files)} files. Solvers: {', '.join(all_solver_labels)}. "
f"Workers: {args.jobs}, timeout: {timeout_s}s\n")
results = []
pool = ThreadPoolExecutor(max_workers=args.jobs)
futures = {pool.submit(process_file, z3_bin, f, timeout_s, zipt_bin): f for f in files}
futures = {pool.submit(process_file, z3_bin, f, timeout_s, external_bins): f for f in files}
done = 0
try:
for fut in as_completed(futures):
@ -240,10 +264,8 @@ def main():
r = fut.result()
results.append(r)
solver_part = " ".join(f"{name}={r[name]:8s} ({r[f't_{name}']:.1f}s)" for name in SOLVER_NAMES)
zipt_part = ""
if zipt_bin:
zipt_part = f" zipt={r['zipt']:8s} ({r['t_zipt']:.1f}s)"
print(f"[{done:4d}/{len(files)}] {r['category']:20s} {solver_part}{zipt_part} {r['file'].parent.name}/{r['file'].name}")
ext_part = "".join(f" {label}={r[label]:8s} ({r[f't_{label}']:.1f}s)" for label in ext_names)
print(f"[{done:4d}/{len(files)}] {r['category']:20s} {solver_part}{ext_part} {r['file'].parent.name}/{r['file'].name}")
except KeyboardInterrupt:
print("\nInterrupted — cancelling pending tasks.", file=sys.stderr)
pool.shutdown(wait=False, cancel_futures=True)
@ -283,10 +305,11 @@ def main():
timeouts = [r for r in results if r[name] == "timeout"]
if timeouts:
_print_file_list(f"{name.upper()} TIMES OUT", timeouts)
if zipt_bin:
zipt_timeouts = [r for r in results if r["zipt"] == "timeout"]
if zipt_timeouts:
_print_file_list("ZIPT TIMES OUT", zipt_timeouts)
for label in ext_names:
timeouts = [r for r in results if r[label] == "timeout"]
if timeouts:
_print_file_list(f"{label.upper()} TIMES OUT", timeouts)
all_to = categories["all_timeout"]
if all_to:
@ -294,20 +317,24 @@ def main():
invalid_models = categories.get("invalid_model", [])
if invalid_models:
_print_file_list("INVALID MODEL GENERATED", invalid_models)
if zipt_bin:
all_three_to = [r for r in results
if all(r[name] == "timeout" for name in SOLVER_NAMES) and r["zipt"] == "timeout"]
if all_three_to:
_print_file_list("ALL SOLVERS (INCL. ZIPT) TIME OUT", all_three_to)
if ext_names:
all_ext_to = [r for r in results
if all(r[name] == "timeout" for name in SOLVER_NAMES)
and all(r[label] == "timeout" for label in ext_names)]
if all_ext_to:
_print_file_list("ALL SOLVERS TIME OUT", all_ext_to)
if diverged:
_print_file_list("DIVERGE among z3 configurations (sat vs unsat)", diverged)
if zipt_bin:
definite = {"sat", "unsat"}
zipt_diverged = [r for r in results
if r["zipt"] in definite
and any(r[name] in definite and r[name] != r["zipt"] for name in SOLVER_NAMES)]
if zipt_diverged:
_print_file_list("DIVERGE involving ZIPT", zipt_diverged)
definite = {"sat", "unsat"}
for label in ext_names:
ext_diverged = [r for r in results
if r[label] in definite
and any(r[name] in definite and r[name] != r[label] for name in SOLVER_NAMES)]
if ext_diverged:
_print_file_list(f"DIVERGE involving {label.upper()}", ext_diverged)
print()
@ -331,8 +358,8 @@ def main():
csv_path = Path(args.csv)
fieldnames = ["file"] + SOLVER_NAMES + [f"t_{name}" for name in SOLVER_NAMES]
fieldnames.extend(["category", "smtlib_status", "status"])
if zipt_bin:
fieldnames.extend(["zipt", "t_zipt"])
for label in ext_names:
fieldnames.extend([label, f"t_{label}"])
with csv_path.open("w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
writer.writeheader()

View file

@ -684,6 +684,9 @@ namespace seq {
m_partial_dfa_edge_index.clear();
m_partial_dfa_pin.reset();
m_projection_extract_idx = 0;
m_explored_automaton.reset();
m_unsat_node_cache.clear();
m_num_cache_hits = 0;
// m_length_trail.reset();
// m_length_info.reset();
m_dep_mgr.reset();
@ -2003,6 +2006,87 @@ namespace seq {
}
}
// ---- Transposition-table helpers (node memoization) ----------------------
static bool reason_is_string_only(backtrack_reason r) {
switch (r) {
case backtrack_reason::regex:
case backtrack_reason::regex_widening:
case backtrack_reason::symbol_clash:
case backtrack_reason::character_range:
return true;
default:
// arithmetic, parikh_image, external, children_failed, unevaluated
return false;
}
}
bool nielsen_graph::node_unsat_string_only(nielsen_node const* n) const {
if (n->m_reason == backtrack_reason::children_failed)
return n->m_unsat_cacheable; // set when the children_failed result was formed
return reason_is_string_only(n->m_reason);
}
std::vector<unsigned> nielsen_graph::compute_node_signature(nielsen_node const* n) const {
std::vector<unsigned> sig;
// string equalities (order-independent)
{
std::vector<std::pair<unsigned,unsigned>> v;
for (auto const& e : n->str_eqs())
v.emplace_back(e.m_lhs->id(), e.m_rhs->id());
std::sort(v.begin(), v.end());
sig.push_back(static_cast<unsigned>(v.size()));
for (auto const& [a,b] : v) { sig.push_back(a); sig.push_back(b); }
}
sig.push_back(UINT_MAX); // section separator
// string disequalities
{
std::vector<std::pair<unsigned,unsigned>> v;
for (auto const& d : n->str_deqs())
v.emplace_back(d.m_lhs->id(), d.m_rhs->id());
std::sort(v.begin(), v.end());
sig.push_back(static_cast<unsigned>(v.size()));
for (auto const& [a,b] : v) { sig.push_back(a); sig.push_back(b); }
}
sig.push_back(UINT_MAX);
// regex memberships incl. view/guard metadata
{
std::vector<std::vector<unsigned>> v;
for (auto const& mm : n->str_mems())
v.push_back({ mm.m_str->id(), mm.m_regex->id(),
static_cast<unsigned>(mm.m_kind),
mm.m_root ? mm.m_root->id() : UINT_MAX,
mm.m_nu, mm.m_discharged ? 1u : 0u });
std::sort(v.begin(), v.end());
sig.push_back(static_cast<unsigned>(v.size()));
for (auto const& a : v) for (unsigned x : a) sig.push_back(x);
}
sig.push_back(UINT_MAX);
// character-range constraints (per symbolic unit)
{
std::vector<std::vector<unsigned>> v;
for (auto const& [uid, cr] : n->char_ranges()) {
std::vector<unsigned> entry;
entry.push_back(uid);
for (auto const& rg : cr.first.ranges()) { entry.push_back(rg.m_lo); entry.push_back(rg.m_hi); }
v.push_back(std::move(entry));
}
std::sort(v.begin(), v.end());
sig.push_back(static_cast<unsigned>(v.size()));
for (auto const& e : v) { sig.push_back(static_cast<unsigned>(e.size())); for (unsigned x : e) sig.push_back(x); }
}
return sig;
}
dep_tracker nielsen_graph::node_all_deps(nielsen_node const* n) {
dep_tracker d = nullptr;
for (auto const& e : n->str_eqs()) d = m_dep_mgr.mk_join(d, e.m_dep);
for (auto const& q : n->str_deqs()) d = m_dep_mgr.mk_join(d, q.m_dep);
for (auto const& mm : n->str_mems()) d = m_dep_mgr.mk_join(d, mm.m_dep);
for (auto const& [uid, cr] : n->char_ranges()) d = m_dep_mgr.mk_join(d, cr.second);
return d;
}
nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node,
ptr_vector<nielsen_edge>& cur_path, const unsigned depth) {
@ -2053,6 +2137,23 @@ namespace seq {
return search_result::unsat;
}
// Transposition-table lookup. The node is now canonical (post-simplify).
// If an equivalent node was already proven UNSAT for string/regex-only
// reasons, this node is unsat too — independently of its (integer) side
// constraints — so we prune without re-exploring its subtree. We derive
// the conflict from this node's own constraint deps (a sound over-approx).
{
const std::vector<unsigned> sig = compute_node_signature(node);
if (m_unsat_node_cache.contains(sig)) {
node->set_conflict(backtrack_reason::regex, node_all_deps(node));
node->set_general_conflict();
node->m_unsat_cacheable = true;
++m_stats.m_num_simplify_conflict;
++m_num_cache_hits;
return search_result::unsat;
}
}
// Apply Parikh image filter: generate modular length constraints and
// perform a lightweight feasibility pre-check. The filter is guarded
// internally (m_parikh_applied) so it only runs once per node.
@ -2102,6 +2203,9 @@ namespace seq {
if (!check_leaf_regex(*node, dep)) {
node->set_general_conflict();
node->set_conflict(backtrack_reason::regex, dep);
// string-only conflict (empty intersection) → memoize.
node->m_unsat_cacheable = true;
m_unsat_node_cache.insert(compute_node_signature(node));
return search_result::unsat;
}
assert_node_side_constraints(node);
@ -2189,6 +2293,20 @@ namespace seq {
}
if (!any_unknown) {
node->set_child_conflict();
// Memoize this internal UNSAT iff its whole subtree closed for
// string/regex-only reasons (no child relied on arithmetic / Parikh /
// external context). Then a same-signature node found via any other
// path is pruned by the lookup above.
bool all_string_only = true;
for (nielsen_edge* e : node->outgoing()) {
if (!node_unsat_string_only(e->tgt())) {
all_string_only = false;
break;
}
}
node->m_unsat_cacheable = all_string_only;
if (all_string_only)
m_unsat_node_cache.insert(compute_node_signature(node));
return search_result::unsat;
}
return search_result::unknown;
@ -3368,43 +3486,55 @@ namespace seq {
}
// -----------------------------------------------------------------------
// Helper: precompute_partial_dfa
// BFS of Brzozowski derivatives from root_re up to `depth` steps, recording
// all concrete minterm edges in the partial DFA. Called eagerly from
// apply_cycle_decomposition (priority 6) so that SCC detection can fire
// before apply_regex_var_split (priority 10) creates a symbolic child whose
// ite-derivative would otherwise delay SCC detection by one more level.
// Helper: ensure_automaton_explored (lazy, on-demand, cached)
// Records the *complete* reachable Brzozowski automaton of root_re into the
// partial DFA — but only once per regex component. This is the lazy
// Q-completion: instead of re-running a fixed-depth BFS on every cycle
// decomposition (the old precompute_partial_dfa(R, depth), which re-explored
// R's automaton on each of the ~hundreds of decompositions and, with a
// shallow depth, left Q incomplete so guards discharged prematurely and laps
// never closed), we explore each component once to saturation and remember
// every state we visited. A later decomposition whose head was reached by an
// earlier exploration finds it already covered and does no work.
//
// Completeness matters: the cycle guard / stabilizer view gate on whether the
// current state lies in Q (projection_state_in_Q), and that is only sound as
// a *bound* when Q contains the whole SCC of the head. The BFS is bounded by
// the finite reachable automaton (Brzozowski states modulo ACI), so it
// terminates; m.inc() keeps it responsive to the resource limit.
// -----------------------------------------------------------------------
void nielsen_graph::precompute_partial_dfa(euf::snode const* root_re, const unsigned depth) {
void nielsen_graph::ensure_automaton_explored(euf::snode const* root_re) {
SASSERT(root_re);
if (!root_re->is_ground())
return;
if (m_explored_automaton.contains(root_re->get_expr()->get_id()))
return;
struct work_item { euf::snode const* re; unsigned d; };
svector<work_item> queue;
queue.push_back({root_re, depth});
uint_set visited;
visited.insert(root_re->id());
svector<euf::snode const*> queue;
queue.push_back(root_re);
while (!queue.empty()) {
auto [re, d] = queue.back();
if (!m.inc())
return; // resource limit: leave Q partial (sound under-approx)
euf::snode const* re = queue.back();
queue.pop_back();
const unsigned re_eid = re->get_expr()->get_id();
if (m_explored_automaton.contains(re_eid))
continue; // already explored (here or in a previous component)
m_explored_automaton.insert(re_eid);
euf::snode_vector mts;
m_sg.compute_minterms(re, mts);
for (euf::snode const* mt : mts) {
// std::cout << "minterm: " << mk_pp(mt->get_expr(), m) << std::endl;
euf::snode const* deriv = m_sg.brzozowski_deriv(re, mt);
if (!deriv || deriv->is_fail())
continue;
record_partial_derivative_edge(re, mt, deriv);
if (d > 0 && deriv->is_ground() && !visited.contains(deriv->id())) {
visited.insert(deriv->id());
queue.push_back({deriv, d - 1});
}
if (deriv->is_ground() && !m_explored_automaton.contains(deriv->get_expr()->get_id()))
queue.push_back(deriv);
}
}
//std::string s = partial_dfa_to_dot(root_re, true);
}
// -----------------------------------------------------------------------
@ -3500,10 +3630,10 @@ namespace seq {
euf::snode const* x = first;
euf::snode const* R = mem.m_regex;
// Eagerly precompute partial DFA edges from this regex so that
// collect_scc_for_projection can detect cycles without waiting
// for apply_regex_var_split to create per-minterm children.
precompute_partial_dfa(R, 64);
// Lazily explore R's full automaton (once, cached) so that
// collect_scc_for_projection sees the complete SCC of R and the
// stabilizer view / cycle guard gate on a complete Q.
ensure_automaton_explored(R);
// R must sit on a cycle (an SCC of the partial DFA).
uint_set scc;
@ -3515,7 +3645,6 @@ namespace seq {
const unsigned nu = m_projection_extract_idx;
if (nu == 0)
continue;
fprintf(stderr, "DEC R=%u nu=%u sccsz=%u x=%u nedges=%u\n", R->id(), nu, scc.num_elems(), x->id(), (unsigned)m_partial_dfa_edges.size()); fflush(stderr);
// Trigger condition: x must not already carry a cycle guard for the
// current SCC snapshot ν. All states of one SCC share a single ν, so
@ -5139,6 +5268,8 @@ namespace seq {
st.update("nseq mod var num unwind (eq)", m_stats.m_mod_var_num_unwinding_eq);
st.update("nseq mod var num unwind (mem)", m_stats.m_mod_var_num_unwinding_mem);
st.update("nseq mod axiomatized disequalities", m_stats.m_ax_diseq);
st.update("nseq unsat-cache size", (unsigned) m_unsat_node_cache.size());
st.update("nseq unsat-cache hits", m_num_cache_hits);
}
}

View file

@ -49,6 +49,8 @@ Author:
#include "util/uint_set.h"
#include "util/vector.h"
#include <map>
#include <set>
#include <vector>
namespace smt { class enode; }
@ -512,6 +514,11 @@ namespace seq {
backtrack_reason m_reason = backtrack_reason::unevaluated;
bool m_is_progress = false;
bool m_node_len_constraints_generated = false; // true after generate_node_length_constraints runs
// true once this node has been proven UNSAT for reasons that depend only
// on its string/regex constraints (not on arithmetic / Parikh / external
// context). Such an unsat is a property of the node's string signature
// and is safe to memoize in the transposition table (m_unsat_node_cache).
bool m_unsat_cacheable = false;
// Parikh filter: set to true once apply_parikh_to_node has been applied
// to this node. Prevents duplicate constraint generation across DFS runs.
@ -829,6 +836,18 @@ namespace seq {
// when the explored SCC's edge set actually grows; identifies which
// partial-DFA edges (m_projection_idx ≤ ν) belong to a projection's Q.
unsigned m_projection_extract_idx = 0;
// Expr ids of regex states whose full reachable automaton has already
// been recorded into the partial DFA (lazy, once-per-component Q growth;
// see ensure_automaton_explored).
uint_set m_explored_automaton;
// Transposition table: structural signatures of nodes already proven
// UNSAT for string/regex-only reasons. A node whose signature is present
// is unsatisfiable regardless of how it was reached, so the DFS can prune
// it without re-exploring its subtree (turns the search tree into the
// finite DAG the termination proof bounds). See compute_node_signature.
std::set<std::vector<unsigned>> m_unsat_node_cache;
unsigned m_num_cache_hits = 0;
public:
@ -1010,6 +1029,16 @@ namespace seq {
search_result search_dfs(nielsen_node *node, ptr_vector<nielsen_edge>& path, unsigned depth = 0);
// Transposition table helpers (node memoization of string-only UNSAT).
// Canonical structural signature of a node (string equalities,
// disequalities, memberships incl. view/guard metadata, char ranges).
// Two nodes with equal signatures have identical string constraints.
std::vector<unsigned> compute_node_signature(nielsen_node const* n) const;
// Union of all constraint deps of a node (sound over-approx conflict).
dep_tracker node_all_deps(nielsen_node const* n);
// True iff the node's UNSAT depends only on string/regex constraints.
bool node_unsat_string_only(nielsen_node const* n) const;
// Regex widening: overapproximate `str` by replacing variables with
// the intersection of their primitive regex constraints (or Σ* if
// unconstrained), replacing symbolic chars with their char ranges,
@ -1180,7 +1209,9 @@ namespace seq {
// eagerly recording concrete minterm edges in the partial DFA so that
// collect_scc_for_projection can find cycles without first waiting for
// concrete children to record them one level at a time.
void precompute_partial_dfa(euf::snode const* root_re, unsigned depth);
// Lazily record the complete reachable automaton of root_re into the
// partial DFA, once per regex component (cached in m_explored_automaton).
void ensure_automaton_explored(euf::snode const* root_re);
// generalized power introduction: for an equation where one head is
// a variable v and the other side has ground prefix + a variable x