From e87aaa69246289eb5e1248017f261382cf884c43 Mon Sep 17 00:00:00 2001 From: Lev Nachmanson <5377127+levnach@users.noreply.github.com> Date: Sun, 28 Jun 2026 10:20:32 -0700 Subject: [PATCH] [snapshot-regression-fix] Fix psmt infinite loop on theory-incomplete cubes (#3044) (#9983) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixes a `psmt` (parallel SMT tactic) regression where the solver hangs to a wall-clock timeout instead of returning `unknown` on formulas whose root cube is genuinely undetermined by an incomplete theory. - **Originating discussion:** https://github.com/Z3Prover/bench/discussions/2735 - **Benchmark:** `iss-3044/bug-1.smt2` (from [Z3 issue #3044](https://github.com/Z3Prover/z3/issues/3044)) ```smt2 (declare-fun a (Int) Bool) (declare-fun b (Int) Bool) (assert (distinct a b)) (check-sat-using psmt) ``` ## Divergence The recorded oracle (expected) vs. current z3 (combined stdout+stderr, `-T:20`): ```diff -(incomplete (theory array)) -unknown +timeout ``` ## Root cause The rewritten parallel tactic (`src/solver/parallel_tactical.cpp`, introduced in #9824/#9825) hangs on this input. In the worker `run()` loop, every `l_undef` cube result was treated as if the per-cube **conflict limit** had been reached: the worker escalated the per-thread conflict budget (`update_max_thread_conflicts`) and re-checked / re-split the same cube. When the `l_undef` actually comes from **theory incompleteness** (here, the array theory cannot decide `(distinct a b)` over `Int -> Bool`) rather than the conflict limit, the verdict never changes, so the worker re-checks the same cube forever. Compounding this, the `batch_manager` state machine had **no terminal `unknown` state** — the only way to finish was for some worker to prove `sat`/`unsat`, which is impossible for a root-level theory-incomplete formula. The combination produced an infinite loop and a wall-clock timeout. The pre-rewrite parallel tactic avoided this: its `giveup()` detected reasons starting with `(incomplete` / `(sat.giveup`, reported a soft undef, and echoed the reason to `verbose_stream()`. ## Fix All changes are confined to `src/solver/parallel_tactical.cpp` (47 insertions, 4 deletions): 1. **Distinguish genuine incompleteness from conflict-limit exhaustion.** In the worker `l_undef` case, only `reason_unknown() == "max-conflicts-reached"` benefits from escalating the budget / splitting. For any other reason (incomplete theory, quantifiers, lambdas, resource limits, ...) re-checking is futile, so the worker records a sound `unknown` and stops working the branch. 2. **Add a terminal `is_unknown` batch-manager state** (`set_unknown`, `get_result() -> l_undef`, reason storage). It is a *soft* result: it does not cancel the other workers, and a definitive `sat`/`unsat` verdict from another branch may still override it (the `set_sat`/`set_unsat` guards now permit overriding `is_unknown`). All `set_unsat` call sites are global formula-unsat (core ⊆ assumptions, or independent of the tested backbone literal), so the override is sound; tree-closure unsat remains guarded by `is_running` and cannot fire because the undef leaf stays open. 3. **Restore the reason output.** The captured `reason_unknown` is propagated to the result goal and echoed to `verbose_stream()`, reproducing the `(incomplete (theory array))` line that the sequential path / old parallel tactic emitted. ## Validation Rebuilt the `./z3` checkout (`./configure && make -C build -j16`) and re-ran the benchmark with the freshly built binary using the same options the snapshot capture uses (`-T:20`, combined stdout+stderr): ``` $ z3 inputs/issues/iss-3044/bug-1.smt2 -T:20 (incomplete (theory array)) unknown ``` This matches the recorded `bug-1.expected.out` oracle **byte-for-byte**, and the benchmark now completes in ~0.5s (was: timeout). Verified stable across 8 consecutive runs. Basic `psmt` `sat`/`unsat` checks continue to produce correct results. Opened as a **draft** for human review. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> > Generated by [Fix a Z3 snapshot-regression divergence](https://github.com/Z3Prover/bench/actions/runs/28313246856) · 5.7K AIC · ⌖ 85.8 AIC · ⊞ 41.2K · [◷](https://github.com/search?q=repo%3AZ3Prover%2Fz3+%22gh-aw-workflow-id%3A+snapshot-regression-fixer%22&type=pullrequests) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/solver/parallel_tactical.cpp | 51 +++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/src/solver/parallel_tactical.cpp b/src/solver/parallel_tactical.cpp index c5f4949bee..7191197767 100644 --- a/src/solver/parallel_tactical.cpp +++ b/src/solver/parallel_tactical.cpp @@ -133,6 +133,7 @@ class parallel_solver { is_running, is_sat, is_unsat, + is_unknown, is_exception_msg, is_exception_code }; @@ -187,6 +188,7 @@ class parallel_solver { unsigned m_exception_code = 0; std::string m_exception_msg; + std::string m_reason_unknown; model_ref m_model; expr_ref_vector m_unsat_core; std::atomic m_canceled = false; @@ -485,13 +487,14 @@ class parallel_solver { m_core_min_jobs.reset(); m_model = nullptr; m_unsat_core.reset(); + m_reason_unknown.clear(); m_canceled = false; } void set_sat(ast_translation& l2g, model& mdl) { std::scoped_lock lock(mux); IF_VERBOSE(1, verbose_stream() << "Batch manager setting SAT.\n"); - if (m_state != state::is_running) return; + if (m_state != state::is_running && m_state != state::is_unknown) return; m_state = state::is_sat; m_model = mdl.translate(l2g); cancel_background_threads(); @@ -506,14 +509,27 @@ class parallel_solver { expr_ref_vector const& core) { std::scoped_lock lock(mux); IF_VERBOSE(1, verbose_stream() << "Batch manager setting UNSAT.\n"); - if (m_state != state::is_running) return; + if (m_state != state::is_running && m_state != state::is_unknown) return; m_state = state::is_unsat; - SASSERT(m_unsat_core.empty()); + m_unsat_core.reset(); for (expr* c : core) m_unsat_core.push_back(l2g(c)); cancel_background_threads(); } + // A worker found a cube whose result is genuinely undetermined (e.g. the + // theory solver is incomplete) and that cannot be refined any further. + // Record a sound 'unknown' verdict. This is a soft result: it does not + // cancel the remaining workers, so a definitive sat/unsat verdict from + // another branch may still arrive and override it. + void set_unknown(std::string const& reason) { + std::scoped_lock lock(mux); + IF_VERBOSE(1, verbose_stream() << "Batch manager setting UNKNOWN: " << reason << ".\n"); + if (m_state != state::is_running) return; + m_state = state::is_unknown; + m_reason_unknown = reason; + } + void set_exception(std::string const& msg) { std::scoped_lock lock(mux); IF_VERBOSE(1, verbose_stream() << "Batch manager setting exception msg: " << msg << ".\n"); @@ -941,6 +957,7 @@ class parallel_solver { throw default_exception("par2: inconsistent end state"); case state::is_sat: return l_true; case state::is_unsat: return l_false; + case state::is_unknown: return l_undef; case state::is_exception_msg: throw default_exception(m_exception_msg.c_str()); case state::is_exception_code: @@ -951,6 +968,8 @@ class parallel_solver { } } + std::string const& get_reason_unknown() const { return m_reason_unknown; } + model_ref& get_model() { return m_model; } expr_ref_vector const& get_unsat_core() const { return m_unsat_core; } @@ -1215,8 +1234,21 @@ class parallel_solver { switch (r) { case l_undef: { - update_max_thread_conflicts(); LOG_WORKER(1, " found undef cube\n"); + // Escalating the conflict budget (update_max_thread_conflicts) + // or splitting the cube only helps when the cube was abandoned + // because the per-cube conflict limit was reached. For any other + // source of incompleteness (an incomplete theory, quantifiers, + // lambdas, resource limits, ...) the verdict will not change, so + // re-checking the same cube would spin forever. Record a sound + // 'unknown' verdict and stop working this branch instead. + std::string reason = s->reason_unknown(); + if (reason != "max-conflicts-reached") { + LOG_WORKER(1, " undef cube is not conflict-limited (" << reason << "); reporting unknown\n"); + b.set_unknown(reason); + return; + } + update_max_thread_conflicts(); if (m_config.m_max_cube_depth <= cube.size()) goto check_cube_start; expr_ref atom = get_split_atom(lease, cube); @@ -2046,6 +2078,10 @@ public: st.copy(m_stats); } + std::string reason_unknown() const { + return m_batch_manager.get_reason_unknown(); + } + void reset_statistics() { m_stats.reset(); } @@ -2128,6 +2164,13 @@ public: case l_undef: if (!m.inc()) throw tactic_exception(Z3_CANCELED_MSG); + { + std::string reason = ps.reason_unknown(); + if (!reason.empty()) { + g->set_reason_unknown(reason); + IF_VERBOSE(0, verbose_stream() << reason << "\n"); + } + } break; }