3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-29 03:48:51 +00:00

Parallel tactic (#9824) (#9825)

Add new parallel algorithm as a tactic (parallel_tactical2.cpp)
Don't port over old experiments from smt_parallel that we aren't using
(sls, inprocessing, failed_literal_mode for bb detection)
Fix bugs: lease cancellation/reslimit race condition, involves changing
lease epoch to simple boolean flag
Also, now there is a single shared set of params for the tactic and
smt_parallel

**Test runs for the parallel_tactical2 vs old smt_parallel version:**
run-2747-Z3-threads-4-qflia-30s-stats.md
run-2746-Z3-threads-4-qflia-30s-parallel_tactic-stats.md
run-2745-Z3-threads-1-qfbv-30s-stats.md
run-3013-Z3-threads-4-qfbv-30s-parallel_tactic-stats.md --> note this is
indeed run-3013, I reran after a bugfix in inc_sat_solver
run-2743-Z3-threads-4-qfnia-30s-stats.md
run-2742-Z3-threads-4-qfnia-30s-parallel_tactic-stats.md

**Test runs for the new smt_parallel with bugfixes:**
run-2801-Z3-threads-4-qflia-30s-smtparallel-bugfixes-stats.md,
run-2800-Z3-threads-4-qflia-30s-smtparallel-bugfixes-stats.md
run-2797-Z3-threads-4-qfnia-30s-smtparallel-bugfixes-stats.md
compare to old smt_parallel:
run-2747-Z3-threads-4-qflia-30s-stats.md
run-2743-Z3-threads-4-qfnia-30s-stats.md

Note that there is a slight regression on lia in run-2800. The source of
this appears to be the new new LP largest-cube LIA heuristic param,
which is enabled by default. disabling this param in run-2801 restored
performance (I didn't change this in this PR though, just something to
note)

http://mtzguido.tplinkdns.com:8081/z3/compare_stats.html

---------

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
Co-authored-by: Ilana Shapiro <ilanashapiro@Ilanas-MacBook-Pro.local>
Co-authored-by: Ilana Shapiro <ilanashapiro@Ilanas-MBP.localdomain>
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
Nikolaj Bjorner 2026-06-26 09:36:15 -07:00 committed by GitHub
parent 15f33f458d
commit 612fab1c9a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 2694 additions and 1796 deletions

View file

@ -25,7 +25,7 @@ Author:
#include "smt/smt_parallel.h"
#include "smt/smt_lookahead.h"
#include "solver/solver_preprocess.h"
#include "params/smt_parallel_params.hpp"
#include "solver/parallel_params.hpp"
#include <cmath>
#include <mutex>
@ -550,7 +550,7 @@ namespace smt {
if (m_ablate_backtracking) {
// Ablation: for each target, pass the entire path from root to that node
for (auto const& target : targets) {
if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
if (m_search_tree.is_lease_canceled(target.leased_node))
continue;
// Reconstruct the full path from root to this target node
@ -626,7 +626,7 @@ namespace smt {
ctx->set_logic(p.ctx.m_setup.get_logic());
context::copy(p.ctx, *ctx, true);
ctx->pop_to_base_lvl();
ctx->get_fparams().m_preprocess = false;
ctx->get_fparams().m_preprocess = false; // avoid preprocessing lemmas that are exchanged
}
void parallel::core_minimizer_worker::cancel() {
@ -763,22 +763,25 @@ namespace smt {
if (m_config.m_global_backbones) {
bb_candidates local_candidates = find_backbone_candidates();
b.collect_backbone_candidates(m_l2g, local_candidates);
if (!m.inc())
bool lease_canceled = false;
if (!b.checkpoint_worker(id, lease, lease_canceled))
return;
if (lease_canceled) {
LOG_WORKER(1, " abandoning canceled lease\n");
continue;
}
}
lbool r = check_cube(cube);
if (b.lease_canceled(lease)) {
bool lease_canceled = false;
if (!b.checkpoint_worker(id, lease, lease_canceled))
return;
if (lease_canceled) {
LOG_WORKER(1, " abandoning canceled lease\n");
lease = {};
m.limit().dec_cancel();
continue;
}
if (!m.inc())
return;
switch (r) {
case l_undef: {
update_max_thread_conflicts();
@ -790,7 +793,6 @@ namespace smt {
if (!atom)
goto check_cube_start;
b.try_split(m_l2g, id, lease, atom, m_config.m_threads_max_conflicts);
lease = {};
simplify();
break;
}
@ -825,7 +827,6 @@ namespace smt {
b.backtrack(m_l2g, id, core_to_use, lease);
if (m_config.m_core_minimize)
b.enqueue_core_minimization(m_l2g, source, unsat_core);
lease = {};
if (m_config.m_share_conflicts)
b.collect_clause(m_l2g, id, mk_not(mk_and(unsat_core)));
@ -854,10 +855,10 @@ namespace smt {
m_num_initial_atoms = ctx->get_num_bool_vars();
ctx->get_fparams().m_preprocess = false; // avoid preprocessing lemmas that are exchanged
smt_parallel_params pp(p.ctx.m_params);
m_config.m_inprocessing = pp.inprocessing();
m_config.m_global_backbones = pp.num_global_bb_batch_threads() > 0 || pp.num_global_bb_fl_threads() > 0;
m_config.m_local_backbones = pp.local_backbones();
parallel_params pp(p.ctx.m_params);
m_config.m_inprocessing = false;
m_config.m_global_backbones = pp.num_bb_threads() > 0;
m_config.m_local_backbones = false;
m_config.m_core_minimize = pp.core_minimize();
m_config.m_ablate_backtracking = pp.ablate_backtracking();
@ -887,9 +888,9 @@ namespace smt {
ctx->pop_to_base_lvl();
m_shared_units_prefix = ctx->assigned_literals().size();
m_num_initial_atoms = ctx->get_num_bool_vars();
ctx->get_fparams().m_preprocess = false; // avoid preprocessing lemmas that are exchanged
smt_parallel_params pp(p.ctx.m_params);
m_use_failed_literal_test = pp.num_global_bb_fl_threads() > 0;
m_use_failed_literal_test = false;
}
parallel::bb_candidates parallel::worker::find_backbone_candidates(unsigned k) {
@ -1105,14 +1106,48 @@ namespace smt {
return r;
}
void parallel::batch_manager::release_lease_unlocked(unsigned worker_id, node* n) {
if (worker_id >= m_worker_leases.size())
void parallel::batch_manager::set_canceled_unlocked() {
if (m_state != state::is_running)
return;
auto &lease = m_worker_leases[worker_id];
if (!lease.leased_node || lease.leased_node != n)
cancel_background_threads();
}
void parallel::batch_manager::set_canceled() {
std::scoped_lock lock(mux);
set_canceled_unlocked();
}
void parallel::batch_manager::release_worker_lease_unlocked(unsigned worker_id, node_lease& lease) {
if (worker_id >= m_worker_leases.size()) {
lease = {};
return;
m_search_tree.dec_active_workers(lease.leased_node);
}
auto& stored_lease = m_worker_leases[worker_id];
if (!stored_lease.leased_node || stored_lease.leased_node != lease.leased_node) {
lease = {};
return;
}
bool cancel_signaled = stored_lease.cancel_signaled;
m_search_tree.dec_active_workers(stored_lease.leased_node);
stored_lease = {};
lease = {};
if (cancel_signaled)
p.m_workers[worker_id]->limit().dec_cancel();
}
bool parallel::batch_manager::attempt_release_canceled_lease_unlocked(unsigned worker_id, node_lease& lease) {
if (m_state != state::is_running || !lease.leased_node || worker_id >= m_worker_leases.size())
return false;
auto& stored_lease = m_worker_leases[worker_id];
if (stored_lease.leased_node != lease.leased_node)
return false;
if (!m_search_tree.is_lease_canceled(stored_lease.leased_node))
return false;
release_worker_lease_unlocked(worker_id, lease);
return true;
}
void parallel::batch_manager::cancel_closed_leases_unlocked(unsigned source_worker_id) {
@ -1124,7 +1159,7 @@ namespace smt {
// only cancel workers that currently hold a lease, whose lease is canceled,
// and haven't already been signaled (prevents multiple inc_cancel() for same lease)
if (lease.leased_node && !lease.cancel_signaled && m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch)) {
if (lease.leased_node && !lease.cancel_signaled && m_search_tree.is_lease_canceled(lease.leased_node)) {
p.m_workers[worker_id]->cancel_lease();
m_worker_leases[worker_id].cancel_signaled = true;
}
@ -1132,7 +1167,7 @@ namespace smt {
}
void parallel::batch_manager::backtrack(ast_translation &l2g, unsigned worker_id, expr_ref_vector const &core,
node_lease const &lease) {
node_lease& lease) {
std::scoped_lock lock(mux);
vector<cube_config::literal> g_core;
for (auto c : core)
@ -1277,7 +1312,7 @@ namespace smt {
if (!g_core.empty()) {
collect_matching_targets_unlocked(source, g_core[0].get(), g_core, targets);
for (auto const& target : targets) {
if (!m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
if (!m_search_tree.is_lease_canceled(target.leased_node))
m_search_tree.backtrack(target.leased_node, g_core);
}
}
@ -1331,7 +1366,7 @@ namespace smt {
for (node* t : matches) {
if (!t || t == source)
continue;
if (m_search_tree.is_lease_canceled(t, t->get_cancel_epoch()))
if (m_search_tree.is_lease_canceled(t))
continue;
// When source is provided, keep only external matches. Nodes in the
@ -1358,12 +1393,12 @@ namespace smt {
if (!is_highest_ancestor)
continue;
targets.push_back({ t, t->get_cancel_epoch() });
targets.push_back({t});
}
}
void parallel::batch_manager::backtrack_unlocked(ast_translation& l2g, unsigned worker_id, expr_ref_vector const& core,
node_lease const* lease, vector<node_lease> const* targets) {
node_lease* lease, vector<node_lease> const* targets) {
if (m_state != state::is_running)
return;
@ -1374,17 +1409,25 @@ namespace smt {
SASSERT(lease != nullptr || targets != nullptr);
bool did_backtrack = false;
if (lease && !m_search_tree.is_lease_canceled(lease->leased_node, lease->cancel_epoch)) {
// we close/backtrack regardless of whether this lease is stale or not, as long as the lease isn't canceled
// i.e. worker 1 splits this node, but then worker 2 determines UNSAT --> worker 2 is stale but we still close this node and backtrack
did_backtrack = true;
IF_VERBOSE(1, verbose_stream() << "Batch manager backtracking.\n");
release_lease_unlocked(worker_id, lease->leased_node);
m_search_tree.backtrack(lease->leased_node, g_core);
if (lease) {
if (!m_search_tree.is_lease_canceled(lease->leased_node)) {
// we close/backtrack regardless of whether this lease is stale or not, as long as the lease isn't canceled
// i.e. worker 1 splits this node, but then worker 2 determines UNSAT --> worker 2 is stale but we still close this node and backtrack
did_backtrack = true;
IF_VERBOSE(1, verbose_stream() << "Batch manager backtracking.\n");
node* leased_node = lease->leased_node;
release_worker_lease_unlocked(worker_id, *lease);
m_search_tree.backtrack(leased_node, g_core);
}
else {
// the lease was canceled by another worker. don't backtrack on this node with whatever new core we just found with this thread
// however, we do proceed to external targets, since the new code may have exposed new external targets we can close/backtrack
attempt_release_canceled_lease_unlocked(worker_id, *lease);
}
}
if (targets) {
for (auto const& target : *targets) {
if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
if (m_search_tree.is_lease_canceled(target.leased_node))
continue;
did_backtrack = true;
@ -1410,37 +1453,59 @@ namespace smt {
}
void parallel::batch_manager::try_split(ast_translation &l2g, unsigned worker_id,
node_lease const &lease, expr *atom, unsigned effort) {
node_lease& lease, expr *atom, unsigned effort) {
std::scoped_lock lock(mux);
if (m_state != state::is_running)
return;
if (m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch))
if (m_search_tree.is_lease_canceled(lease.leased_node)) {
attempt_release_canceled_lease_unlocked(worker_id, lease);
return;
}
expr_ref lit(m), nlit(m);
lit = l2g(atom);
nlit = mk_not(m, lit);
bool did_split = m_search_tree.try_split(lease.leased_node, lease.cancel_epoch, lit, nlit, effort);
node* leased_node = lease.leased_node;
VERIFY(!leased_node->path_contains_atom(lit));
VERIFY(!leased_node->path_contains_atom(nlit));
bool did_split = m_search_tree.try_split(leased_node, lit, nlit, effort);
release_lease_unlocked(worker_id, lease.leased_node);
release_worker_lease_unlocked(worker_id, lease);
if (did_split) {
++m_stats.m_num_cubes;
m_stats.m_max_cube_depth = std::max(m_stats.m_max_cube_depth, lease.leased_node->depth() + 1);
m_stats.m_max_cube_depth = std::max(m_stats.m_max_cube_depth, leased_node->depth() + 1);
IF_VERBOSE(1, verbose_stream() << "Batch manager splitting on literal: " << mk_bounded_pp(lit, m, 3) << "\n");
}
}
void parallel::batch_manager::release_lease(unsigned worker_id, node_lease const &lease) {
bool parallel::batch_manager::checkpoint_worker(unsigned worker_id, node_lease& lease, bool& lease_canceled) {
std::scoped_lock lock(mux);
release_lease_unlocked(worker_id, lease.leased_node);
lease_canceled = false;
SASSERT(worker_id < p.m_workers.size());
if (attempt_release_canceled_lease_unlocked(worker_id, lease)) {
lease_canceled = true;
return true;
}
if (p.m_workers[worker_id]->limit().inc())
return true;
if (attempt_release_canceled_lease_unlocked(worker_id, lease)) {
lease_canceled = true;
return true;
}
set_canceled_unlocked();
return false;
}
bool parallel::batch_manager::lease_canceled(node_lease const &lease) {
std::scoped_lock lock(mux);
return m_state == state::is_running && m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch);
return m_state == state::is_running && m_search_tree.is_lease_canceled(lease.leased_node);
}
void parallel::batch_manager::collect_clause(ast_translation &l2g, unsigned source_worker_id, expr *clause) {
@ -1745,7 +1810,6 @@ namespace smt {
IF_VERBOSE(2, m_search_tree.display(verbose_stream()); verbose_stream() << "\n";);
lease.leased_node = t;
lease.cancel_epoch = t->get_cancel_epoch();
if (id >= m_worker_leases.size())
m_worker_leases.resize(id + 1);
m_worker_leases[id] = lease;
@ -1779,8 +1843,9 @@ namespace smt {
m_worker_leases.reset();
m_worker_leases.resize(p.m_workers.size());
smt_parallel_params pp(p.ctx.m_params);
parallel_params pp(p.ctx.m_params);
m_ablate_backtracking = pp.ablate_backtracking();
m_canceled = false;
}
void parallel::batch_manager::collect_statistics(::statistics &st) const {
@ -1794,19 +1859,14 @@ namespace smt {
}
lbool parallel::operator()(expr_ref_vector const &asms) {
smt_parallel_params pp(ctx.m_params);
unsigned num_global_bb_batch_threads = pp.num_global_bb_batch_threads();
parallel_params pp(ctx.m_params);
unsigned num_global_bb_batch_threads = pp.num_bb_threads();
if (num_global_bb_batch_threads > 2)
throw default_exception("smt_parallel.num_global_bb_batch_threads must be 0, 1, or 2");
throw default_exception("parallel.num_bb_threads must be 0, 1, or 2");
unsigned num_workers = std::min((unsigned)std::thread::hardware_concurrency(), ctx.get_fparams().m_threads);
unsigned num_sls_threads = (pp.sls() ? 1 : 0);
unsigned num_sls_threads = 0;
unsigned num_core_min_threads = (pp.core_minimize() ? 1 : 0);
unsigned num_global_bb_fl_threads = pp.num_global_bb_fl_threads();
if (num_global_bb_fl_threads > 2)
throw default_exception("smt_parallel.num_global_bb_fl_threads must be 0, 1, or 2");
if (num_global_bb_fl_threads > 0 && num_global_bb_batch_threads > 0)
throw default_exception("smt_parallel.num_global_bb_fl_threads and smt_parallel.num_global_bb_batch_threads cannot both be enabled");
unsigned num_global_bb_threads = num_global_bb_fl_threads > 0 ? num_global_bb_fl_threads : num_global_bb_batch_threads;
unsigned num_global_bb_threads = num_global_bb_batch_threads;
unsigned total_threads = num_workers + num_sls_threads + num_core_min_threads + num_global_bb_threads;
IF_VERBOSE(1, verbose_stream() << "Parallel SMT with " << total_threads << " threads\n";);
@ -1856,18 +1916,52 @@ namespace smt {
<< m_global_backbones_workers.size() << " global backbone threads.\n";);
m_batch_manager.initialize(num_global_bb_threads);
auto safe_run = [&](auto&& run_fn, reslimit& lim) {
try {
run_fn();
if (lim.is_canceled())
m_batch_manager.set_canceled();
} catch (z3_error &err) {
IF_VERBOSE(0, verbose_stream() << "Exception in parallel solver: " << err.what() << "\n");
if (!lim.is_canceled())
m_batch_manager.set_exception(err.error_code());
else
m_batch_manager.set_canceled();
} catch (z3_exception &ex) {
IF_VERBOSE(0, verbose_stream() << "Exception in parallel solver: " << ex.what() << "\n");
if (!lim.is_canceled() && !is_cancellation_exception(ex.what()))
m_batch_manager.set_exception(ex.what());
else
m_batch_manager.set_canceled();
} catch (...) {
IF_VERBOSE(0, verbose_stream() << "Unknown exception in parallel solver\n");
if (!lim.is_canceled())
m_batch_manager.set_exception("unknown exception");
else
m_batch_manager.set_canceled();
}
};
// Launch threads
vector<std::thread> threads(total_threads);
unsigned thread_idx = 0;
for (auto* w : m_workers)
threads[thread_idx++] = std::thread([&, w]() { w->run(); });
threads[thread_idx++] = std::thread([w, &safe_run]() {
safe_run([w]() { w->run(); }, w->limit());
});
if (m_sls_worker)
threads[thread_idx++] = std::thread([&]() { m_sls_worker->run(); });
threads[thread_idx++] = std::thread([this, &safe_run]() {
safe_run([this]() { m_sls_worker->run(); }, m_sls_worker->limit());
});
if (m_core_minimizer_worker)
threads[thread_idx++] = std::thread([&]() { m_core_minimizer_worker->run(); });
threads[thread_idx++] = std::thread([this, &safe_run]() {
safe_run([this]() { m_core_minimizer_worker->run(); }, m_core_minimizer_worker->limit());
});
for (auto* w : m_global_backbones_workers)
threads[thread_idx++] = std::thread([&, w]() { w->run(); });
threads[thread_idx++] = std::thread([w, &safe_run]() {
safe_run([w]() { w->run(); }, w->limit());
});
// Wait for all threads to finish