mirror of
https://github.com/Z3Prover/z3
synced 2026-06-29 03:48:51 +00:00
Add new parallel algorithm as a tactic (parallel_tactical2.cpp) Don't port over old experiments from smt_parallel that we aren't using (sls, inprocessing, failed_literal_mode for bb detection) Fix bugs: lease cancellation/reslimit race condition, involves changing lease epoch to simple boolean flag Also, now there is a single shared set of params for the tactic and smt_parallel **Test runs for the parallel_tactical2 vs old smt_parallel version:** run-2747-Z3-threads-4-qflia-30s-stats.md run-2746-Z3-threads-4-qflia-30s-parallel_tactic-stats.md run-2745-Z3-threads-1-qfbv-30s-stats.md run-3013-Z3-threads-4-qfbv-30s-parallel_tactic-stats.md --> note this is indeed run-3013, I reran after a bugfix in inc_sat_solver run-2743-Z3-threads-4-qfnia-30s-stats.md run-2742-Z3-threads-4-qfnia-30s-parallel_tactic-stats.md **Test runs for the new smt_parallel with bugfixes:** run-2801-Z3-threads-4-qflia-30s-smtparallel-bugfixes-stats.md, run-2800-Z3-threads-4-qflia-30s-smtparallel-bugfixes-stats.md run-2797-Z3-threads-4-qfnia-30s-smtparallel-bugfixes-stats.md compare to old smt_parallel: run-2747-Z3-threads-4-qflia-30s-stats.md run-2743-Z3-threads-4-qfnia-30s-stats.md Note that there is a slight regression on lia in run-2800. The source of this appears to be the new new LP largest-cube LIA heuristic param, which is enabled by default. disabling this param in run-2801 restored performance (I didn't change this in this PR though, just something to note) http://mtzguido.tplinkdns.com:8081/z3/compare_stats.html --------- Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com> Co-authored-by: Ilana Shapiro <ilanashapiro@Ilanas-MacBook-Pro.local> Co-authored-by: Ilana Shapiro <ilanashapiro@Ilanas-MBP.localdomain> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
This commit is contained in:
parent
15f33f458d
commit
612fab1c9a
29 changed files with 2694 additions and 1796 deletions
|
|
@ -25,7 +25,7 @@ Author:
|
|||
#include "smt/smt_parallel.h"
|
||||
#include "smt/smt_lookahead.h"
|
||||
#include "solver/solver_preprocess.h"
|
||||
#include "params/smt_parallel_params.hpp"
|
||||
#include "solver/parallel_params.hpp"
|
||||
|
||||
#include <cmath>
|
||||
#include <mutex>
|
||||
|
|
@ -550,7 +550,7 @@ namespace smt {
|
|||
if (m_ablate_backtracking) {
|
||||
// Ablation: for each target, pass the entire path from root to that node
|
||||
for (auto const& target : targets) {
|
||||
if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
|
||||
if (m_search_tree.is_lease_canceled(target.leased_node))
|
||||
continue;
|
||||
|
||||
// Reconstruct the full path from root to this target node
|
||||
|
|
@ -626,7 +626,7 @@ namespace smt {
|
|||
ctx->set_logic(p.ctx.m_setup.get_logic());
|
||||
context::copy(p.ctx, *ctx, true);
|
||||
ctx->pop_to_base_lvl();
|
||||
ctx->get_fparams().m_preprocess = false;
|
||||
ctx->get_fparams().m_preprocess = false; // avoid preprocessing lemmas that are exchanged
|
||||
}
|
||||
|
||||
void parallel::core_minimizer_worker::cancel() {
|
||||
|
|
@ -763,22 +763,25 @@ namespace smt {
|
|||
if (m_config.m_global_backbones) {
|
||||
bb_candidates local_candidates = find_backbone_candidates();
|
||||
b.collect_backbone_candidates(m_l2g, local_candidates);
|
||||
if (!m.inc())
|
||||
bool lease_canceled = false;
|
||||
if (!b.checkpoint_worker(id, lease, lease_canceled))
|
||||
return;
|
||||
if (lease_canceled) {
|
||||
LOG_WORKER(1, " abandoning canceled lease\n");
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
lbool r = check_cube(cube);
|
||||
|
||||
if (b.lease_canceled(lease)) {
|
||||
bool lease_canceled = false;
|
||||
if (!b.checkpoint_worker(id, lease, lease_canceled))
|
||||
return;
|
||||
if (lease_canceled) {
|
||||
LOG_WORKER(1, " abandoning canceled lease\n");
|
||||
lease = {};
|
||||
m.limit().dec_cancel();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!m.inc())
|
||||
return;
|
||||
|
||||
switch (r) {
|
||||
case l_undef: {
|
||||
update_max_thread_conflicts();
|
||||
|
|
@ -790,7 +793,6 @@ namespace smt {
|
|||
if (!atom)
|
||||
goto check_cube_start;
|
||||
b.try_split(m_l2g, id, lease, atom, m_config.m_threads_max_conflicts);
|
||||
lease = {};
|
||||
simplify();
|
||||
break;
|
||||
}
|
||||
|
|
@ -825,7 +827,6 @@ namespace smt {
|
|||
b.backtrack(m_l2g, id, core_to_use, lease);
|
||||
if (m_config.m_core_minimize)
|
||||
b.enqueue_core_minimization(m_l2g, source, unsat_core);
|
||||
lease = {};
|
||||
|
||||
if (m_config.m_share_conflicts)
|
||||
b.collect_clause(m_l2g, id, mk_not(mk_and(unsat_core)));
|
||||
|
|
@ -854,10 +855,10 @@ namespace smt {
|
|||
m_num_initial_atoms = ctx->get_num_bool_vars();
|
||||
ctx->get_fparams().m_preprocess = false; // avoid preprocessing lemmas that are exchanged
|
||||
|
||||
smt_parallel_params pp(p.ctx.m_params);
|
||||
m_config.m_inprocessing = pp.inprocessing();
|
||||
m_config.m_global_backbones = pp.num_global_bb_batch_threads() > 0 || pp.num_global_bb_fl_threads() > 0;
|
||||
m_config.m_local_backbones = pp.local_backbones();
|
||||
parallel_params pp(p.ctx.m_params);
|
||||
m_config.m_inprocessing = false;
|
||||
m_config.m_global_backbones = pp.num_bb_threads() > 0;
|
||||
m_config.m_local_backbones = false;
|
||||
m_config.m_core_minimize = pp.core_minimize();
|
||||
m_config.m_ablate_backtracking = pp.ablate_backtracking();
|
||||
|
||||
|
|
@ -887,9 +888,9 @@ namespace smt {
|
|||
ctx->pop_to_base_lvl();
|
||||
m_shared_units_prefix = ctx->assigned_literals().size();
|
||||
m_num_initial_atoms = ctx->get_num_bool_vars();
|
||||
ctx->get_fparams().m_preprocess = false; // avoid preprocessing lemmas that are exchanged
|
||||
|
||||
smt_parallel_params pp(p.ctx.m_params);
|
||||
m_use_failed_literal_test = pp.num_global_bb_fl_threads() > 0;
|
||||
m_use_failed_literal_test = false;
|
||||
}
|
||||
|
||||
parallel::bb_candidates parallel::worker::find_backbone_candidates(unsigned k) {
|
||||
|
|
@ -1105,14 +1106,48 @@ namespace smt {
|
|||
return r;
|
||||
}
|
||||
|
||||
void parallel::batch_manager::release_lease_unlocked(unsigned worker_id, node* n) {
|
||||
if (worker_id >= m_worker_leases.size())
|
||||
void parallel::batch_manager::set_canceled_unlocked() {
|
||||
if (m_state != state::is_running)
|
||||
return;
|
||||
auto &lease = m_worker_leases[worker_id];
|
||||
if (!lease.leased_node || lease.leased_node != n)
|
||||
cancel_background_threads();
|
||||
}
|
||||
|
||||
void parallel::batch_manager::set_canceled() {
|
||||
std::scoped_lock lock(mux);
|
||||
set_canceled_unlocked();
|
||||
}
|
||||
|
||||
void parallel::batch_manager::release_worker_lease_unlocked(unsigned worker_id, node_lease& lease) {
|
||||
if (worker_id >= m_worker_leases.size()) {
|
||||
lease = {};
|
||||
return;
|
||||
m_search_tree.dec_active_workers(lease.leased_node);
|
||||
}
|
||||
auto& stored_lease = m_worker_leases[worker_id];
|
||||
if (!stored_lease.leased_node || stored_lease.leased_node != lease.leased_node) {
|
||||
lease = {};
|
||||
return;
|
||||
}
|
||||
bool cancel_signaled = stored_lease.cancel_signaled;
|
||||
m_search_tree.dec_active_workers(stored_lease.leased_node);
|
||||
stored_lease = {};
|
||||
lease = {};
|
||||
if (cancel_signaled)
|
||||
p.m_workers[worker_id]->limit().dec_cancel();
|
||||
}
|
||||
|
||||
bool parallel::batch_manager::attempt_release_canceled_lease_unlocked(unsigned worker_id, node_lease& lease) {
|
||||
if (m_state != state::is_running || !lease.leased_node || worker_id >= m_worker_leases.size())
|
||||
return false;
|
||||
|
||||
auto& stored_lease = m_worker_leases[worker_id];
|
||||
if (stored_lease.leased_node != lease.leased_node)
|
||||
return false;
|
||||
|
||||
if (!m_search_tree.is_lease_canceled(stored_lease.leased_node))
|
||||
return false;
|
||||
|
||||
release_worker_lease_unlocked(worker_id, lease);
|
||||
return true;
|
||||
}
|
||||
|
||||
void parallel::batch_manager::cancel_closed_leases_unlocked(unsigned source_worker_id) {
|
||||
|
|
@ -1124,7 +1159,7 @@ namespace smt {
|
|||
|
||||
// only cancel workers that currently hold a lease, whose lease is canceled,
|
||||
// and haven't already been signaled (prevents multiple inc_cancel() for same lease)
|
||||
if (lease.leased_node && !lease.cancel_signaled && m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch)) {
|
||||
if (lease.leased_node && !lease.cancel_signaled && m_search_tree.is_lease_canceled(lease.leased_node)) {
|
||||
p.m_workers[worker_id]->cancel_lease();
|
||||
m_worker_leases[worker_id].cancel_signaled = true;
|
||||
}
|
||||
|
|
@ -1132,7 +1167,7 @@ namespace smt {
|
|||
}
|
||||
|
||||
void parallel::batch_manager::backtrack(ast_translation &l2g, unsigned worker_id, expr_ref_vector const &core,
|
||||
node_lease const &lease) {
|
||||
node_lease& lease) {
|
||||
std::scoped_lock lock(mux);
|
||||
vector<cube_config::literal> g_core;
|
||||
for (auto c : core)
|
||||
|
|
@ -1277,7 +1312,7 @@ namespace smt {
|
|||
if (!g_core.empty()) {
|
||||
collect_matching_targets_unlocked(source, g_core[0].get(), g_core, targets);
|
||||
for (auto const& target : targets) {
|
||||
if (!m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
|
||||
if (!m_search_tree.is_lease_canceled(target.leased_node))
|
||||
m_search_tree.backtrack(target.leased_node, g_core);
|
||||
}
|
||||
}
|
||||
|
|
@ -1331,7 +1366,7 @@ namespace smt {
|
|||
for (node* t : matches) {
|
||||
if (!t || t == source)
|
||||
continue;
|
||||
if (m_search_tree.is_lease_canceled(t, t->get_cancel_epoch()))
|
||||
if (m_search_tree.is_lease_canceled(t))
|
||||
continue;
|
||||
|
||||
// When source is provided, keep only external matches. Nodes in the
|
||||
|
|
@ -1358,12 +1393,12 @@ namespace smt {
|
|||
if (!is_highest_ancestor)
|
||||
continue;
|
||||
|
||||
targets.push_back({ t, t->get_cancel_epoch() });
|
||||
targets.push_back({t});
|
||||
}
|
||||
}
|
||||
|
||||
void parallel::batch_manager::backtrack_unlocked(ast_translation& l2g, unsigned worker_id, expr_ref_vector const& core,
|
||||
node_lease const* lease, vector<node_lease> const* targets) {
|
||||
node_lease* lease, vector<node_lease> const* targets) {
|
||||
if (m_state != state::is_running)
|
||||
return;
|
||||
|
||||
|
|
@ -1374,17 +1409,25 @@ namespace smt {
|
|||
SASSERT(lease != nullptr || targets != nullptr);
|
||||
bool did_backtrack = false;
|
||||
|
||||
if (lease && !m_search_tree.is_lease_canceled(lease->leased_node, lease->cancel_epoch)) {
|
||||
// we close/backtrack regardless of whether this lease is stale or not, as long as the lease isn't canceled
|
||||
// i.e. worker 1 splits this node, but then worker 2 determines UNSAT --> worker 2 is stale but we still close this node and backtrack
|
||||
did_backtrack = true;
|
||||
IF_VERBOSE(1, verbose_stream() << "Batch manager backtracking.\n");
|
||||
release_lease_unlocked(worker_id, lease->leased_node);
|
||||
m_search_tree.backtrack(lease->leased_node, g_core);
|
||||
if (lease) {
|
||||
if (!m_search_tree.is_lease_canceled(lease->leased_node)) {
|
||||
// we close/backtrack regardless of whether this lease is stale or not, as long as the lease isn't canceled
|
||||
// i.e. worker 1 splits this node, but then worker 2 determines UNSAT --> worker 2 is stale but we still close this node and backtrack
|
||||
did_backtrack = true;
|
||||
IF_VERBOSE(1, verbose_stream() << "Batch manager backtracking.\n");
|
||||
node* leased_node = lease->leased_node;
|
||||
release_worker_lease_unlocked(worker_id, *lease);
|
||||
m_search_tree.backtrack(leased_node, g_core);
|
||||
}
|
||||
else {
|
||||
// the lease was canceled by another worker. don't backtrack on this node with whatever new core we just found with this thread
|
||||
// however, we do proceed to external targets, since the new code may have exposed new external targets we can close/backtrack
|
||||
attempt_release_canceled_lease_unlocked(worker_id, *lease);
|
||||
}
|
||||
}
|
||||
if (targets) {
|
||||
for (auto const& target : *targets) {
|
||||
if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
|
||||
if (m_search_tree.is_lease_canceled(target.leased_node))
|
||||
continue;
|
||||
|
||||
did_backtrack = true;
|
||||
|
|
@ -1410,37 +1453,59 @@ namespace smt {
|
|||
}
|
||||
|
||||
void parallel::batch_manager::try_split(ast_translation &l2g, unsigned worker_id,
|
||||
node_lease const &lease, expr *atom, unsigned effort) {
|
||||
node_lease& lease, expr *atom, unsigned effort) {
|
||||
std::scoped_lock lock(mux);
|
||||
|
||||
if (m_state != state::is_running)
|
||||
return;
|
||||
|
||||
if (m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch))
|
||||
if (m_search_tree.is_lease_canceled(lease.leased_node)) {
|
||||
attempt_release_canceled_lease_unlocked(worker_id, lease);
|
||||
return;
|
||||
}
|
||||
|
||||
expr_ref lit(m), nlit(m);
|
||||
lit = l2g(atom);
|
||||
nlit = mk_not(m, lit);
|
||||
bool did_split = m_search_tree.try_split(lease.leased_node, lease.cancel_epoch, lit, nlit, effort);
|
||||
node* leased_node = lease.leased_node;
|
||||
VERIFY(!leased_node->path_contains_atom(lit));
|
||||
VERIFY(!leased_node->path_contains_atom(nlit));
|
||||
bool did_split = m_search_tree.try_split(leased_node, lit, nlit, effort);
|
||||
|
||||
release_lease_unlocked(worker_id, lease.leased_node);
|
||||
release_worker_lease_unlocked(worker_id, lease);
|
||||
|
||||
if (did_split) {
|
||||
++m_stats.m_num_cubes;
|
||||
m_stats.m_max_cube_depth = std::max(m_stats.m_max_cube_depth, lease.leased_node->depth() + 1);
|
||||
m_stats.m_max_cube_depth = std::max(m_stats.m_max_cube_depth, leased_node->depth() + 1);
|
||||
IF_VERBOSE(1, verbose_stream() << "Batch manager splitting on literal: " << mk_bounded_pp(lit, m, 3) << "\n");
|
||||
}
|
||||
}
|
||||
|
||||
void parallel::batch_manager::release_lease(unsigned worker_id, node_lease const &lease) {
|
||||
bool parallel::batch_manager::checkpoint_worker(unsigned worker_id, node_lease& lease, bool& lease_canceled) {
|
||||
std::scoped_lock lock(mux);
|
||||
release_lease_unlocked(worker_id, lease.leased_node);
|
||||
lease_canceled = false;
|
||||
SASSERT(worker_id < p.m_workers.size());
|
||||
|
||||
if (attempt_release_canceled_lease_unlocked(worker_id, lease)) {
|
||||
lease_canceled = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (p.m_workers[worker_id]->limit().inc())
|
||||
return true;
|
||||
|
||||
if (attempt_release_canceled_lease_unlocked(worker_id, lease)) {
|
||||
lease_canceled = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
set_canceled_unlocked();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool parallel::batch_manager::lease_canceled(node_lease const &lease) {
|
||||
std::scoped_lock lock(mux);
|
||||
return m_state == state::is_running && m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch);
|
||||
return m_state == state::is_running && m_search_tree.is_lease_canceled(lease.leased_node);
|
||||
}
|
||||
|
||||
void parallel::batch_manager::collect_clause(ast_translation &l2g, unsigned source_worker_id, expr *clause) {
|
||||
|
|
@ -1745,7 +1810,6 @@ namespace smt {
|
|||
IF_VERBOSE(2, m_search_tree.display(verbose_stream()); verbose_stream() << "\n";);
|
||||
|
||||
lease.leased_node = t;
|
||||
lease.cancel_epoch = t->get_cancel_epoch();
|
||||
if (id >= m_worker_leases.size())
|
||||
m_worker_leases.resize(id + 1);
|
||||
m_worker_leases[id] = lease;
|
||||
|
|
@ -1779,8 +1843,9 @@ namespace smt {
|
|||
m_worker_leases.reset();
|
||||
m_worker_leases.resize(p.m_workers.size());
|
||||
|
||||
smt_parallel_params pp(p.ctx.m_params);
|
||||
parallel_params pp(p.ctx.m_params);
|
||||
m_ablate_backtracking = pp.ablate_backtracking();
|
||||
m_canceled = false;
|
||||
}
|
||||
|
||||
void parallel::batch_manager::collect_statistics(::statistics &st) const {
|
||||
|
|
@ -1794,19 +1859,14 @@ namespace smt {
|
|||
}
|
||||
|
||||
lbool parallel::operator()(expr_ref_vector const &asms) {
|
||||
smt_parallel_params pp(ctx.m_params);
|
||||
unsigned num_global_bb_batch_threads = pp.num_global_bb_batch_threads();
|
||||
parallel_params pp(ctx.m_params);
|
||||
unsigned num_global_bb_batch_threads = pp.num_bb_threads();
|
||||
if (num_global_bb_batch_threads > 2)
|
||||
throw default_exception("smt_parallel.num_global_bb_batch_threads must be 0, 1, or 2");
|
||||
throw default_exception("parallel.num_bb_threads must be 0, 1, or 2");
|
||||
unsigned num_workers = std::min((unsigned)std::thread::hardware_concurrency(), ctx.get_fparams().m_threads);
|
||||
unsigned num_sls_threads = (pp.sls() ? 1 : 0);
|
||||
unsigned num_sls_threads = 0;
|
||||
unsigned num_core_min_threads = (pp.core_minimize() ? 1 : 0);
|
||||
unsigned num_global_bb_fl_threads = pp.num_global_bb_fl_threads();
|
||||
if (num_global_bb_fl_threads > 2)
|
||||
throw default_exception("smt_parallel.num_global_bb_fl_threads must be 0, 1, or 2");
|
||||
if (num_global_bb_fl_threads > 0 && num_global_bb_batch_threads > 0)
|
||||
throw default_exception("smt_parallel.num_global_bb_fl_threads and smt_parallel.num_global_bb_batch_threads cannot both be enabled");
|
||||
unsigned num_global_bb_threads = num_global_bb_fl_threads > 0 ? num_global_bb_fl_threads : num_global_bb_batch_threads;
|
||||
unsigned num_global_bb_threads = num_global_bb_batch_threads;
|
||||
unsigned total_threads = num_workers + num_sls_threads + num_core_min_threads + num_global_bb_threads;
|
||||
|
||||
IF_VERBOSE(1, verbose_stream() << "Parallel SMT with " << total_threads << " threads\n";);
|
||||
|
|
@ -1856,18 +1916,52 @@ namespace smt {
|
|||
<< m_global_backbones_workers.size() << " global backbone threads.\n";);
|
||||
|
||||
m_batch_manager.initialize(num_global_bb_threads);
|
||||
|
||||
auto safe_run = [&](auto&& run_fn, reslimit& lim) {
|
||||
try {
|
||||
run_fn();
|
||||
if (lim.is_canceled())
|
||||
m_batch_manager.set_canceled();
|
||||
} catch (z3_error &err) {
|
||||
IF_VERBOSE(0, verbose_stream() << "Exception in parallel solver: " << err.what() << "\n");
|
||||
if (!lim.is_canceled())
|
||||
m_batch_manager.set_exception(err.error_code());
|
||||
else
|
||||
m_batch_manager.set_canceled();
|
||||
} catch (z3_exception &ex) {
|
||||
IF_VERBOSE(0, verbose_stream() << "Exception in parallel solver: " << ex.what() << "\n");
|
||||
if (!lim.is_canceled() && !is_cancellation_exception(ex.what()))
|
||||
m_batch_manager.set_exception(ex.what());
|
||||
else
|
||||
m_batch_manager.set_canceled();
|
||||
} catch (...) {
|
||||
IF_VERBOSE(0, verbose_stream() << "Unknown exception in parallel solver\n");
|
||||
if (!lim.is_canceled())
|
||||
m_batch_manager.set_exception("unknown exception");
|
||||
else
|
||||
m_batch_manager.set_canceled();
|
||||
}
|
||||
};
|
||||
|
||||
// Launch threads
|
||||
vector<std::thread> threads(total_threads);
|
||||
unsigned thread_idx = 0;
|
||||
for (auto* w : m_workers)
|
||||
threads[thread_idx++] = std::thread([&, w]() { w->run(); });
|
||||
threads[thread_idx++] = std::thread([w, &safe_run]() {
|
||||
safe_run([w]() { w->run(); }, w->limit());
|
||||
});
|
||||
if (m_sls_worker)
|
||||
threads[thread_idx++] = std::thread([&]() { m_sls_worker->run(); });
|
||||
threads[thread_idx++] = std::thread([this, &safe_run]() {
|
||||
safe_run([this]() { m_sls_worker->run(); }, m_sls_worker->limit());
|
||||
});
|
||||
if (m_core_minimizer_worker)
|
||||
threads[thread_idx++] = std::thread([&]() { m_core_minimizer_worker->run(); });
|
||||
threads[thread_idx++] = std::thread([this, &safe_run]() {
|
||||
safe_run([this]() { m_core_minimizer_worker->run(); }, m_core_minimizer_worker->limit());
|
||||
});
|
||||
for (auto* w : m_global_backbones_workers)
|
||||
threads[thread_idx++] = std::thread([&, w]() { w->run(); });
|
||||
threads[thread_idx++] = std::thread([w, &safe_run]() {
|
||||
safe_run([w]() { w->run(); }, w->limit());
|
||||
});
|
||||
|
||||
|
||||
// Wait for all threads to finish
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue