3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 07:06:28 +00:00

attempt at lease policy patch

This commit is contained in:
Ilana Shapiro 2026-06-15 23:14:53 -07:00
parent fc5e4d7557
commit ab523fce54
4 changed files with 113 additions and 115 deletions

View file

@ -80,6 +80,7 @@ namespace smt {
try {
if (!m.inc()) {
b.set_canceled();
// b.notify_cv_waiters;
return;
}
res = m_sls->check();
@ -227,7 +228,8 @@ namespace smt {
bb_candidates.reset();
}
if (!m.inc())
b.set_canceled();
b.set_canceled();
// b.notify_cv_waiters;
}
lbool parallel::backbones_worker::probe_literal(bool_var v, expr *e, bool is_retry) {
@ -397,6 +399,7 @@ namespace smt {
if (!m.inc()) {
b.set_canceled();
// b.notify_cv_waiters;
return;
}
if (canceled())
@ -524,6 +527,7 @@ namespace smt {
}
if (!m.inc())
b.set_canceled();
// b.notify_cv_waiters;
}
void parallel::backbones_worker::cancel() {
@ -559,7 +563,7 @@ namespace smt {
if (m_ablate_backtracking) {
// Ablation: for each target, pass the entire path from root to that node
for (auto const& target : targets) {
if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
if (m_search_tree.is_lease_canceled(target.leased_node))
continue;
// Reconstruct the full path from root to this target node
@ -752,6 +756,7 @@ namespace smt {
b.publish_minimized_core(m_l2g, asms, source, original_size, minimized);
}
b.set_canceled();
// b.notify_cv_waiters;
}
void parallel::worker::run() {
@ -773,21 +778,29 @@ namespace smt {
if (m_config.m_global_backbones) {
bb_candidates local_candidates = find_backbone_candidates();
b.collect_backbone_candidates(m_l2g, local_candidates);
if (!m.inc())
if (!m.inc()) {
b.set_canceled();
// b.notify_cv_waiters;
break;
}
}
lbool r = check_cube(cube);
if (b.lease_canceled(lease)) {
bool cancel_signaled = false;
if (b.release_canceled_lease(id, lease, cancel_signaled)) {
LOG_WORKER(1, " abandoning canceled lease\n");
lease = {};
m.limit().dec_cancel();
if (cancel_signaled)
m.limit().dec_cancel();
continue;
}
if (!m.inc())
if (!m.inc()) {
b.set_canceled();
// b.notify_cv_waiters;
break;
}
switch (r) {
case l_undef: {
@ -846,8 +859,11 @@ namespace smt {
if (m_config.m_share_units)
share_units();
}
if (!m.inc())
if (!m.inc()) {
b.set_canceled();
// b.notify_cv_waiters;
return;
}
}
parallel::worker::worker(unsigned id, parallel &p, expr_ref_vector const &_asms)
@ -1135,7 +1151,7 @@ namespace smt {
// only cancel workers that currently hold a lease, whose lease is canceled,
// and haven't already been signaled (prevents multiple inc_cancel() for same lease)
if (lease.leased_node && !lease.cancel_signaled && m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch)) {
if (lease.leased_node && !lease.cancel_signaled && m_search_tree.is_lease_canceled(lease.leased_node)) {
p.m_workers[worker_id]->cancel_lease();
m_worker_leases[worker_id].cancel_signaled = true;
}
@ -1190,14 +1206,13 @@ namespace smt {
unsigned best_idx = select_best_core_min_job_unlocked();
m_core_min_jobs.swap(best_idx, m_core_min_jobs.size() - 1);
core_min_job* job = m_core_min_jobs.detach_back();
scoped_ptr<core_min_job> job = m_core_min_jobs.detach_back();
m_core_min_jobs.pop_back();
SASSERT(job);
source = job->source;
core.reset();
for (expr* c : job->core)
core.push_back(g2l(c));
dealloc(job);
return source != nullptr;
}
@ -1288,7 +1303,7 @@ namespace smt {
if (!g_core.empty()) {
collect_matching_targets_unlocked(source, g_core[0].get(), g_core, targets);
for (auto const& target : targets) {
if (!m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
if (!m_search_tree.is_lease_canceled(target.leased_node))
m_search_tree.backtrack(target.leased_node, g_core);
}
}
@ -1342,7 +1357,7 @@ namespace smt {
for (node* t : matches) {
if (!t || t == source)
continue;
if (m_search_tree.is_lease_canceled(t, t->get_cancel_epoch()))
if (m_search_tree.is_lease_canceled(t))
continue;
// When source is provided, keep only external matches. Nodes in the
@ -1369,7 +1384,7 @@ namespace smt {
if (!is_highest_ancestor)
continue;
targets.push_back({ t, t->get_cancel_epoch() });
targets.push_back({ t });
}
}
@ -1385,7 +1400,7 @@ namespace smt {
SASSERT(lease != nullptr || targets != nullptr);
bool did_backtrack = false;
if (lease && !m_search_tree.is_lease_canceled(lease->leased_node, lease->cancel_epoch)) {
if (lease && !m_search_tree.is_lease_canceled(lease->leased_node)) {
// we close/backtrack regardless of whether this lease is stale or not, as long as the lease isn't canceled
// i.e. worker 1 splits this node, but then worker 2 determines UNSAT --> worker 2 is stale but we still close this node and backtrack
did_backtrack = true;
@ -1395,7 +1410,7 @@ namespace smt {
}
if (targets) {
for (auto const& target : *targets) {
if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
if (m_search_tree.is_lease_canceled(target.leased_node))
continue;
did_backtrack = true;
@ -1427,13 +1442,13 @@ namespace smt {
if (m_state != state::is_running)
return;
if (m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch))
if (m_search_tree.is_lease_canceled(lease.leased_node))
return;
expr_ref lit(m), nlit(m);
lit = l2g(atom);
nlit = mk_not(m, lit);
bool did_split = m_search_tree.try_split(lease.leased_node, lease.cancel_epoch, lit, nlit, effort);
bool did_split = m_search_tree.try_split(lease.leased_node, lit, nlit, effort);
release_lease_unlocked(worker_id, lease.leased_node);
@ -1449,9 +1464,22 @@ namespace smt {
release_lease_unlocked(worker_id, lease.leased_node);
}
bool parallel::batch_manager::lease_canceled(node_lease const &lease) {
bool parallel::batch_manager::release_canceled_lease(unsigned worker_id, node_lease const &lease, bool& cancel_signaled) {
std::scoped_lock lock(mux);
return m_state == state::is_running && m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch);
cancel_signaled = false;
if (m_state != state::is_running || !lease.leased_node || worker_id >= m_worker_leases.size())
return false;
auto& stored = m_worker_leases[worker_id];
if (stored.leased_node != lease.leased_node)
return false;
if (!m_search_tree.is_lease_canceled(stored.leased_node))
return false;
cancel_signaled = stored.cancel_signaled;
release_lease_unlocked(worker_id, stored.leased_node);
return true;
}
void parallel::batch_manager::collect_clause(ast_translation &l2g, unsigned source_worker_id, expr *clause) {
@ -1701,6 +1729,12 @@ namespace smt {
cancel_background_threads();
}
// void parallel::batch_manager::notify_cv_waiters {
// std::scoped_lock lock(mux);
// m_bb_cv.notify_all();
// m_core_min_cv.notify_all();
// }
void parallel::batch_manager::set_exception(unsigned error_code) {
std::scoped_lock lock(mux);
IF_VERBOSE(1, verbose_stream() << "Batch manager setting exception code: " << error_code << ".\n");
@ -1763,7 +1797,6 @@ namespace smt {
IF_VERBOSE(2, m_search_tree.display(verbose_stream()); verbose_stream() << "\n";);
lease.leased_node = t;
lease.cancel_epoch = t->get_cancel_epoch();
if (id >= m_worker_leases.size())
m_worker_leases.resize(id + 1);
m_worker_leases[id] = lease;
@ -1878,20 +1911,14 @@ namespace smt {
auto safe_run = [&](auto* w) {
try {
w->run();
if (w->limit().is_canceled())
m_batch_manager.set_canceled();
}
catch (z3_error& err) {
if (!w->limit().is_canceled())
m_batch_manager.set_exception(err.error_code());
else
m_batch_manager.set_canceled();
}
catch (z3_exception& ex) {
if (!w->limit().is_canceled() && !is_cancellation_exception(ex.what()))
m_batch_manager.set_exception(ex.what());
else
m_batch_manager.set_canceled();
}
};

View file

@ -64,12 +64,6 @@ namespace smt {
struct node_lease {
node* leased_node = nullptr;
// Cancellation generation counter for this node/subtree.
// Incremented when the node is closed; used to signal that all
// workers holding leases on this node (or its descendants)
// must abandon work immediately.
unsigned cancel_epoch = 0;
// Guards against multiple inc_cancel() calls for the same lease.
// Set when cancel_lease() is signaled; cleared when a new lease is assigned.
bool cancel_signaled = false;
@ -197,6 +191,7 @@ namespace smt {
void set_exception(std::string const& msg);
void set_exception(unsigned error_code);
void set_canceled();
void notify_cv_waiters();
void collect_statistics(::statistics& st) const;
void collect_backbone_candidates(ast_translation& l2g, bb_candidates& bb_candidates);
@ -226,7 +221,7 @@ namespace smt {
unsigned original_core_size, expr_ref_vector const& minimized_core);
void try_split(ast_translation& l2g, unsigned worker_id, node_lease const& lease, expr* atom, unsigned effort);
void release_lease(unsigned worker_id, node_lease const& lease);
bool lease_canceled(node_lease const& lease);
bool release_canceled_lease(unsigned worker_id, node_lease const& lease, bool& cancel_signaled);
void collect_clause(ast_translation& l2g, unsigned source_worker_id, expr* clause);
expr_ref_vector return_shared_clauses(ast_translation& g2l, unsigned& worker_limit, unsigned worker_id);

View file

@ -127,12 +127,6 @@ class parallel_solver {
struct node_lease {
search_tree::node<solver_cube_config>* leased_node = nullptr;
// Cancellation generation counter for this node/subtree.
// Incremented when the node is closed; used to signal that all
// workers holding leases on this node (or its descendants)
// must abandon work immediately.
unsigned cancel_epoch = 0;
// Guards against multiple inc_cancel() calls for the same lease.
// Set when cancel_lease() is signaled; cleared when a new lease is assigned.
bool cancel_signaled = false;
@ -218,7 +212,7 @@ class parallel_solver {
expr_ref_vector m_unsat_core;
// called from batch manager to cancel other workers if we've reached a verdict
void cancel_workers_unlocked() {
void cancel_background_threads() {
IF_VERBOSE(1, verbose_stream() << "Canceling workers\n");
for (auto* w : p.m_workers)
w->cancel();
@ -248,7 +242,7 @@ class parallel_solver {
if (id == source_worker_id) continue;
auto const& lease = m_worker_leases[id];
if (lease.leased_node && !lease.cancel_signaled &&
m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch)) {
m_search_tree.is_lease_canceled(lease.leased_node)) {
p.m_workers[id]->cancel_lease();
m_worker_leases[id].cancel_signaled = true;
}
@ -316,7 +310,7 @@ class parallel_solver {
for (auto* t : matches) {
if (!t || t == source)
continue;
if (m_search_tree.is_lease_canceled(t, t->get_cancel_epoch()))
if (m_search_tree.is_lease_canceled(t))
continue;
// When source is provided, keep only external matches. Nodes in the
@ -343,7 +337,7 @@ class parallel_solver {
if (!is_highest_ancestor)
continue;
targets.push_back({ t, t->get_cancel_epoch() });
targets.push_back({ t });
}
}
@ -405,7 +399,7 @@ class parallel_solver {
SASSERT(lease != nullptr || targets != nullptr);
bool did_backtrack = false;
if (lease && !m_search_tree.is_lease_canceled(lease->leased_node, lease->cancel_epoch)) {
if (lease && !m_search_tree.is_lease_canceled(lease->leased_node)) {
// we close/backtrack regardless of whether this lease is stale or not, as long as the lease isn't canceled
// i.e. worker 1 splits this node, but then worker 2 determines UNSAT --> worker 2 is stale but we still close this node and backtrack
did_backtrack = true;
@ -414,7 +408,7 @@ class parallel_solver {
}
if (targets) {
for (auto const& target : *targets) {
if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
if (m_search_tree.is_lease_canceled(target.leased_node))
continue;
did_backtrack = true;
m_search_tree.backtrack(target.leased_node, g_core);
@ -438,7 +432,7 @@ class parallel_solver {
SASSERT(m_unsat_core.empty());
for (auto& e : m_search_tree.get_core_from_root())
m_unsat_core.push_back(e.get());
cancel_workers_unlocked();
cancel_background_threads();
}
}
@ -477,16 +471,23 @@ class parallel_solver {
if (m_state != state::is_running) return;
m_state = state::is_sat;
m_model = mdl.translate(l2g);
cancel_workers_unlocked();
cancel_background_threads();
}
void set_cancel() {
void set_canceled() {
std::scoped_lock lock(mux);
if (m_state != state::is_running)
return;
cancel_workers_unlocked();
cancel_background_threads();
}
// CODE IS LOCKED WHEN WE ADD THIS FUNCTION -- NEED TO FIX
// void notify_cv_waiters() {
// std::scoped_lock lock(mux);
// m_bb_cv.notify_all();
// m_core_min_cv.notify_all();
// }
void set_unsat(ast_translation& l2g,
expr_ref_vector const& core) {
std::scoped_lock lock(mux);
@ -496,7 +497,7 @@ class parallel_solver {
SASSERT(m_unsat_core.empty());
for (expr* c : core)
m_unsat_core.push_back(l2g(c));
cancel_workers_unlocked();
cancel_background_threads();
}
void set_exception(std::string const& msg) {
@ -505,7 +506,7 @@ class parallel_solver {
if (m_state != state::is_running) return;
m_state = state::is_exception_msg;
m_exception_msg = msg;
cancel_workers_unlocked();
cancel_background_threads();
}
void set_exception(unsigned error_code) {
@ -514,7 +515,7 @@ class parallel_solver {
if (m_state != state::is_running) return;
m_state = state::is_exception_code;
m_exception_code = error_code;
cancel_workers_unlocked();
cancel_background_threads();
}
bool get_cube(ast_translation& g2l, unsigned id,
@ -531,7 +532,6 @@ class parallel_solver {
if (!t) return false;
lease.leased_node = t;
lease.cancel_epoch = t->get_cancel_epoch();
if (id >= m_worker_leases.size())
m_worker_leases.resize(id + 1);
m_worker_leases[id] = lease;
@ -638,7 +638,7 @@ class parallel_solver {
m_unsat_core.push_back(l2g(e));
++m_stats.m_core_min_jobs_published;
++m_stats.m_core_min_global_unsat;
cancel_workers_unlocked();
cancel_background_threads();
return;
}
@ -651,7 +651,7 @@ class parallel_solver {
if (!g_core.empty()) {
collect_matching_targets_unlocked(source, g_core[0].get(), g_core, targets);
for (auto const& target : targets) {
if (!m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch))
if (!m_search_tree.is_lease_canceled(target.leased_node))
m_search_tree.backtrack(target.leased_node, g_core);
}
}
@ -672,7 +672,7 @@ class parallel_solver {
SASSERT(m_unsat_core.empty());
for (auto& e : m_search_tree.get_core_from_root())
m_unsat_core.push_back(e.get());
cancel_workers_unlocked();
cancel_background_threads();
}
}
@ -682,7 +682,7 @@ class parallel_solver {
return false;
if (m_state != state::is_running)
return false;
if (m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch))
if (m_search_tree.is_lease_canceled(lease.leased_node))
return false;
expr_ref _atom(l2g(atom), m);
@ -694,8 +694,7 @@ class parallel_solver {
expr* atom, unsigned effort) {
std::scoped_lock lock(mux);
if (m_state != state::is_running) return;
if (m_search_tree.is_lease_canceled(
lease.leased_node, lease.cancel_epoch)) return;
if (m_search_tree.is_lease_canceled(lease.leased_node)) return;
expr_ref lit(m), nlit(m);
lit = l2g(atom);
@ -704,8 +703,7 @@ class parallel_solver {
VERIFY(!lease.leased_node->path_contains_atom(nlit));
bool did_split = m_search_tree.try_split(
lease.leased_node, lease.cancel_epoch,
lit, nlit, effort);
lease.leased_node, lit, nlit, effort);
release_lease_unlocked(worker_id, lease.leased_node);
@ -723,11 +721,22 @@ class parallel_solver {
release_lease_unlocked(worker_id, lease.leased_node);
}
bool lease_canceled(node_lease const& lease) {
bool release_canceled_lease(unsigned worker_id, node_lease const& lease, bool& cancel_signaled) {
std::scoped_lock lock(mux);
return m_state == state::is_running &&
m_search_tree.is_lease_canceled(
lease.leased_node, lease.cancel_epoch);
cancel_signaled = false;
if (m_state != state::is_running || !lease.leased_node || worker_id >= m_worker_leases.size())
return false;
auto& stored = m_worker_leases[worker_id];
if (stored.leased_node != lease.leased_node)
return false;
if (!m_search_tree.is_lease_canceled(stored.leased_node))
return false;
cancel_signaled = stored.cancel_signaled;
release_lease_unlocked(worker_id, stored.leased_node);
return true;
}
void collect_clause(ast_translation& l2g,
@ -1174,17 +1183,20 @@ class parallel_solver {
}
lbool r = check_cube(cube);
if (b.lease_canceled(lease)) {
bool cancel_signaled = false;
if (b.release_canceled_lease(id, lease, cancel_signaled)) {
LOG_WORKER(1, " abandoning canceled lease\n");
lease = {};
m.limit().dec_cancel();
if (cancel_signaled)
m.limit().dec_cancel();
continue;
}
// NSB - at this point it shouldn't be possible to call inc_cancel.
// Is this ensured? I am not sure.
if (!m.inc()) {
b.set_cancel();
b.set_canceled();
// b.notify_cv_waiters();
return;
}
@ -1502,7 +1514,8 @@ class parallel_solver {
while (true) {
if (!m.inc()) {
b.set_cancel();
b.set_canceled();
// b.notify_cv_waiters();
return;
}
if (canceled())
@ -1630,8 +1643,6 @@ class parallel_solver {
curr_batch.reset();
}
if (!m.inc())
b.set_cancel();
}
public:
@ -1831,8 +1842,8 @@ class parallel_solver {
if (minimized.size() < original_size)
b.publish_minimized_core(m_l2g, asms, source, original_size, minimized);
}
if (!m.inc())
b.set_cancel();
b.set_canceled();
// b.notify_cv_waiters();
}
void cancel() {
@ -1892,8 +1903,6 @@ public:
static_cast<unsigned>(std::thread::hardware_concurrency()),
pp.threads_max());
bool core_minimize = pp.core_minimize();
if (pp.ablate_backtracking())
core_minimize = false;
unsigned num_bb_threads = pp.num_bb_threads();
if (num_bb_threads > 2)
throw default_exception("parallel.num_bb_threads must be 0, 1, or 2");
@ -1941,39 +1950,14 @@ public:
m_batch_manager.initialize(num_bb_threads);
auto safe_run = [&](std::function<void()> run_fn, reslimit& lim) {
try {
run_fn();
if (lim.is_canceled())
m_batch_manager.set_cancel();
} catch (z3_error &err) {
IF_VERBOSE(0, verbose_stream() << "Exception in parallel solver: " << err.what() << "\n");
if (!lim.is_canceled())
m_batch_manager.set_exception(err.error_code());
else
m_batch_manager.set_cancel();
} catch (z3_exception &ex) {
IF_VERBOSE(0, verbose_stream() << "Exception in parallel solver: " << ex.what() << "\n");
if (!lim.is_canceled() && !is_cancellation_exception(ex.what()))
m_batch_manager.set_exception(ex.what());
else
m_batch_manager.set_cancel();
}
};
/* Launch threads. */
vector<std::thread> threads;
for (auto *w : m_workers)
threads.push_back(std::thread([w, &safe_run]() {
safe_run([w]() { w->run(); }, w->limit());
}));
threads.push_back(std::thread([w]() { w->run(); }));
if (m_core_minimizer_worker)
threads.push_back(std::thread([this, &safe_run]() {
safe_run([this]() { m_core_minimizer_worker->run(); }, m_core_minimizer_worker->limit());
}));
threads.push_back(std::thread([this]() { m_core_minimizer_worker->run(); }));
for (auto* w : m_global_backbones_workers)
threads.push_back(std::thread([w, &safe_run]() {
safe_run([w]() { w->run(); }, w->limit());
}));
threads.push_back(std::thread([w]() { w->run(); }));
for (auto& t : threads)
t.join();

View file

@ -50,7 +50,6 @@ namespace search_tree {
unsigned m_effort_spent = 0;
unsigned m_round_max_effort = 0;
unsigned m_active_workers = 0;
unsigned m_cancel_epoch = 0;
public:
node(literal const &l, node *parent) : m_literal(l), m_parent(parent), m_status(status::open) {}
@ -156,12 +155,6 @@ namespace search_tree {
m_round_max_effort = effort;
m_effort_spent += m_round_max_effort;
}
unsigned get_cancel_epoch() const {
return m_cancel_epoch;
}
void inc_cancel_epoch() {
++m_cancel_epoch;
}
};
template <typename Config> class tree {
@ -348,7 +341,6 @@ namespace search_tree {
void close(node<Config> *n, vector<literal> const &C) {
if (!n || n->get_status() == status::closed)
return;
n->inc_cancel_epoch();
n->set_status(status::closed);
n->set_core(C);
close(n->left(), C);
@ -452,8 +444,8 @@ namespace search_tree {
// On timeout, either expand the current leaf or reopen the node for a
// later revisit, depending on the tree-expansion heuristic.
bool try_split(node<Config> *n, unsigned cancel_epoch, literal const &a, literal const &b, unsigned effort) {
if (is_lease_canceled(n, cancel_epoch))
bool try_split(node<Config> *n, literal const &a, literal const &b, unsigned effort) {
if (is_lease_canceled(n))
return false;
// Record at most one effort contribution per concurrent round on this node.
@ -552,8 +544,8 @@ namespace search_tree {
n->dec_active_workers();
}
bool is_lease_canceled(node<Config>* n, unsigned cancel_epoch) const {
return !n || n->get_status() == status::closed || n->get_cancel_epoch() != cancel_epoch;
bool is_lease_canceled(node<Config>* n) const {
return !n || n->get_status() == status::closed;
}
vector<literal> const &get_core_from_root() const {