diff --git a/src/smt/smt_parallel.cpp b/src/smt/smt_parallel.cpp index 8a3910590..5497d7ec0 100644 --- a/src/smt/smt_parallel.cpp +++ b/src/smt/smt_parallel.cpp @@ -80,6 +80,7 @@ namespace smt { try { if (!m.inc()) { b.set_canceled(); + // b.notify_cv_waiters; return; } res = m_sls->check(); @@ -227,7 +228,8 @@ namespace smt { bb_candidates.reset(); } if (!m.inc()) - b.set_canceled(); + b.set_canceled(); + // b.notify_cv_waiters; } lbool parallel::backbones_worker::probe_literal(bool_var v, expr *e, bool is_retry) { @@ -397,6 +399,7 @@ namespace smt { if (!m.inc()) { b.set_canceled(); + // b.notify_cv_waiters; return; } if (canceled()) @@ -524,6 +527,7 @@ namespace smt { } if (!m.inc()) b.set_canceled(); + // b.notify_cv_waiters; } void parallel::backbones_worker::cancel() { @@ -559,7 +563,7 @@ namespace smt { if (m_ablate_backtracking) { // Ablation: for each target, pass the entire path from root to that node for (auto const& target : targets) { - if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch)) + if (m_search_tree.is_lease_canceled(target.leased_node)) continue; // Reconstruct the full path from root to this target node @@ -752,6 +756,7 @@ namespace smt { b.publish_minimized_core(m_l2g, asms, source, original_size, minimized); } b.set_canceled(); + // b.notify_cv_waiters; } void parallel::worker::run() { @@ -773,21 +778,29 @@ namespace smt { if (m_config.m_global_backbones) { bb_candidates local_candidates = find_backbone_candidates(); b.collect_backbone_candidates(m_l2g, local_candidates); - if (!m.inc()) + if (!m.inc()) { + b.set_canceled(); + // b.notify_cv_waiters; break; + } } lbool r = check_cube(cube); - if (b.lease_canceled(lease)) { + bool cancel_signaled = false; + if (b.release_canceled_lease(id, lease, cancel_signaled)) { LOG_WORKER(1, " abandoning canceled lease\n"); lease = {}; - m.limit().dec_cancel(); + if (cancel_signaled) + m.limit().dec_cancel(); continue; } - if (!m.inc()) + if (!m.inc()) { + b.set_canceled(); + // b.notify_cv_waiters; break; + } switch (r) { case l_undef: { @@ -846,8 +859,11 @@ namespace smt { if (m_config.m_share_units) share_units(); } - if (!m.inc()) + if (!m.inc()) { b.set_canceled(); + // b.notify_cv_waiters; + return; + } } parallel::worker::worker(unsigned id, parallel &p, expr_ref_vector const &_asms) @@ -1135,7 +1151,7 @@ namespace smt { // only cancel workers that currently hold a lease, whose lease is canceled, // and haven't already been signaled (prevents multiple inc_cancel() for same lease) - if (lease.leased_node && !lease.cancel_signaled && m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch)) { + if (lease.leased_node && !lease.cancel_signaled && m_search_tree.is_lease_canceled(lease.leased_node)) { p.m_workers[worker_id]->cancel_lease(); m_worker_leases[worker_id].cancel_signaled = true; } @@ -1190,14 +1206,13 @@ namespace smt { unsigned best_idx = select_best_core_min_job_unlocked(); m_core_min_jobs.swap(best_idx, m_core_min_jobs.size() - 1); - core_min_job* job = m_core_min_jobs.detach_back(); + scoped_ptr job = m_core_min_jobs.detach_back(); m_core_min_jobs.pop_back(); SASSERT(job); source = job->source; core.reset(); for (expr* c : job->core) core.push_back(g2l(c)); - dealloc(job); return source != nullptr; } @@ -1288,7 +1303,7 @@ namespace smt { if (!g_core.empty()) { collect_matching_targets_unlocked(source, g_core[0].get(), g_core, targets); for (auto const& target : targets) { - if (!m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch)) + if (!m_search_tree.is_lease_canceled(target.leased_node)) m_search_tree.backtrack(target.leased_node, g_core); } } @@ -1342,7 +1357,7 @@ namespace smt { for (node* t : matches) { if (!t || t == source) continue; - if (m_search_tree.is_lease_canceled(t, t->get_cancel_epoch())) + if (m_search_tree.is_lease_canceled(t)) continue; // When source is provided, keep only external matches. Nodes in the @@ -1369,7 +1384,7 @@ namespace smt { if (!is_highest_ancestor) continue; - targets.push_back({ t, t->get_cancel_epoch() }); + targets.push_back({ t }); } } @@ -1385,7 +1400,7 @@ namespace smt { SASSERT(lease != nullptr || targets != nullptr); bool did_backtrack = false; - if (lease && !m_search_tree.is_lease_canceled(lease->leased_node, lease->cancel_epoch)) { + if (lease && !m_search_tree.is_lease_canceled(lease->leased_node)) { // we close/backtrack regardless of whether this lease is stale or not, as long as the lease isn't canceled // i.e. worker 1 splits this node, but then worker 2 determines UNSAT --> worker 2 is stale but we still close this node and backtrack did_backtrack = true; @@ -1395,7 +1410,7 @@ namespace smt { } if (targets) { for (auto const& target : *targets) { - if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch)) + if (m_search_tree.is_lease_canceled(target.leased_node)) continue; did_backtrack = true; @@ -1427,13 +1442,13 @@ namespace smt { if (m_state != state::is_running) return; - if (m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch)) + if (m_search_tree.is_lease_canceled(lease.leased_node)) return; expr_ref lit(m), nlit(m); lit = l2g(atom); nlit = mk_not(m, lit); - bool did_split = m_search_tree.try_split(lease.leased_node, lease.cancel_epoch, lit, nlit, effort); + bool did_split = m_search_tree.try_split(lease.leased_node, lit, nlit, effort); release_lease_unlocked(worker_id, lease.leased_node); @@ -1449,9 +1464,22 @@ namespace smt { release_lease_unlocked(worker_id, lease.leased_node); } - bool parallel::batch_manager::lease_canceled(node_lease const &lease) { + bool parallel::batch_manager::release_canceled_lease(unsigned worker_id, node_lease const &lease, bool& cancel_signaled) { std::scoped_lock lock(mux); - return m_state == state::is_running && m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch); + cancel_signaled = false; + if (m_state != state::is_running || !lease.leased_node || worker_id >= m_worker_leases.size()) + return false; + + auto& stored = m_worker_leases[worker_id]; + if (stored.leased_node != lease.leased_node) + return false; + + if (!m_search_tree.is_lease_canceled(stored.leased_node)) + return false; + + cancel_signaled = stored.cancel_signaled; + release_lease_unlocked(worker_id, stored.leased_node); + return true; } void parallel::batch_manager::collect_clause(ast_translation &l2g, unsigned source_worker_id, expr *clause) { @@ -1701,6 +1729,12 @@ namespace smt { cancel_background_threads(); } + // void parallel::batch_manager::notify_cv_waiters { + // std::scoped_lock lock(mux); + // m_bb_cv.notify_all(); + // m_core_min_cv.notify_all(); + // } + void parallel::batch_manager::set_exception(unsigned error_code) { std::scoped_lock lock(mux); IF_VERBOSE(1, verbose_stream() << "Batch manager setting exception code: " << error_code << ".\n"); @@ -1763,7 +1797,6 @@ namespace smt { IF_VERBOSE(2, m_search_tree.display(verbose_stream()); verbose_stream() << "\n";); lease.leased_node = t; - lease.cancel_epoch = t->get_cancel_epoch(); if (id >= m_worker_leases.size()) m_worker_leases.resize(id + 1); m_worker_leases[id] = lease; @@ -1878,20 +1911,14 @@ namespace smt { auto safe_run = [&](auto* w) { try { w->run(); - if (w->limit().is_canceled()) - m_batch_manager.set_canceled(); } catch (z3_error& err) { if (!w->limit().is_canceled()) m_batch_manager.set_exception(err.error_code()); - else - m_batch_manager.set_canceled(); } catch (z3_exception& ex) { if (!w->limit().is_canceled() && !is_cancellation_exception(ex.what())) m_batch_manager.set_exception(ex.what()); - else - m_batch_manager.set_canceled(); } }; diff --git a/src/smt/smt_parallel.h b/src/smt/smt_parallel.h index 79b326889..2e0632aed 100644 --- a/src/smt/smt_parallel.h +++ b/src/smt/smt_parallel.h @@ -64,12 +64,6 @@ namespace smt { struct node_lease { node* leased_node = nullptr; - // Cancellation generation counter for this node/subtree. - // Incremented when the node is closed; used to signal that all - // workers holding leases on this node (or its descendants) - // must abandon work immediately. - unsigned cancel_epoch = 0; - // Guards against multiple inc_cancel() calls for the same lease. // Set when cancel_lease() is signaled; cleared when a new lease is assigned. bool cancel_signaled = false; @@ -197,6 +191,7 @@ namespace smt { void set_exception(std::string const& msg); void set_exception(unsigned error_code); void set_canceled(); + void notify_cv_waiters(); void collect_statistics(::statistics& st) const; void collect_backbone_candidates(ast_translation& l2g, bb_candidates& bb_candidates); @@ -226,7 +221,7 @@ namespace smt { unsigned original_core_size, expr_ref_vector const& minimized_core); void try_split(ast_translation& l2g, unsigned worker_id, node_lease const& lease, expr* atom, unsigned effort); void release_lease(unsigned worker_id, node_lease const& lease); - bool lease_canceled(node_lease const& lease); + bool release_canceled_lease(unsigned worker_id, node_lease const& lease, bool& cancel_signaled); void collect_clause(ast_translation& l2g, unsigned source_worker_id, expr* clause); expr_ref_vector return_shared_clauses(ast_translation& g2l, unsigned& worker_limit, unsigned worker_id); diff --git a/src/solver/parallel_tactical2.cpp b/src/solver/parallel_tactical2.cpp index ec27f176f..601f8f476 100644 --- a/src/solver/parallel_tactical2.cpp +++ b/src/solver/parallel_tactical2.cpp @@ -127,12 +127,6 @@ class parallel_solver { struct node_lease { search_tree::node* leased_node = nullptr; - // Cancellation generation counter for this node/subtree. - // Incremented when the node is closed; used to signal that all - // workers holding leases on this node (or its descendants) - // must abandon work immediately. - unsigned cancel_epoch = 0; - // Guards against multiple inc_cancel() calls for the same lease. // Set when cancel_lease() is signaled; cleared when a new lease is assigned. bool cancel_signaled = false; @@ -218,7 +212,7 @@ class parallel_solver { expr_ref_vector m_unsat_core; // called from batch manager to cancel other workers if we've reached a verdict - void cancel_workers_unlocked() { + void cancel_background_threads() { IF_VERBOSE(1, verbose_stream() << "Canceling workers\n"); for (auto* w : p.m_workers) w->cancel(); @@ -248,7 +242,7 @@ class parallel_solver { if (id == source_worker_id) continue; auto const& lease = m_worker_leases[id]; if (lease.leased_node && !lease.cancel_signaled && - m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch)) { + m_search_tree.is_lease_canceled(lease.leased_node)) { p.m_workers[id]->cancel_lease(); m_worker_leases[id].cancel_signaled = true; } @@ -316,7 +310,7 @@ class parallel_solver { for (auto* t : matches) { if (!t || t == source) continue; - if (m_search_tree.is_lease_canceled(t, t->get_cancel_epoch())) + if (m_search_tree.is_lease_canceled(t)) continue; // When source is provided, keep only external matches. Nodes in the @@ -343,7 +337,7 @@ class parallel_solver { if (!is_highest_ancestor) continue; - targets.push_back({ t, t->get_cancel_epoch() }); + targets.push_back({ t }); } } @@ -405,7 +399,7 @@ class parallel_solver { SASSERT(lease != nullptr || targets != nullptr); bool did_backtrack = false; - if (lease && !m_search_tree.is_lease_canceled(lease->leased_node, lease->cancel_epoch)) { + if (lease && !m_search_tree.is_lease_canceled(lease->leased_node)) { // we close/backtrack regardless of whether this lease is stale or not, as long as the lease isn't canceled // i.e. worker 1 splits this node, but then worker 2 determines UNSAT --> worker 2 is stale but we still close this node and backtrack did_backtrack = true; @@ -414,7 +408,7 @@ class parallel_solver { } if (targets) { for (auto const& target : *targets) { - if (m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch)) + if (m_search_tree.is_lease_canceled(target.leased_node)) continue; did_backtrack = true; m_search_tree.backtrack(target.leased_node, g_core); @@ -438,7 +432,7 @@ class parallel_solver { SASSERT(m_unsat_core.empty()); for (auto& e : m_search_tree.get_core_from_root()) m_unsat_core.push_back(e.get()); - cancel_workers_unlocked(); + cancel_background_threads(); } } @@ -477,16 +471,23 @@ class parallel_solver { if (m_state != state::is_running) return; m_state = state::is_sat; m_model = mdl.translate(l2g); - cancel_workers_unlocked(); + cancel_background_threads(); } - void set_cancel() { + void set_canceled() { std::scoped_lock lock(mux); if (m_state != state::is_running) return; - cancel_workers_unlocked(); + cancel_background_threads(); } + // CODE IS LOCKED WHEN WE ADD THIS FUNCTION -- NEED TO FIX + // void notify_cv_waiters() { + // std::scoped_lock lock(mux); + // m_bb_cv.notify_all(); + // m_core_min_cv.notify_all(); + // } + void set_unsat(ast_translation& l2g, expr_ref_vector const& core) { std::scoped_lock lock(mux); @@ -496,7 +497,7 @@ class parallel_solver { SASSERT(m_unsat_core.empty()); for (expr* c : core) m_unsat_core.push_back(l2g(c)); - cancel_workers_unlocked(); + cancel_background_threads(); } void set_exception(std::string const& msg) { @@ -505,7 +506,7 @@ class parallel_solver { if (m_state != state::is_running) return; m_state = state::is_exception_msg; m_exception_msg = msg; - cancel_workers_unlocked(); + cancel_background_threads(); } void set_exception(unsigned error_code) { @@ -514,7 +515,7 @@ class parallel_solver { if (m_state != state::is_running) return; m_state = state::is_exception_code; m_exception_code = error_code; - cancel_workers_unlocked(); + cancel_background_threads(); } bool get_cube(ast_translation& g2l, unsigned id, @@ -531,7 +532,6 @@ class parallel_solver { if (!t) return false; lease.leased_node = t; - lease.cancel_epoch = t->get_cancel_epoch(); if (id >= m_worker_leases.size()) m_worker_leases.resize(id + 1); m_worker_leases[id] = lease; @@ -638,7 +638,7 @@ class parallel_solver { m_unsat_core.push_back(l2g(e)); ++m_stats.m_core_min_jobs_published; ++m_stats.m_core_min_global_unsat; - cancel_workers_unlocked(); + cancel_background_threads(); return; } @@ -651,7 +651,7 @@ class parallel_solver { if (!g_core.empty()) { collect_matching_targets_unlocked(source, g_core[0].get(), g_core, targets); for (auto const& target : targets) { - if (!m_search_tree.is_lease_canceled(target.leased_node, target.cancel_epoch)) + if (!m_search_tree.is_lease_canceled(target.leased_node)) m_search_tree.backtrack(target.leased_node, g_core); } } @@ -672,7 +672,7 @@ class parallel_solver { SASSERT(m_unsat_core.empty()); for (auto& e : m_search_tree.get_core_from_root()) m_unsat_core.push_back(e.get()); - cancel_workers_unlocked(); + cancel_background_threads(); } } @@ -682,7 +682,7 @@ class parallel_solver { return false; if (m_state != state::is_running) return false; - if (m_search_tree.is_lease_canceled(lease.leased_node, lease.cancel_epoch)) + if (m_search_tree.is_lease_canceled(lease.leased_node)) return false; expr_ref _atom(l2g(atom), m); @@ -694,8 +694,7 @@ class parallel_solver { expr* atom, unsigned effort) { std::scoped_lock lock(mux); if (m_state != state::is_running) return; - if (m_search_tree.is_lease_canceled( - lease.leased_node, lease.cancel_epoch)) return; + if (m_search_tree.is_lease_canceled(lease.leased_node)) return; expr_ref lit(m), nlit(m); lit = l2g(atom); @@ -704,8 +703,7 @@ class parallel_solver { VERIFY(!lease.leased_node->path_contains_atom(nlit)); bool did_split = m_search_tree.try_split( - lease.leased_node, lease.cancel_epoch, - lit, nlit, effort); + lease.leased_node, lit, nlit, effort); release_lease_unlocked(worker_id, lease.leased_node); @@ -723,11 +721,22 @@ class parallel_solver { release_lease_unlocked(worker_id, lease.leased_node); } - bool lease_canceled(node_lease const& lease) { + bool release_canceled_lease(unsigned worker_id, node_lease const& lease, bool& cancel_signaled) { std::scoped_lock lock(mux); - return m_state == state::is_running && - m_search_tree.is_lease_canceled( - lease.leased_node, lease.cancel_epoch); + cancel_signaled = false; + if (m_state != state::is_running || !lease.leased_node || worker_id >= m_worker_leases.size()) + return false; + + auto& stored = m_worker_leases[worker_id]; + if (stored.leased_node != lease.leased_node) + return false; + + if (!m_search_tree.is_lease_canceled(stored.leased_node)) + return false; + + cancel_signaled = stored.cancel_signaled; + release_lease_unlocked(worker_id, stored.leased_node); + return true; } void collect_clause(ast_translation& l2g, @@ -1174,17 +1183,20 @@ class parallel_solver { } lbool r = check_cube(cube); - if (b.lease_canceled(lease)) { + bool cancel_signaled = false; + if (b.release_canceled_lease(id, lease, cancel_signaled)) { LOG_WORKER(1, " abandoning canceled lease\n"); lease = {}; - m.limit().dec_cancel(); + if (cancel_signaled) + m.limit().dec_cancel(); continue; } // NSB - at this point it shouldn't be possible to call inc_cancel. // Is this ensured? I am not sure. if (!m.inc()) { - b.set_cancel(); + b.set_canceled(); + // b.notify_cv_waiters(); return; } @@ -1502,7 +1514,8 @@ class parallel_solver { while (true) { if (!m.inc()) { - b.set_cancel(); + b.set_canceled(); + // b.notify_cv_waiters(); return; } if (canceled()) @@ -1630,8 +1643,6 @@ class parallel_solver { curr_batch.reset(); } - if (!m.inc()) - b.set_cancel(); } public: @@ -1831,8 +1842,8 @@ class parallel_solver { if (minimized.size() < original_size) b.publish_minimized_core(m_l2g, asms, source, original_size, minimized); } - if (!m.inc()) - b.set_cancel(); + b.set_canceled(); + // b.notify_cv_waiters(); } void cancel() { @@ -1892,8 +1903,6 @@ public: static_cast(std::thread::hardware_concurrency()), pp.threads_max()); bool core_minimize = pp.core_minimize(); - if (pp.ablate_backtracking()) - core_minimize = false; unsigned num_bb_threads = pp.num_bb_threads(); if (num_bb_threads > 2) throw default_exception("parallel.num_bb_threads must be 0, 1, or 2"); @@ -1941,39 +1950,14 @@ public: m_batch_manager.initialize(num_bb_threads); - auto safe_run = [&](std::function run_fn, reslimit& lim) { - try { - run_fn(); - if (lim.is_canceled()) - m_batch_manager.set_cancel(); - } catch (z3_error &err) { - IF_VERBOSE(0, verbose_stream() << "Exception in parallel solver: " << err.what() << "\n"); - if (!lim.is_canceled()) - m_batch_manager.set_exception(err.error_code()); - else - m_batch_manager.set_cancel(); - } catch (z3_exception &ex) { - IF_VERBOSE(0, verbose_stream() << "Exception in parallel solver: " << ex.what() << "\n"); - if (!lim.is_canceled() && !is_cancellation_exception(ex.what())) - m_batch_manager.set_exception(ex.what()); - else - m_batch_manager.set_cancel(); - } - }; /* Launch threads. */ vector threads; for (auto *w : m_workers) - threads.push_back(std::thread([w, &safe_run]() { - safe_run([w]() { w->run(); }, w->limit()); - })); + threads.push_back(std::thread([w]() { w->run(); })); if (m_core_minimizer_worker) - threads.push_back(std::thread([this, &safe_run]() { - safe_run([this]() { m_core_minimizer_worker->run(); }, m_core_minimizer_worker->limit()); - })); + threads.push_back(std::thread([this]() { m_core_minimizer_worker->run(); })); for (auto* w : m_global_backbones_workers) - threads.push_back(std::thread([w, &safe_run]() { - safe_run([w]() { w->run(); }, w->limit()); - })); + threads.push_back(std::thread([w]() { w->run(); })); for (auto& t : threads) t.join(); diff --git a/src/util/search_tree.h b/src/util/search_tree.h index 701c2fe8d..9b9f15064 100644 --- a/src/util/search_tree.h +++ b/src/util/search_tree.h @@ -50,7 +50,6 @@ namespace search_tree { unsigned m_effort_spent = 0; unsigned m_round_max_effort = 0; unsigned m_active_workers = 0; - unsigned m_cancel_epoch = 0; public: node(literal const &l, node *parent) : m_literal(l), m_parent(parent), m_status(status::open) {} @@ -156,12 +155,6 @@ namespace search_tree { m_round_max_effort = effort; m_effort_spent += m_round_max_effort; } - unsigned get_cancel_epoch() const { - return m_cancel_epoch; - } - void inc_cancel_epoch() { - ++m_cancel_epoch; - } }; template class tree { @@ -348,7 +341,6 @@ namespace search_tree { void close(node *n, vector const &C) { if (!n || n->get_status() == status::closed) return; - n->inc_cancel_epoch(); n->set_status(status::closed); n->set_core(C); close(n->left(), C); @@ -452,8 +444,8 @@ namespace search_tree { // On timeout, either expand the current leaf or reopen the node for a // later revisit, depending on the tree-expansion heuristic. - bool try_split(node *n, unsigned cancel_epoch, literal const &a, literal const &b, unsigned effort) { - if (is_lease_canceled(n, cancel_epoch)) + bool try_split(node *n, literal const &a, literal const &b, unsigned effort) { + if (is_lease_canceled(n)) return false; // Record at most one effort contribution per concurrent round on this node. @@ -552,8 +544,8 @@ namespace search_tree { n->dec_active_workers(); } - bool is_lease_canceled(node* n, unsigned cancel_epoch) const { - return !n || n->get_status() == status::closed || n->get_cancel_epoch() != cancel_epoch; + bool is_lease_canceled(node* n) const { + return !n || n->get_status() == status::closed; } vector const &get_core_from_root() const {