diff --git a/src/smt/smt_parallel.cpp b/src/smt/smt_parallel.cpp index d74b71df2..c19a274a8 100644 --- a/src/smt/smt_parallel.cpp +++ b/src/smt/smt_parallel.cpp @@ -111,11 +111,11 @@ namespace smt { } void parallel::worker::run() { - search_tree::node *node = nullptr; + node_lease lease; expr_ref_vector cube(m); while (true) { - if (!b.get_cube(m_g2l, id, cube, node)) { + if (!b.get_cube(m_g2l, id, cube, lease)) { LOG_WORKER(1, " no more cubes\n"); return; } @@ -125,8 +125,18 @@ namespace smt { LOG_WORKER(1, " CUBE SIZE IN MAIN LOOP: " << cube.size() << "\n"); lbool r = check_cube(cube); + if (b.lease_canceled(lease)) { + LOG_WORKER(1, " abandoning canceled lease\n"); + b.abandon_lease(id, lease); + m.limit().reset_cancel(); + lease = {}; + continue; + } + if (!m.inc()) { - b.set_exception("context cancelled"); + if (b.is_batch_running()) { + b.set_exception("context cancelled"); + } return; } @@ -140,7 +150,8 @@ namespace smt { auto atom = get_split_atom(); if (!atom) goto check_cube_start; - b.try_split(m_l2g, id, node, atom, m_config.m_threads_max_conflicts); + b.try_split(m_l2g, id, lease, atom, m_config.m_threads_max_conflicts); + lease = {}; simplify(); break; } @@ -164,7 +175,8 @@ namespace smt { } LOG_WORKER(1, " found unsat cube\n"); - b.backtrack(m_l2g, unsat_core, node); + b.backtrack(m_l2g, id, unsat_core, lease); + lease = {}; if (m_config.m_share_conflicts) b.collect_clause(m_l2g, id, mk_not(mk_and(unsat_core))); @@ -314,18 +326,44 @@ namespace smt { m.limit().cancel(); } - void parallel::batch_manager::backtrack(ast_translation &l2g, expr_ref_vector const &core, - search_tree::node *node) { + void parallel::batch_manager::release_lease_unlocked(unsigned worker_id, node* n, unsigned epoch) { + if (worker_id >= m_worker_leases.size()) + return; + auto &lease = m_worker_leases[worker_id]; + if (!lease.node || lease.node != n || lease.epoch != epoch) + return; + m_search_tree.release_worker(lease.node); + lease = {}; + } + + void parallel::batch_manager::cancel_closed_leases_unlocked(unsigned source_worker_id) { + for (unsigned worker_id = 0; worker_id < m_worker_leases.size(); ++worker_id) { + if (worker_id == source_worker_id) + continue; + auto const& lease = m_worker_leases[worker_id]; + if (!lease.node || !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) + continue; + p.m_workers[worker_id]->cancel(); + } + } + + void parallel::batch_manager::backtrack(ast_translation &l2g, unsigned worker_id, expr_ref_vector const &core, + node_lease const &lease) { std::scoped_lock lock(mux); IF_VERBOSE(1, verbose_stream() << "Batch manager backtracking.\n"); if (m_state != state::is_running) return; vector g_core; for (auto c : core) { - expr_ref g_c(l2g(c), m); g_core.push_back(expr_ref(l2g(c), m)); } - m_search_tree.backtrack(node, g_core); + release_lease_unlocked(worker_id, lease.node, lease.epoch); + if (lease.node && !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) + m_search_tree.backtrack(lease.node, g_core); + else if (lease.node && lease.node->get_status() == search_tree::status::closed) + m_search_tree.backtrack(lease.node, g_core); + + cancel_closed_leases_unlocked(worker_id); IF_VERBOSE(1, m_search_tree.display(verbose_stream() << bounded_pp_exprs(core) << "\n");); if (m_search_tree.is_closed()) { @@ -338,8 +376,8 @@ namespace smt { } } - void parallel::batch_manager::try_split(ast_translation &l2g, unsigned source_worker_id, - search_tree::node *node, expr *atom, unsigned effort) { + void parallel::batch_manager::try_split(ast_translation &l2g, unsigned worker_id, + node_lease const &lease, expr *atom, unsigned effort) { std::scoped_lock lock(mux); expr_ref lit(m), nlit(m); lit = l2g(atom); @@ -348,15 +386,34 @@ namespace smt { if (m_state != state::is_running) return; - bool did_split = m_search_tree.try_split(node, lit, nlit, effort); + release_lease_unlocked(worker_id, lease.node, lease.epoch); + if (!lease.node || m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) + return; + + bool did_split = m_search_tree.try_split(lease.node, lit, nlit, effort); if (did_split) { ++m_stats.m_num_cubes; - m_stats.m_max_cube_depth = std::max(m_stats.m_max_cube_depth, node->depth() + 1); + m_stats.m_max_cube_depth = std::max(m_stats.m_max_cube_depth, lease.node->depth() + 1); IF_VERBOSE(1, verbose_stream() << "Batch manager splitting on literal: " << mk_bounded_pp(lit, m, 3) << "\n"); } } + void parallel::batch_manager::abandon_lease(unsigned worker_id, node_lease const &lease) { + std::scoped_lock lock(mux); + release_lease_unlocked(worker_id, lease.node, lease.epoch); + } + + bool parallel::batch_manager::lease_canceled(node_lease const &lease) { + std::scoped_lock lock(mux); + return m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch); + } + + bool parallel::batch_manager::is_batch_running() { + std::scoped_lock lock(mux); + return m_state == state::is_running; + } + void parallel::batch_manager::collect_clause(ast_translation &l2g, unsigned source_worker_id, expr *clause) { std::scoped_lock lock(mux); expr *g_clause = l2g(clause); @@ -400,11 +457,14 @@ namespace smt { try { r = ctx->check(asms.size(), asms.data()); } catch (z3_error &err) { - b.set_exception(err.error_code()); + if (!m.limit().is_canceled()) + b.set_exception(err.error_code()); } catch (z3_exception &ex) { - b.set_exception(ex.what()); + if (!m.limit().is_canceled()) + b.set_exception(ex.what()); } catch (...) { - b.set_exception("unknown exception"); + if (!m.limit().is_canceled()) + b.set_exception("unknown exception"); } asms.shrink(asms.size() - cube.size()); LOG_WORKER(1, " DONE checking cube " << r << "\n";); @@ -501,7 +561,7 @@ namespace smt { } } - bool parallel::batch_manager::get_cube(ast_translation &g2l, unsigned id, expr_ref_vector &cube, node *&n) { + bool parallel::batch_manager::get_cube(ast_translation &g2l, unsigned id, expr_ref_vector &cube, node_lease &lease) { cube.reset(); std::unique_lock lock(mux); if (m_search_tree.is_closed()) { @@ -512,11 +572,16 @@ namespace smt { IF_VERBOSE(1, verbose_stream() << "aborting get_cube\n";); return false; } - node *t = m_search_tree.activate_node(n); + node *t = m_search_tree.activate_node(lease.node); if (!t) return false; IF_VERBOSE(1, m_search_tree.display(verbose_stream()); verbose_stream() << "\n";); - n = t; + lease.node = t; + lease.epoch = t->epoch(); + lease.cancel_epoch = t->cancel_epoch(); + if (id >= m_worker_leases.size()) + m_worker_leases.resize(id + 1); + m_worker_leases[id] = lease; while (t) { if (cube_config::literal_is_null(t->get_literal())) break; @@ -532,6 +597,8 @@ namespace smt { m_state = state::is_running; m_search_tree.reset(); m_search_tree.set_effort_unit(initial_max_thread_conflicts); + m_worker_leases.reset(); + m_worker_leases.resize(p.num_threads); } void parallel::batch_manager::collect_statistics(::statistics &st) const { @@ -556,7 +623,6 @@ namespace smt { }; scoped_clear clear(*this); - m_batch_manager.initialize(); m_workers.reset(); smt_parallel_params pp(ctx.m_params); @@ -574,6 +640,8 @@ namespace smt { sl.push_child(&(m_sls_worker->limit())); } + m_batch_manager.initialize(); + // Launch threads vector threads; threads.resize(m_should_run_sls ? num_threads + 1 : num_threads); // +1 for sls worker diff --git a/src/smt/smt_parallel.h b/src/smt/smt_parallel.h index bddb10b63..42b68484d 100644 --- a/src/smt/smt_parallel.h +++ b/src/smt/smt_parallel.h @@ -43,6 +43,12 @@ namespace smt { expr_ref clause; }; + struct node_lease { + search_tree::node* node = nullptr; + unsigned epoch = 0; + unsigned cancel_epoch = 0; + }; + class batch_manager { enum state { @@ -66,6 +72,7 @@ namespace smt { stats m_stats; using node = search_tree::node; search_tree::tree m_search_tree; + vector m_worker_leases; unsigned m_exception_code = 0; std::string m_exception_msg; @@ -92,6 +99,8 @@ namespace smt { } void init_parameters_state(); + void release_lease_unlocked(unsigned worker_id, node* n, unsigned epoch); + void cancel_closed_leases_unlocked(unsigned source_worker_id); public: batch_manager(ast_manager& m, parallel& p) : m(m), p(p), m_search_tree(expr_ref(m)) { } @@ -104,9 +113,12 @@ namespace smt { void set_exception(unsigned error_code); void collect_statistics(::statistics& st) const; - bool get_cube(ast_translation& g2l, unsigned id, expr_ref_vector& cube, node*& n); - void backtrack(ast_translation& l2g, expr_ref_vector const& core, node* n); - void try_split(ast_translation& l2g, unsigned id, node* n, expr* atom, unsigned effort); + bool get_cube(ast_translation& g2l, unsigned id, expr_ref_vector& cube, node_lease& lease); + void backtrack(ast_translation& l2g, unsigned worker_id, expr_ref_vector const& core, node_lease const& lease); + void try_split(ast_translation& l2g, unsigned worker_id, node_lease const& lease, expr* atom, unsigned effort); + void abandon_lease(unsigned worker_id, node_lease const& lease); + bool lease_canceled(node_lease const& lease); + bool is_batch_running(); void collect_clause(ast_translation& l2g, unsigned source_worker_id, expr* clause); expr_ref_vector return_shared_clauses(ast_translation& g2l, unsigned& worker_limit, unsigned worker_id); diff --git a/src/util/search_tree.h b/src/util/search_tree.h index bac6ce0d0..de508678b 100644 --- a/src/util/search_tree.h +++ b/src/util/search_tree.h @@ -48,6 +48,9 @@ namespace search_tree { vector m_core; unsigned m_num_activations = 0; unsigned m_effort_spent = 0; + unsigned m_active_workers = 0; + unsigned m_epoch = 0; + unsigned m_cancel_epoch = 0; public: node(literal const &l, node *parent) : m_literal(l), m_parent(parent), m_status(status::open) {} @@ -137,6 +140,17 @@ namespace search_tree { void mark_new_activation() { set_status(status::active); ++m_num_activations; + ++m_active_workers; + } + void release_worker() { + if (m_active_workers > 0) + --m_active_workers; + } + unsigned active_workers() const { + return m_active_workers; + } + bool has_active_workers() const { + return m_active_workers > 0; } unsigned effort_spent() const { return m_effort_spent; @@ -144,6 +158,18 @@ namespace search_tree { void add_effort(unsigned effort) { m_effort_spent += effort; } + unsigned epoch() const { + return m_epoch; + } + void bump_epoch() { + ++m_epoch; + } + unsigned cancel_epoch() const { + return m_cancel_epoch; + } + void bump_cancel_epoch() { + ++m_cancel_epoch; + } }; template class tree { @@ -233,7 +259,7 @@ namespace search_tree { unsigned count_active_nodes(node* cur) const { if (!cur || cur->get_status() == status::closed) return 0; - return (cur->get_status() == status::active ? 1 : 0) + + return (cur->has_active_workers() ? 1 : 0) + count_active_nodes(cur->left()) + count_active_nodes(cur->right()); } @@ -344,6 +370,8 @@ namespace search_tree { void close(node *n, vector const &C) { if (!n || n->get_status() == status::closed) return; + n->bump_epoch(); + n->bump_cancel_epoch(); n->set_status(status::closed); n->set_core(C); close(n->left(), C); @@ -447,7 +475,9 @@ namespace search_tree { // On timeout, either expand the current leaf or reopen the node for a // later revisit, depending on the tree-expansion heuristic. bool try_split(node *n, literal const &a, literal const &b, unsigned effort) { - if (!n || n->get_status() != status::active) + // the node could have been marked open by another thread that finished first and split + // we still want to add the current thread's effort in this case, but not if the node was closed + if (!n || n->get_status() == status::closed) return false; n->add_effort(effort); @@ -475,6 +505,8 @@ namespace search_tree { // conflict is given by a set of literals. // they are subsets of the literals on the path from root to n AND the external assumption literals void backtrack(node *n, vector const &conflict) { + if (!n) + return; if (conflict.empty()) { close_with_core(m_root.get(), conflict); return; @@ -520,6 +552,20 @@ namespace search_tree { return activate_best_node(); } + void release_worker(node* n) { + if (!n) + return; + n->release_worker(); + } + + bool is_lease_valid(node* n, unsigned epoch) const { + return n && n->get_status() != status::closed && n->epoch() == epoch; + } + + bool is_lease_canceled(node* n, unsigned cancel_epoch) const { + return !n || n->get_status() == status::closed || n->cancel_epoch() != cancel_epoch; + } + vector const &get_core_from_root() const { return m_root->get_core(); }