From b341c61fbc65e5f4a330641d116d2e796f7cfb5d Mon Sep 17 00:00:00 2001 From: Ilana Shapiro Date: Sat, 11 Apr 2026 22:09:41 -0700 Subject: [PATCH 1/5] first attempt with codex. Codex notes: What changed: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Each tree node now tracks: - active worker count - lease epoch - cancel epoch - get_cube() now hands each worker an explicit lease: (node, epoch, cancel_epoch). - try_split() and backtrack() now operate on that lease, and the batch manager releases the worker’s lease under the tree lock before mutating the node. - If another worker closes the leased node or subtree, the batch manager cancels only the workers whose current leased nodes are now closed. - Workers detect canceled leases after check(), reset their local cancel flag, abandon the stale lease, and continue instead of turning that into a global exception. - The “reopen immediately into the open queue” policy is preserved. I did not add a barrier waiting for all workers on a node to finish. - Active-worker accounting is now separate from the open/active/closed scheduling status, so reopening a node no longer erases the fact that other workers are still on it. I also updated search_tree bookkeeping so: - closure bumps node cancel/lease epochs - active-node counting uses actual active-worker presence, not just status == active --- src/smt/smt_parallel.cpp | 108 +++++++++++++++++++++++++++++++-------- src/smt/smt_parallel.h | 18 +++++-- src/util/search_tree.h | 50 +++++++++++++++++- 3 files changed, 151 insertions(+), 25 deletions(-) diff --git a/src/smt/smt_parallel.cpp b/src/smt/smt_parallel.cpp index d74b71df2..c19a274a8 100644 --- a/src/smt/smt_parallel.cpp +++ b/src/smt/smt_parallel.cpp @@ -111,11 +111,11 @@ namespace smt { } void parallel::worker::run() { - search_tree::node *node = nullptr; + node_lease lease; expr_ref_vector cube(m); while (true) { - if (!b.get_cube(m_g2l, id, cube, node)) { + if (!b.get_cube(m_g2l, id, cube, lease)) { LOG_WORKER(1, " no more cubes\n"); return; } @@ -125,8 +125,18 @@ namespace smt { LOG_WORKER(1, " CUBE SIZE IN MAIN LOOP: " << cube.size() << "\n"); lbool r = check_cube(cube); + if (b.lease_canceled(lease)) { + LOG_WORKER(1, " abandoning canceled lease\n"); + b.abandon_lease(id, lease); + m.limit().reset_cancel(); + lease = {}; + continue; + } + if (!m.inc()) { - b.set_exception("context cancelled"); + if (b.is_batch_running()) { + b.set_exception("context cancelled"); + } return; } @@ -140,7 +150,8 @@ namespace smt { auto atom = get_split_atom(); if (!atom) goto check_cube_start; - b.try_split(m_l2g, id, node, atom, m_config.m_threads_max_conflicts); + b.try_split(m_l2g, id, lease, atom, m_config.m_threads_max_conflicts); + lease = {}; simplify(); break; } @@ -164,7 +175,8 @@ namespace smt { } LOG_WORKER(1, " found unsat cube\n"); - b.backtrack(m_l2g, unsat_core, node); + b.backtrack(m_l2g, id, unsat_core, lease); + lease = {}; if (m_config.m_share_conflicts) b.collect_clause(m_l2g, id, mk_not(mk_and(unsat_core))); @@ -314,18 +326,44 @@ namespace smt { m.limit().cancel(); } - void parallel::batch_manager::backtrack(ast_translation &l2g, expr_ref_vector const &core, - search_tree::node *node) { + void parallel::batch_manager::release_lease_unlocked(unsigned worker_id, node* n, unsigned epoch) { + if (worker_id >= m_worker_leases.size()) + return; + auto &lease = m_worker_leases[worker_id]; + if (!lease.node || lease.node != n || lease.epoch != epoch) + return; + m_search_tree.release_worker(lease.node); + lease = {}; + } + + void parallel::batch_manager::cancel_closed_leases_unlocked(unsigned source_worker_id) { + for (unsigned worker_id = 0; worker_id < m_worker_leases.size(); ++worker_id) { + if (worker_id == source_worker_id) + continue; + auto const& lease = m_worker_leases[worker_id]; + if (!lease.node || !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) + continue; + p.m_workers[worker_id]->cancel(); + } + } + + void parallel::batch_manager::backtrack(ast_translation &l2g, unsigned worker_id, expr_ref_vector const &core, + node_lease const &lease) { std::scoped_lock lock(mux); IF_VERBOSE(1, verbose_stream() << "Batch manager backtracking.\n"); if (m_state != state::is_running) return; vector g_core; for (auto c : core) { - expr_ref g_c(l2g(c), m); g_core.push_back(expr_ref(l2g(c), m)); } - m_search_tree.backtrack(node, g_core); + release_lease_unlocked(worker_id, lease.node, lease.epoch); + if (lease.node && !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) + m_search_tree.backtrack(lease.node, g_core); + else if (lease.node && lease.node->get_status() == search_tree::status::closed) + m_search_tree.backtrack(lease.node, g_core); + + cancel_closed_leases_unlocked(worker_id); IF_VERBOSE(1, m_search_tree.display(verbose_stream() << bounded_pp_exprs(core) << "\n");); if (m_search_tree.is_closed()) { @@ -338,8 +376,8 @@ namespace smt { } } - void parallel::batch_manager::try_split(ast_translation &l2g, unsigned source_worker_id, - search_tree::node *node, expr *atom, unsigned effort) { + void parallel::batch_manager::try_split(ast_translation &l2g, unsigned worker_id, + node_lease const &lease, expr *atom, unsigned effort) { std::scoped_lock lock(mux); expr_ref lit(m), nlit(m); lit = l2g(atom); @@ -348,15 +386,34 @@ namespace smt { if (m_state != state::is_running) return; - bool did_split = m_search_tree.try_split(node, lit, nlit, effort); + release_lease_unlocked(worker_id, lease.node, lease.epoch); + if (!lease.node || m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) + return; + + bool did_split = m_search_tree.try_split(lease.node, lit, nlit, effort); if (did_split) { ++m_stats.m_num_cubes; - m_stats.m_max_cube_depth = std::max(m_stats.m_max_cube_depth, node->depth() + 1); + m_stats.m_max_cube_depth = std::max(m_stats.m_max_cube_depth, lease.node->depth() + 1); IF_VERBOSE(1, verbose_stream() << "Batch manager splitting on literal: " << mk_bounded_pp(lit, m, 3) << "\n"); } } + void parallel::batch_manager::abandon_lease(unsigned worker_id, node_lease const &lease) { + std::scoped_lock lock(mux); + release_lease_unlocked(worker_id, lease.node, lease.epoch); + } + + bool parallel::batch_manager::lease_canceled(node_lease const &lease) { + std::scoped_lock lock(mux); + return m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch); + } + + bool parallel::batch_manager::is_batch_running() { + std::scoped_lock lock(mux); + return m_state == state::is_running; + } + void parallel::batch_manager::collect_clause(ast_translation &l2g, unsigned source_worker_id, expr *clause) { std::scoped_lock lock(mux); expr *g_clause = l2g(clause); @@ -400,11 +457,14 @@ namespace smt { try { r = ctx->check(asms.size(), asms.data()); } catch (z3_error &err) { - b.set_exception(err.error_code()); + if (!m.limit().is_canceled()) + b.set_exception(err.error_code()); } catch (z3_exception &ex) { - b.set_exception(ex.what()); + if (!m.limit().is_canceled()) + b.set_exception(ex.what()); } catch (...) { - b.set_exception("unknown exception"); + if (!m.limit().is_canceled()) + b.set_exception("unknown exception"); } asms.shrink(asms.size() - cube.size()); LOG_WORKER(1, " DONE checking cube " << r << "\n";); @@ -501,7 +561,7 @@ namespace smt { } } - bool parallel::batch_manager::get_cube(ast_translation &g2l, unsigned id, expr_ref_vector &cube, node *&n) { + bool parallel::batch_manager::get_cube(ast_translation &g2l, unsigned id, expr_ref_vector &cube, node_lease &lease) { cube.reset(); std::unique_lock lock(mux); if (m_search_tree.is_closed()) { @@ -512,11 +572,16 @@ namespace smt { IF_VERBOSE(1, verbose_stream() << "aborting get_cube\n";); return false; } - node *t = m_search_tree.activate_node(n); + node *t = m_search_tree.activate_node(lease.node); if (!t) return false; IF_VERBOSE(1, m_search_tree.display(verbose_stream()); verbose_stream() << "\n";); - n = t; + lease.node = t; + lease.epoch = t->epoch(); + lease.cancel_epoch = t->cancel_epoch(); + if (id >= m_worker_leases.size()) + m_worker_leases.resize(id + 1); + m_worker_leases[id] = lease; while (t) { if (cube_config::literal_is_null(t->get_literal())) break; @@ -532,6 +597,8 @@ namespace smt { m_state = state::is_running; m_search_tree.reset(); m_search_tree.set_effort_unit(initial_max_thread_conflicts); + m_worker_leases.reset(); + m_worker_leases.resize(p.num_threads); } void parallel::batch_manager::collect_statistics(::statistics &st) const { @@ -556,7 +623,6 @@ namespace smt { }; scoped_clear clear(*this); - m_batch_manager.initialize(); m_workers.reset(); smt_parallel_params pp(ctx.m_params); @@ -574,6 +640,8 @@ namespace smt { sl.push_child(&(m_sls_worker->limit())); } + m_batch_manager.initialize(); + // Launch threads vector threads; threads.resize(m_should_run_sls ? num_threads + 1 : num_threads); // +1 for sls worker diff --git a/src/smt/smt_parallel.h b/src/smt/smt_parallel.h index bddb10b63..42b68484d 100644 --- a/src/smt/smt_parallel.h +++ b/src/smt/smt_parallel.h @@ -43,6 +43,12 @@ namespace smt { expr_ref clause; }; + struct node_lease { + search_tree::node* node = nullptr; + unsigned epoch = 0; + unsigned cancel_epoch = 0; + }; + class batch_manager { enum state { @@ -66,6 +72,7 @@ namespace smt { stats m_stats; using node = search_tree::node; search_tree::tree m_search_tree; + vector m_worker_leases; unsigned m_exception_code = 0; std::string m_exception_msg; @@ -92,6 +99,8 @@ namespace smt { } void init_parameters_state(); + void release_lease_unlocked(unsigned worker_id, node* n, unsigned epoch); + void cancel_closed_leases_unlocked(unsigned source_worker_id); public: batch_manager(ast_manager& m, parallel& p) : m(m), p(p), m_search_tree(expr_ref(m)) { } @@ -104,9 +113,12 @@ namespace smt { void set_exception(unsigned error_code); void collect_statistics(::statistics& st) const; - bool get_cube(ast_translation& g2l, unsigned id, expr_ref_vector& cube, node*& n); - void backtrack(ast_translation& l2g, expr_ref_vector const& core, node* n); - void try_split(ast_translation& l2g, unsigned id, node* n, expr* atom, unsigned effort); + bool get_cube(ast_translation& g2l, unsigned id, expr_ref_vector& cube, node_lease& lease); + void backtrack(ast_translation& l2g, unsigned worker_id, expr_ref_vector const& core, node_lease const& lease); + void try_split(ast_translation& l2g, unsigned worker_id, node_lease const& lease, expr* atom, unsigned effort); + void abandon_lease(unsigned worker_id, node_lease const& lease); + bool lease_canceled(node_lease const& lease); + bool is_batch_running(); void collect_clause(ast_translation& l2g, unsigned source_worker_id, expr* clause); expr_ref_vector return_shared_clauses(ast_translation& g2l, unsigned& worker_limit, unsigned worker_id); diff --git a/src/util/search_tree.h b/src/util/search_tree.h index bac6ce0d0..de508678b 100644 --- a/src/util/search_tree.h +++ b/src/util/search_tree.h @@ -48,6 +48,9 @@ namespace search_tree { vector m_core; unsigned m_num_activations = 0; unsigned m_effort_spent = 0; + unsigned m_active_workers = 0; + unsigned m_epoch = 0; + unsigned m_cancel_epoch = 0; public: node(literal const &l, node *parent) : m_literal(l), m_parent(parent), m_status(status::open) {} @@ -137,6 +140,17 @@ namespace search_tree { void mark_new_activation() { set_status(status::active); ++m_num_activations; + ++m_active_workers; + } + void release_worker() { + if (m_active_workers > 0) + --m_active_workers; + } + unsigned active_workers() const { + return m_active_workers; + } + bool has_active_workers() const { + return m_active_workers > 0; } unsigned effort_spent() const { return m_effort_spent; @@ -144,6 +158,18 @@ namespace search_tree { void add_effort(unsigned effort) { m_effort_spent += effort; } + unsigned epoch() const { + return m_epoch; + } + void bump_epoch() { + ++m_epoch; + } + unsigned cancel_epoch() const { + return m_cancel_epoch; + } + void bump_cancel_epoch() { + ++m_cancel_epoch; + } }; template class tree { @@ -233,7 +259,7 @@ namespace search_tree { unsigned count_active_nodes(node* cur) const { if (!cur || cur->get_status() == status::closed) return 0; - return (cur->get_status() == status::active ? 1 : 0) + + return (cur->has_active_workers() ? 1 : 0) + count_active_nodes(cur->left()) + count_active_nodes(cur->right()); } @@ -344,6 +370,8 @@ namespace search_tree { void close(node *n, vector const &C) { if (!n || n->get_status() == status::closed) return; + n->bump_epoch(); + n->bump_cancel_epoch(); n->set_status(status::closed); n->set_core(C); close(n->left(), C); @@ -447,7 +475,9 @@ namespace search_tree { // On timeout, either expand the current leaf or reopen the node for a // later revisit, depending on the tree-expansion heuristic. bool try_split(node *n, literal const &a, literal const &b, unsigned effort) { - if (!n || n->get_status() != status::active) + // the node could have been marked open by another thread that finished first and split + // we still want to add the current thread's effort in this case, but not if the node was closed + if (!n || n->get_status() == status::closed) return false; n->add_effort(effort); @@ -475,6 +505,8 @@ namespace search_tree { // conflict is given by a set of literals. // they are subsets of the literals on the path from root to n AND the external assumption literals void backtrack(node *n, vector const &conflict) { + if (!n) + return; if (conflict.empty()) { close_with_core(m_root.get(), conflict); return; @@ -520,6 +552,20 @@ namespace search_tree { return activate_best_node(); } + void release_worker(node* n) { + if (!n) + return; + n->release_worker(); + } + + bool is_lease_valid(node* n, unsigned epoch) const { + return n && n->get_status() != status::closed && n->epoch() == epoch; + } + + bool is_lease_canceled(node* n, unsigned cancel_epoch) const { + return !n || n->get_status() == status::closed || n->cancel_epoch() != cancel_epoch; + } + vector const &get_core_from_root() const { return m_root->get_core(); } From 013f1db73934fb28d27c1bd4dae50fd5dbd2524a Mon Sep 17 00:00:00 2001 From: Ilana Shapiro Date: Mon, 13 Apr 2026 11:59:00 -0700 Subject: [PATCH 2/5] fix(parallel-smt): gate split/backtrack by lease epoch What it changes: - util/search_tree.h - bumps node epoch on split - threads epoch through should_split(...) and try_split(...) - always records effort, but only split/reopen if the lease epoch still matches - smt/smt_parallel.cpp - requires is_lease_valid(..., lease.epoch) before backtrack(...) - passes lease.epoch into m_search_tree.try_split(...) --- src/smt/smt_parallel.cpp | 8 ++++---- src/util/search_tree.h | 12 +++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/smt/smt_parallel.cpp b/src/smt/smt_parallel.cpp index c19a274a8..38c3d15e7 100644 --- a/src/smt/smt_parallel.cpp +++ b/src/smt/smt_parallel.cpp @@ -358,9 +358,9 @@ namespace smt { g_core.push_back(expr_ref(l2g(c), m)); } release_lease_unlocked(worker_id, lease.node, lease.epoch); - if (lease.node && !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) - m_search_tree.backtrack(lease.node, g_core); - else if (lease.node && lease.node->get_status() == search_tree::status::closed) + if (lease.node && + m_search_tree.is_lease_valid(lease.node, lease.epoch) && + !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) m_search_tree.backtrack(lease.node, g_core); cancel_closed_leases_unlocked(worker_id); @@ -390,7 +390,7 @@ namespace smt { if (!lease.node || m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) return; - bool did_split = m_search_tree.try_split(lease.node, lit, nlit, effort); + bool did_split = m_search_tree.try_split(lease.node, lit, nlit, effort, lease.epoch); if (did_split) { ++m_stats.m_num_cubes; diff --git a/src/util/search_tree.h b/src/util/search_tree.h index de508678b..158b5a226 100644 --- a/src/util/search_tree.h +++ b/src/util/search_tree.h @@ -80,6 +80,7 @@ namespace search_tree { SASSERT(!m_right); m_left = alloc(node, a, this); m_right = alloc(node, b, this); + bump_epoch(); } node* left() const { return m_left; } @@ -277,8 +278,8 @@ namespace search_tree { find_shallowest_timed_out_leaf_depth(cur->right(), best_depth); } - bool should_split(node* n) { - if (!n || n->get_status() != status::active || !n->is_leaf()) + bool should_split(node* n, unsigned epoch) { + if (!n || n->epoch() != epoch || n->get_status() != status::active || !n->is_leaf()) return false; unsigned num_active_nodes = count_active_nodes(m_root.get()); @@ -474,7 +475,7 @@ namespace search_tree { // On timeout, either expand the current leaf or reopen the node for a // later revisit, depending on the tree-expansion heuristic. - bool try_split(node *n, literal const &a, literal const &b, unsigned effort) { + bool try_split(node *n, literal const &a, literal const &b, unsigned effort, unsigned epoch) { // the node could have been marked open by another thread that finished first and split // we still want to add the current thread's effort in this case, but not if the node was closed if (!n || n->get_status() == status::closed) @@ -483,7 +484,7 @@ namespace search_tree { n->add_effort(effort); bool did_split = false; - if (should_split(n)) { + if (should_split(n, epoch)) { n->split(a, b); did_split = true; } @@ -497,7 +498,8 @@ namespace search_tree { // // Waiting for all workers would introduce per-node synchronization, delay // diversification, and let a slow worker stall progress. - n->set_status(status::open); + if (n->epoch() == epoch) + n->set_status(status::open); return did_split; } From ae1a4563728f611c580f128e36e911342bfe0d85 Mon Sep 17 00:00:00 2001 From: Ilana Shapiro Date: Mon, 13 Apr 2026 13:56:06 -0700 Subject: [PATCH 3/5] clean up code and add some comments --- src/smt/smt_parallel.cpp | 11 +++++++++-- src/smt/smt_parallel.h | 12 ++++++++++-- src/util/search_tree.h | 8 ++++---- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/smt/smt_parallel.cpp b/src/smt/smt_parallel.cpp index 38c3d15e7..2c155ba29 100644 --- a/src/smt/smt_parallel.cpp +++ b/src/smt/smt_parallel.cpp @@ -563,7 +563,8 @@ namespace smt { bool parallel::batch_manager::get_cube(ast_translation &g2l, unsigned id, expr_ref_vector &cube, node_lease &lease) { cube.reset(); - std::unique_lock lock(mux); + std::scoped_lock lock(mux); + if (m_search_tree.is_closed()) { IF_VERBOSE(1, verbose_stream() << "all done\n";); return false; @@ -572,16 +573,21 @@ namespace smt { IF_VERBOSE(1, verbose_stream() << "aborting get_cube\n";); return false; } + node *t = m_search_tree.activate_node(lease.node); + if (!t) return false; + IF_VERBOSE(1, m_search_tree.display(verbose_stream()); verbose_stream() << "\n";); + lease.node = t; lease.epoch = t->epoch(); - lease.cancel_epoch = t->cancel_epoch(); + lease.cancel_epoch = t->get_cancel_epoch(); if (id >= m_worker_leases.size()) m_worker_leases.resize(id + 1); m_worker_leases[id] = lease; + while (t) { if (cube_config::literal_is_null(t->get_literal())) break; @@ -590,6 +596,7 @@ namespace smt { cube.push_back(std::move(lit)); t = t->parent(); } + return true; } diff --git a/src/smt/smt_parallel.h b/src/smt/smt_parallel.h index 42b68484d..93f3214e2 100644 --- a/src/smt/smt_parallel.h +++ b/src/smt/smt_parallel.h @@ -45,8 +45,16 @@ namespace smt { struct node_lease { search_tree::node* node = nullptr; - unsigned epoch = 0; - unsigned cancel_epoch = 0; + // Version counter for structural mutations of this node (e.g., split/close). + // Used to detect stale leases: if a worker's lease.epoch != node.epoch, + // the node has changed since it was acquired and must not be mutated. + unsigned epoch = 0; + + // Cancellation generation counter for this node/subtree. + // Incremented when the node is closed; used to signal that all + // workers holding leases on this node (or its descendants) + // must abandon work immediately. + unsigned cancel_epoch = 0; }; class batch_manager { diff --git a/src/util/search_tree.h b/src/util/search_tree.h index 158b5a226..9d93c77e7 100644 --- a/src/util/search_tree.h +++ b/src/util/search_tree.h @@ -165,7 +165,7 @@ namespace search_tree { void bump_epoch() { ++m_epoch; } - unsigned cancel_epoch() const { + unsigned get_cancel_epoch() const { return m_cancel_epoch; } void bump_cancel_epoch() { @@ -229,6 +229,8 @@ namespace search_tree { select_next_node(cur->right(), target_status, best); } + // Try to select an open node using the select_next_node policy + // If there are no open nodes, try to select an active node for portfolio solving node* activate_best_node() { candidate best; select_next_node(m_root.get(), status::open, best); @@ -542,8 +544,6 @@ namespace search_tree { } // return an active node in the tree, or nullptr if there is none - // first check if there is a node to activate under n, - // if not, go up the tree and try to activate a sibling subtree node *activate_node(node *n) { if (!n) { if (m_root->get_status() == status::active) { @@ -565,7 +565,7 @@ namespace search_tree { } bool is_lease_canceled(node* n, unsigned cancel_epoch) const { - return !n || n->get_status() == status::closed || n->cancel_epoch() != cancel_epoch; + return !n || n->get_status() == status::closed || n->get_cancel_epoch() != cancel_epoch; } vector const &get_core_from_root() const { From dec2f2ec9953182d57d1c6a8a776c713e3b64bf2 Mon Sep 17 00:00:00 2001 From: Ilana Shapiro Date: Mon, 13 Apr 2026 14:15:18 -0700 Subject: [PATCH 4/5] fix bug about backtracking condition being too strict: The epoch guard should not block backtrack(...) the same way it blocks try_split(...). A stale worker that proves UNSAT for n should still be able to close n, and that closure should then cancel the other workers on n and its subtree. I changed smt/smt_parallel.cpp accordingly: - try_split(...) still uses epoch to reject stale structural splits - backtrack(...) no longer requires is_lease_valid(..., epoch); it only requires that the lease is not already canceled So the intended asymmetry is now restored: - stale split: reject - stale unsat/backtrack: allow closure, then cancel affected workers --- src/smt/smt_parallel.cpp | 6 ++---- src/util/search_tree.h | 13 +++---------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/src/smt/smt_parallel.cpp b/src/smt/smt_parallel.cpp index 2c155ba29..f87e6944c 100644 --- a/src/smt/smt_parallel.cpp +++ b/src/smt/smt_parallel.cpp @@ -332,7 +332,7 @@ namespace smt { auto &lease = m_worker_leases[worker_id]; if (!lease.node || lease.node != n || lease.epoch != epoch) return; - m_search_tree.release_worker(lease.node); + m_search_tree.dec_active_workers(lease.node); lease = {}; } @@ -358,9 +358,7 @@ namespace smt { g_core.push_back(expr_ref(l2g(c), m)); } release_lease_unlocked(worker_id, lease.node, lease.epoch); - if (lease.node && - m_search_tree.is_lease_valid(lease.node, lease.epoch) && - !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) + if (lease.node && !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) m_search_tree.backtrack(lease.node, g_core); cancel_closed_leases_unlocked(worker_id); diff --git a/src/util/search_tree.h b/src/util/search_tree.h index 9d93c77e7..56a9bac77 100644 --- a/src/util/search_tree.h +++ b/src/util/search_tree.h @@ -143,13 +143,10 @@ namespace search_tree { ++m_num_activations; ++m_active_workers; } - void release_worker() { + void dec_active_workers() { if (m_active_workers > 0) --m_active_workers; } - unsigned active_workers() const { - return m_active_workers; - } bool has_active_workers() const { return m_active_workers > 0; } @@ -554,14 +551,10 @@ namespace search_tree { return activate_best_node(); } - void release_worker(node* n) { + void dec_active_workers(node* n) { if (!n) return; - n->release_worker(); - } - - bool is_lease_valid(node* n, unsigned epoch) const { - return n && n->get_status() != status::closed && n->epoch() == epoch; + n->dec_active_workers(); } bool is_lease_canceled(node* n, unsigned cancel_epoch) const { From 60b4dff4287c202bbe2cf14d601a8be64c63d247 Mon Sep 17 00:00:00 2001 From: Ilana Shapiro Date: Mon, 13 Apr 2026 15:49:54 -0700 Subject: [PATCH 5/5] ablate to no backtracking on stale leases --- src/smt/smt_parallel.cpp | 8 +++++++- src/util/search_tree.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/smt/smt_parallel.cpp b/src/smt/smt_parallel.cpp index f87e6944c..07a29d777 100644 --- a/src/smt/smt_parallel.cpp +++ b/src/smt/smt_parallel.cpp @@ -358,7 +358,13 @@ namespace smt { g_core.push_back(expr_ref(l2g(c), m)); } release_lease_unlocked(worker_id, lease.node, lease.epoch); - if (lease.node && !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) + + // only backtrack if the lease is still valid (i.e., the worker has not been given a new node to work on or the lease has not been canceled by another worker) + // this means we can potentially delay backtracking for some unsat cubes until another worker (whose lease is valid) determines it UNSAT. + // Empirically, this is better than aggressively backtracking on every unsat cube, for now. + if (lease.node && + m_search_tree.is_lease_valid(lease.node, lease.epoch) && + !m_search_tree.is_lease_canceled(lease.node, lease.cancel_epoch)) m_search_tree.backtrack(lease.node, g_core); cancel_closed_leases_unlocked(worker_id); diff --git a/src/util/search_tree.h b/src/util/search_tree.h index 56a9bac77..15f86eafa 100644 --- a/src/util/search_tree.h +++ b/src/util/search_tree.h @@ -557,6 +557,10 @@ namespace search_tree { n->dec_active_workers(); } + bool is_lease_valid(node* n, unsigned epoch) const { + return n && n->get_status() != status::closed && n->epoch() == epoch; + } + bool is_lease_canceled(node* n, unsigned cancel_epoch) const { return !n || n->get_status() == status::closed || n->get_cancel_epoch() != cancel_epoch; }