Backtrack/backjump based on accumulated lemmas

2025-08-22 11:07:51 +00:00 · 2022-11-23 12:49:36 +01:00 · 2022-11-23 12:49:36 +01:00 · 2787a22007
commit 2787a22007
parent fdc186b204
4 changed files with 202 additions and 134 deletions
--- a/src/math/polysat/conflict.cpp
+++ b/src/math/polysat/conflict.cpp
@ -153,7 +153,6 @@ namespace polysat {
            SASSERT(m_vars_occurring.empty());
            SASSERT(m_lemmas.empty());
            SASSERT(m_narrow_queue.empty());
-            SASSERT_EQ(m_max_jump_level, UINT_MAX);
        }
        return is_empty;
    }
@ -166,7 +165,6 @@ namespace polysat {
        m_lemmas.reset();
        m_narrow_queue.reset();
        m_level = UINT_MAX;
-        m_max_jump_level = UINT_MAX;
        SASSERT(empty());
    }

@ -314,50 +312,13 @@ namespace polysat {
    void conflict::add_lemma(clause_ref lemma) {
        SASSERT(lemma);
        lemma->set_redundant(true);
-        LOG_H3("Side lemma: " << *lemma);
+        LOG_H3("Lemma: " << *lemma);
        for (sat::literal lit : *lemma) {
            LOG(lit_pp(s, lit));
            SASSERT(s.m_bvars.value(lit) != l_true);
        }
-        // TODO: call clause simplifier here?
-        //       maybe it reduces the level we have to consider.
-
-        // Two kinds of side lemmas:
-        // 1. If all constraints are false, then the side lemma is an alternative conflict lemma.
-        //      => we should at least jump back to the second-highest level in the lemma (could be lower, if so justified by another lemma)
-        // 2. If there is an undef constraint, then it is is a justification for this new constraint.
-        //    (Can there be more than one undef constraint? Should not happen for these lemmas.)
-        //      => TODO: (unclear) jump at least to the max level in the lemma and hope the propagation helps there? or ignore it for jump level computation?
-        unsigned max_level = 0;  // highest level in lemma
-        unsigned jump_level = 0;  // second-highest level in lemma
-        bool has_unassigned = false;
-        for (sat::literal lit : *lemma) {
-            if (!s.m_bvars.is_assigned(lit)) {
-                has_unassigned = true;
-                continue;
-            }
-            unsigned const lit_level = s.m_bvars.level(lit);
-            if (lit_level > max_level) {
-                jump_level = max_level;
-                max_level = lit_level;
-            } else if (max_level > lit_level && lit_level > jump_level) {
-                jump_level = lit_level;
-            }
-        }
-        if (has_unassigned)
-            jump_level = max_level;
-        LOG("Jump level: " << jump_level);
-        m_max_jump_level = std::min(m_max_jump_level, jump_level);
        m_lemmas.push_back(std::move(lemma));
-        // If possible, we should set the new constraint to l_true;
-        // and re-enable the assertions marked with "tag:true_by_side_lemma".
-        // Or we adjust the conflict invariant:
-        // - l_true constraints is the default case as before,
-        // - l_undef constraints are new and justified by some side lemma, but
-        //   should be treated by the conflict resolution methods like l_true
-        //   constraints,
-        // - l_false constraints are disallowed in the conflict (as before).
-    }
+   }

    void conflict::remove(signed_constraint c) {
        SASSERT(contains(c));
@ -475,7 +436,7 @@ namespace polysat {
        return lemma.build();
    }

-    clause_ref_vector conflict::take_side_lemmas() {
+    clause_ref_vector conflict::take_lemmas() {
 #ifndef NDEBUG
        on_scope_exit check_empty([this]() {
            SASSERT(m_lemmas.empty());
@ -536,7 +497,7 @@ namespace polysat {
        for (auto v : m_vars)
            out << " v" << v;
        if (!m_lemmas.empty())
-            out << " side lemmas";
+            out << " lemmas";
        for (clause const* lemma : m_lemmas)
            out << " " << show_deref(lemma);
        return out;
--- a/src/math/polysat/conflict.h
+++ b/src/math/polysat/conflict.h
@ -91,11 +91,8 @@ namespace polysat {
        unsigned_vector m_var_occurrences;  // for each variable, the number of constraints in m_literals that contain it
        uint_set m_vars_occurring;          // set of variables that occur in at least one of the constraints in m_literals

-        // Additional lemmas that justify new constraints generated during conflict resolution
+        // Lemmas that been accumulated during conflict resolution
        clause_ref_vector m_lemmas;
-        // The maximal level on which none of the side lemmas is falsified.
-        // (If we backjump to a level higher than max_jump_level, at least one side lemma will be false.)
-        unsigned m_max_jump_level = UINT_MAX;

        // Store constraints that should be narrowed after backjumping.
        // This allows us to perform propagations that are missed by the two-watched-variables scheme,
@ -189,15 +186,10 @@ namespace polysat {
        /** Convert the core into a lemma to be learned. */
        clause_ref build_lemma();

-        /** Move the accumulated side lemmas out of the conflict */
-        clause_ref_vector take_side_lemmas();
-        /**
-         * Backjump at least to this level (or possibly to a lower level),
-         * to ensure all side lemmas can be propagated.
-         */
-        unsigned max_jump_level() const { return m_max_jump_level; }
+        /** Move the accumulated lemmas out of the conflict */
+        clause_ref_vector take_lemmas();

-        clause_ref_vector const& side_lemmas() const { return m_lemmas; }
+        clause_ref_vector const& lemmas() const { return m_lemmas; }

        /** Move the literals to be narrowed out of the conflict */
        sat::literal_vector take_narrow_queue();
--- a/src/math/polysat/solver.cpp
+++ b/src/math/polysat/solver.cpp
@ -462,7 +462,6 @@ namespace polysat {
            }
            case trail_instr_t::assign_bool_i: {
                sat::literal lit = m_search.back().lit();
-                signed_constraint c = lit2cnstr(lit);
                LOG_V("Undo assign_bool_i: " << lit);
                unsigned active_level = m_bvars.level(lit);

@ -693,7 +692,7 @@ namespace polysat {
                LOG_H2("Working on " << search_item_pp(m_search, item));
                LOG(m_justification[v]);
                LOG("Conflict: " << m_conflict);
-                justification& j = m_justification[v];
+                justification const& j = m_justification[v];
                if (j.is_decision()) {
                    if (j.level() <= base_level())
                        break;
@ -703,10 +702,6 @@ namespace polysat {
                if (j.level() <= base_level())  // propagation level may be out of order (cf. replay)
                    continue;
                m_conflict.resolve_value(v);
-                // if (j.level() > base_level() &&  /* !m_conflict.resolve_value(v) && */ j.is_decision()) {
-                //     revert_decision(v);
-                //     return;
-                // }
            }
            else {
                // Resolve over boolean literal
@ -719,21 +714,23 @@ namespace polysat {
                LOG(bool_justification_pp(m_bvars, lit));
                LOG("Literal " << lit << " is " << lit2cnstr(lit));
                LOG("Conflict: " << m_conflict);
+                SASSERT(!m_bvars.is_assumption(var));
+                if (m_bvars.is_decision(var)) {
+                    if (m_bvars.level(var) <= base_level())
+                        break;
+                    revert_bool_decision(lit);
+                    return;
+                }
                // NOTE: the levels of boolean literals on the stack aren't always ordered by level (cf. replay functionality in pop_levels).
                //       Thus we can only skip base level literals here, instead of aborting the loop.
                if (m_bvars.level(var) <= base_level())
                    continue;
-                SASSERT(!m_bvars.is_assumption(var));
-                if (m_bvars.is_decision(var)) {
-                    revert_bool_decision(lit);
-                    return;
-                }
                if (m_bvars.is_bool_propagation(var))
+                    // TODO: this could be a propagation at an earlier level.
+                    //       do we really want to resolve these eagerly?
                    m_conflict.resolve_bool(lit, *m_bvars.reason(lit));
-                else {
-                    SASSERT(m_bvars.is_evaluation(var));
+                else
                    m_conflict.resolve_with_assignment(lit);
-                }
            }
        }
        LOG("End of resolve_conflict loop");
@ -784,6 +781,7 @@ namespace polysat {
     *
     */
    void solver::revert_decision(pvar v) {
+#if 0
        rational val = m_value[v];
        LOG_H2("Reverting decision: pvar " << v << " := " << val);
        SASSERT(m_justification[v].is_decision());
@ -798,9 +796,13 @@ namespace polysat {

        unsigned jump_level = get_level(v) - 1;
        backjump_and_learn(jump_level, *lemma);
+#endif
+        unsigned max_jump_level = get_level(v) - 1;
+        backjump_and_learn(max_jump_level);
    }

    void solver::revert_bool_decision(sat::literal const lit) {
+#if 0
        LOG_H2("Reverting decision: " << lit_pp(*this, lit));
        sat::bool_var const var = lit.var();

@ -823,16 +825,136 @@ namespace polysat {
        // If there is more than one undef choice left in that lemma,
        // then the next bdecide will take care of that (after all outstanding propagations).
        SASSERT(can_bdecide());
+#endif
+        unsigned max_jump_level = m_bvars.level(lit) - 1;
+        backjump_and_learn(max_jump_level);
    }

+    std::optional<lemma_score> solver::compute_lemma_score(clause const& lemma) {
+        unsigned max_level = 0;     // highest level in lemma
+        unsigned at_max_level = 0;  // how many literals at the highest level in lemma
+        unsigned snd_level = 0;     // second-highest level in lemma
+        for (sat::literal lit : lemma) {
+            SASSERT(m_bvars.is_assigned(lit));  // any new constraints should have been assign_eval'd
+            if (m_bvars.is_true(lit))  // may happen if we only use the clause to justify a new constraint; it is not a real lemma
+                return std::nullopt;
+
+            unsigned const lit_level = m_bvars.level(lit);
+            if (lit_level > max_level) {
+                snd_level = max_level;
+                max_level = lit_level;
+                at_max_level = 1;
+            } else if (lit_level == max_level) {
+                at_max_level++;
+            } else if (max_level > lit_level && lit_level > snd_level) {
+                snd_level = lit_level;
+            }
+        }
+        SASSERT(lemma.empty() || at_max_level > 0);
+        // The MCSAT paper distinguishes between "UIP clauses" and "semantic split clauses".
+        // It is the same as our distinction between "asserting" and "non-asserting" lemmas.
+        // - UIP clause: a single literal on the highest decision level in the lemma.
+        //               Do the standard backjumping known from SAT solving (back to second-highest level in the lemma, propagate it there).
+        // - Semantic split clause: multiple literals on the highest level in the lemma.
+        //                          Backtrack to "highest level - 1" and split on the lemma there.
+        // For now, we follow the same convention for computing the jump levels.
+        unsigned jump_level;
+        if (at_max_level <= 1)
+            jump_level = snd_level;
+        else
+            jump_level = (max_level == 0) ? 0 : (max_level - 1);
+        return {{jump_level, at_max_level}};
+    }
+
+    void solver::backjump_and_learn(unsigned max_jump_level) {
+        sat::literal_vector narrow_queue = m_conflict.take_narrow_queue();
+        clause_ref_vector lemmas = m_conflict.take_lemmas();
+
+        // Select the "best" lemma
+        // - lowest jump level
+        // - lowest number of literals at the highest level
+        // We must do so before backjump() when the search stack is still intact.
+        lemma_score best_score = lemma_score::max();
+        clause* best_lemma = nullptr;
+        for (clause* lemma : lemmas) {
+            m_simplify_clause.apply(*lemma);
+            auto score = compute_lemma_score(*lemma);
+            if (!score)
+                continue;
+            if (*score < best_score) {
+                best_score = *score;
+                best_lemma = lemma;
+            }
+        }
+        // In case no (good) lemma has been found, build the fallback lemma from the conflict state.
+        if (!best_lemma || best_score.jump_level() > max_jump_level) {
+            lemmas.push_back(m_conflict.build_lemma());
+            clause* lemma = lemmas.back();
+            m_simplify_clause.apply(*lemma);
+            auto score = compute_lemma_score(*lemma);
+            SASSERT(score);
+            best_score = *score;
+            best_lemma = lemma;
+        }
+        unsigned const jump_level = best_score.jump_level();
+
+        m_conflict.reset();
+        backjump(jump_level);
+
+        for (sat::literal lit : narrow_queue) {
+            LOG("Narrow queue: " << lit_pp(*this, lit));
+            switch (m_bvars.value(lit)) {
+            case l_true:
+                lit2cnstr(lit).narrow(*this, false);
+                break;
+            case l_false:
+                lit2cnstr(~lit).narrow(*this, false);
+                break;
+            case l_undef:
+                /* do nothing */
+                break;
+            default:
+                UNREACHABLE();
+            }
+        }
+
+        for (clause* lemma : lemmas) {
+            add_clause(*lemma);
+            // NOTE: currently, the backjump level is an overapproximation,
+            //       since the level of evaluated constraints may not be exact (see TODO in assign_eval).
+            // For this reason, we may actually get a conflict at this point
+            // (because the actual jump_level of the lemma may be lower that best_level.)
+            if (is_conflict()) {
+                // until this is fixed (if possible; and there may be other causes of conflict at this point),
+                // we just forget about the remaining lemmas and restart conflict analysis.
+                return;
+            }
+            SASSERT(!is_conflict());  // TODO: is this true in general? No lemma by itself should lead to a conflict here. But can there be conflicting asserting lemmas?
+        }
+
+        LOG("best_score: " << best_score);
+        LOG("best_lemma: " << *best_lemma);
+
+        if (best_score.literals_at_max_level() > 1) {
+            LOG("Main lemma is not asserting: " << *best_lemma);
+            SASSERT(!all_of(*best_lemma, [this](sat::literal lit) { return m_bvars.is_assigned(lit); }));
+            m_lemmas.push_back(best_lemma);
+            m_trail.push_back(trail_instr_t::add_lemma_i);
+            // TODO: currently we forget non-asserting lemmas when backjumping over them.
+            //       We surely don't want to keep them in m_lemmas because then we will start doing case splits
+            //       even if the lemma should instead be waiting for propagations.
+            //       We could instead watch its pvars and re-insert into m_lemmas when all but one are assigned.
+            //       The same could even be done in general for all lemmas, instead of distinguishing between
+            //       asserting and non-asserting lemmas.
+            //       (Note that the same lemma can be asserting in one branch of the search but non-asserting in another,
+            //       depending on which pvars are assigned.)
+            SASSERT(can_bdecide());
+        }
+    }  // backjump_and_learn
+
+#if 0
    void solver::backjump_and_learn(unsigned jump_level, clause& lemma) {
-#ifndef NDEBUG
-        polysat::assignment old_assignment = assignment().clone();
-        sat::literal_vector lemma_invariant_todo;
-        SASSERT(lemma_invariant_part1(lemma, old_assignment, lemma_invariant_todo));
-        // SASSERT(lemma_invariant(lemma, old_assignment));
-#endif
-        clause_ref_vector side_lemmas = m_conflict.take_side_lemmas();
+        clause_ref_vector lemmas = m_conflict.take_lemmas();
        sat::literal_vector narrow_queue = m_conflict.take_narrow_queue();

        m_conflict.reset();
@ -853,14 +975,15 @@ namespace polysat {
                UNREACHABLE();
            }
        }
-        for (auto cl : side_lemmas) {
+        for (clause* cl : lemmas) {
            m_simplify_clause.apply(*cl);
            add_clause(*cl);
        }
-        SASSERT(lemma_invariant_part2(lemma_invariant_todo));
        learn_lemma(lemma);
    }
+#endif

+#if 0
    void solver::learn_lemma(clause& lemma) {
        SASSERT(!lemma.empty());
        m_simplify_clause.apply(lemma);
@ -883,60 +1006,7 @@ namespace polysat {
            SASSERT(can_bdecide());
        }
    }
-
-    bool solver::lemma_invariant_part1(clause const& lemma, polysat::assignment const& a, sat::literal_vector& out_todo) {
-        SASSERT(out_todo.empty());
-        LOG("Lemma: " << lemma);
-        LOG("assignment: " << a);
-        // TODO: fix
-#if 0
-        for (sat::literal lit : lemma) {
-            auto const c = lit2cnstr(lit);
-            bool const currently_false = c.is_currently_false(*this, assignment);
-            LOG("  " << lit_pp(*this, lit) << "    currently_false? " << currently_false);
-            if (!currently_false && m_bvars.value(lit) == l_undef)
-                out_todo.push_back(lit);  // undefs might only be set false after the side lemmas are propagated, so check them later.
-            else
-                SASSERT(m_bvars.value(lit) == l_false || currently_false);
-        }
 #endif
-        return true;
-    }
-
-    bool solver::lemma_invariant_part2(sat::literal_vector const& todo) {
-        LOG("todo: " << todo);
-        // Check that undef literals are now propagated by the side lemmas.
-        //
-        // Unfortunately, this isn't always possible.
-        // Consider if the first side lemma contains a constraint that comes from a boolean decision:
-        //
-        //      76: v10 + v7 + -1*v0 + -1 == 0      [ l_true decide@5 pwatched ]
-        //
-        // When we now backtrack behind the decision level of the literal, then we cannot propagate the side lemma,
-        // and some literals of the main lemma may still be undef at this point.
-        //
-        // So it seems that using constraints from a non-asserting lemma makes the new lemma also non-asserting (if it isn't already).
-#if 1
-        for (sat::literal lit : todo)
-            SASSERT(m_bvars.value(lit) == l_false);
-#endif
-        return true;
-    }
-
-    bool solver::lemma_invariant(clause const& lemma, polysat::assignment const& old_assignment) {
-        LOG("Lemma: " << lemma);
-        // LOG("old_assignment: " << old_assignment);
-        // TODO: fix
-#if 0
-        for (sat::literal lit : lemma) {
-            auto const c = lit2cnstr(lit);
-            bool const currently_false = c.is_currently_false(*this, old_assignment);
-            LOG("  " << lit_pp(*this, lit) << "    currently_false? " << currently_false);
-            SASSERT(m_bvars.value(lit) == l_false || currently_false);
-        }
-#endif
-        return true;
-    }

    unsigned solver::level(sat::literal lit0, clause const& cl) {
        unsigned lvl = 0;
--- a/src/math/polysat/solver.h
+++ b/src/math/polysat/solver.h
@ -46,6 +46,52 @@ namespace polysat {
        bool        m_log_conflicts = false;
    };

+    /**
+     * A metric to evaluate lemmas from conflict analysis.
+     * Lower is better.
+     *
+     * Comparison criterion:
+     * - Lowest jump level has priority, because otherwise, some of the accumulated lemmas may still be false after backjumping.
+     * - To break ties on jump level, choose clause with the fewest literals at its highest decision level;
+     *   to limit case splits.
+     */
+    class lemma_score {
+        unsigned m_jump_level;
+        unsigned m_literals_at_max_level;
+
+    public:
+        lemma_score(unsigned jump_level, unsigned at_max_level)
+            : m_jump_level(jump_level), m_literals_at_max_level(at_max_level)
+        { }
+
+        unsigned jump_level() const { return m_jump_level; }
+        unsigned literals_at_max_level() const { return m_literals_at_max_level; }
+
+        static lemma_score max() {
+            return {UINT_MAX, UINT_MAX};
+        }
+
+        bool operator==(lemma_score const& other) const {
+            return m_jump_level == other.m_jump_level
+                && m_literals_at_max_level == other.m_literals_at_max_level;
+        }
+        bool operator!=(lemma_score const& other) const { return !operator==(other); }
+
+        bool operator<(lemma_score const& other) const {
+            return m_jump_level < other.m_jump_level
+                || (m_jump_level == other.m_jump_level && m_literals_at_max_level < other.m_literals_at_max_level);
+        }
+        bool operator>(lemma_score const& other) const { return other.operator<(*this); }
+        bool operator<=(lemma_score const& other) const { return operator==(other) || operator<(other); }
+        bool operator>=(lemma_score const& other) const { return operator==(other) || operator>(other); }
+
+        std::ostream& display(std::ostream& out) const {
+            return out << "jump_level=" << m_jump_level << " at_max_level=" << m_literals_at_max_level;
+        }
+    };
+
+    inline std::ostream& operator<<(std::ostream& out, lemma_score const& ls) { return ls.display(out); }
+
    class solver {

        struct stats {
@ -214,6 +260,8 @@ namespace polysat {
        void revert_decision(pvar v);
        void revert_bool_decision(sat::literal lit);
        void backjump_and_learn(unsigned jump_level, clause& lemma);
+        void backjump_and_learn(unsigned max_jump_level);
+        std::optional<lemma_score> compute_lemma_score(clause const& lemma);

        // activity of variables based on standard VSIDS
        unsigned m_activity_inc = 128;
@ -237,9 +285,6 @@ namespace polysat {

        bool invariant();
        static bool invariant(signed_constraints const& cs);
-        bool lemma_invariant(clause const& lemma, polysat::assignment const& a);
-        bool lemma_invariant_part1(clause const& lemma, polysat::assignment const& a, sat::literal_vector& out_todo);
-        bool lemma_invariant_part2(sat::literal_vector const& todo);
        bool wlist_invariant();
        bool assignment_invariant();
        bool verify_sat();