revise axiom instantiation scheme for finite-sets

Instead of asserting theory axioms lazily we create them on the fly and allow propagation eagerly. The approach uses a waterfall model as follows: - terms are created: they are inserted into an index for (set.in x S) axiom creation. - two terms are merged by an equality. Loop over all new opportunities for axiom instantiation New axioms are added to a queue of recently created axioms. - an atomic formula was asserted by the SAT solver. Update the watch list to find new propagations. During propagation recently created axioms are either inserted into a propagation queue, or inserted into a watch list. They are inserted into a propagation queue all or all but one literal is assigned to false. They are inserted into a watch list if at least two literals are unassigned They are dropped if the axiom contains a literal that is assigned to true The propagation queue is processed by by asserting the theory axiom to the core. Also add some elementary statistics. A breaking change is to change the datatype for undo-trail in smt_context to not use a custom data-structure. This can likely cause regressions. For example, the region allocator now comes from the stack_trail instead of being owned within smt_context with a different declaration order. smt_context could crash during destruction or maybe even pop. We take the risk as the change is overdue. Add swap method to ref_vector.
2025-12-21 19:53:43 +00:00 · 2025-10-18 12:08:39 +02:00 · 2025-10-18 12:08:39 +02:00 · 43d40ac142
commit 43d40ac142
parent aa1f1f56b6
7 changed files with 522 additions and 209 deletions
--- a/src/smt/smt_context.cpp
+++ b/src/smt/smt_context.cpp
@ -66,7 +66,7 @@ namespace smt {
        m_progress_callback(nullptr),
        m_next_progress_sample(0),
        m_clause_proof(*this),
-        m_fingerprints(m, m_region),
+        m_fingerprints(m, get_region()),
        m_b_internalized_stack(m),
        m_e_internalized_stack(m),
        m_l_internalized_stack(m),
@ -799,7 +799,7 @@ namespace smt {
                }
                else {
                    // uncommon case: r2 will have two theory_vars attached to it.
-                    r2->add_th_var(v1, t1, m_region);
+                    r2->add_th_var(v1, t1, get_region());
                    push_new_th_diseqs(r2, v1, get_theory(t1));
                    push_new_th_diseqs(r1, v2, get_theory(t2));
                }
@ -850,7 +850,7 @@ namespace smt {
                theory_var v2 = r2->get_th_var(t1);
                TRACE(merge_theory_vars, tout << get_theory(t1)->get_name() << ": " << v2 << " == " << v1 << "\n");
                if (v2 == null_theory_var) {
-                    r2->add_th_var(v1, t1, m_region);
+                    r2->add_th_var(v1, t1, get_region());
                    push_new_th_diseqs(r2, v1, get_theory(t1));
                }
                l1 = l1->get_next();
@ -1939,14 +1939,13 @@ namespace smt {
            m.trace_stream() << "[push] " << m_scope_lvl << "\n";

        m_scope_lvl++;
-        m_region.push_scope();
+        get_region().push_scope();
        m_scopes.push_back(scope());
        scope & s = m_scopes.back();
        // TRACE(context, tout << "push " << m_scope_lvl << "\n";);

        m_relevancy_propagator->push();
        s.m_assigned_literals_lim    = m_assigned_literals.size();
-        s.m_trail_stack_lim          = m_trail_stack.size();
        s.m_aux_clauses_lim          = m_aux_clauses.size();
        s.m_justifications_lim       = m_justifications.size();
        s.m_units_to_reassert_lim    = m_units_to_reassert.size();
@ -1962,12 +1961,6 @@ namespace smt {
        CASSERT("context", check_invariant());
    }

-    /**
-       \brief Execute generic undo-objects.
-    */
-    void context::undo_trail_stack(unsigned old_size) {
-        ::undo_trail_stack(m_trail_stack, old_size);
-    }

    /**
       \brief Remove watch literal idx from the given clause.
@ -2455,7 +2448,7 @@ namespace smt {

            m_fingerprints.pop_scope(num_scopes);
            unassign_vars(s.m_assigned_literals_lim);
-            undo_trail_stack(s.m_trail_stack_lim);
+            m_trail_stack.pop_scope(num_scopes);

            for (theory* th : m_theory_set) 
                th->pop_scope_eh(num_scopes);
@ -2470,7 +2463,6 @@ namespace smt {
            m_th_eq_propagation_queue.reset();
            m_th_diseq_propagation_queue.reset();
            m_atom_propagation_queue.reset();
-            m_region.pop_scope(num_scopes);
            m_scopes.shrink(new_lvl);
            m_conflict_resolution->reset();

@ -3058,7 +3050,7 @@ namespace smt {
        del_clauses(m_lemmas, 0);
        del_justifications(m_justifications, 0);
        reset_tmp_clauses();
-        undo_trail_stack(0);
+        m_trail_stack.reset();
        m_qmanager = nullptr;
        if (m_is_diseq_tmp) {
            m_is_diseq_tmp->del_eh(m, false);
--- a/src/smt/smt_context.h
+++ b/src/smt/smt_context.h
@ -113,7 +113,6 @@ namespace smt {
        progress_callback *         m_progress_callback;
        unsigned                    m_next_progress_sample;
        clause_proof                m_clause_proof;
-        region                      m_region;
        fingerprint_set             m_fingerprints;

        expr_ref_vector             m_b_internalized_stack; // stack of the boolean expressions already internalized.
@ -304,7 +303,7 @@ namespace smt {
        }

        region & get_region() {
-            return m_region;
+            return m_trail_stack.get_region();
        }

        bool relevancy() const {
@ -643,7 +642,6 @@ namespace smt {
        //
        // -----------------------------------
    protected:
-        typedef ptr_vector<trail >   trail_stack;
        trail_stack                           m_trail_stack;
 #ifdef Z3DEBUG
        bool                                  m_trail_enabled { true };
@ -653,11 +651,15 @@ namespace smt {
        template<typename TrailObject>
        void push_trail(const TrailObject & obj) {
            SASSERT(m_trail_enabled);
-            m_trail_stack.push_back(new (m_region) TrailObject(obj));
+            m_trail_stack.push(obj);
        }

        void push_trail_ptr(trail * ptr) {
-            m_trail_stack.push_back(ptr);
+            m_trail_stack.push_ptr(ptr);
+        }
+
+        trail_stack& get_trail_stack() {
+            return m_trail_stack;
        }

    protected:
@ -667,7 +669,6 @@ namespace smt {
        unsigned                    m_search_lvl { 0 }; // It is greater than m_base_lvl when assumptions are used.  Otherwise, it is equals to m_base_lvl
        struct scope {
            unsigned                m_assigned_literals_lim;
-            unsigned                m_trail_stack_lim;
            unsigned                m_aux_clauses_lim;
            unsigned                m_justifications_lim;
            unsigned                m_units_to_reassert_lim;
@ -687,8 +688,6 @@ namespace smt {

        void pop_scope(unsigned num_scopes);

-        void undo_trail_stack(unsigned old_size);
-
        void unassign_vars(unsigned old_lim);

        void remove_watch_literal(clause * cls, unsigned idx);
@ -1021,7 +1020,7 @@ namespace smt {

        template<typename Justification>
        justification * mk_justification(Justification const & j) {
-            justification * js = new (m_region) Justification(j);
+            justification * js = new (get_region()) Justification(j);
            SASSERT(js->in_region());
            if (js->has_del_eh())
                m_justifications.push_back(js);
--- a/src/smt/smt_internalizer.cpp
+++ b/src/smt/smt_internalizer.cpp
@ -608,7 +608,7 @@ namespace smt {
        m_lambdas.insert(lam_node, q);
        m_app2enode.setx(q->get_id(), lam_node, nullptr);
        m_l_internalized_stack.push_back(q);
-        m_trail_stack.push_back(&m_mk_lambda_trail);
+        m_trail_stack.push_ptr(&m_mk_lambda_trail);
        bool_var bv = get_bool_var(fa);
        assign(literal(bv, false), nullptr);
        mark_as_relevant(bv);
@ -959,7 +959,7 @@ namespace smt {
            m_activity[v]      = 0.0;
        m_case_split_queue->mk_var_eh(v);
        m_b_internalized_stack.push_back(n);
-        m_trail_stack.push_back(&m_mk_bool_var_trail);
+        m_trail_stack.push_ptr(&m_mk_bool_var_trail);
        m_stats.m_num_mk_bool_var++;
        SASSERT(check_bool_var_vector_sizes());
        return v;
@ -1010,7 +1010,8 @@ namespace smt {
            CTRACE(cached_generation, generation != m_generation,
                   tout << "cached_generation: #" << n->get_id() << " " << generation << " " << m_generation << "\n";);
        }
-        enode * e           = enode::mk(m, m_region, m_app2enode, n, generation, suppress_args, merge_tf, m_scope_lvl, cgc_enabled, true);
+        enode *e = enode::mk(m, get_region(), m_app2enode, n, generation, suppress_args, merge_tf, m_scope_lvl,
+                             cgc_enabled, true);
        TRACE(mk_enode_detail, tout << "e.get_num_args() = " << e->get_num_args() << "\n";);
        if (m.is_unique_value(n))
            e->mark_as_interpreted();
@ -1018,7 +1019,7 @@ namespace smt {
        TRACE(generation, tout << "mk_enode: " << id << " " << generation << "\n";);
        m_app2enode.setx(id, e, nullptr);
        m_e_internalized_stack.push_back(n);
-        m_trail_stack.push_back(&m_mk_enode_trail);
+        m_trail_stack.push_ptr(&m_mk_enode_trail);
        m_enodes.push_back(e);
        if (e->get_num_args() > 0) {
            if (e->is_true_eq()) {
@ -1864,11 +1865,11 @@ namespace smt {
        if (old_v == null_theory_var) {
            enode * r     = n->get_root();
            theory_var v2 = r->get_th_var(th_id);
-            n->add_th_var(v, th_id, m_region);
+            n->add_th_var(v, th_id, get_region());
            push_trail(add_th_var_trail(n, th_id));
            if (v2 == null_theory_var) {
                if (r != n)
-                    r->add_th_var(v, th_id, m_region);
+                    r->add_th_var(v, th_id, get_region());
                push_new_th_diseqs(r, v, th);
            }
            else if (r != n) {
--- a/src/smt/theory_finite_set.cpp
+++ b/src/smt/theory_finite_set.cpp
@ -20,13 +20,13 @@ Abstract:
 namespace smt {

    /**
-    Constructor.
-    Set up callback that adds axiom instantiations as clauses. 
+    * Constructor.
+    * Set up callback that adds axiom instantiations as clauses. 
    **/
    theory_finite_set::theory_finite_set(context& ctx):
        theory(ctx, ctx.get_manager().mk_family_id("finite_set")),
        u(m),
-        m_axioms(m)
+        m_axioms(m), m_find(*this)
    {
        // Setup the add_clause callback for axioms
        std::function<void(expr_ref_vector const &)> add_clause_fn = 
@ -46,40 +46,146 @@ namespace smt {
        m_set_members.reset();
    }

+    /**
+    * When creating a theory variable, we associate extra data structures with it.
+    * if n := (set.in x S)
+    * then for every T in the equivalence class of S (including S), assert theory axioms for x in T.
+    * 
+    * if n := (setop T U)
+    * then for every (set.in x S) where either S ~ T, S ~ U, assert theory axioms for x in n.
+    * Since n is fresh there are no other (set.in x S) with S ~ n in the state.
+    * 
+    * if n := (set.filter p S)
+    * then for every (set.in x T) where S ~ T, assert theory axiom for x in (set.filter p S)
+    * 
+    * if n := (set.map f S)
+    * then for every (set.in x T) where S ~ T, assert theory axiom for (set.in x S) and map.
+    * In other words, assert 
+    *         (set.in (f x) (set.map f S))
+    */
+    theory_var theory_finite_set::mk_var(enode *n) {
+        theory_var r = theory::mk_var(n);
+        VERIFY(r == static_cast<theory_var>(m_find.mk_var()));
+        SASSERT(r == static_cast<int>(m_var_data.size()));
+        m_var_data.push_back(alloc(var_data));
+        ctx.push_trail(push_back_vector<ptr_vector<var_data>>(m_var_data));
+        ctx.push_trail(new_obj_trail(m_var_data.back()));
+        expr *e = n->get_expr();
+        if (u.is_in(e)) {
+            auto set = n->get_arg(1);
+            auto v = set->get_root()->get_th_var(get_id());
+            SASSERT(v != null_theory_var);
+            m_var_data[v]->m_parent_in.push_back(n);
+            ctx.push_trail(push_back_trail(m_var_data[v]->m_parent_in));
+            add_in_axioms(n, m_var_data[v]);  // add axioms x in S x S ~ T, T := setop, or T is arg of setop.
+            auto f = to_app(e)->get_decl();
+            if (!m_set_in_decls.contains(f)) {
+                m_set_in_decls.push_back(f);
+                ctx.push_trail(push_back_vector(m_set_in_decls));
+            }
+        }
+        else if (u.is_union(e) || u.is_intersect(e) || 
+            u.is_difference(e) || u.is_singleton(e) ||
+            u.is_empty(e) || u.is_range(e)) {            
+            m_var_data[r]->m_setops.push_back(n);
+            ctx.push_trail(push_back_trail(m_var_data[r]->m_setops));
+            for (auto arg : enode::args(n)) {
+                if (!u.is_finite_set(arg->get_expr()))
+                    continue;
+                auto v = arg->get_root()->get_th_var(get_id());
+                SASSERT(v != null_theory_var);
+                // add axioms for x in S, e := setop S T ...
+                for (auto in : m_var_data[v]->m_parent_in)
+                    add_membership_axioms(in->get_arg(0)->get_expr(), e);
+                m_var_data[v]->m_parent_setops.push_back(n);
+                ctx.push_trail(push_back_trail(m_var_data[v]->m_parent_setops));
+            }
+        }
+        else if (u.is_map(e) || u.is_select(e)) {
+            NOT_IMPLEMENTED_YET();
+        }
+        return r;
+    }
+
+    trail_stack& theory_finite_set::get_trail_stack() {
+        return ctx.get_trail_stack();
+    }
+
+    /*
+    * Merge the equivalence classes of two variables.
+     * parent_in := vector of (set.in x S) terms where S is in the equivalence class of r.
+     * parent_setops := vector of (set.op S T) where S or T is in the equivalence class of r.
+     * setops := vector of (set.op S T) where (set.op S T) is in the equivalence class of r.
+     * 
+    */
+    void theory_finite_set::merge_eh(theory_var root, theory_var other, theory_var, theory_var) {
+        // r is the new root
+        TRACE(finite_set, tout << "merging v" << root << " v" << other << "\n"; display_var(tout, root);
+              tout << " <- " << mk_pp(get_enode(other)->get_expr(), m) << "\n";);
+        SASSERT(root == find(root));
+        var_data *d_root= m_var_data[root];
+        var_data *d_other = m_var_data[other];
+        ctx.push_trail(restore_vector(d_root->m_setops));
+        ctx.push_trail(restore_vector(d_root->m_parent_setops));
+        ctx.push_trail(restore_vector(d_root->m_parent_in));
+        add_in_axioms(root, other);
+        add_in_axioms(other, root);
+        d_root->m_setops.append(d_other->m_setops);
+        d_root->m_parent_setops.append(d_other->m_parent_setops);
+        d_root->m_parent_in.append(d_other->m_parent_in);
+        TRACE(finite_set, tout << "after merge\n"; display_var(tout, root););
+    }
+
+    /*
+     * for each (set.in x S) in d1->parent_in,
+     *     add axioms for (set.in x S)
+     */
+    void theory_finite_set::add_in_axioms(theory_var v1, theory_var v2) {
+        auto d1 = m_var_data[v1];
+        auto d2 = m_var_data[v2];
+        for (enode *in : d1->m_parent_in) 
+            add_in_axioms(in, d2);        
+    }
+
+    /*
+     *  let (set.in x S) 
+     *
+     *  for each T := (set.op U V) in d2->parent_setops
+     *         then S ~ U or S ~ V by construction
+     *              add axioms for (set.in x T)
+     *        
+     *  for each T := (set.op U V) in d2->setops
+     *         then S ~ T by construction
+     *              add axioms for (set.in x T)
+    */
+
+    void theory_finite_set::add_in_axioms(enode *in, var_data *d) {
+        SASSERT(u.is_in(in->get_expr()));
+        auto e = in->get_arg(0)->get_expr();
+        auto set1 = in->get_arg(1);
+        for (enode *setop : d->m_parent_setops) {
+            SASSERT(
+                any_of(enode::args(setop), [&](enode *arg) { return in->get_arg(1)->get_root() == arg->get_root(); }));
+            add_membership_axioms(e, setop->get_expr());
+        }
+
+        for (enode *setop : d->m_setops) {
+            SASSERT(in->get_arg(1)->get_root() == setop->get_root());
+            add_membership_axioms(e, setop->get_expr());
+        }
+    }
+
    /**
    * Boolean atomic formulas for finite sets are one of:
    * (set.in x S)
    * (set.subset S T)
    * When an atomic formula is first created it is to be registered with the solver.
    * The internalize_atom method takes care of this.
-    * Atomic formulas are special cases of terms (of non-Boolean type) so the first 
-    * effect is to register the atom as a term.
-    * The second effect is to set up tracking and assert axioms.
-    * Tracking:
-    *    For every occurrence (set.in x_i S_i) we track x_i. 
-    * Axioms that can be added immediately.
+    * Atomic formulas are special cases of terms (of non-Boolean type) so they are registered as terms.
    *    
    */
    bool theory_finite_set::internalize_atom(app * atom, bool gate_ctx) {
-        TRACE(finite_set, tout << "internalize_atom: " << mk_pp(atom, m) << "\n";);
-
-        internalize_term(atom);
-        
-        // Track membership elements (set.in)
-        expr* elem = nullptr, *set = nullptr;
-        if (u.is_in(atom, elem, set)) {
-            auto n = ctx.get_enode(elem);
-            if (!m_elements.contains(n)) {
-                m_elements.insert(n);
-                ctx.push_trail(insert_obj_trail(m_elements, n));
-            }
-        }
-
-        // Assert immediate axioms
-        // if (!ctx.relevancy())
-        add_immediate_axioms(atom);
-        
-        return true;
+        return internalize_term(atom);
    }

    /**
@ -115,6 +221,10 @@ namespace smt {
        if (!is_attached_to_var(e))             
            ctx.attach_th_var(e, this, mk_var(e));

+        // Assert immediate axioms
+        // if (!ctx.relevancy())
+        add_immediate_axioms(term);
+                
        return true;
    }

@ -126,16 +236,16 @@ namespace smt {

    void theory_finite_set::new_eq_eh(theory_var v1, theory_var v2) {
        TRACE(finite_set, tout << "new_eq_eh: v" << v1 << " = v" << v2 << "\n";);
-        //    x = y, y in S
-        // -------------------
-        //  axioms for x in S
-
-        auto n1 = get_enode(v1);
-        auto n2 = get_enode(v2);
-        auto e1 = n1->get_expr();
-        auto e2 = n2->get_expr();
+        m_find.merge(v1, v2); // triggers merge_eh, which triggers incremental generation of theory axioms
    }

+    /**
+    * Every dissequality has to be supported by at distinguishing element.
+    * 
+    * TODO: we can avoid instantiating the extensionality axiom if we know statically that e1, e2
+    * can never be equal (say, they have different cardinalities or they are finite sets by construction
+    * with elements that can differentiate the sets)
+    */
    void theory_finite_set::new_diseq_eh(theory_var v1, theory_var v2) {
        TRACE(finite_set, tout << "new_diseq_eh: v" << v1 << " != v" << v2 << "\n");
        auto n1 = get_enode(v1);
@ -163,7 +273,7 @@ namespace smt {
    final_check_status theory_finite_set::final_check_eh() {
        TRACE(finite_set, tout << "final_check_eh\n";);

-        if (add_membership_axioms())
+        if (activate_unasserted_clause())
            return FC_CONTINUE;

        if (assume_eqs())
@ -186,7 +296,7 @@ namespace smt {
     */
    void theory_finite_set::add_immediate_axioms(app* term) {
        expr *elem = nullptr, *set = nullptr;
-        unsigned sz = m_theory_axioms.size();
+        unsigned sz = m_clauses.axioms.size();
        if (u.is_in(term, elem, set) && u.is_empty(set))
            add_membership_axioms(elem, set);
        else if (u.is_subset(term))
@ -195,48 +305,163 @@ namespace smt {
            m_axioms.in_singleton_axiom(elem, term);
       
        // Assert all new lemmas as clauses
-        for (unsigned i = sz; i < m_theory_axioms.size(); ++i) 
-            assert_clause(m_theory_axioms[i]);
-
-
-        // TODO also add axioms for x in S u T, x in S n T, etc to the stack of m_theory_axioms.
-        // The axioms are then instantiated if they are propagating.
+        for (unsigned i = sz; i < m_clauses.axioms.size(); ++i)
+            m_clauses.squeue.push_back(i);
    }

-    /**
-     *   Set membership is saturated with respect to set operations.
-     *    For every (set.in x S) where S is a union, assert (or propagate) (set.in x S1) or (set.in x S2)
-     */
-    bool theory_finite_set::add_membership_axioms() {
-        expr *elem1 = nullptr, *set1 = nullptr;
+    void theory_finite_set::assign_eh(bool_var v, bool is_true) {
+        TRACE(finite_set, tout << "assign_eh: v" << v << " is_true: " << is_true << "\n";);
+        expr *e = ctx.bool_var2expr(v);
+        // retrieve the watch list for clauses where e appears with opposite polarity
+        unsigned idx = 2 * e->get_id() + (is_true ? 1 : 0);
+        if (idx >= m_clauses.watch.size())
+            return;

-        // walk all parents of elem in congruence table.
-        // if a parent is of the form elem' in S u T, or similar.
-        // create clauses for elem in S u T.
+        // walk the watch list and try to find new watches or propagate
+        unsigned j = 0;
+        for (unsigned i = 0; i < m_clauses.watch[idx].size(); ++i) {
+            TRACE(finite_set, tout << " watch[" << i << "] size: " << m_clauses.watch[i].size() << "\n";);
+            auto clause_idx = m_clauses.watch[idx][i];
+            auto &clause = m_clauses.axioms[clause_idx];
+            if (any_of(clause, [&](expr *lit) { return ctx.find_assignment(lit) == l_true; })) {
+                TRACE(finite_set, tout << "  satisfied\n";);
+                m_clauses.watch[idx][j++] = clause_idx;
+                continue; // clause is already satisfied
+            }
+            auto is_complement_to = [&](bool is_true, expr* lit, expr* arg) {
+                if (is_true)
+                    return m.is_not(lit) && to_app(lit)->get_arg(0) == arg;
+                else
+                    return lit == arg;
+            };
+            auto lit1 = clause.get(0);
+            auto lit2 = clause.get(1);
+            auto position = 0;
+            if (is_complement_to(is_true, lit1, e))
+                position = 0;
+            else {
+                SASSERT(is_complement_to(is_true, lit2, e));
+                position = 1;
+            }
            
-        for (auto elem : m_elements) {
-            if (!ctx.is_relevant(elem))
-                continue;
-            for (auto p : enode::parents(elem)) {
-                if (!u.is_in(p->get_expr(), elem1, set1))
+            bool found_swap = false;
+            for (unsigned k = 2; k < clause.size(); ++k) {
+                expr *lit = clause.get(k);
+                if (ctx.find_assignment(lit) == l_false)
                    continue;
-                if (elem->get_root() != p->get_arg(0)->get_root())
-                    continue;  // elem is then equal to set1 but not elem1. This is a different case.
-                if (!ctx.is_relevant(p))
-                    continue;
-                for (auto sib : *p->get_arg(1))
-                    add_membership_axioms(elem->get_expr(), sib->get_expr());
+                // found a new watch     
+                clause.swap(position, k);          
+                // std::swap(clause[position], clause[k]);
+                bool litneg = m.is_not(lit, lit);
+                auto litid = 2 * lit->get_id() + litneg;              
+                m_clauses.watch.reserve(litid + 1);
+                m_clauses.watch[litid].push_back(clause_idx);
+                TRACE(finite_set, tout << "  new watch for " << mk_pp(lit, m) << "\n";);
+                found_swap = true;
+                break;
+            }
+            if (found_swap) 
+                continue;  // the clause is removed from this watch list
+            // either all literals are false, or the other watch literal is propagating.
+            m_clauses.squeue.push_back(clause_idx);
+            TRACE(finite_set, tout << "  propagate clause\n";);
+            m_clauses.watch[idx][j++] = clause_idx;
+            ++i;
+            for (; i < m_clauses.watch[idx].size(); ++i)
+                m_clauses.watch[idx][j++] = m_clauses.watch[idx][i];
+            break;
+        }
+        m_clauses.watch[idx].shrink(j);
+    }
+
+    bool theory_finite_set::can_propagate() {
+        return m_clauses.can_propagate();
+    }
+
+    void theory_finite_set::propagate() {
+        TRACE(finite_set, tout << "propagate\n";);
+        ctx.push_trail(value_trail(m_clauses.aqhead));
+        ctx.push_trail(value_trail(m_clauses.sqhead));
+        while (can_propagate() && !ctx.inconsistent()) {
+            // activate newly created clauses
+            while (m_clauses.aqhead < m_clauses.axioms.size()) 
+                activate_clause(m_clauses.aqhead++);
+
+            // empty the propagation queue of clauses to assert
+            while (m_clauses.sqhead < m_clauses.squeue.size() && !ctx.inconsistent()) {
+                auto index = m_clauses.squeue[m_clauses.sqhead++];
+                auto const &clause = m_clauses.axioms[index];
+                assert_clause(clause);
            }
        }      
-        if (instantiate_false_lemma())
-            return true;
-        if (instantiate_unit_propagation())
-            return true;
-        if (instantiate_free_lemma())
-            return true;
-        return false;
    }

+    void theory_finite_set::activate_clause(unsigned clause_idx) {
+        TRACE(finite_set, tout << "activate_clause: " << clause_idx << "\n";);
+        auto &clause = m_clauses.axioms[clause_idx];
+        if (any_of(clause, [&](expr *e) { return ctx.find_assignment(e) == l_true; }))
+            return;
+        if (clause.size() <= 1) {
+            m_clauses.squeue.push_back(clause_idx);
+            return;
+        }
+        expr *w1 = nullptr, *w2 = nullptr;
+        for (unsigned i = 0; i < clause.size(); ++i) {
+            expr *lit = clause.get(i);
+            switch (ctx.find_assignment(lit)) {
+            case l_true:
+                UNREACHABLE();
+                 return; 
+            case l_false: 
+                break;
+            case l_undef:
+                if (!w1) {
+                    w1 = lit;
+                    clause.swap(0, i);
+                }                                    
+                else if (!w2) {
+                    w2 = lit;
+                    clause.swap(1, i);
+                }
+                break;
+            }
+        }
+        if (!w2) {
+            m_clauses.squeue.push_back(clause_idx);
+            return;
+        }
+        bool w1neg = m.is_not(w1, w1);
+        bool w2neg = m.is_not(w2, w2);  
+        unsigned w1id = 2 * w1->get_id() + w1neg;
+        unsigned w2id = 2 * w2->get_id() + w2neg;
+        unsigned sz = std::max(w1id, w2id) + 1;
+        m_clauses.watch.reserve(sz);
+        m_clauses.watch[w1id].push_back(clause_idx);
+        m_clauses.watch[w2id].push_back(clause_idx);
+
+        struct unwatch_clause : public trail {
+            theory_finite_set &th;
+            unsigned index;
+            unwatch_clause(theory_finite_set &th, unsigned index) : th(th), index(index) {}
+            void undo() override {
+                auto &clause = th.m_clauses.axioms[index];
+                expr *w1 = clause.get(0);
+                expr *w2 = clause.get(1);
+                bool w1neg = th.m.is_not(w1, w1);
+                bool w2neg = th.m.is_not(w2, w2);
+                unsigned w1id = 2 * w1->get_id() + w1neg;
+                unsigned w2id = 2 * w2->get_id() + w2neg;
+                auto &watch1 = th.m_clauses.watch[w1id];
+                auto &watch2 = th.m_clauses.watch[w2id];
+                watch1.erase(index);
+                watch2.erase(index);
+            }
+        };
+        ctx.push_trail(unwatch_clause(*this, clause_idx));
+    }
+
+
+
    /**
     *  Saturate with respect to equality sharing:
     *  - Sets corresponding to shared variables having the same interpretation should also be congruent
@ -297,10 +522,10 @@ namespace smt {
                table.erase({a, b});
            }
        };
-        if (m_theory_axiom_exprs.contains({a, b}))
+        if (m_clauses.members.contains({a, b}))
            return false;
-        m_theory_axiom_exprs.insert({a, b});
-        ctx.push_trail(insert_obj_pair_table(m_theory_axiom_exprs, a, b));
+        m_clauses.members.insert({a, b});
+        ctx.push_trail(insert_obj_pair_table(m_clauses.members, a, b));
        return true;
    }

@ -343,8 +568,9 @@ namespace smt {

    void theory_finite_set::add_clause(expr_ref_vector const& clause) {
        TRACE(finite_set, tout << "add_clause: " << clause << "\n");
-        ctx.push_trail(push_back_vector(m_theory_axioms));
-        m_theory_axioms.push_back(clause);        
+        ctx.push_trail(push_back_vector(m_clauses.axioms));
+        m_clauses.axioms.push_back(clause);
+        m_stats.m_num_axioms_created++;
    }

    theory * theory_finite_set::mk_fresh(context * new_ctx) {
@ -353,6 +579,15 @@ namespace smt {

    void theory_finite_set::display(std::ostream & out) const {
        out << "theory_finite_set:\n";
+        for (unsigned i = 0; i < m_clauses.axioms.size(); ++i)
+            out << "[" << i << "]: " << m_clauses.axioms[i] << "\n";
+        for (unsigned v = 0; v < get_num_vars(); ++v)
+            display_var(out, v);
+        for (unsigned i = 0; i < m_clauses.watch.size(); ++i) {
+            if (m_clauses.watch[i].empty())
+                continue;
+            out << "watch[" << i << "] := " << m_clauses.watch[i] << "\n";
+        }
    }

    void theory_finite_set::init_model(model_generator & mg) {
@ -369,35 +604,41 @@ namespace smt {
        // and ensure that the model factory has values for them.
        // For now, we rely on the default model construction.
        reset_set_members();
-        for (auto x : m_elements) {
-            if (!ctx.is_relevant(x))
-                continue;
-            x = x->get_root();
-            if (x->is_marked())
-                continue;
-            x->set_mark();  // make sure we only do this once per element
-            // TODO: use marking of x to avoid duplicate work
-            for (auto p : enode::parents(x)) {
-                if (!ctx.is_relevant(p))
+        for (auto f : m_set_in_decls) {
+            for (auto n : ctx.enodes_of(f)) {
+                SASSERT(u.is_in(n->get_expr()));
+                auto x = n->get_arg(0);
+                if (!ctx.is_relevant(x))
                    continue;
-                if (!u.is_in(p->get_expr()))
+                x = x->get_root();
+                if (x->is_marked())
                    continue;
-                if (ctx.get_assignment(p->get_expr()) != l_true)
-                    continue;
-                enode *elem = nullptr, *set = nullptr;
-                set = p->get_arg(1)->get_root();
-                elem = p->get_arg(0)->get_root();
-                if (elem != x)
-                    continue; 
-                if (!m_set_members.contains(set)) 
-                    m_set_members.insert(set, alloc(obj_hashtable<enode>));                
-                m_set_members.find(set)->insert(x);
+                x->set_mark();  // make sure we only do this once per element
+                for (auto p : enode::parents(x)) {
+                    if (!ctx.is_relevant(p))
+                        continue;
+                    if (!u.is_in(p->get_expr()))
+                        continue;
+                    if (ctx.get_assignment(p->get_expr()) != l_true)
+                        continue;
+                    auto set = p->get_arg(1)->get_root();
+                    auto elem = p->get_arg(0)->get_root();
+                    if (elem != x)
+                        continue;
+                    if (!m_set_members.contains(set))
+                        m_set_members.insert(set, alloc(obj_hashtable<enode>));
+                    m_set_members.find(set)->insert(x);
+                }
            }
        }
-        for (auto x : m_elements) {
-            x = x->get_root();
-            if (x->is_marked())
-                x->unset_mark();
+        for (auto f : m_set_in_decls) {
+            for (auto n : ctx.enodes_of(f)) {
+                SASSERT(u.is_in(n->get_expr()));
+                auto x = n->get_arg(0);
+                x = x->get_root();
+                if (x->is_marked())
+                    x->unset_mark();
+            }
        }
    }

@ -417,10 +658,12 @@ namespace smt {
        }

        app *mk_value(model_generator &mg, expr_ref_vector const &values) override {            
-            SASSERT(values.size() == m_elements->size());
+            SASSERT(values.empty() == !m_elements);
            if (values.empty()) 
                return th.u.mk_empty(s);
+
            SASSERT(m_elements);
+            SASSERT(values.size() == m_elements->size());
            return th.mk_union(values.size(), values.data(), s);
        }
    };
@ -433,77 +676,84 @@ namespace smt {
        return alloc(finite_set_value_proc, *this, s, elements);
    }

+
    /**
-    * Lemmas that are currently assinged to false are conflicts. 
-    * They should be asserted as soon as possible.
-    * Only the first conflict needs to be asserted.
-    * 
+    * a theory axiom can be unasserted if it contains two or more literals that have
+    * not been internalized yet.
    */
-    bool theory_finite_set::instantiate_false_lemma() {
-        for (auto const& clause : m_theory_axioms) {
-            bool all_false = all_of(clause, [&](expr *e) { return ctx.find_assignment(e) == l_false; });
-            if (!all_false)
-                continue;
-            assert_clause(clause);
-            return true;
+    bool theory_finite_set::activate_unasserted_clause() {
+        for (auto const &clause : m_clauses.axioms) {
+            if (assert_clause(clause))
+                return true;
        }
        return false;
    }

-    /**
-     * Lemmas that are unit propagating should be asserted as possible and can be asserted in a batch.
-     * It is possible to assert a unit propagating lemma as a clause.
-     * A more efficient approach is to add a Theory propagation with the solver.
-     * A theory propagation gets recorded on the assignment trail and the overhead of undoing it is baked in to backtracking.
-     * A theory axiom is also removed during backtracking.
-    */
-    bool theory_finite_set::instantiate_unit_propagation() {
-        bool propagated = false;
-        for (auto const &clause : m_theory_axioms) {
-            expr *undef = nullptr;
-            bool is_unit_propagating = true;
-            for (auto e : clause) {
-                switch (ctx.find_assignment(e)) {
-                case l_false: continue;
-                case l_true: is_unit_propagating = false; break;
-                case l_undef:
-                    if (undef != nullptr) 
-                        is_unit_propagating = false;                    
-                    undef = e;
-                    break;
-                }
-                if (!is_unit_propagating)
-                    break;
+    bool theory_finite_set::assert_clause(expr_ref_vector const &clause) {
+        expr *unit = nullptr;
+        unsigned undef_count = 0;
+        for (auto e : clause) {
+            switch (ctx.find_assignment(e)) {
+            case l_true:
+                return false; // clause is already satisfied
+            case l_false:
+                break;
+            case l_undef:
+                ++undef_count;
+                unit = e;
+                break;
            }
-            if (!is_unit_propagating || undef == nullptr)
-                continue;      
-            assert_clause(clause);
-            propagated = true;
        }
-        return propagated;
-    }

-    /**
-     * We assume the lemmas in the queue are necessary for completeness.
-     * So they all have to be enforced through case analysis.
-     * Lemmas with more than one unassigned literal are asserted here.
-     * The solver will case split on the unassigned literals to satisfy the lemma.
-    */
-    bool theory_finite_set::instantiate_free_lemma() {
-        for (auto const& clause : m_theory_axioms) {
-            if (any_of(clause, [&](expr *e) { return ctx.find_assignment(e) == l_true; }))
-                continue;
-            assert_clause(clause);
+        if (undef_count == 1) {
+            TRACE(finite_set, tout << " propagate unit: " << mk_pp(unit, m) << "\n" << clause << "\n";);
+            auto lit = mk_literal(unit);
+            literal_vector core;
+            for (auto e : clause) {
+                if (e != unit)
+                    core.push_back(~mk_literal(e));
+            }
+            m_stats.m_num_axioms_propagated++;
+            ctx.assign(lit, ctx.mk_justification(
+                                theory_propagation_justification(get_id(), ctx, core.size(), core.data(), lit)));
            return true;
        }
-        return false;
-    }
-
-    void theory_finite_set::assert_clause(expr_ref_vector const &clause) {
+        bool is_conflict = (undef_count == 0);
+        if (is_conflict)
+            m_stats.m_num_axioms_conflicts++;
+        else
+            m_stats.m_num_axioms_case_splits++;
+        TRACE(finite_set, tout << " assert " << (is_conflict ? "conflict" : "case split") << clause << "\n";);
        literal_vector lclause;
        for (auto e : clause)
            lclause.push_back(mk_literal(e));
        ctx.mk_th_axiom(get_id(), lclause);
+        return true;
+    }
+
+    std::ostream& theory_finite_set::display_var(std::ostream& out, theory_var v) const {
+        out << "v" << v << " := " << enode_pp(get_enode(v), ctx) << "\n";
+        auto d = m_var_data[v];
+        if (!d->m_setops.empty()) {
+            out << "  setops: ";
+            for (auto n : d->m_setops)
+                out << enode_pp(n, ctx) << " ";
+            out << "\n";
+        }
+        if (!d->m_parent_setops.empty()) {
+            out << "  parent_setops: ";
+            for (auto n : d->m_parent_setops)
+                out << enode_pp(n, ctx) << " ";
+            out << "\n";
+        }
+        if (!d->m_parent_in.empty()) {
+            out << "  parent_in: ";
+            for (auto n : d->m_parent_in)
+                out << enode_pp(n, ctx) << " ";
+            out << "\n";
+        }
+
+        return out;
    }

 }  // namespace smt
--- a/src/smt/theory_finite_set.h
+++ b/src/smt/theory_finite_set.h
@ -20,40 +20,40 @@ Abstract:
    The set-based decision procedure relies on saturating with respect
    to rules of the form:

-       x in v1 == v2, v1 ~ set.empty
+       x in v1 a term, v1 ~ set.empty
    -----------------------------------
-       v1 = set.empty => not (x in v1)
+           not (x in set.empty)


-     x in v1 == v2, v1 ~ v3, v3 == (set.union v4 v5)
+     x in v1 a term , v1 ~ v3, v3 := (set.union v4 v5)
     -----------------------------------------------
           x in v3 <=> x in v4 or x in v5

-     x in v1 == v2, v1 ~ v3, v3 == (set.intersect v4 v5)
+     x in v1 a term, v1 ~ v3, v3 := (set.intersect v4 v5)
     ---------------------------------------------------
           x in v3 <=> x in v4 and x in v5

-    x in v1 == v2, v1 ~ v3, v3 == (set.difference v4 v5)
+    x in v1 a term, v1 ~ v3, v3 := (set.difference v4 v5)
    ---------------------------------------------------
         x in v3 <=> x in v4 and not (x in v5)

-    x in v1 == v2, v1 ~ v3, v3 == (set.singleton v4)
+    x in v1 a term, v1 ~ v3, v3 := (set.singleton v4)
    -----------------------------------------------
         x in v3 <=> x == v4

-    x in v1 == v2, v1 ~ v3, v3 == (set.range lo hi)
+    x in v1 a term, v1 ~ v3, v3 := (set.range lo hi)
    -----------------------------------------------
         x in v3 <=> (lo <= x <= hi)

-    x in v1 == v2, v1 ~ v3, v3 == (set.map f v4)
+    x in v1 a term, v1 ~ v3, v3 := (set.map f v4)
    -----------------------------------------------
     x in v3 <=> set.map_inverse(f, x, v4) in v4

-    x in v1 == v2, v1 ~ v3, v3 == (set.map f v4)
+    x in v1 a term, v1 ~ v3, v3 := (set.map f v4)
   -----------------------------------------------
        x in v4 => f(x) in v3

-   x in v1 == v2, v1 ~ v3, v3 == (set.select p v4)
+   x in v1 a tern, v1 ~ v3, v3 := (set.select p v4)
   -----------------------------------------------
        x in v3 <=> p(x) and x in v4

@ -89,20 +89,59 @@ theory_finite_set.cpp.
 #include "ast/finite_set_decl_plugin.h"
 #include "ast/rewriter/finite_set_axioms.h"
 #include "util/obj_pair_hashtable.h"
+#include "util/union_find.h"
 #include "smt/smt_theory.h"
 #include "model/finite_set_factory.h"

 namespace smt {
    class theory_finite_set : public theory {
+        using th_union_find = union_find<theory_finite_set>;
        friend class theory_finite_set_test;
        friend struct finite_set_value_proc;
+        friend class th_union_find;
+
+        struct var_data {
+            ptr_vector<enode> m_setops;
+            ptr_vector<enode> m_parent_in;
+            ptr_vector<enode> m_parent_setops;
+        };
+
+        struct theory_clauses {
+            vector<expr_ref_vector> axioms;            // vector of created theory axioms
+            unsigned                aqhead = 0;        // queue head of created axioms
+            unsigned_vector         squeue;            // propagation queue of axioms to be added to the solver
+            unsigned                sqhead = 0;        // head into propagation queue axioms to be added to solver
+            obj_pair_hashtable<expr, expr> members;    // set of membership axioms that were instantiated
+            vector<unsigned_vector>  watch;            // watch list from expression index to clause occurrence
+
+            bool can_propagate() const {
+                return sqhead < squeue.size() || aqhead < axioms.size();
+            }
+        };
+
+        struct stats {
+            unsigned m_num_axioms_created = 0;
+            unsigned m_num_axioms_conflicts = 0;
+            unsigned m_num_axioms_propagated = 0;
+            unsigned m_num_axioms_case_splits = 0;
+
+            void collect_statistics(::statistics & st) const {
+                st.update("finite-set-axioms-created", m_num_axioms_created);
+                st.update("finite-set-axioms-propagated", m_num_axioms_propagated);
+                st.update("finite-set-axioms-conflicts", m_num_axioms_conflicts);
+                st.update("finite-set-axioms-case-splits", m_num_axioms_case_splits);
+            }
+        };
+
        finite_set_util           u;
        finite_set_axioms         m_axioms;
-        obj_hashtable<enode>      m_elements;             // set of all 'x' where there is an 'x in S' atom
-        vector<expr_ref_vector>   m_theory_axioms;
-        obj_pair_hashtable<expr, expr> m_theory_axiom_exprs;
+        th_union_find             m_find;
+        theory_clauses            m_clauses;
        finite_set_factory *m_factory = nullptr;
        obj_map<enode, obj_hashtable<enode> *> m_set_members;
+        ptr_vector<func_decl> m_set_in_decls;
+        ptr_vector<var_data> m_var_data;
+        stats m_stats;
               
    protected:
        // Override relevant methods from smt::theory
@ -112,23 +151,32 @@ namespace smt {
        void new_diseq_eh(theory_var v1, theory_var v2) override;
        void apply_sort_cnstr(enode *n, sort *s) override; 
        final_check_status final_check_eh() override;
+        bool can_propagate() override;
+        void propagate() override;
+        void assign_eh(bool_var v, bool is_true) override;
        
        theory * mk_fresh(context * new_ctx) override;
        char const * get_name() const override { return "finite_set"; }
        void display(std::ostream & out) const override;
        void init_model(model_generator & mg) override;
        model_value_proc * mk_value(enode * n, model_generator & mg) override;
+        theory_var mk_var(enode *n) override;
+
+        void collect_statistics(::statistics & st) const override {
+            m_stats.collect_statistics(st);
+        }
+
+
+        void add_in_axioms(theory_var v1, theory_var v2);
+        void add_in_axioms(enode *in, var_data *d);

        // Helper methods for axiom instantiation
        void add_membership_axioms(expr* elem, expr* set);
        void add_clause(expr_ref_vector const& clause);
-        void assert_clause(expr_ref_vector const &clause);
-        bool instantiate_false_lemma();
-        bool instantiate_unit_propagation();
-        bool instantiate_free_lemma();
-        lbool truth_value(expr *e);
+        bool assert_clause(expr_ref_vector const &clause);
+        void activate_clause(unsigned index);
+        bool activate_unasserted_clause();
        void add_immediate_axioms(app *atom);
-        bool add_membership_axioms();
        bool assume_eqs();
        bool is_new_axiom(expr *a, expr *b);
        app *mk_union(unsigned num_elems, expr *const *elems, sort* set_sort);
@ -137,6 +185,16 @@ namespace smt {
        void collect_members();
        void reset_set_members();

+        // manage union-find of theory variables
+        theory_var find(theory_var v) const { return m_find.find(v); }        
+        bool is_root(theory_var v) const { return m_find.is_root(v); }
+        trail_stack &get_trail_stack();
+        void merge_eh(theory_var v1, theory_var v2, theory_var, theory_var);
+        void after_merge_eh(theory_var r1, theory_var r2, theory_var v1, theory_var v2) {}
+        void unmerge_eh(theory_var v1, theory_var v2) {}
+
+        std::ostream &display_var(std::ostream &out, theory_var v) const;
+        
    public:
        theory_finite_set(context& ctx);
        ~theory_finite_set() override;
--- a/src/util/ref_vector.h
+++ b/src/util/ref_vector.h
@ -267,6 +267,10 @@ public:
        this->m_nodes.swap(other.m_nodes);
    }

+    void swap(unsigned idx1, unsigned idx2) noexcept {
+        this->super::swap(idx1, idx2);
+    }
+    
    class element_ref {
        T * &       m_ref;
        TManager &  m_manager;
--- a/src/util/trail.h
+++ b/src/util/trail.h
@ -423,4 +423,13 @@ public:
        m_scopes.shrink(new_lvl);
        m_region.pop_scope(num_scopes);
    }
+
+    unsigned size() const {
+        return m_trail_stack.size();
+    }
+
+    void shrink(unsigned new_size) {
+        SASSERT(new_size <= m_trail_stack.size());
+        m_trail_stack.shrink(new_size);
+    }
 };