z3/src/smt/theory_finite_set.cpp

/*++
Copyright (c) 2025 Microsoft Corporation

Module Name:

    theory_finite_set.cpp

Abstract:

    Theory solver for finite sets.
    Implements axiom schemas for finite set operations.

Author:

    GitHub Copilot Agent 2025

Revision History:

--*/

#include "smt/theory_finite_set.h"
#include "smt/smt_context.h"
#include "smt/smt_model_generator.h"
#include "ast/ast_pp.h"

namespace smt {

    /**
    Constructor.
    Set up callback that adds axiom instantiations as clauses.
    **/
    theory_finite_set::theory_finite_set(context& ctx):
        theory(ctx, ctx.get_manager().mk_family_id("finite_set")),
        u(m),
        m_axioms(m)
    {
        // Setup the add_clause callback for axioms
        std::function<void(expr_ref_vector const &)> add_clause_fn =
            [this](expr_ref_vector const& clause) {
                this->add_clause(clause);
            };
        m_axioms.set_add_clause(add_clause_fn);
    }

    /**
    * Boolean atomic formulas for finite sets are one of:
    * (set.in x S)
    * (set.subset S T)
    * When an atomic formula is first created it is to be registered with the solver.
    * The internalize_atom method takes care of this.
    * Atomic formulas are special cases of terms (of non-Boolean type) so the first
    * effect is to register the atom as a term.
    * The second effect is to set up tracking and assert axioms.
    * Tracking:
    *    For every occurrence (set.in x_i S_i) we track x_i.
    * Axioms:
    *    We can immediately assert some axioms because they are unit literals:
    *    - (set.in x set.empty) is false
    *    - (set.subset S T) <=> (= (set.union S T) T)  (or (= (set.intersect S T) S))
    *    Axioms can be deffered to when the atomic formulas become "relevant" for the theory solver.
    *
    */
    bool theory_finite_set::internalize_atom(app * atom, bool gate_ctx) {
        TRACE(finite_set, tout << "internalize_atom: " << mk_pp(atom, m) << "\n";);

        internalize_term(atom);

        // Track membership elements (set.in)
        expr* elem = nullptr, *set = nullptr;
        if (u.is_in(atom, elem, set)) {
            auto n = ctx.get_enode(elem);
            if (!m_elements.contains(n)) {
                m_elements.insert(n);
                ctx.push_trail(insert_obj_trail(m_elements, n));
            }
        }

        // Assert immediate axioms
        // add_immediate_axioms(atom);

        return true;
    }

    /**
     * When terms are registered with the solver , we need to ensure that:
     * - All subterms have an associated E-node
     * - Boolean terms are registered as boolean variables
     *   Registering a Boolean variable ensures that the solver will be notified about its truth value.
     * - Non-Boolean terms have an associated theory variable
     *   Registering a theory variable ensures that the solver will be notified about equalities and disequalites.
     *   The solver can use the theory variable to track auxiliary information about E-nodes.
    */
    bool theory_finite_set::internalize_term(app * term) {
        TRACE(finite_set, tout << "internalize_term: " << mk_pp(term, m) << "\n";);

        // Internalize all arguments first
        for (expr* arg : *term)
            ctx.internalize(arg, false);

        // Create boolean variable for Boolean terms
        if (m.is_bool(term) && !ctx.b_internalized(term)) {
            bool_var bv = ctx.mk_bool_var(term);
            ctx.set_var_theory(bv, get_id());
        }

        // Create enode for the term if needed
        enode* e = nullptr;
        if (ctx.e_internalized(term))
            e = ctx.get_enode(term);
        else
            e = ctx.mk_enode(term, false, m.is_bool(term), true);

        // Attach theory variable if this is a set
        if (!is_attached_to_var(e))
            ctx.attach_th_var(e, this, mk_var(e));

        return true;
    }

    void theory_finite_set::new_eq_eh(theory_var v1, theory_var v2) {
        TRACE(finite_set, tout << "new_eq_eh: v" << v1 << " = v" << v2 << "\n";);
        // When two sets are equal, propagate membership constraints
        // This is handled by congruence closure, so no additional work needed here
    }

    void theory_finite_set::new_diseq_eh(theory_var v1, theory_var v2) {
        TRACE(finite_set, tout << "new_diseq_eh: v" << v1 << " != v" << v2 << "\n";);
        // Disequalities could trigger extensionality axioms
        // For now, we rely on the final_check to handle this
    }

    /**
    * Final check for the finite set theory.
     * The Final Check method is called when the solver has assigned truth values to all Boolean variables.
     * It is responsible for asserting any remaining axioms and checking for inconsistencies.
     *
     * It ensures saturation with respect to the theory axioms:
     * - Set membership is saturated with respect to set operations.
     *    For every (set.in x S) where S is a union, assert (or propagate) (set.in x S1) or (set.in x S2)
     * - It saturates with respect to extensionality:
     *   Sets corresponding to shared variables having the same interpretation should also be congruent
    */
    final_check_status theory_finite_set::final_check_eh() {
        TRACE(finite_set, tout << "final_check_eh\n";);

        // walk all parents of elem in congruence table.
        // if a parent is of the form elem' in S u T, or similar.
        // create clauses for elem in S u T.

        // Saturate membership constraints
        expr* elem1 = nullptr, *set1 = nullptr;
        for (auto elem : m_elements) {
            if (!ctx.is_relevant(elem))
                continue;
            for (auto p : enode::parents(elem)) {
                if (!u.is_in(p->get_expr(), elem1, set1))
                    continue;
                if (elem->get_root() != p->get_arg(0)->get_root())
                    continue; // elem is then equal to set1 but not elem1. This is a different case.
                if (!ctx.is_relevant(p))
                    continue;
                for (auto sib : *p->get_arg(1))
                    instantiate_axioms(elem->get_expr(), sib->get_expr());
            }
        }
        if (instantiate_false_lemma())
            return FC_CONTINUE;
        if (instantiate_unit_propagation())
            return FC_CONTINUE;
        if (instantiate_free_lemma())
            return FC_CONTINUE;

        // TODO: Extensionality axioms for sets
        return FC_DONE;
    }

    /**
    * Instantiate axioms for a given element in a set.
    */
    void theory_finite_set::instantiate_axioms(expr* elem, expr* set) {
        TRACE(finite_set, tout << "instantiate_axioms: " << mk_pp(elem, m) << " in " << mk_pp(set, m) << "\n";);

        struct insert_obj_pair_table : public trail {
            obj_pair_hashtable<expr, expr> &table;
            expr *a, *b;
            insert_obj_pair_table(obj_pair_hashtable<expr, expr> &t, expr *a, expr *b) :
                table(t), a(a), b(b) {}
            void undo() override {
                table.erase({a, b});
            }
        };
        if (m_lemma_exprs.contains({elem, set}))
            return;
        m_lemma_exprs.insert({elem, set});
        ctx.push_trail(insert_obj_pair_table(m_lemma_exprs, elem, set));
        // Instantiate appropriate axiom based on set structure
        if (u.is_empty(set)) {
            m_axioms.in_empty_axiom(elem);
        }
        else if (u.is_singleton(set)) {
            m_axioms.in_singleton_axiom(elem, set);
        }
        else if (u.is_union(set)) {
            m_axioms.in_union_axiom(elem, set);
        }
        else if (u.is_intersect(set)) {
            m_axioms.in_intersect_axiom(elem, set);
        }
        else if (u.is_difference(set)) {
            m_axioms.in_difference_axiom(elem, set);
        }
        else if (u.is_range(set)) {
            m_axioms.in_range_axiom(elem, set);
        }
        else if (u.is_map(set)) {
            m_axioms.in_map_axiom(elem, set);
            m_axioms.in_map_image_axiom(elem, set);
        }
        else if (u.is_select(set)) {
            m_axioms.in_select_axiom(elem, set);
        }

        // Instantiate size axioms for singleton sets
        // TODO, such axioms don't belong here
        if (u.is_singleton(set)) {
            m_axioms.size_singleton_axiom(set);
        }
    }

    void theory_finite_set::add_clause(expr_ref_vector const& clause) {
        TRACE(finite_set, tout << "add_clause: " << clause << "\n");
        ctx.push_trail(push_back_vector(m_lemmas));
        m_lemmas.push_back(clause);
    }

    theory * theory_finite_set::mk_fresh(context * new_ctx) {
        return alloc(theory_finite_set, *new_ctx);
    }

    void theory_finite_set::display(std::ostream & out) const {
        out << "theory_finite_set:\n";
    }

    void theory_finite_set::init_model(model_generator & mg) {
        TRACE(finite_set, tout << "init_model\n";);
        // Model generation will use default interpretation for sets
        // The model will be constructed based on the membership literals that are true
    }

    model_value_proc * theory_finite_set::mk_value(enode * n, model_generator & mg) {
        TRACE(finite_set, tout << "mk_value: " << mk_pp(n->get_expr(), m) << "\n";);

        // For now, return nullptr to use default model construction
        // A complete implementation would construct explicit set values
        // based on true membership literals
        return nullptr;
    }

    /**
    * Lemmas that are currently assinged to false are conflicts.
    * They should be asserted as soon as possible.
    * Only the first conflict needs to be asserted.
    *
    */
    bool theory_finite_set::instantiate_false_lemma() {
        for (auto const& clause : m_lemmas) {
            bool all_false = all_of(clause, [&](expr *e) { return ctx.find_assignment(e) == l_false; });
            if (!all_false)
                continue;
            assert_clause(clause);
            return true;
        }
        return false;
    }

    /**
     * Lemmas that are unit propagating should be asserted as possible and can be asserted in a batch.
     * It is possible to assert a unit propagating lemma as a clause.
     * A more efficient approach is to add a Theory propagation with the solver.
     * A theory propagation gets recorded on the assignment trail and the overhead of undoing it is baked in to backtracking.
     * A theory axiom is also removed during backtracking.
    */
    bool theory_finite_set::instantiate_unit_propagation() {
        bool propagaed = false;
        for (auto const &clause : m_lemmas) {
            expr *undef = nullptr;
            bool is_unit_propagating = true;
            for (auto e : clause) {
                switch (ctx.find_assignment(e)) {
                case l_false: continue;
                case l_true: is_unit_propagating = false; break;
                case l_undef:
                    if (undef != nullptr)
                        is_unit_propagating = false;
                    undef = e;
                    break;
                }
                if (!is_unit_propagating)
                    break;
            }
            if (!is_unit_propagating || undef == nullptr)
                continue;
            assert_clause(clause);
            propagated = true;
        }
        return propagated;
    }

    /**
     * We assume the lemmas in the queue are necessary for completeness.
     * So they all have to be enforced through case analysis.
     * Lemmas with more than one unassigned literal are asserted here.
     * The solver will case split on the unassigned literals to satisfy the lemma.
    */
    bool theory_finite_set::instantiate_free_lemma() {
        for (auto const& clause : m_lemmas) {
            if (any_of(clause, [&](expr *e) { return ctx.find_assignment(e) == l_true; }))
                continue;
            assert_clause(clause);
            return true;
        }
        return false;
    }

    void theory_finite_set::assert_clause(expr_ref_vector const &clause) {
        literal_vector lclause;
        for (auto e : clause)
            lclause.push_back(mk_literal(e));
        ctx.mk_th_axiom(get_id(), lclause);
    }

}  // namespace smt