Z3 sources

Signed-off-by: Leonardo de Moura <leonardo@microsoft.com>
2025-07-18 02:16:40 +00:00 · 2012-10-02 11:35:25 -07:00 · 2012-10-02 11:35:25 -07:00 · e9eab22e5c
commit e9eab22e5c
parent 3f9edad676
1186 changed files with 381859 additions and 0 deletions
--- a/lib/dl_mk_similarity_compressor.cpp
+++ b/lib/dl_mk_similarity_compressor.cpp
@ -0,0 +1,537 @@
+/*++
+Copyright (c) 2006 Microsoft Corporation
+
+Module Name:
+
+    dl_mk_similarity_compressor.cpp
+
+Abstract:
+
+    <abstract>
+
+Author:
+
+    Krystof Hoder (t-khoder) 2010-10-22.
+
+Revision History:
+
+--*/
+
+#include<utility>
+#include<sstream>
+#include"dl_mk_similarity_compressor.h"
+
+namespace datalog {
+
+    mk_similarity_compressor::mk_similarity_compressor(context & ctx, unsigned threshold_count) :
+            plugin(5000),
+            m_context(ctx),
+            m_manager(ctx.get_manager()),
+            m_threshold_count(threshold_count),
+            m_result_rules(ctx.get_rule_manager()),
+            m_pinned(m_manager) {
+        SASSERT(threshold_count>1);
+    }
+
+    void mk_similarity_compressor::reset() {
+        m_rules.reset();
+        m_result_rules.reset();
+        m_pinned.reset();
+    }
+
+    /**
+       Allows to traverse head and positive tails in a single for loop starting from -1
+     */
+    app * get_by_tail_index(rule * r, int idx) {
+        if(idx==-1) {
+            return r->get_head();
+        }
+        SASSERT(idx<static_cast<int>(r->get_positive_tail_size()));
+        return r->get_tail(idx);
+    }
+
+    template<typename T>
+    int aux_compare(T a, T b) {
+        return (a>b) ? 1 : ( (a==b) ? 0 : -1);
+    }
+
+    int compare_var_args(app* t1, app* t2) {
+        SASSERT(t1->get_num_args()==t2->get_num_args());
+        int res;
+        unsigned n = t1->get_num_args();
+        for(unsigned i=0; i<n; i++) {
+            expr * a1 = t1->get_arg(i);
+            expr * a2 = t2->get_arg(i);
+
+            res = aux_compare(is_var(a1), is_var(a2));
+            if(res!=0) { return res; }
+            if(is_var(a1)) {
+                res = aux_compare(to_var(a1)->get_idx(), to_var(a2)->get_idx());
+                if(res!=0) { return res; }
+            }
+        }
+        return 0;
+    }
+
+    int compare_args(app* t1, app* t2, int & skip_countdown) {
+        SASSERT(t1->get_num_args()==t2->get_num_args());
+        int res;
+        unsigned n = t1->get_num_args();
+        for(unsigned i=0; i<n; i++) {
+            if(is_var(t1->get_arg(i))) {
+                SASSERT(t1->get_arg(i)==t2->get_arg(i));
+                continue;
+            }
+            if((skip_countdown--)==0) {
+                continue;
+            }
+            res = aux_compare(t1->get_arg(i), t2->get_arg(i));
+            if(res!=0) { return res; }
+        }
+        return 0;
+    }
+
+    /**
+       \brief Return 0 if r1 and r2 could be similar. If the rough similarity
+       equaivelance class of r1 is greater than the one of r2, return 1; otherwise return -1.
+
+       Two rules are in the same rough similarity class if they differ only in constant arguments
+       of positive uninterpreted predicates.
+     */
+    int rough_compare(rule * r1, rule * r2) {
+        int res = aux_compare(r1->get_tail_size(), r2->get_tail_size());
+        if(res!=0) { return res; }
+        res = aux_compare(r1->get_uninterpreted_tail_size(), r2->get_uninterpreted_tail_size());
+        if(res!=0) { return res; }
+        res = aux_compare(r1->get_positive_tail_size(), r2->get_positive_tail_size());
+        if(res!=0) { return res; }
+
+        int pos_tail_sz = r1->get_positive_tail_size();
+        for(int i=-1; i<pos_tail_sz; i++) {
+            app * t1 = get_by_tail_index(r1, i);
+            app * t2 = get_by_tail_index(r2, i);
+            res = aux_compare(t1->get_decl(), t2->get_decl());
+            if(res!=0) { return res; }
+            res = compare_var_args(t1, t2);
+            if(res!=0) { return res; }
+        }
+
+        unsigned tail_sz = r1->get_tail_size();
+        for(unsigned i=pos_tail_sz; i<tail_sz; i++) {
+            res = aux_compare(r1->get_tail(i), r2->get_tail(i));
+            if(res!=0) { return res; }
+        }
+        
+        return 0;
+    }
+
+    /**
+       \c r1 and \c r2 must be equal according to the \c rough_compare function for this function
+       to be called.
+     */
+    int total_compare(rule * r1, rule * r2, int skipped_arg_index = INT_MAX) {
+        SASSERT(rough_compare(r1, r2)==0);
+        int pos_tail_sz = r1->get_positive_tail_size();
+        for(int i=-1; i<pos_tail_sz; i++) {
+            int res = compare_args(get_by_tail_index(r1, i), get_by_tail_index(r2, i), skipped_arg_index);
+            if(res!=0) { return res; }
+        }
+        return 0;
+    }
+
+    class const_info {
+        int m_tail_index;
+        unsigned m_arg_index;
+        bool m_has_parent;
+        /** Parent is a constant that appears earlier in the rule and has always the same value
+            as this constant. */
+        unsigned m_parent_index;
+    public:
+
+        const_info(int tail_index, unsigned arg_index) 
+            : m_tail_index(tail_index), m_arg_index(arg_index), m_has_parent(false) {}
+
+        int tail_index() const { return m_tail_index; }
+        unsigned arg_index() const { return m_arg_index; }
+        bool has_parent() const { return m_has_parent; }
+        unsigned parent_index() const { SASSERT(has_parent()); return m_parent_index; }
+
+        void set_parent_index(unsigned idx) { 
+            SASSERT(!m_has_parent);
+            m_has_parent = true;
+            m_parent_index = idx;
+        }
+    };
+
+    typedef svector<const_info> info_vector;
+
+    void collect_const_indexes(app * t, int tail_index, info_vector & res) {
+        unsigned n = t->get_num_args();
+        for(unsigned i=0; i<n; i++) {
+            if(is_var(t->get_arg(i))) {
+                continue;
+            }
+            res.push_back(const_info(tail_index, i));
+        }
+    }
+
+    void collect_const_indexes(rule * r, info_vector & res) {
+        collect_const_indexes(r->get_head(), -1, res);
+        unsigned pos_tail_sz = r->get_positive_tail_size();
+        for(unsigned i=0; i<pos_tail_sz; i++) {
+            collect_const_indexes(r->get_tail(i), i, res);
+        }
+    }
+
+    template<class T>
+    void collect_orphan_consts(rule * r, const info_vector & const_infos, T & tgt) {
+        unsigned const_cnt = const_infos.size();
+        tgt.reset();
+        for(unsigned i=0; i<const_cnt; i++) {
+            const_info inf = const_infos[i];
+            if(inf.has_parent()) {
+                continue;
+            }
+            app * pred = get_by_tail_index(r, inf.tail_index());
+            tgt.push_back(to_app(pred->get_arg(inf.arg_index())));
+            SASSERT(tgt.back()->get_num_args()==0);
+        }
+    }
+    template<class T>
+    void collect_orphan_sorts(rule * r, const info_vector & const_infos, T & tgt) {
+        unsigned const_cnt = const_infos.size();
+        tgt.reset();
+        for(unsigned i=0; i<const_cnt; i++) {
+            const_info inf = const_infos[i];
+            if(inf.has_parent()) {
+                continue;
+            }
+            app * pred = get_by_tail_index(r, inf.tail_index());
+            tgt.push_back(pred->get_decl()->get_domain(inf.arg_index()));
+        }
+    }
+
+    /**
+       \brief From the \c tail_indexes and \c arg_indexes remove elements corresponding to constants
+       that are the same in rules \c *first ... \c *(after_last-1).
+     */
+    void remove_stable_constants(rule_vector::iterator first, rule_vector::iterator after_last, 
+            info_vector & const_infos) {
+        SASSERT(after_last-first>1);
+        unsigned const_cnt = const_infos.size();
+        ptr_vector<app> vals;
+        rule * r = *(first++);
+        collect_orphan_consts(r, const_infos, vals);
+        SASSERT(vals.size()==const_cnt);
+        rule_vector::iterator it = first;
+        for(; it!=after_last; ++it) {
+            for(unsigned i=0; i<const_cnt; i++) {
+                app * pred = get_by_tail_index(*it, const_infos[i].tail_index());
+                app * val = to_app(pred->get_arg(const_infos[i].arg_index()));
+                if(vals[i]!=val) {
+                    vals[i] = 0;
+                }
+            }
+        }
+        unsigned removed_cnt = 0;
+        for(unsigned i=0; i<const_cnt; i++) {
+            if(vals[i]!=0) {
+                removed_cnt++;
+            }
+            else if(removed_cnt!=0) {
+                const_infos[i-removed_cnt] = const_infos[i];
+            }
+        }
+        if(removed_cnt!=0) {
+            const_infos.shrink(const_cnt-removed_cnt);
+        }
+    }
+
+    /**
+       \brief When function returns, \c parents will contain for each constant the index of the
+       first constant that is equal to it in all the rules. If there is no such, it will contain
+       its own index.
+     */
+    void detect_equal_constants(rule_vector::iterator first, rule_vector::iterator after_last, 
+            info_vector & const_infos) {
+        SASSERT(first!=after_last);
+        unsigned const_cnt = const_infos.size();
+        ptr_vector<app> vals;
+        ptr_vector<sort> sorts;
+        rule * r = *(first++);
+        collect_orphan_consts(r, const_infos, vals);
+        collect_orphan_sorts(r, const_infos, sorts);
+        SASSERT(vals.size()==const_cnt);
+        vector<unsigned_vector> possible_parents(const_cnt);
+        for(unsigned i=1; i<const_cnt; i++) {
+            for(unsigned j=0; j<i; j++) {
+                if(vals[i]==vals[j] && sorts[i]==sorts[j]) {
+                    possible_parents[i].push_back(j);
+                }
+            }
+        }
+        rule_vector::iterator it = first;
+        for(; it!=after_last; ++it) {
+            collect_orphan_consts(*it, const_infos, vals);
+            for(unsigned i=1; i<const_cnt; i++) {
+                unsigned_vector & ppars = possible_parents[i];
+                unsigned j=0;
+                while(j<ppars.size()) {
+                    if(vals[i]!=vals[ppars[j]]) {
+                        ppars[j] = ppars.back();
+                        ppars.pop_back();
+                    }
+                    else {
+                        j++;
+                    }
+                }
+            }
+        }
+        for(unsigned i=0; i<const_cnt; i++) {
+            unsigned parent = i;
+            unsigned_vector & ppars = possible_parents[i];
+            unsigned ppars_sz = ppars.size();
+            for(unsigned j=0; j<ppars_sz; j++) {
+                if(ppars[j]<parent) {
+                    parent = ppars[j];
+                }
+            }
+            if(parent!=i) {
+                const_infos[i].set_parent_index(parent);
+            }
+        }
+    }
+
+    unsigned get_constant_count(rule * r) {
+        unsigned res = r->get_head()->get_num_args() - count_variable_arguments(r->get_head());
+        unsigned pos_tail_sz = r->get_positive_tail_size();
+        for(unsigned i=0; i<pos_tail_sz; i++) {
+            res+= r->get_tail(i)->get_num_args() - count_variable_arguments(r->get_tail(i));
+        }
+        return res;
+    }
+
+    bool initial_comparator(rule * r1, rule * r2) {
+        int res = rough_compare(r1, r2);
+        if(res!=0) { return res>0; }
+        return total_compare(r1, r2)>0;
+    }
+
+    class arg_ignoring_comparator {
+        unsigned m_ignored_index;
+    public:
+        arg_ignoring_comparator(unsigned ignored_index) : m_ignored_index(ignored_index) {}
+        bool operator()(rule * r1, rule * r2) const {
+            return total_compare(r1, r2, m_ignored_index)>0;
+        }
+        bool eq(rule * r1, rule * r2) const {
+            return total_compare(r1, r2, m_ignored_index)==0;
+        }
+    };
+
+    void mk_similarity_compressor::merge_class(rule_vector::iterator first, 
+            rule_vector::iterator after_last) {
+        SASSERT(after_last-first>1);
+        info_vector const_infos;
+        rule * r = *first; //an arbitrary representative of the class
+        collect_const_indexes(r, const_infos);
+        remove_stable_constants(first, after_last, const_infos);
+
+        unsigned const_cnt = const_infos.size();
+        SASSERT(const_cnt>0);
+
+        detect_equal_constants(first, after_last, const_infos);
+
+
+        //The aux relation contains column for each constant which does not have an earlier constant
+        //that it is equal to (i.e. only has no parent)
+        ptr_vector<sort> aux_domain;
+        collect_orphan_sorts(r, const_infos, aux_domain);
+
+        func_decl* head_pred = r->get_head()->get_decl();
+        symbol const& name_prefix = head_pred->get_name();
+        std::string name_suffix = "sc_" + to_string(const_cnt);
+        func_decl * aux_pred = m_context.mk_fresh_head_predicate(name_prefix, symbol(name_suffix.c_str()), 
+            aux_domain.size(), aux_domain.c_ptr(), head_pred);
+        m_pinned.push_back(aux_pred);
+
+        relation_fact val_fact(m_manager, const_cnt);
+        rule_vector::iterator it = first;
+        for(; it!=after_last; ++it) {
+            collect_orphan_consts(*it, const_infos, val_fact);
+            m_context.add_fact(aux_pred, val_fact);
+        }
+        m_context.get_rmanager().mark_saturated(aux_pred);
+
+        app * new_head = r->get_head();
+        ptr_vector<app> new_tail;
+        svector<bool> new_negs;
+        unsigned tail_sz = r->get_tail_size();
+        for(unsigned i=0; i<tail_sz; i++) {
+            new_tail.push_back(r->get_tail(i));
+            new_negs.push_back(r->is_neg_tail(i));
+        }
+
+        var_counter var_ctr;
+        var_ctr.count_vars(m_manager, r);
+        unsigned max_var_idx, new_var_idx_base;
+        if(var_ctr.get_max_positive(max_var_idx)) {
+            new_var_idx_base = max_var_idx+1;
+        }
+        else {
+            new_var_idx_base = 0;
+        }
+
+        ptr_vector<var> const_vars; //variables at indexes of their corresponding constants
+        expr_ref_vector aux_vars(m_manager); //variables as arguments for the auxiliary predicate
+
+        unsigned aux_column_index = 0;
+
+        for(unsigned i=0; i<const_cnt; ) {
+            int tail_idx = const_infos[i].tail_index();
+            app * & mod_tail = (tail_idx==-1) ? new_head : new_tail[tail_idx];
+            ptr_vector<expr> mod_args(mod_tail->get_num_args(), mod_tail->get_args());
+
+            for(; i<const_cnt && const_infos[i].tail_index()==tail_idx; i++) { //here the outer loop counter is modified
+                const_info & inf = const_infos[i];
+                var * mod_var;
+                if(!inf.has_parent()) {
+                    mod_var = m_manager.mk_var(new_var_idx_base+aux_column_index, 
+                        aux_domain[aux_column_index]);
+                    aux_column_index++;
+                    aux_vars.push_back(mod_var);
+                }
+                else {
+                    mod_var = const_vars[inf.parent_index()];
+                }
+                const_vars.push_back(mod_var);
+                mod_args[inf.arg_index()] = mod_var;
+            }
+
+            app * upd_tail = m_manager.mk_app(mod_tail->get_decl(), mod_args.c_ptr());
+            m_pinned.push_back(upd_tail);
+            mod_tail = upd_tail;
+        }
+
+        app_ref aux_tail(m_manager.mk_app(aux_pred, aux_vars.c_ptr()), m_manager);
+        new_tail.push_back(aux_tail);
+        new_negs.push_back(false);
+
+        rule * new_rule = m_context.get_rule_manager().mk(new_head, new_tail.size(), new_tail.c_ptr(), 
+            new_negs.c_ptr());
+        m_result_rules.push_back(new_rule);
+
+        //TODO: allow for a rule to have multiple parent objects
+        new_rule->set_accounting_parent_object(m_context, r);
+        m_modified = true;
+    }
+
+    void mk_similarity_compressor::process_class(rule_vector::iterator first, 
+            rule_vector::iterator after_last) {
+        SASSERT(first!=after_last);
+        //remove duplicates
+        {
+            rule_vector::iterator it = first;
+            rule_vector::iterator prev = it;
+            ++it;
+            while(it!=after_last) {
+                if(it!=after_last && total_compare(*prev, *it)==0) {
+                    --after_last;
+                    std::swap(*it, *after_last);
+                    m_modified = true;
+                }
+                else {
+                    prev = it;
+                    ++it;
+                }
+            }
+        }
+        SASSERT(first!=after_last);
+
+        unsigned const_cnt = get_constant_count(*first);
+#if 0
+        for(unsigned ignored_index=0; ignored_index<const_cnt; ignored_index++) {
+            arg_ignoring_comparator comparator(ignored_index);
+            std::sort(first, after_last, comparator);
+
+            rule_vector::iterator it = first;
+            rule_vector::iterator grp_begin = it;
+            unsigned grp_size=0;
+            while(it!=after_last) {
+                rule_vector::iterator prev = it;
+                ++it;
+                grp_size++;
+                if(it==after_last || !comparator.eq(*prev, *it)) {
+                    if(grp_size>m_threshold_count) {
+                        merge_class(grp_begin, it);
+                        //group was processed, so we remove it from the class
+                        if(it==after_last) {
+                            after_last=grp_begin;
+                            it=after_last;
+                        }
+                        else {
+                            while(it!=grp_begin) {
+                                std::swap(*--it, *--after_last);
+                            }
+                        }
+                    }
+                    grp_begin = it;
+                    grp_size = 0;
+                }
+            }
+        }
+#endif
+        //TODO: compress also rules with pairs (or tuples) of equal constants
+
+#if 1
+        if(const_cnt>0) {
+            unsigned rule_cnt = static_cast<unsigned>(after_last-first);
+            if(rule_cnt>m_threshold_count) {
+                merge_class(first, after_last);
+                return;
+            }
+        }
+#endif
+
+        //put rules which weren't merged into result
+        rule_vector::iterator it = first;
+        for(; it!=after_last; ++it) {
+            m_result_rules.push_back(*it);
+        }
+    }
+
+    rule_set * mk_similarity_compressor::operator()(rule_set const & source, model_converter_ref& mc, proof_converter_ref& pc) {
+        // TODO mc, pc
+        m_modified = false;
+        unsigned init_rule_cnt = source.get_num_rules();
+        SASSERT(m_rules.empty());
+        for(unsigned i=0; i<init_rule_cnt; i++) {
+            m_rules.push_back(source.get_rule(i));
+        }
+
+        std::sort(m_rules.begin(), m_rules.end(), initial_comparator);
+
+        rule_vector::iterator it = m_rules.begin();
+        rule_vector::iterator end = m_rules.end();
+        rule_vector::iterator cl_begin = it;
+        while(it!=end) {
+            rule_vector::iterator prev = it;
+            ++it;
+            if(it==end || rough_compare(*prev, *it)!=0) {
+                process_class(cl_begin, it);
+                cl_begin = it;
+            }
+        }
+
+        rule_set * result = static_cast<rule_set *>(0);
+        if(m_modified) {
+            result = alloc(rule_set, m_context);
+            unsigned fin_rule_cnt = m_result_rules.size();
+            for(unsigned i=0; i<fin_rule_cnt; i++) {
+                result->add_rule(m_result_rules.get(i));
+            }
+        }
+        reset();
+        return result;
+    }
+};