3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-12 12:08:18 +00:00

Add datalog infrastructure for min aggregation function

This patch adds an instruction to the datalog interpreter and
constructs a new AST node for min aggregation functions.

The compiler is currently still work in progress and depends on
changes made to the handling of simple joins and the preprocessor.

Signed-off-by: Alex Horn <t-alexh@microsoft.com>
This commit is contained in:
Alex Horn 2015-06-10 18:09:56 +01:00
parent 004bf1471f
commit 140fb7942d
11 changed files with 418 additions and 3 deletions

View file

@ -44,7 +44,8 @@ namespace datalog {
m_num_sym("N"),
m_lt_sym("<"),
m_le_sym("<="),
m_rule_sym("R")
m_rule_sym("R"),
m_min_sym("min")
{
}
@ -490,6 +491,66 @@ namespace datalog {
return m_manager->mk_func_decl(m_clone_sym, 1, &s, s, info);
}
/**
In SMT2 syntax, we can write \c ((_ min R N) v_0 v_1 ... v_k)) where 0 <= N <= k,
R is a relation of sort V_0 x V_1 x ... x V_k and each v_i is a zero-arity function
(also known as a "constant" in SMT2 parlance) whose range is of sort V_i.
Example:
(define-sort number_t () (_ BitVec 2))
(declare-rel numbers (number_t number_t))
(declare-rel is_min (number_t number_t))
(declare-var x number_t)
(declare-var y number_t)
(rule (numbers #b00 #b11))
(rule (numbers #b00 #b01))
(rule (=> (and (numbers x y) ((_ min numbers 1) x y)) (is_min x y)))
This says that we want to find the mininum y grouped by x.
*/
func_decl * dl_decl_plugin::mk_min(decl_kind k, unsigned num_parameters, parameter const * parameters) {
if (num_parameters < 2) {
m_manager->raise_exception("invalid min aggregate definition due to missing parameters");
return 0;
}
parameter const & relation_parameter = parameters[0];
if (!relation_parameter.is_ast() || !is_func_decl(relation_parameter.get_ast())) {
m_manager->raise_exception("invalid min aggregate definition, first parameter is not a function declaration");
return 0;
}
func_decl* f = to_func_decl(relation_parameter.get_ast());
if (!m_manager->is_bool(f->get_range())) {
m_manager->raise_exception("invalid min aggregate definition, first paramater must be a predicate");
return 0;
}
parameter const & min_col_parameter = parameters[1];
if (!min_col_parameter.is_int()) {
m_manager->raise_exception("invalid min aggregate definition, second parameter must be an integer");
return 0;
}
if (min_col_parameter.get_int() < 0) {
m_manager->raise_exception("invalid min aggregate definition, second parameter must be non-negative");
return 0;
}
if ((unsigned)min_col_parameter.get_int() >= f->get_arity()) {
m_manager->raise_exception("invalid min aggregate definition, second parameter exceeds the arity of the relation");
return 0;
}
func_decl_info info(m_family_id, k, num_parameters, parameters);
SASSERT(f->get_info() == 0);
return m_manager->mk_func_decl(m_min_sym, f->get_arity(), f->get_domain(), f->get_range(), info);
}
func_decl * dl_decl_plugin::mk_func_decl(
decl_kind k, unsigned num_parameters, parameter const * parameters,
unsigned arity, sort * const * domain, sort * range) {
@ -617,6 +678,9 @@ namespace datalog {
break;
}
case OP_DL_MIN:
return mk_min(k, num_parameters, parameters);
default:
m_manager->raise_exception("operator not recognized");
return 0;
@ -627,7 +691,7 @@ namespace datalog {
}
void dl_decl_plugin::get_op_names(svector<builtin_name> & op_names, symbol const & logic) {
op_names.push_back(builtin_name(m_min_sym.bare_str(), OP_DL_MIN));
}
void dl_decl_plugin::get_sort_names(svector<builtin_name> & sort_names, symbol const & logic) {

View file

@ -50,6 +50,7 @@ namespace datalog {
OP_DL_LT,
OP_DL_REP,
OP_DL_ABS,
OP_DL_MIN,
LAST_RA_OP
};
@ -71,6 +72,7 @@ namespace datalog {
symbol m_lt_sym;
symbol m_le_sym;
symbol m_rule_sym;
symbol m_min_sym;
bool check_bounds(char const* msg, unsigned low, unsigned up, unsigned val) const;
bool check_domain(unsigned low, unsigned up, unsigned val) const;
@ -94,12 +96,69 @@ namespace datalog {
func_decl * mk_compare(decl_kind k, symbol const& sym, sort*const* domain);
func_decl * mk_clone(sort* r);
func_decl * mk_rule(unsigned arity);
func_decl * mk_min(decl_kind k, unsigned num_parameters, parameter const * parameters);
sort * mk_finite_sort(unsigned num_params, parameter const* params);
sort * mk_relation_sort(unsigned num_params, parameter const* params);
sort * mk_rule_sort();
public:
/**
Is \c decl a min aggregation function?
*/
static bool is_aggregate(const func_decl* const decl)
{
return decl->get_decl_kind() == OP_DL_MIN;
}
/**
\pre: is_aggregate(aggregate)
\returns function declaration of predicate which is subject to min aggregation function
*/
static func_decl * min_func_decl(const func_decl* const aggregate)
{
SASSERT(is_aggregate(aggregate));
parameter const & relation_parameter = aggregate->get_parameter(0);
return to_func_decl(relation_parameter.get_ast());
}
/**
\pre: is_aggregate(aggregate)
\returns column identifier (starting at zero) which is minimized by aggregation function
*/
static unsigned min_col(const func_decl* const aggregate)
{
SASSERT(is_aggregate(aggregate));
return (unsigned)aggregate->get_parameter(1).get_int();
}
/**
\pre: is_aggregate(aggregate)
\returns column identifiers for the "group by" in the given min aggregation function
*/
static unsigned_vector group_by_cols(const func_decl* const aggregate)
{
SASSERT(is_aggregate(aggregate));
unsigned _min_col = min_col(aggregate);
if (aggregate->get_arity() == 0U)
return unsigned_vector();
unsigned col_num = 0;
unsigned_vector cols(aggregate->get_arity() - 1U);
for (unsigned i = 0; i < cols.size(); ++i, ++col_num)
{
if (col_num == _min_col)
++col_num;
cols[i] = col_num;
}
return cols;
}
dl_decl_plugin();
virtual ~dl_decl_plugin() {}

View file

@ -346,6 +346,13 @@ namespace datalog {
bool is_neg_tail(unsigned i) const { SASSERT(i < m_tail_size); return GET_TAG(m_tail[i]) == 1; }
/**
A predicate P(Xj) can be annotated by adding an interpreted predicate of the form ((_ min P N) ...)
where N is the column number that should be used for the min aggregation function.
Such an interpreted predicate is an example for which this function returns true.
*/
bool is_min_tail(unsigned i) const { return dl_decl_plugin::is_aggregate(get_tail(i)->get_decl()); }
/**
Check whether predicate p is in the interpreted tail.

View file

@ -179,6 +179,7 @@ namespace datalog {
void compute_deps();
void compute_tc_deps();
bool stratified_negation();
public:
rule_set(context & ctx);
rule_set(const rule_set & rs);

View file

@ -485,4 +485,126 @@ namespace datalog {
brw.mk_or(disjs.size(), disjs.c_ptr(), fml);
}
class table_plugin::min_fn : public table_min_fn{
table_signature m_sig;
const unsigned_vector m_group_by_cols;
const unsigned m_col;
public:
min_fn(const table_signature & t_sig, const unsigned_vector& group_by_cols, const unsigned col)
: m_sig(t_sig),
m_group_by_cols(group_by_cols),
m_col(col) {}
virtual table_base* operator()(table_base const& t) {
//return reference_implementation(t);
return reference_implementation_with_hash(t);
}
private:
/**
Reference implementation with negation:
T1 = join(T, T) by group_cols
T2 = { (t1,t2) in T1 | t1[col] > t2[col] }
T3 = { t1 | (t1,t2) in T2 }
T4 = T \ T3
The point of this reference implementation is to show
that the minimum requires negation (set difference).
This is relevant for fixed point computations.
*/
virtual table_base * reference_implementation(const table_base & t) {
relation_manager & manager = t.get_manager();
table_join_fn * join_fn = manager.mk_join_fn(t, t, m_group_by_cols, m_group_by_cols);
table_base * join_table = (*join_fn)(t, t);
table_base::iterator join_table_it = join_table->begin();
table_base::iterator join_table_end = join_table->end();
table_fact row;
table_element i, j;
for (; join_table_it != join_table_end; ++join_table_it) {
join_table_it->get_fact(row);
i = row[m_col];
j = row[t.num_columns() + m_col];
if (i > j) {
continue;
}
join_table->remove_fact(row);
}
unsigned_vector cols(t.num_columns());
for (unsigned k = 0; k < cols.size(); ++k) {
cols[k] = cols.size() + k;
SASSERT(cols[k] < join_table->num_columns());
}
table_transformer_fn * project_fn = manager.mk_project_fn(*join_table, cols);
table_base * gt_table = (*project_fn)(*join_table);
for (unsigned k = 0; k < cols.size(); ++k) {
cols[k] = k;
SASSERT(cols[k] < t.num_columns());
SASSERT(cols[k] < gt_table->num_columns());
}
table_base * result = t.clone();
table_intersection_filter_fn * diff_fn = manager.mk_filter_by_negation_fn(*result, *gt_table, cols, cols);
(*diff_fn)(*result, *gt_table);
gt_table->deallocate();
return result;
}
typedef map < table_fact, table_element, svector_hash_proc<table_element_hash>,
vector_eq_proc<table_fact> > group_map;
// Thanks to Nikolaj who kindly helped with the second reference implementation!
virtual table_base * reference_implementation_with_hash(const table_base & t) {
group_map group;
table_base::iterator it = t.begin();
table_base::iterator end = t.end();
table_fact row, row2;
table_element current_value, min_value;
for (; it != end; ++it) {
it->get_fact(row);
current_value = row[m_col];
group_by(row, row2);
group_map::entry* entry = group.find_core(row2);
if (!entry) {
group.insert(row2, current_value);
}
else if (entry->get_data().m_value > current_value) {
entry->get_data().m_value = current_value;
}
}
table_base* result = t.get_plugin().mk_empty(m_sig);
table_base::iterator it2 = t.begin();
for (; it2 != end; ++it2) {
it2->get_fact(row);
current_value = row[m_col];
group_by(row, row2);
VERIFY(group.find(row2, min_value));
if (min_value == current_value) {
result->add_fact(row);
}
}
return result;
}
void group_by(table_fact const& in, table_fact& out) {
out.reset();
for (unsigned i = 0; i < m_group_by_cols.size(); ++i) {
out.push_back(in[m_group_by_cols[i]]);
}
}
};
table_min_fn * table_plugin::mk_min_fn(const table_base & t,
unsigned_vector & group_by_cols, const unsigned col) {
return alloc(table_plugin::min_fn, t.get_signature(), group_by_cols, col);
}
}

View file

@ -192,6 +192,29 @@ namespace datalog {
virtual base_object * operator()(const base_object & t1, const base_object & t2) = 0;
};
/**
\brief Aggregate minimum value
Informally, we want to group rows in a table \c t by \c group_by_cols and
return the minimum value in column \c col among each group.
Let \c t be a table with N columns.
Let \c group_by_cols be a set of column identifers for table \c t such that |group_by_cols| < N.
Let \c col be a column identifier for table \c t such that \c col is not in \c group_by_cols.
Let R_col be a set of rows in table \c t such that, for all rows r_i, r_j in R_col
and column identifiers k in \c group_by_cols, r_i[k] = r_j[k].
For each R_col, we want to restrict R_col to those rows whose value in column \c col is minimal.
min_fn(R, group_by_cols, col) =
{ row in R | forall row' in R . row'[group_by_cols] = row[group_by_cols] => row'[col] >= row[col] }
*/
class min_fn : public base_fn {
public:
virtual base_object * operator()(const base_object & t) = 0;
};
class transformer_fn : public base_fn {
public:
virtual base_object * operator()(const base_object & t) = 0;
@ -856,6 +879,7 @@ namespace datalog {
typedef table_infrastructure::base_fn base_table_fn;
typedef table_infrastructure::join_fn table_join_fn;
typedef table_infrastructure::min_fn table_min_fn;
typedef table_infrastructure::transformer_fn table_transformer_fn;
typedef table_infrastructure::union_fn table_union_fn;
typedef table_infrastructure::mutator_fn table_mutator_fn;
@ -1020,6 +1044,7 @@ namespace datalog {
class table_plugin : public table_infrastructure::plugin_object {
friend class relation_manager;
class min_fn;
protected:
table_plugin(symbol const& n, relation_manager & manager) : plugin_object(n, manager) {}
public:
@ -1027,6 +1052,9 @@ namespace datalog {
virtual bool can_handle_signature(const table_signature & s) { return s.functional_columns()==0; }
protected:
virtual table_min_fn * mk_min_fn(const table_base & t,
unsigned_vector & group_by_cols, const unsigned col);
/**
If the returned value is non-zero, the returned object must take ownership of \c mapper.
Otherwise \c mapper must remain unmodified.

View file

@ -25,6 +25,7 @@ Revision History:
#include"rel_context.h"
#include"debug.h"
#include"warning.h"
#include"dl_table_relation.h"
namespace datalog {
@ -883,6 +884,59 @@ namespace datalog {
removed_cols, result);
}
class instr_min : public instruction {
reg_idx m_source_reg;
reg_idx m_target_reg;
unsigned_vector m_group_by_cols;
unsigned m_min_col;
public:
instr_min(reg_idx source_reg, reg_idx target_reg, const unsigned_vector & group_by_cols, unsigned min_col)
: m_source_reg(source_reg),
m_target_reg(target_reg),
m_group_by_cols(group_by_cols),
m_min_col(min_col) {
}
virtual bool perform(execution_context & ctx) {
log_verbose(ctx);
if (!ctx.reg(m_source_reg)) {
ctx.make_empty(m_target_reg);
return true;
}
const relation_base & s = *ctx.reg(m_source_reg);
if (!s.from_table()) {
throw default_exception("relation is not a table %s",
s.get_plugin().get_name().bare_str());
}
++ctx.m_stats.m_min;
const table_relation & tr = static_cast<const table_relation &>(s);
const table_base & source_t = tr.get_table();
relation_manager & r_manager = s.get_manager();
const relation_signature & r_sig = s.get_signature();
table_min_fn * fn = r_manager.mk_min_fn(source_t, m_group_by_cols, m_min_col);
table_base * target_t = (*fn)(source_t);
TRACE("dl",
tout << "% ";
target_t->display(tout);
tout << "\n";);
relation_base * target_r = r_manager.mk_table_relation(r_sig, target_t);
ctx.set_reg(m_target_reg, target_r);
return true;
}
virtual void display_head_impl(execution_context const& ctx, std::ostream & out) const {
out << " MIN AGGR ";
}
virtual void make_annotations(execution_context & ctx) {
}
};
instruction * instruction::mk_min(reg_idx source, reg_idx target, const unsigned_vector & group_by_cols,
const unsigned min_col) {
return alloc(instr_min, source, target, group_by_cols, min_col);
}
class instr_select_equal_and_project : public instruction {
reg_idx m_src;

View file

@ -93,6 +93,7 @@ namespace datalog {
unsigned m_filter_interp_project;
unsigned m_filter_id;
unsigned m_filter_eq;
unsigned m_min;
stats() { reset(); }
void reset() { memset(this, 0, sizeof(*this)); }
};
@ -284,6 +285,8 @@ namespace datalog {
static instruction * mk_join_project(reg_idx rel1, reg_idx rel2, unsigned joined_col_cnt,
const unsigned * cols1, const unsigned * cols2, unsigned removed_col_cnt,
const unsigned * removed_cols, reg_idx result);
static instruction * mk_min(reg_idx source, reg_idx target, const unsigned_vector & group_by_cols,
const unsigned min_col);
static instruction * mk_rename(reg_idx src, unsigned cycle_len, const unsigned * permutation_cycle,
reg_idx tgt);
static instruction * mk_filter_by_negation(reg_idx tgt, reg_idx neg_rel, unsigned col_cnt,

View file

@ -1021,6 +1021,11 @@ namespace datalog {
return res;
}
table_min_fn * relation_manager::mk_min_fn(const table_base & t,
unsigned_vector & group_by_cols, const unsigned col)
{
return t.get_plugin().mk_min_fn(t, group_by_cols, col);
}
class relation_manager::auxiliary_table_transformer_fn {
table_fact m_row;

View file

@ -251,6 +251,9 @@ namespace datalog {
return mk_join_fn(t1, t2, cols1.size(), cols1.c_ptr(), cols2.c_ptr(), allow_product_relation);
}
table_min_fn * mk_min_fn(const table_base & t,
unsigned_vector & group_by_cols, const unsigned col);
/**
\brief Return functor that transforms a table into one that lacks columns listed in
\c removed_cols array.

View file

@ -1,4 +1,4 @@
#ifdef _WINDOWS
#if defined(_WINDOWS) || defined(_CYGWIN)
#include "dl_context.h"
#include "dl_table.h"
#include "dl_register_engine.h"
@ -91,9 +91,78 @@ void test_dl_bitvector_table() {
test_table(mk_bv_table);
}
void test_table_min() {
std::cout << "----- test_table_min -----\n";
datalog::table_signature sig;
sig.push_back(2);
sig.push_back(4);
sig.push_back(8);
smt_params params;
ast_manager ast_m;
datalog::register_engine re;
datalog::context ctx(ast_m, re, params);
datalog::relation_manager & m = ctx.get_rel_context()->get_rmanager();
m.register_plugin(alloc(datalog::bitvector_table_plugin, m));
datalog::table_base* tbl = mk_bv_table(m, sig);
datalog::table_base& table = *tbl;
datalog::table_fact row, row1, row2, row3;
row.push_back(1);
row.push_back(2);
row.push_back(5);
// Group (1,2,*)
row1 = row;
row[2] = 6;
row2 = row;
row[2] = 5;
row3 = row;
table.add_fact(row1);
table.add_fact(row2);
table.add_fact(row3);
// Group (1,3,*)
row[1] = 3;
row1 = row;
row[2] = 7;
row2 = row;
row[2] = 4;
row3 = row;
table.add_fact(row1);
table.add_fact(row2);
table.add_fact(row3);
table.display(std::cout);
unsigned_vector group_by(2);
group_by[0] = 0;
group_by[1] = 1;
datalog::table_min_fn * min_fn = m.mk_min_fn(table, group_by, 2);
datalog::table_base * min_tbl = (*min_fn)(table);
min_tbl->display(std::cout);
row[1] = 2;
row[2] = 5;
SASSERT(min_tbl->contains_fact(row));
row[1] = 3;
row[2] = 4;
SASSERT(min_tbl->contains_fact(row));
dealloc(min_fn);
min_tbl->deallocate();
tbl->deallocate();
}
void tst_dl_table() {
test_dl_bitvector_table();
test_table_min();
}
#else
void tst_dl_table() {