mirror of
https://github.com/Z3Prover/z3
synced 2025-04-10 19:27:06 +00:00
481 lines
18 KiB
C++
481 lines
18 KiB
C++
/*++
|
|
Copyright (c) 2006 Microsoft Corporation
|
|
|
|
Module Name:
|
|
|
|
dl_table.h
|
|
|
|
Abstract:
|
|
|
|
<abstract>
|
|
|
|
Author:
|
|
|
|
Krystof Hoder (t-khoder) 2010-09-01.
|
|
|
|
Revision History:
|
|
|
|
--*/
|
|
|
|
#ifndef _DL_SPARSE_TABLE_H_
|
|
#define _DL_SPARSE_TABLE_H_
|
|
|
|
#include<iostream>
|
|
#include<list>
|
|
#include<utility>
|
|
|
|
#include "ast.h"
|
|
#include "bit_vector.h"
|
|
#include "buffer.h"
|
|
#include "hashtable.h"
|
|
#include "map.h"
|
|
#include "ref_vector.h"
|
|
#include "vector.h"
|
|
|
|
#include "dl_base.h"
|
|
|
|
|
|
namespace datalog {
|
|
class sparse_table;
|
|
|
|
class sparse_table_plugin : public table_plugin {
|
|
friend class sparse_table;
|
|
protected:
|
|
class join_project_fn;
|
|
class union_fn;
|
|
class transformer_fn;
|
|
class rename_fn;
|
|
class project_fn;
|
|
class negation_filter_fn;
|
|
class select_equal_and_project_fn;
|
|
|
|
typedef ptr_vector<sparse_table> sp_table_vector;
|
|
typedef map<table_signature, sp_table_vector *,
|
|
table_signature::hash, table_signature::eq > table_pool;
|
|
|
|
table_pool m_pool;
|
|
|
|
void recycle(sparse_table * t);
|
|
|
|
void garbage_collect();
|
|
|
|
void reset();
|
|
|
|
static bool join_involves_functional(const table_signature & s1, const table_signature & s2,
|
|
unsigned col_cnt, const unsigned * cols1, const unsigned * cols2);
|
|
|
|
public:
|
|
typedef sparse_table table;
|
|
|
|
sparse_table_plugin(relation_manager & manager);
|
|
~sparse_table_plugin();
|
|
|
|
virtual bool can_handle_signature(const table_signature & s)
|
|
{ return s.size()>0; }
|
|
|
|
virtual table_base * mk_empty(const table_signature & s);
|
|
sparse_table * mk_clone(const sparse_table & t);
|
|
|
|
protected:
|
|
virtual table_join_fn * mk_join_fn(const table_base & t1, const table_base & t2,
|
|
unsigned col_cnt, const unsigned * cols1, const unsigned * cols2);
|
|
virtual table_join_fn * mk_join_project_fn(const table_base & t1, const table_base & t2,
|
|
unsigned col_cnt, const unsigned * cols1, const unsigned * cols2, unsigned removed_col_cnt,
|
|
const unsigned * removed_cols);
|
|
virtual table_union_fn * mk_union_fn(const table_base & tgt, const table_base & src,
|
|
const table_base * delta);
|
|
virtual table_transformer_fn * mk_project_fn(const table_base & t, unsigned col_cnt,
|
|
const unsigned * removed_cols);
|
|
virtual table_transformer_fn * mk_rename_fn(const table_base & t, unsigned permutation_cycle_len,
|
|
const unsigned * permutation_cycle);
|
|
virtual table_transformer_fn * mk_select_equal_and_project_fn(const table_base & t,
|
|
const table_element & value, unsigned col);
|
|
virtual table_intersection_filter_fn * mk_filter_by_negation_fn(const table_base & t,
|
|
const table_base & negated_obj, unsigned joined_col_cnt,
|
|
const unsigned * t_cols, const unsigned * negated_cols);
|
|
};
|
|
|
|
class entry_storage {
|
|
public:
|
|
typedef unsigned store_offset;
|
|
private:
|
|
typedef svector<char> storage;
|
|
|
|
class offset_hash_proc {
|
|
storage & m_storage;
|
|
unsigned m_unique_entry_size;
|
|
public:
|
|
offset_hash_proc(storage & s, unsigned unique_entry_sz)
|
|
: m_storage(s), m_unique_entry_size(unique_entry_sz) {}
|
|
unsigned operator()(store_offset ofs) const {
|
|
return string_hash(m_storage.c_ptr()+ofs, m_unique_entry_size, 0);
|
|
}
|
|
};
|
|
|
|
class offset_eq_proc {
|
|
storage & m_storage;
|
|
unsigned m_unique_entry_size;
|
|
public:
|
|
offset_eq_proc(storage & s, unsigned unique_entry_sz)
|
|
: m_storage(s), m_unique_entry_size(unique_entry_sz) {}
|
|
bool operator()(store_offset o1, store_offset o2) const {
|
|
const char * base = m_storage.c_ptr();
|
|
return memcmp(base+o1, base+o2, m_unique_entry_size)==0;
|
|
}
|
|
};
|
|
|
|
typedef hashtable<store_offset, offset_hash_proc, offset_eq_proc> storage_indexer;
|
|
|
|
static const store_offset NO_RESERVE = UINT_MAX;
|
|
|
|
unsigned m_entry_size;
|
|
unsigned m_unique_part_size;
|
|
unsigned m_data_size;
|
|
/**
|
|
Invariant: Every or all but one blocks of length \c m_entry_size in the \c m_data vector
|
|
are unique sequences of bytes and have their offset stored in the \c m_data_indexer hashtable.
|
|
If the offset of the last block is not stored in the hashtable, it is stored in the \c m_reserve
|
|
variable. Otherwise \c m_reserve==NO_RESERVE.
|
|
|
|
The size of m_data is actually 8 bytes larger than stated in m_data_size, so that we may
|
|
deref an uint64 pointer at the end of the array.
|
|
*/
|
|
storage m_data;
|
|
storage_indexer m_data_indexer;
|
|
store_offset m_reserve;
|
|
public:
|
|
entry_storage(unsigned entry_size, unsigned functional_size = 0, unsigned init_size = 0)
|
|
: m_entry_size(entry_size),
|
|
m_unique_part_size(entry_size-functional_size),
|
|
m_data_indexer(next_power_of_two(std::max(8u,init_size)),
|
|
offset_hash_proc(m_data, m_unique_part_size), offset_eq_proc(m_data, m_unique_part_size)),
|
|
m_reserve(NO_RESERVE) {
|
|
SASSERT(entry_size>0);
|
|
SASSERT(functional_size<=entry_size);
|
|
resize_data(init_size);
|
|
resize_data(0);
|
|
}
|
|
entry_storage(const entry_storage &s)
|
|
: m_entry_size(s.m_entry_size),
|
|
m_unique_part_size(s.m_unique_part_size),
|
|
m_data_size(s.m_data_size),
|
|
m_data(s.m_data),
|
|
m_data_indexer(next_power_of_two(std::max(8u,s.entry_count())),
|
|
offset_hash_proc(m_data, m_unique_part_size), offset_eq_proc(m_data, m_unique_part_size)),
|
|
m_reserve(s.m_reserve) {
|
|
store_offset after_last=after_last_offset();
|
|
for(store_offset i=0; i<after_last; i+=m_entry_size) {
|
|
m_data_indexer.insert(i);
|
|
}
|
|
}
|
|
|
|
entry_storage & operator=(const entry_storage & o) {
|
|
m_data_indexer.reset();
|
|
m_entry_size = o.m_entry_size;
|
|
m_unique_part_size = o.m_unique_part_size;
|
|
m_data_size = o.m_data_size;
|
|
m_data = o.m_data;
|
|
m_reserve = o.m_reserve;
|
|
store_offset after_last=after_last_offset();
|
|
for(store_offset i=0; i<after_last; i+=m_entry_size) {
|
|
m_data_indexer.insert(i);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
void reset() {
|
|
resize_data(0);
|
|
m_data_indexer.reset();
|
|
m_reserve = NO_RESERVE;
|
|
}
|
|
|
|
unsigned entry_size() const { return m_entry_size; }
|
|
unsigned get_size_estimate_bytes() const;
|
|
char * get(store_offset ofs) { return m_data.begin()+ofs; }
|
|
const char * get(store_offset ofs) const
|
|
{ return const_cast<entry_storage *>(this)->get(ofs); }
|
|
|
|
unsigned entry_count() const { return m_data_indexer.size(); }
|
|
|
|
store_offset after_last_offset() const {
|
|
return (m_reserve==NO_RESERVE) ? m_data_size : m_reserve;
|
|
}
|
|
|
|
char * begin() { return get(0); }
|
|
const char * begin() const { return get(0); }
|
|
const char * after_last() const { return get(after_last_offset()); }
|
|
|
|
|
|
bool has_reserve() const { return m_reserve!=NO_RESERVE; }
|
|
store_offset reserve() const { SASSERT(has_reserve()); return m_reserve; }
|
|
|
|
void ensure_reserve() {
|
|
if(has_reserve()) {
|
|
SASSERT(m_reserve==m_data_size-m_entry_size);
|
|
return;
|
|
}
|
|
m_reserve=m_data_size;
|
|
resize_data(m_data_size+m_entry_size);
|
|
}
|
|
|
|
/**
|
|
\brief Return pointer to the reserve.
|
|
|
|
The reserve must exist when the function is called.
|
|
*/
|
|
char * get_reserve_ptr() {
|
|
SASSERT(has_reserve());
|
|
return &m_data.get(reserve());
|
|
}
|
|
|
|
bool reserve_content_already_present() const {
|
|
SASSERT(has_reserve());
|
|
return m_data_indexer.contains(reserve());
|
|
}
|
|
|
|
bool find_reserve_content(store_offset & result) const {
|
|
SASSERT(has_reserve());
|
|
storage_indexer::entry * indexer_entry = m_data_indexer.find_core(reserve());
|
|
if(!indexer_entry) {
|
|
return false;
|
|
}
|
|
result = indexer_entry->get_data();
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
\brief Write fact \c f into the reserve at the end of the \c m_data storage.
|
|
|
|
If the reserve does not exist, this function creates it.
|
|
*/
|
|
void write_into_reserve(const char * data) {
|
|
ensure_reserve();
|
|
memcpy(get_reserve_ptr(), data, m_entry_size);
|
|
}
|
|
|
|
/**
|
|
\brief If the fact in reserve is not in the table, insert it there and return true;
|
|
otherwise return false.
|
|
|
|
When a fact is inserted into the table, the reserve becomes part of the table and
|
|
is no longer a reserve.
|
|
*/
|
|
bool insert_reserve_content();
|
|
store_offset insert_or_get_reserve_content();
|
|
bool remove_reserve_content();
|
|
/**
|
|
Remove data at the offset \c ofs.
|
|
|
|
Data with offset lower than \c ofs are not be modified by this function, data with
|
|
higher offset may be moved.
|
|
*/
|
|
void remove_offset(store_offset ofs);
|
|
|
|
|
|
//the following two operations allow breaking of the object invariant!
|
|
void resize_data(unsigned sz) {
|
|
m_data_size = sz;
|
|
m_data.resize(sz + sizeof(uint64));
|
|
}
|
|
|
|
bool insert_offset(store_offset ofs) {
|
|
return m_data_indexer.insert_if_not_there(ofs)==ofs;
|
|
}
|
|
};
|
|
|
|
class sparse_table : public table_base {
|
|
friend class sparse_table_plugin;
|
|
friend class sparse_table_plugin::join_project_fn;
|
|
friend class sparse_table_plugin::union_fn;
|
|
friend class sparse_table_plugin::transformer_fn;
|
|
friend class sparse_table_plugin::rename_fn;
|
|
friend class sparse_table_plugin::project_fn;
|
|
friend class sparse_table_plugin::negation_filter_fn;
|
|
friend class sparse_table_plugin::select_equal_and_project_fn;
|
|
|
|
class our_iterator_core;
|
|
class key_indexer;
|
|
class general_key_indexer;
|
|
class full_signature_key_indexer;
|
|
typedef entry_storage::store_offset store_offset;
|
|
|
|
|
|
class column_info {
|
|
unsigned m_big_offset;
|
|
unsigned m_small_offset;
|
|
uint64 m_mask;
|
|
uint64 m_write_mask;
|
|
public:
|
|
unsigned m_offset; //!< in bits
|
|
unsigned m_length; //!< in bits
|
|
|
|
column_info(unsigned offset, unsigned length) \
|
|
: m_big_offset(offset/8),
|
|
m_small_offset(offset%8),
|
|
m_mask( length==64 ? ULLONG_MAX : (static_cast<uint64>(1)<<length)-1 ),
|
|
m_write_mask( ~(m_mask<<m_small_offset) ),
|
|
m_offset(offset),
|
|
m_length(length) {
|
|
SASSERT(length<=64);
|
|
SASSERT(length+m_small_offset<=64);
|
|
}
|
|
table_element get(const char * rec) const {
|
|
const uint64 * ptr = reinterpret_cast<const uint64*>(rec+m_big_offset);
|
|
uint64 res = *ptr;
|
|
res>>=m_small_offset;
|
|
res&=m_mask;
|
|
return res;
|
|
}
|
|
void set(char * rec, table_element val) const {
|
|
SASSERT( (val&~m_mask)==0 ); //the value fits into the column
|
|
uint64 * ptr = reinterpret_cast<uint64*>(rec+m_big_offset);
|
|
*ptr&=m_write_mask;
|
|
*ptr|=val<<m_small_offset;
|
|
}
|
|
unsigned const next_ofs() const { return m_offset+m_length; }
|
|
};
|
|
class column_layout : public svector<column_info> {
|
|
|
|
void make_byte_aligned_end(unsigned col_index);
|
|
public:
|
|
|
|
unsigned m_entry_size;
|
|
/**
|
|
Number of last bytes which correspond to functional columns in the signature.
|
|
*/
|
|
unsigned m_functional_part_size;
|
|
unsigned m_functional_col_cnt;
|
|
|
|
column_layout(const table_signature & sig);
|
|
|
|
table_element get(const char * rec, unsigned col) const {
|
|
return (*this)[col].get(rec);
|
|
}
|
|
void set(char * rec, unsigned col, table_element val) const {
|
|
return (*this)[col].set(rec, val);
|
|
}
|
|
};
|
|
|
|
|
|
typedef svector<unsigned> key_spec; //sequence of columns in a key
|
|
typedef svector<table_element> key_value; //values of key columns
|
|
typedef map<key_spec, key_indexer*, int_vector_hash_proc<key_spec>,
|
|
vector_eq_proc<key_spec> > key_index_map;
|
|
|
|
static const store_offset NO_RESERVE = UINT_MAX;
|
|
|
|
column_layout m_column_layout;
|
|
unsigned m_fact_size;
|
|
entry_storage m_data;
|
|
mutable key_index_map m_key_indexes;
|
|
|
|
|
|
const char * get_at_offset(store_offset i) const {
|
|
return m_data.get(i);
|
|
}
|
|
|
|
table_element get_cell(store_offset ofs, unsigned column) const {
|
|
return m_column_layout.get(m_data.get(ofs), column);
|
|
}
|
|
|
|
void set_cell(store_offset ofs, unsigned column, table_element val) {
|
|
m_column_layout.set(m_data.get(ofs), column, val);
|
|
}
|
|
|
|
void write_into_reserve(const table_element* f);
|
|
|
|
/**
|
|
\brief Return reference to an indexer over columns in \c key_cols.
|
|
|
|
An indexer can retrieve a sequence of offsets that with \c key_cols columns equal to
|
|
the specified key. Indexers are populated lazily -- they remember the position of the
|
|
last fact they contain, and when an indexer is retrieved by the \c get_key_indexer function,
|
|
all the new facts are added into the indexer.
|
|
|
|
When a fact is removed from the table, all indexers are destroyed. This is not an extra
|
|
expense in the current use scenario, because we first perform all fact removals and do the
|
|
joins only after that (joins are the only operations that lead to index construction).
|
|
*/
|
|
key_indexer& get_key_indexer(unsigned key_len, const unsigned * key_cols) const;
|
|
|
|
void reset_indexes();
|
|
|
|
static void copy_columns(const column_layout & src_layout, const column_layout & dest_layout,
|
|
unsigned start_index, unsigned after_last, const char * src, char * dest,
|
|
unsigned & dest_idx, unsigned & pre_projection_idx, const unsigned * & next_removed);
|
|
|
|
/**
|
|
\c array \c removed_cols contains column indexes to be removed in ascending order and
|
|
is terminated by a number greated than the highest column index of a join the the two tables.
|
|
This is to simplify the traversal of the array when building facts.
|
|
*/
|
|
static void concatenate_rows(const column_layout & layout1, const column_layout & layout2,
|
|
const column_layout & layout_res, const char * ptr1, const char * ptr2, char * res,
|
|
const unsigned * removed_cols);
|
|
|
|
/**
|
|
\brief Perform join-project between t1 and t2 iterating through t1 and retrieving relevant
|
|
columns from t2 using indexing.
|
|
|
|
\c array \c removed_cols contains column indexes to be removed in ascending order and
|
|
is terminated by a number greated than the highest column index of a join the the two tables.
|
|
This is to simplify the traversal of the array when building facts.
|
|
|
|
\c tables_swapped value means that the resulting facts should contain facts from t2 first,
|
|
instead of the default behavior that would concatenate the two facts as \c (t1,t2).
|
|
|
|
\remark The function is called \c self_agnostic_join since, unlike the virtual method
|
|
\c join, it is static and therefore allows to easily swap the roles of the two joined
|
|
tables (the indexed and iterated one) in a way that is expected to give better performance.
|
|
*/
|
|
static void self_agnostic_join_project(const sparse_table & t1, const sparse_table & t2,
|
|
unsigned joined_col_cnt, const unsigned * t1_joined_cols, const unsigned * t2_joined_cols,
|
|
const unsigned * removed_cols, bool tables_swapped, sparse_table & result);
|
|
|
|
|
|
/**
|
|
If the fact at \c data (in table's native representation) is not in the table,
|
|
add it and return true. Otherwise return false.
|
|
*/
|
|
bool add_fact(const char * data);
|
|
|
|
bool add_reserve_content();
|
|
|
|
void garbage_collect();
|
|
|
|
sparse_table(sparse_table_plugin & p, const table_signature & sig, unsigned init_capacity=0);
|
|
sparse_table(const sparse_table & t);
|
|
virtual ~sparse_table();
|
|
public:
|
|
|
|
virtual void deallocate() {
|
|
get_plugin().recycle(this);
|
|
}
|
|
|
|
unsigned row_count() const { return m_data.entry_count(); }
|
|
|
|
sparse_table_plugin & get_plugin() const
|
|
{ return static_cast<sparse_table_plugin &>(table_base::get_plugin()); }
|
|
|
|
virtual bool empty() const { return row_count()==0; }
|
|
virtual void add_fact(const table_fact & f);
|
|
virtual bool contains_fact(const table_fact & f) const;
|
|
virtual bool fetch_fact(table_fact & f) const;
|
|
virtual void ensure_fact(const table_fact & f);
|
|
virtual void remove_fact(const table_element* fact);
|
|
virtual void reset();
|
|
|
|
virtual table_base * clone() const;
|
|
|
|
virtual table_base::iterator begin() const;
|
|
virtual table_base::iterator end() const;
|
|
|
|
virtual unsigned get_size_estimate_rows() const { return row_count(); }
|
|
virtual unsigned get_size_estimate_bytes() const;
|
|
virtual bool knows_exact_size() const { return true; }
|
|
};
|
|
|
|
};
|
|
|
|
#endif /* _DL_SPARSE_TABLE_H_ */
|