/*++ Copyright (c) 2006 Microsoft Corporation Module Name: dl_sparse_table.cpp Abstract: Author: Krystof Hoder (t-khoder) 2010-09-24. Revision History: --*/ #include #include"dl_context.h" #include"dl_util.h" #include"dl_sparse_table.h" namespace datalog { // ----------------------------------- // // entry_storage // // ----------------------------------- entry_storage::store_offset entry_storage::insert_or_get_reserve_content() { SASSERT(has_reserve()); store_offset entry_ofs = m_data_indexer.insert_if_not_there(m_reserve); if(m_reserve==entry_ofs) { //entry inserted, so reserve is no longer a reserve m_reserve = NO_RESERVE; } return entry_ofs; } bool entry_storage::insert_reserve_content() { SASSERT(has_reserve()); store_offset entry_ofs = m_data_indexer.insert_if_not_there(m_reserve); if(m_reserve==entry_ofs) { //entry inserted, so reserve is no longer a reserve m_reserve = NO_RESERVE; return true; } return false; } bool entry_storage::remove_reserve_content() { SASSERT(has_reserve()); store_offset entry_ofs; if(!find_reserve_content(entry_ofs)) { //the fact was not in the table return false; } remove_offset(entry_ofs); return true; } void entry_storage::remove_offset(store_offset ofs) { m_data_indexer.remove(ofs); store_offset last_ofs = after_last_offset() - m_entry_size; if(ofs!=last_ofs) { SASSERT(ofs+m_entry_size<=last_ofs); //we don't want any holes, so we put the last element at the place //of the removed one m_data_indexer.remove(last_ofs); char * base = &m_data.get(0); memcpy(base+ofs, base+last_ofs, m_entry_size); m_data_indexer.insert(ofs); } if(has_reserve()) { //we already have a reserve, so we need to shrink a little to keep having just one resize_data(m_data_size-m_entry_size); } m_reserve=last_ofs; } unsigned entry_storage::get_size_estimate_bytes() const { unsigned sz = m_data.capacity(); sz += m_data_indexer.capacity()*sizeof(storage_indexer::entry); return sz; } // ----------------------------------- // // sparse_table::column_layout // // ----------------------------------- unsigned get_domain_length(uint64 dom_size) { SASSERT(dom_size>0); unsigned length = 0; unsigned dom_size_sm; if(dom_size>UINT_MAX) { dom_size_sm = static_cast(dom_size>>32); length += 32; if( (dom_size&UINT_MAX)!=0 && dom_size_sm!=UINT_MAX ) { dom_size_sm++; } } else { dom_size_sm=static_cast(dom_size); } if(dom_size_sm==1) { length += 1; //unary domains } else if(dom_size_sm>0x80000000u) { length += 32; } else { length += get_num_1bits(next_power_of_two(dom_size_sm)-1); //ceil(log2(dom_size)) } return length; } sparse_table::column_layout::column_layout(const table_signature & sig) : m_functional_col_cnt(sig.functional_columns()) { SASSERT(sig.size()>0); unsigned ofs = 0; unsigned sig_sz = sig.size(); unsigned first_functional = sig_sz-m_functional_col_cnt; for(unsigned i=0; i0); SASSERT(length<=64); if(size()>0 && (length>54 || i==first_functional)) { //large domains must start byte-aligned, as well as functional columns make_byte_aligned_end(size()-1); ofs = back().next_ofs(); } push_back(column_info(ofs, length)); ofs+=length; } make_byte_aligned_end(size()-1); SASSERT(back().next_ofs()%8==0);//the entries must be aligned to whole bytes m_entry_size = back().next_ofs()/8; if(m_functional_col_cnt) { SASSERT((*this)[first_functional].m_offset%8==0); m_functional_part_size = m_entry_size - (*this)[first_functional].m_offset/8; } else { m_functional_part_size = 0; } } void sparse_table::column_layout::make_byte_aligned_end(unsigned col_index0) { unsigned ofs = (*this)[col_index0].next_ofs(); unsigned ofs_bit_part = ofs%8; unsigned rounded_ofs = (ofs_bit_part==0) ? ofs : (ofs+8-ofs_bit_part); if(rounded_ofs!=ofs) { SASSERT(rounded_ofs>ofs); int diff = rounded_ofs-ofs; unsigned col_idx = col_index0+1; while(diff!=0) { //we should always be able to fix the alignment by the time we reach zero SASSERT(col_idx>0); col_idx--; column_info & ci = (*this)[col_idx]; unsigned new_length = ci.m_length; if(ci.m_length<64) { unsigned swallowed = std::min(64-static_cast(ci.m_length), diff); diff-=swallowed; new_length+=swallowed; } unsigned new_ofs = ci.m_offset+diff; ci = column_info(new_ofs, new_length); } } SASSERT(rounded_ofs%8==0); SASSERT((*this)[col_index0].next_ofs()%8==0); } // ----------------------------------- // // sparse_table // // ----------------------------------- class sparse_table::our_iterator_core : public iterator_core { class our_row : public row_interface { const our_iterator_core & m_parent; public: our_row(const sparse_table & t, const our_iterator_core & parent) : row_interface(t), m_parent(parent) {} virtual table_element operator[](unsigned col) const { return m_parent.m_layout.get(m_parent.m_ptr, col); } }; const char * m_end; const char * m_ptr; unsigned m_fact_size; our_row m_row_obj; const column_layout & m_layout; public: our_iterator_core(const sparse_table & t, bool finished) : m_end(t.m_data.after_last()), m_ptr(finished ? m_end : t.m_data.begin()), m_fact_size(t.m_fact_size), m_row_obj(t, *this), m_layout(t.m_column_layout) {} virtual bool is_finished() const { return m_ptr==m_end; } virtual row_interface & operator*() { SASSERT(!is_finished()); return m_row_obj; } virtual void operator++() { SASSERT(!is_finished()); m_ptr+=m_fact_size; } }; class sparse_table::key_indexer { protected: unsigned_vector m_key_cols; public: typedef const store_offset * offset_iterator; /** Iterators returned by \c begin() and \c end() are valid only as long as the \c query_result object that returned them exists. */ struct query_result { private: bool m_singleton; union { store_offset m_single_result; struct { offset_iterator begin; offset_iterator end; } m_many; }; public: /** \brief Empty result. */ query_result() : m_singleton(false) { m_many.begin = 0; m_many.end = 0; } query_result(offset_iterator begin, offset_iterator end) : m_singleton(false) { m_many.begin = begin; m_many.end = end; } query_result(store_offset single_result) : m_singleton(true), m_single_result(single_result) {} offset_iterator begin() const { return m_singleton ? &m_single_result : m_many.begin; } offset_iterator end() const { return m_singleton ? (&m_single_result+1) : m_many.end; } bool empty() const { return begin()==end(); } }; key_indexer(unsigned key_len, const unsigned * key_cols) : m_key_cols(key_len, key_cols) {} virtual ~key_indexer() {} virtual void update(const sparse_table & t) {} virtual query_result get_matching_offsets(const key_value & key) const = 0; }; class sparse_table::general_key_indexer : public key_indexer { typedef svector offset_vector; typedef u_map index_map; index_map m_map; mutable entry_storage m_keys; store_offset m_first_nonindexed; void key_to_reserve(const key_value & key) const { m_keys.ensure_reserve(); m_keys.write_into_reserve(reinterpret_cast(key.c_ptr())); } offset_vector & get_matching_offset_vector(const key_value & key) { key_to_reserve(key); store_offset ofs = m_keys.insert_or_get_reserve_content(); index_map::entry * e = m_map.find_core(ofs); if(!e) { TRACE("dl_table_relation", tout << "inserting\n";); e = m_map.insert_if_not_there2(ofs, offset_vector()); } return e->get_data().m_value; } public: general_key_indexer(unsigned key_len, const unsigned * key_cols) : key_indexer(key_len, key_cols), m_keys(key_len*sizeof(table_element)), m_first_nonindexed(0) {} virtual void update(const sparse_table & t) { if(m_first_nonindexed==t.m_data.after_last_offset()) { return; } SASSERT(m_first_nonindexedinsert(ofs); } m_first_nonindexed = t.m_data.after_last_offset(); } virtual query_result get_matching_offsets(const key_value & key) const { key_to_reserve(key); store_offset ofs; if(!m_keys.find_reserve_content(ofs)) { return query_result(); } index_map::entry * e = m_map.find_core(ofs); if(!e) { return query_result(); } const offset_vector & res = e->get_data().m_value; return query_result(res.begin(), res.end()); } }; /** When doing lookup using this index, the content of the reserve in sparse_table::m_data changes. */ class sparse_table::full_signature_key_indexer : public key_indexer { const sparse_table & m_table; /** Permutation of key columns to make it into table facts. If empty, no permutation is necessary. */ unsigned_vector m_permutation; mutable table_fact m_key_fact; public: static bool can_handle(unsigned key_len, const unsigned * key_cols, const sparse_table & t) { unsigned non_func_cols = t.get_signature().first_functional(); if(key_len!=non_func_cols) { return false; } counter ctr; ctr.count(key_len, key_cols); if(ctr.get_max_counter_value()!=1 || ctr.get_max_positive()!=non_func_cols-1) { return false; } SASSERT(ctr.get_positive_count()==non_func_cols); return true; } full_signature_key_indexer(unsigned key_len, const unsigned * key_cols, const sparse_table & t) : key_indexer(key_len, key_cols), m_table(t) { SASSERT(can_handle(key_len, key_cols, t)); m_permutation.resize(key_len); for(unsigned i=0; i(m_table); t.write_into_reserve(m_key_fact.c_ptr()); store_offset res; if(!t.m_data.find_reserve_content(res)) { return query_result(); } return query_result(res); } }; sparse_table::sparse_table(sparse_table_plugin & p, const table_signature & sig, unsigned init_capacity) : table_base(p, sig), m_column_layout(sig), m_fact_size(m_column_layout.m_entry_size), m_data(m_fact_size, m_column_layout.m_functional_part_size, init_capacity) {} sparse_table::sparse_table(const sparse_table & t) : table_base(t.get_plugin(), t.get_signature()), m_column_layout(t.m_column_layout), m_fact_size(t.m_fact_size), m_data(t.m_data) {} table_base * sparse_table::clone() const { return get_plugin().mk_clone(*this); } sparse_table::~sparse_table() { reset_indexes(); } void sparse_table::reset() { reset_indexes(); m_data.reset(); } table_base::iterator sparse_table::begin() const { return mk_iterator(alloc(our_iterator_core, *this, false)); } table_base::iterator sparse_table::end() const { return mk_iterator(alloc(our_iterator_core, *this, true)); } sparse_table::key_indexer& sparse_table::get_key_indexer(unsigned key_len, const unsigned * key_cols) const { #if Z3DEBUG //We allow indexes only on non-functional columns because we want to be able to modify them //without having to worry about updating indexes. //Maybe we might keep a list of indexes that contain functional columns and on an update reset //only those. SASSERT(key_len==0 || counter().count(key_len, key_cols).get_max_positive()get_data().m_value) { if(full_signature_key_indexer::can_handle(key_len, key_cols, *this)) { key_map_entry->get_data().m_value = alloc(full_signature_key_indexer, key_len, key_cols, *this); } else { key_map_entry->get_data().m_value = alloc(general_key_indexer, key_len, key_cols); } } key_indexer & indexer = *key_map_entry->get_data().m_value; indexer.update(*this); return indexer; } void sparse_table::reset_indexes() { key_index_map::iterator kmit = m_key_indexes.begin(); key_index_map::iterator kmend = m_key_indexes.end(); for(; kmit!=kmend; ++kmit) { dealloc((*kmit).m_value); } m_key_indexes.reset(); } void sparse_table::write_into_reserve(const table_element* f) { TRACE("dl_table_relation", tout << "\n";); m_data.ensure_reserve(); char * reserve = m_data.get_reserve_ptr(); unsigned col_cnt = m_column_layout.size(); for(unsigned i=0; i(*this); t.write_into_reserve(f.c_ptr()); unsigned func_col_cnt = get_signature().functional_columns(); if(func_col_cnt==0) { return t.m_data.reserve_content_already_present(); } else { store_offset ofs; if(!t.m_data.find_reserve_content(ofs)) { return false; } unsigned sz = get_signature().size(); for(unsigned i=func_col_cnt; i(*this); t.write_into_reserve(f.c_ptr()); store_offset ofs; if(!t.m_data.find_reserve_content(ofs)) { return false; } unsigned sz = sig.size(); for(unsigned i=sig.first_functional(); ipre_projection_idx); dest_layout.set(dest, dest_idx++, src_layout.get(src, i)); } } void sparse_table::concatenate_rows(const column_layout & layout1, const column_layout & layout2, const column_layout & layout_res, const char * ptr1, const char * ptr2, char * res, const unsigned * removed_cols) { unsigned t1non_func = layout1.size()-layout1.m_functional_col_cnt; unsigned t2non_func = layout2.size()-layout2.m_functional_col_cnt; unsigned t1cols = layout1.size(); unsigned t2cols = layout2.size(); unsigned orig_i = 0; unsigned res_i = 0; const unsigned * next_removed = removed_cols; copy_columns(layout1, layout_res, 0, t1non_func, ptr1, res, res_i, orig_i, next_removed); copy_columns(layout2, layout_res, 0, t2non_func, ptr2, res, res_i, orig_i, next_removed); copy_columns(layout1, layout_res, t1non_func, t1cols, ptr1, res, res_i, orig_i, next_removed); copy_columns(layout2, layout_res, t2non_func, t2cols, ptr2, res, res_i, orig_i, next_removed); } void sparse_table::garbage_collect() { if (memory::above_high_watermark()) { get_plugin().garbage_collect(); } if (memory::above_high_watermark()) { IF_VERBOSE(1, verbose_stream() << "Ran out of memory while filling table of size: " << get_size_estimate_rows() << " rows " << get_size_estimate_bytes() << " bytes\n";); throw out_of_memory_error(); } } void sparse_table::self_agnostic_join_project(const sparse_table & t1, const sparse_table & t2, unsigned joined_col_cnt, const unsigned * t1_joined_cols, const unsigned * t2_joined_cols, const unsigned * removed_cols, bool tables_swapped, sparse_table & result) { unsigned t1_entry_size = t1.m_fact_size; unsigned t2_entry_size = t2.m_fact_size; unsigned t1idx = 0; unsigned t1end = t1.m_data.after_last_offset(); TRACE("dl_table_relation", tout << "joined_col_cnt: " << joined_col_cnt << "\n"; tout << "t1_entry_size: " << t1_entry_size << "\n"; tout << "t2_entry_size: " << t2_entry_size << "\n"; t1.display(tout); t2.display(tout); tout << (&t1) << " " << (&t2) << " " << (&result) << "\n"; ); if(joined_col_cnt==0) { unsigned t2idx = 0; unsigned t2end = t2.m_data.after_last_offset(); for(; t1idx!=t1end; t1idx+=t1_entry_size) { for(t2idx = 0; t2idx != t2end; t2idx += t2_entry_size) { result.m_data.ensure_reserve(); result.garbage_collect(); char * res_reserve = result.m_data.get_reserve_ptr(); char const* t1ptr = t1.get_at_offset(t1idx); char const* t2ptr = t2.get_at_offset(t2idx); if(tables_swapped) { concatenate_rows(t2.m_column_layout, t1.m_column_layout, result.m_column_layout, t2ptr, t1ptr, res_reserve, removed_cols); } else { concatenate_rows(t1.m_column_layout, t2.m_column_layout, result.m_column_layout, t1ptr, t2ptr, res_reserve, removed_cols); } result.add_reserve_content(); } } return; } key_value t1_key; t1_key.resize(joined_col_cnt); key_indexer& t2_indexer = t2.get_key_indexer(joined_col_cnt, t2_joined_cols); bool key_modified = true; key_indexer::query_result t2_offsets; for(; t1idx != t1end; t1idx += t1_entry_size) { for(unsigned i = 0; i < joined_col_cnt; i++) { table_element val = t1.m_column_layout.get(t1.get_at_offset(t1idx), t1_joined_cols[i]); TRACE("dl_table_relation", tout << "val: " << val << " " << t1idx << " " << t1_joined_cols[i] << "\n";); if(t1_key[i] != val) { t1_key[i] = val; key_modified = true; } } if(key_modified) { t2_offsets = t2_indexer.get_matching_offsets(t1_key); key_modified = false; } if(t2_offsets.empty()) { continue; } key_indexer::offset_iterator t2ofs_it = t2_offsets.begin(); key_indexer::offset_iterator t2ofs_end = t2_offsets.end(); for(; t2ofs_it != t2ofs_end; ++t2ofs_it) { store_offset t2ofs = *t2ofs_it; result.m_data.ensure_reserve(); result.garbage_collect(); char * res_reserve = result.m_data.get_reserve_ptr(); char const * t1ptr = t1.get_at_offset(t1idx); char const * t2ptr = t2.get_at_offset(t2ofs); if(tables_swapped) { concatenate_rows(t2.m_column_layout, t1.m_column_layout, result.m_column_layout, t2ptr, t1ptr, res_reserve, removed_cols); } else { concatenate_rows(t1.m_column_layout, t2.m_column_layout, result.m_column_layout, t1ptr, t2ptr, res_reserve, removed_cols); } result.add_reserve_content(); } } } // ----------------------------------- // // sparse_table_plugin // // ----------------------------------- sparse_table_plugin::sparse_table_plugin(relation_manager & manager) : table_plugin(symbol("sparse"), manager) {} sparse_table_plugin::~sparse_table_plugin() { reset(); } void sparse_table_plugin::reset() { table_pool::iterator it = m_pool.begin(); table_pool::iterator end = m_pool.end(); for(; it!=end; ++it) { sp_table_vector * vect = it->m_value; sp_table_vector::iterator it = vect->begin(); sp_table_vector::iterator end = vect->end(); for(; it!=end; ++it) { (*it)->destroy(); //calling deallocate() would only put the table back into the pool } dealloc(vect); } m_pool.reset(); } void sparse_table_plugin::garbage_collect() { IF_VERBOSE(2, verbose_stream() << "garbage collecting "<< memory::get_allocation_size() << " bytes down to ";); reset(); IF_VERBOSE(2, verbose_stream() << memory::get_allocation_size() << " bytes\n";); } void sparse_table_plugin::recycle(sparse_table * t) { const table_signature & sig = t->get_signature(); t->reset(); table_pool::entry * e = m_pool.insert_if_not_there2(sig, 0); sp_table_vector * & vect = e->get_data().m_value; if(vect==0) { vect = alloc(sp_table_vector); } IF_VERBOSE(12, verbose_stream() << "Recycle: " << t->get_size_estimate_bytes() << "\n";); vect->push_back(t); } table_base * sparse_table_plugin::mk_empty(const table_signature & s) { SASSERT(can_handle_signature(s)); sp_table_vector * vect; if(!m_pool.find(s, vect) || vect->empty()) { return alloc(sparse_table, *this, s); } sparse_table * res = vect->back(); vect->pop_back(); return res; } sparse_table * sparse_table_plugin::mk_clone(const sparse_table & t) { sparse_table * res = static_cast(mk_empty(t.get_signature())); res->m_data = t.m_data; return res; } bool sparse_table_plugin::join_involves_functional(const table_signature & s1, const table_signature & s2, unsigned col_cnt, const unsigned * cols1, const unsigned * cols2) { if(col_cnt==0) { return false; } return counter().count(col_cnt, cols1).get_max_positive()>=s1.first_functional() || counter().count(col_cnt, cols2).get_max_positive()>=s2.first_functional(); } class sparse_table_plugin::join_project_fn : public convenient_table_join_project_fn { public: join_project_fn(const table_signature & t1_sig, const table_signature & t2_sig, unsigned col_cnt, const unsigned * cols1, const unsigned * cols2, unsigned removed_col_cnt, const unsigned * removed_cols) : convenient_table_join_project_fn(t1_sig, t2_sig, col_cnt, cols1, cols2, removed_col_cnt, removed_cols) { m_removed_cols.push_back(UINT_MAX); } virtual table_base * operator()(const table_base & tb1, const table_base & tb2) { const sparse_table & t1 = static_cast(tb1); const sparse_table & t2 = static_cast(tb2); sparse_table_plugin & plugin = t1.get_plugin(); sparse_table * res = static_cast(plugin.mk_empty(get_result_signature())); //If we join with some intersection, want to iterate over the smaller table and //do indexing into the bigger one. If we simply do a product, we want the bigger //one to be at the outer iteration (then the small one will hopefully fit into //the cache) if( (t1.row_count() > t2.row_count()) == (!m_cols1.empty()) ) { sparse_table::self_agnostic_join_project(t2, t1, m_cols1.size(), m_cols2.c_ptr(), m_cols1.c_ptr(), m_removed_cols.c_ptr(), true, *res); } else { sparse_table::self_agnostic_join_project(t1, t2, m_cols1.size(), m_cols1.c_ptr(), m_cols2.c_ptr(), m_removed_cols.c_ptr(), false, *res); } TRACE("dl_table_relation", tb1.display(tout); tb2.display(tout); res->display(tout); ); return res; } }; table_join_fn * sparse_table_plugin::mk_join_fn(const table_base & t1, const table_base & t2, unsigned col_cnt, const unsigned * cols1, const unsigned * cols2) { const table_signature & sig1 = t1.get_signature(); const table_signature & sig2 = t2.get_signature(); if(t1.get_kind()!=get_kind() || t2.get_kind()!=get_kind() || join_involves_functional(sig1, sig2, col_cnt, cols1, cols2)) { //We also don't allow indexes on functional columns (and they are needed for joins) return 0; } return mk_join_project_fn(t1, t2, col_cnt, cols1, cols2, 0, static_cast(0)); } table_join_fn * sparse_table_plugin::mk_join_project_fn(const table_base & t1, const table_base & t2, unsigned col_cnt, const unsigned * cols1, const unsigned * cols2, unsigned removed_col_cnt, const unsigned * removed_cols) { const table_signature & sig1 = t1.get_signature(); const table_signature & sig2 = t2.get_signature(); if(t1.get_kind()!=get_kind() || t2.get_kind()!=get_kind() || removed_col_cnt==t1.get_signature().size()+t2.get_signature().size() || join_involves_functional(sig1, sig2, col_cnt, cols1, cols2)) { //We don't allow sparse tables with zero signatures (and project on all columns leads to such) //We also don't allow indexes on functional columns. return 0; } return alloc(join_project_fn, t1.get_signature(), t2.get_signature(), col_cnt, cols1, cols2, removed_col_cnt, removed_cols); } class sparse_table_plugin::union_fn : public table_union_fn { public: virtual void operator()(table_base & tgt0, const table_base & src0, table_base * delta0) { sparse_table & tgt = static_cast(tgt0); const sparse_table & src = static_cast(src0); sparse_table * delta = static_cast(delta0); unsigned fact_size = tgt.m_fact_size; const char* ptr = src.m_data.begin(); const char* after_last=src.m_data.after_last(); for(; ptradd_fact(ptr); } } } }; table_union_fn * sparse_table_plugin::mk_union_fn(const table_base & tgt, const table_base & src, const table_base * delta) { if(tgt.get_kind()!=get_kind() || src.get_kind()!=get_kind() || (delta && delta->get_kind()!=get_kind()) || tgt.get_signature()!=src.get_signature() || (delta && delta->get_signature()!=tgt.get_signature())) { return 0; } return alloc(union_fn); } class sparse_table_plugin::project_fn : public convenient_table_project_fn { const unsigned m_inp_col_cnt; const unsigned m_removed_col_cnt; const unsigned m_result_col_cnt; public: project_fn(const table_signature & orig_sig, unsigned removed_col_cnt, const unsigned * removed_cols) : convenient_table_project_fn(orig_sig, removed_col_cnt, removed_cols), m_inp_col_cnt(orig_sig.size()), m_removed_col_cnt(removed_col_cnt), m_result_col_cnt(orig_sig.size()-removed_col_cnt) { SASSERT(removed_col_cnt>0); } virtual void transform_row(const char * src, char * tgt, const sparse_table::column_layout & src_layout, const sparse_table::column_layout & tgt_layout) { unsigned r_idx=0; unsigned tgt_i=0; for(unsigned i=0; i(tb); unsigned t_fact_size = t.m_fact_size; sparse_table_plugin & plugin = t.get_plugin(); sparse_table * res = static_cast(plugin.mk_empty(get_result_signature())); const sparse_table::column_layout & src_layout = t.m_column_layout; const sparse_table::column_layout & tgt_layout = res->m_column_layout; const char* t_ptr = t.m_data.begin(); const char* t_end = t.m_data.after_last(); for(; t_ptr!=t_end; t_ptr+=t_fact_size) { SASSERT(t_ptrm_data.ensure_reserve(); char * res_ptr = res->m_data.get_reserve_ptr(); transform_row(t_ptr, res_ptr, src_layout, tgt_layout); res->m_data.insert_reserve_content(); } return res; } }; table_transformer_fn * sparse_table_plugin::mk_project_fn(const table_base & t, unsigned col_cnt, const unsigned * removed_cols) { if(col_cnt==t.get_signature().size()) { return 0; } return alloc(project_fn, t.get_signature(), col_cnt, removed_cols); } class sparse_table_plugin::select_equal_and_project_fn : public convenient_table_transformer_fn { const unsigned m_col; sparse_table::key_value m_key; public: select_equal_and_project_fn(const table_signature & orig_sig, table_element val, unsigned col) : m_col(col) { table_signature::from_project(orig_sig, 1, &col, get_result_signature()); m_key.push_back(val); } virtual table_base * operator()(const table_base & tb) { const sparse_table & t = static_cast(tb); sparse_table_plugin & plugin = t.get_plugin(); sparse_table * res = static_cast(plugin.mk_empty(get_result_signature())); const sparse_table::column_layout & t_layout = t.m_column_layout; const sparse_table::column_layout & res_layout = res->m_column_layout; unsigned t_cols = t_layout.size(); sparse_table::key_indexer & indexer = t.get_key_indexer(1, &m_col); sparse_table::key_indexer::query_result t_offsets = indexer.get_matching_offsets(m_key); if(t_offsets.empty()) { //no matches return res; } sparse_table::key_indexer::offset_iterator ofs_it=t_offsets.begin(); sparse_table::key_indexer::offset_iterator ofs_end=t_offsets.end(); for(; ofs_it!=ofs_end; ++ofs_it) { sparse_table::store_offset t_ofs = *ofs_it; const char * t_ptr = t.get_at_offset(t_ofs); res->m_data.ensure_reserve(); char * res_reserve = res->m_data.get_reserve_ptr(); unsigned res_i = 0; for(unsigned i=0; iadd_reserve_content(); } return res; } }; table_transformer_fn * sparse_table_plugin::mk_select_equal_and_project_fn(const table_base & t, const table_element & value, unsigned col) { if(t.get_kind()!=get_kind() || t.get_signature().size()==1 || col>=t.get_signature().first_functional()) { //We don't allow sparse tables with zero signatures (and project on a single //column table produces one). //We also don't allow indexes on functional columns. And our implementation of //select_equal_and_project uses index on \c col. return 0; } return alloc(select_equal_and_project_fn, t.get_signature(), value, col); } class sparse_table_plugin::rename_fn : public convenient_table_rename_fn { const unsigned m_cycle_len; const unsigned m_col_cnt; unsigned_vector m_out_of_cycle; public: rename_fn(const table_signature & orig_sig, unsigned permutation_cycle_len, const unsigned * permutation_cycle) : convenient_table_rename_fn(orig_sig, permutation_cycle_len, permutation_cycle), m_cycle_len(permutation_cycle_len), m_col_cnt(orig_sig.size()) { SASSERT(permutation_cycle_len>=2); idx_set cycle_cols; for(unsigned i=0; i(tb); unsigned t_fact_size = t.m_fact_size; sparse_table_plugin & plugin = t.get_plugin(); sparse_table * res = static_cast(plugin.mk_empty(get_result_signature())); unsigned res_fact_size = res->m_fact_size; unsigned res_data_size = res_fact_size*t.row_count(); res->m_data.resize_data(res_data_size); //here we can separate data creatin and insertion into hashmap, since we know //that no row will become duplicit //create the data const char* t_ptr = t.m_data.begin(); char* res_ptr = res->m_data.begin(); char* res_end = res_ptr+res_data_size; for(; res_ptr!=res_end; t_ptr+=t_fact_size, res_ptr+=res_fact_size) { transform_row(t_ptr, res_ptr, t.m_column_layout, res->m_column_layout); } //and insert them into the hash-map for(unsigned i=0; i!=res_data_size; i+=res_fact_size) { TRUSTME(res->m_data.insert_offset(i)); } return res; } }; table_transformer_fn * sparse_table_plugin::mk_rename_fn(const table_base & t, unsigned permutation_cycle_len, const unsigned * permutation_cycle) { if(t.get_kind()!=get_kind()) { return 0; } return alloc(rename_fn, t.get_signature(), permutation_cycle_len, permutation_cycle); } class sparse_table_plugin::negation_filter_fn : public convenient_table_negation_filter_fn { typedef sparse_table::store_offset store_offset; typedef sparse_table::key_value key_value; typedef sparse_table::key_indexer key_indexer; bool m_joining_neg_non_functional; /** Used by \c collect_intersection_offsets function. If tgt_is_first is false, contains the same items as \c res. */ idx_set m_intersection_content; public: negation_filter_fn(const table_base & tgt, const table_base & neg, unsigned joined_col_cnt, const unsigned * t_cols, const unsigned * negated_cols) : convenient_table_negation_filter_fn(tgt, neg, joined_col_cnt, t_cols, negated_cols) { unsigned neg_fisrt_func = neg.get_signature().first_functional(); counter ctr; ctr.count(m_cols2); m_joining_neg_non_functional = ctr.get_max_counter_value()==1 && ctr.get_positive_count()==neg_fisrt_func && (neg_fisrt_func==0 || ctr.get_max_positive()==neg_fisrt_func-1); } /** Collect offsets of rows in \c t1 or \c t2 (depends on whether \c tgt_is_first is true or false) that have a match in the other table into \c res. Offsets in \c res are in ascending order. */ void collect_intersection_offsets(const sparse_table & t1, const sparse_table & t2, bool tgt_is_first, svector & res) { SASSERT(res.empty()); if(!tgt_is_first) { m_intersection_content.reset(); } unsigned joined_col_cnt = m_cols1.size(); unsigned t1_entry_size = t1.m_data.entry_size(); const unsigned * cols1 = tgt_is_first ? m_cols1.c_ptr() : m_cols2.c_ptr(); const unsigned * cols2 = tgt_is_first ? m_cols2.c_ptr() : m_cols1.c_ptr(); key_value t1_key; t1_key.resize(joined_col_cnt); key_indexer & t2_indexer = t2.get_key_indexer(joined_col_cnt, cols2); bool key_modified=true; key_indexer::query_result t2_offsets; store_offset t1_after_last = t1.m_data.after_last_offset(); for(store_offset t1_ofs=0; t1_ofs(tgt0); const sparse_table & neg = static_cast(neg0); if(m_cols1.size()==0) { if(!neg.empty()) { tgt.reset(); } return; } svector to_remove; //offsets here are in increasing order //We don't do just the simple tgt.row_count()>neg.row_count() because the swapped case is //more expensive. The constant 4 is, however, just my guess what the ratio might be. if(tgt.row_count()/4>neg.row_count()) { collect_intersection_offsets(neg, tgt, false, to_remove); } else { collect_intersection_offsets(tgt, neg, true, to_remove); } if(to_remove.empty()) { return; } //the largest offsets are at the end, so we can remove them one by one while(!to_remove.empty()) { store_offset removed_ofs = to_remove.back(); to_remove.pop_back(); tgt.m_data.remove_offset(removed_ofs); } tgt.reset_indexes(); } }; table_intersection_filter_fn * sparse_table_plugin::mk_filter_by_negation_fn(const table_base & t, const table_base & negated_obj, unsigned joined_col_cnt, const unsigned * t_cols, const unsigned * negated_cols) { if(!check_kind(t) || !check_kind(negated_obj) || join_involves_functional(t.get_signature(), negated_obj.get_signature(), joined_col_cnt, t_cols, negated_cols) ) { return 0; } return alloc(negation_filter_fn, t, negated_obj, joined_col_cnt, t_cols, negated_cols); } unsigned sparse_table::get_size_estimate_bytes() const { unsigned sz = 0; sz += m_data.get_size_estimate_bytes(); sz += m_key_indexes.capacity()*8; // TBD return sz; } };