/*++ Copyright (c) 2011 Microsoft Corporation Module Name: iz3hash.h Abstract: Simple implementation of bucket-list hash tables conforming to SGI hash_map and hash_set interfaces. Just enough members are implemented to support iz3 and duality. iz3 and duality need this package because they assume that insert preserves iterators and references to elements, which is not true of the hashtable packages in util. This package lives in namespace hash_space. Specializations of class "hash" should be made in this namespace. Author: Ken McMillan (kenmcmil) Revision History: --*/ #ifndef IZ3_HASH_H #define IZ3_HASH_H #ifdef _WINDOWS #pragma warning(disable:4267) #endif #include #include #include #include "hash.h" #define stl_ext hash_space namespace hash_space { template class hash {}; template <> class hash { public: size_t operator()(const int &s) const { return s; } }; template <> class hash { public: size_t operator()(const std::string &s) const { return string_hash(s.c_str(), s.size(), 0); } }; template <> class hash > { public: size_t operator()(const std::pair &p) const { return p.first + p.second; } }; template class hash > { public: size_t operator()(const std::pair &p) const { return (size_t)p.first + (size_t)p.second; } }; enum { num_primes = 29 }; static const unsigned long primes[num_primes] = { 7ul, 53ul, 97ul, 193ul, 389ul, 769ul, 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul, 3221225473ul, 4294967291ul }; inline unsigned long next_prime(unsigned long n) { const unsigned long* to = primes + (int)num_primes; for(const unsigned long* p = primes; p < to; p++) if(*p >= n) return *p; return primes[num_primes-1]; } template class hashtable { public: typedef Value &reference; typedef const Value &const_reference; struct Entry { Entry* next; Value val; Entry(const Value &_val) : val(_val) {next = 0;} }; struct iterator { Entry* ent; hashtable* tab; typedef std::forward_iterator_tag iterator_category; typedef Value value_type; typedef std::ptrdiff_t difference_type; typedef size_t size_type; typedef Value& reference; typedef Value* pointer; iterator(Entry* _ent, hashtable* _tab) : ent(_ent), tab(_tab) { } iterator() { } Value &operator*() const { return ent->val; } Value *operator->() const { return &(operator*()); } iterator &operator++() { Entry *old = ent; ent = ent->next; if (!ent) { size_t bucket = tab->get_bucket(old->val); while (!ent && ++bucket < tab->buckets.size()) ent = tab->buckets[bucket]; } return *this; } iterator operator++(int) { iterator tmp = *this; operator++(); return tmp; } bool operator==(const iterator& it) const { return ent == it.ent; } bool operator!=(const iterator& it) const { return ent != it.ent; } }; struct const_iterator { const Entry* ent; const hashtable* tab; typedef std::forward_iterator_tag iterator_category; typedef Value value_type; typedef std::ptrdiff_t difference_type; typedef size_t size_type; typedef const Value& reference; typedef const Value* pointer; const_iterator(const Entry* _ent, const hashtable* _tab) : ent(_ent), tab(_tab) { } const_iterator() { } const Value &operator*() const { return ent->val; } const Value *operator->() const { return &(operator*()); } const_iterator &operator++() { Entry *old = ent; ent = ent->next; if (!ent) { size_t bucket = tab->get_bucket(old->val); while (!ent && ++bucket < tab->buckets.size()) ent = tab->buckets[bucket]; } return *this; } const_iterator operator++(int) { const_iterator tmp = *this; operator++(); return tmp; } bool operator==(const const_iterator& it) const { return ent == it.ent; } bool operator!=(const const_iterator& it) const { return ent != it.ent; } }; private: typedef std::vector Table; Table buckets; size_t entries; HashFun hash_fun ; GetKey get_key; KeyEqFun key_eq_fun; public: hashtable(size_t init_size) : buckets(init_size,(Entry *)0) { entries = 0; } hashtable(const hashtable& other) { dup(other); } hashtable& operator= (const hashtable& other) { if (&other != this) dup(other); return *this; } ~hashtable() { clear(); } size_t size() const { return entries; } bool empty() const { return size() == 0; } void swap(hashtable& other) { buckets.swap(other.buckets); std::swap(entries, other.entries); } iterator begin() { for (size_t i = 0; i < buckets.size(); ++i) if (buckets[i]) return iterator(buckets[i], this); return end(); } iterator end() { return iterator(0, this); } const_iterator begin() const { for (size_t i = 0; i < buckets.size(); ++i) if (buckets[i]) return const_iterator(buckets[i], this); return end(); } const_iterator end() const { return const_iterator(0, this); } size_t get_bucket(const Value& val, size_t n) const { return hash_fun(get_key(val)) % n; } size_t get_key_bucket(const Key& key) const { return hash_fun(key) % buckets.size(); } size_t get_bucket(const Value& val) const { return get_bucket(val,buckets.size()); } Entry *lookup(const Value& val, bool ins = false) { resize(entries + 1); size_t n = get_bucket(val); Entry* from = buckets[n]; for (Entry* ent = from; ent; ent = ent->next) if (key_eq_fun(get_key(ent->val), get_key(val))) return ent; if(!ins) return 0; Entry* tmp = new Entry(val); tmp->next = from; buckets[n] = tmp; ++entries; return tmp; } Entry *lookup_key(const Key& key) const { size_t n = get_key_bucket(key); Entry* from = buckets[n]; for (Entry* ent = from; ent; ent = ent->next) if (key_eq_fun(get_key(ent->val), key)) return ent; return 0; } const_iterator find(const Key& key) const { return const_iterator(lookup_key(key),this); } iterator find(const Key& key) { return iterator(lookup_key(key),this); } std::pair insert(const Value& val){ size_t old_entries = entries; Entry *ent = lookup(val,true); return std::pair(iterator(ent,this),entries > old_entries); } iterator insert(const iterator &it, const Value& val){ Entry *ent = lookup(val,true); return iterator(ent,this); } size_t erase(const Key& key) { Entry** p = &(buckets[get_key_bucket(key)]); size_t count = 0; while(*p){ Entry *q = *p; if (key_eq_fun(get_key(q->val), key)) { ++count; *p = q->next; delete q; } else p = &(q->next); } entries -= count; return count; } void resize(size_t new_size) { const size_t old_n = buckets.size(); if (new_size <= old_n) return; const size_t n = next_prime(new_size); if (n <= old_n) return; Table tmp(n, (Entry*)(0)); for (size_t i = 0; i < old_n; ++i) { Entry* ent = buckets[i]; while (ent) { size_t new_bucket = get_bucket(ent->val, n); buckets[i] = ent->next; ent->next = tmp[new_bucket]; tmp[new_bucket] = ent; ent = buckets[i]; } } buckets.swap(tmp); } void clear() { for (size_t i = 0; i < buckets.size(); ++i) { for (Entry* ent = buckets[i]; ent != 0;) { Entry* next = ent->next; delete ent; ent = next; } buckets[i] = 0; } entries = 0; } void dup(const hashtable& other) { buckets.resize(other.buckets.size()); for (size_t i = 0; i < other.buckets.size(); ++i) { Entry** to = &buckets[i]; for (Entry* from = other.buckets[i]; from; from = from->next) to = &((*to = new Entry(from->val))->next); } entries = other.entries; } }; template class equal { public: bool operator()(const T& x, const T &y) const { return x == y; } }; template class identity { public: const T &operator()(const T &x) const { return x; } }; template class proj1 { public: const T &operator()(const std::pair &x) const { return x.first; } }; template , class EqFun = equal > class hash_set : public hashtable,EqFun> { public: typedef Element value_type; hash_set() : hashtable,EqFun>(7) {} }; template , class EqFun = equal > class hash_map : public hashtable,Key,HashFun,proj1,EqFun> { public: hash_map() : hashtable,Key,HashFun,proj1,EqFun>(7) {} Value &operator[](const Key& key) { std::pair kvp(key,Value()); return hashtable,Key,HashFun,proj1,EqFun>:: lookup(kvp,true)->val.second; } }; } #endif