3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-10-26 01:14:36 +00:00

Optimize hashtable for better cache locality and load factor

- Add cache-aligned entry structures (alignas(16))
- Improve load factor from 75% to 62.5% for better performance
- Add memory prefetching hints for cache optimization
- Implement Robin Hood probing distance tracking
- Add comprehensive performance monitoring metrics
- Include performance test harness for benchmarking

These changes target the Round 1 performance improvements outlined
in the Z3 performance plan, focusing on reducing cache misses and
improving memory access patterns in hash table operations.
This commit is contained in:
Daily Perf Improver 2025-09-16 17:40:21 +00:00
parent ce81aa9078
commit 48567dd423
3 changed files with 615 additions and 5 deletions

View file

@ -26,10 +26,21 @@ Revision History:
#include "util/memory_manager.h"
#include "util/hash.h"
#include "util/vector.h"
#ifdef __builtin_prefetch
#define HASHTABLE_PREFETCH(addr) __builtin_prefetch(addr, 0, 3)
#else
#define HASHTABLE_PREFETCH(addr) ((void)0)
#endif
#define DEFAULT_HASHTABLE_INITIAL_CAPACITY 8
#define SMALL_TABLE_CAPACITY 64
// Performance optimization constants
#define HASHTABLE_CACHE_LINE_SIZE 64
#define OPTIMIZED_LOAD_FACTOR_NUM 5
#define OPTIMIZED_LOAD_FACTOR_DEN 8
#define ROBIN_HOOD_THRESHOLD 8
// #define HASHTABLE_STATISTICS
#ifdef HASHTABLE_STATISTICS
@ -43,22 +54,25 @@ typedef enum { HT_FREE,
HT_USED } hash_entry_state;
template<typename T>
class default_hash_entry {
class alignas(16) default_hash_entry {
unsigned m_hash{ 0 }; //!< cached hash code
hash_entry_state m_state = HT_FREE;
T m_data;
unsigned char m_probe_distance{ 0 }; //!< Robin Hood probing distance
public:
typedef T data;
unsigned get_hash() const { return m_hash; }
bool is_free() const { return m_state == HT_FREE; }
bool is_deleted() const { return m_state == HT_DELETED; }
bool is_used() const { return m_state == HT_USED; }
unsigned char get_probe_distance() const { return m_probe_distance; }
T & get_data() { return m_data; }
const T & get_data() const { return m_data; }
void set_data(T && d) { m_data = std::move(d); m_state = HT_USED; }
void set_hash(unsigned h) { m_hash = h; }
void set_probe_distance(unsigned char dist) { m_probe_distance = dist; }
void mark_as_deleted() { m_state = HT_DELETED; }
void mark_as_free() { m_state = HT_FREE; }
void mark_as_free() { m_state = HT_FREE; m_probe_distance = 0; }
};
/**
@ -134,6 +148,10 @@ protected:
unsigned m_num_deleted;
#ifdef HASHTABLE_STATISTICS
unsigned long long m_st_collision;
unsigned long long m_st_probe_distance_sum;
unsigned long long m_st_max_probe_distance;
unsigned long long m_st_lookups;
unsigned long long m_st_cache_misses;
#endif
Entry* alloc_table(unsigned size) {
@ -386,7 +404,8 @@ public:
} ((void) 0)
void insert(data && e) {
if (((m_size + m_num_deleted) << 2) > (m_capacity * 3)) {
// Optimized load factor: 5/8 = 62.5% instead of 75%
if ((m_size + m_num_deleted) * OPTIMIZED_LOAD_FACTOR_DEN > m_capacity * OPTIMIZED_LOAD_FACTOR_NUM) {
expand_table();
}
unsigned hash = get_hash(e);
@ -440,8 +459,8 @@ public:
Store the entry/slot of the table in et.
*/
bool insert_if_not_there_core(data && e, entry * & et) {
if ((m_size + m_num_deleted) << 2 > (m_capacity * 3)) {
// if ((m_size + m_num_deleted) * 2 > (m_capacity)) {
// Optimized load factor: 5/8 = 62.5% instead of 75%
if ((m_size + m_num_deleted) * OPTIMIZED_LOAD_FACTOR_DEN > m_capacity * OPTIMIZED_LOAD_FACTOR_NUM) {
expand_table();
}
unsigned hash = get_hash(e);
@ -508,6 +527,8 @@ public:
entry * begin = m_table + idx;
entry * end = m_table + m_capacity;
entry * curr = begin;
// Prefetch likely cache line to improve memory access
HASHTABLE_PREFETCH(begin);
for (; curr != end; ++curr) {
FIND_LOOP_BODY();
}
@ -671,8 +692,18 @@ public:
#ifdef HASHTABLE_STATISTICS
unsigned long long get_num_collision() const { return m_st_collision; }
double get_avg_probe_distance() const {
return m_st_lookups > 0 ? (double)m_st_probe_distance_sum / m_st_lookups : 0.0;
}
unsigned long long get_max_probe_distance() const { return m_st_max_probe_distance; }
double get_load_factor() const { return (double)m_size / m_capacity; }
double get_effective_load_factor() const { return (double)(m_size + m_num_deleted) / m_capacity; }
#else
unsigned long long get_num_collision() const { return 0; }
double get_avg_probe_distance() const { return 0.0; }
unsigned long long get_max_probe_distance() const { return 0; }
double get_load_factor() const { return (double)m_size / m_capacity; }
double get_effective_load_factor() const { return (double)(m_size + m_num_deleted) / m_capacity; }
#endif
#define COLL_LOOP_BODY() { \