C++ 进阶：哈希表原理与实现

C++ 进阶：哈希表原理与实现 | 极客日志

/*------------------任务：定义哈希表函数的'结构体模板'------------------*/
template<class K>
struct HashFunc {
    // 重载 () 运算符 ---> 作用：将 K 类型转化为 size_t 类型
    size_t operator()(const K& key) {
        return (size_t)key; // 默认为直接转换，适用于 int、long 等整数类型
    }
};

/*------------------任务：定义哈希函数的'模板特化'------------------*/
template<>
struct HashFunc<string> {
    // 实现：'() 运算符的重载' ---> 作用：将 string 类型的变量转化为哈希值
    size_t operator()(const string& s) {
        size_t hash = 0;
        // 使用范围 for 循环遍历字符串并用 BKDR 算法计算其哈希值
        for (auto it : s) {
            hash += it;
            hash *= 131; // BKDR 哈希算法认为：131 可有效减少冲突
        }
        return hash;
    }
};

#pragma once
#include <iostream>
#include <vector>
using namespace std;

/*------------------任务：定义哈希表函数的'通用类模板'------------------*/
template<class K>
struct HashFunc {
    size_t operator()(const K& key) {
        return (size_t)key;
    }
};

/*------------------任务：定义哈希函数的'模板特化'------------------*/
template<>
struct HashFunc<string> {
    size_t operator()(const string& s) {
        size_t hash = 0;
        for (auto it : s) {
            hash += it;
            hash *= 131;
        }
        return hash;
    }
};

/*------------------任务：实现'获取下一个 >=n 的质数的函数'---> '用于哈希表扩容'------------------*/
inline unsigned long _stl_next_prime(unsigned long n) {
    static const int __stl_num_primes = 28;
    static const unsigned long _stl_prime_list[__stl_num_primes] = {
        53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 
        49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 
        6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 
        402653189, 805306457, 1610612741, 3221225473, 4294967291
    };
    const unsigned long* first = _stl_prime_list;
    const unsigned long* last = _stl_prime_list + __stl_num_primes;
    const unsigned long* pos = lower_bound(first, last, n);
    return pos == last ? *(last - 1) : *pos;
}

#pragma once
#include "HashTable.h"
namespace open_address {
    enum State { EXIST, EMPTY, DELETE };

    template<class K, class V>
    struct HashData {
        pair<K, V> _kv;
        State _state = EMPTY;
    };

    template<class K, class V, class Hash = HashFunc<K>>
    class HashTable {
    private:
        vector<HashData<K, V>> _tables;
        size_t _n;
    public:
        HashTable() : _tables(_stl_next_prime(0)), _n(0) {}

        HashData<K, V>* Find(const K& key) {
            Hash hash;
            size_t hash_0 = hash(key) % _tables.size();
            size_t hash_i = hash_0;
            size_t i = 1;
            while (_tables[hash_i]._state != EMPTY) {
                if (_tables[hash_i]._state == EXIST && _tables[hash_i]._kv.first == key) {
                    return &_tables[hash_i];
                }
                hash_i = (hash_0 + i) % _tables.size();
                ++i;
            }
            return nullptr;
        }

        bool Erase(const K& key) {
            HashData<K, V>* ret = Find(key);
            if (ret) {
                ret->_state = DELETE;
                --_n;
                return true;
            }
            return false;
        }

        bool Insert(const pair<K, V>& kv) {
            if (Find(kv.first)) return false;
            if (_n * 10 / _tables.size() >= 7) {
                HashTable<K, V, Hash> newHt;
                newHt._tables.resize(_stl_next_prime(_tables.size() + 1));
                for (auto& htData : _tables) {
                    if (htData._state == EXIST) {
                        newHt.Insert(htData._kv);
                    }
                }
                _tables.swap(newHt._tables);
            }
            Hash hashFunc;
            size_t hash_0 = hashFunc(kv.first) % _tables.size();
            size_t hash_i = hash_0;
            size_t i = 1;
            while (_tables[hash_i]._state == EXIST) {
                hash_i = (hash_0 + i) % _tables.size();
                ++i;
            }
            _tables[hash_i]._kv = kv;
            _tables[hash_i]._state = EXIST;
            ++_n;
            return true;
        }
    };
}

#pragma once
#include "HashTable.h"
namespace hash_bucket {
    template<class K, class V>
    struct HashNode {
        pair<K, V> _kv;
        HashNode<K, V>* _next;
        HashNode(const pair<K, V>& kv) : _kv(kv), _next(nullptr) {}
    };

    template<class K, class V, class Hash = HashFunc<K>>
    class HashTable {
    private:
        vector<HashNode<K, V>*> _tables;
        size_t _n;
        typedef HashNode<K, V> Node;
    public:
        HashTable() : _tables(_stl_next_prime(0)), _n(0) {}

        ~HashTable() {
            for (size_t i = 0; i < _tables.size(); ++i) {
                Node* current = _tables[i];
                while (current) {
                    Node* next = current->_next;
                    delete current;
                    current = next;
                }
                _tables[i] = nullptr;
            }
        }

        Node* Find(const K& key) {
            Hash hashFunc;
            size_t hash_i = hashFunc(key) % _tables.size();
            Node* current = _tables[hash_i];
            while (current) {
                if (current->_kv.first == key) return current;
                current = current->_next;
            }
            return nullptr;
        }

        bool Erase(const K& key) {
            Hash hashFunc;
            size_t hash_i = hashFunc(key) % _tables.size();
            Node* curr = _tables[hash_i];
            Node* prev = nullptr;
            while (curr) {
                if (curr->_kv.first == key) {
                    if (prev == nullptr) {
                        _tables[hash_i] = curr->_next;
                    } else {
                        prev->_next = curr->_next;
                    }
                    delete curr;
                    --_n;
                    return true;
                }
                prev = curr;
                curr = curr->_next;
            }
            return false;
        }

        bool Insert(const pair<K, V>& kv) {
            if (Find(kv.first)) return false;
            if (_n == _tables.size()) {
                vector<Node*> newVector(_tables.size() * 2);
                for (size_t i = 0; i < _tables.size(); i++) {
                    Node* current = _tables[i];
                    while (current) {
                        Node* next = current->_next;
                        Hash hashFunc;
                        size_t hash_i = hashFunc(current->_kv.first) % newVector.size();
                        current->_next = newVector[hash_i];
                        newVector[hash_i] = current;
                        current = next;
                    }
                    _tables[i] = nullptr;
                }
                _tables.swap(newVector);
            }
            Node* newNode = new Node(kv);
            Hash hashFunc;
            size_t hash_i = hashFunc(kv.first) % _tables.size();
            newNode->_next = _tables[hash_i];
            _tables[hash_i] = newNode;
            ++_n;
            return true;
        }
    };
}

#include "HashTable.h"
#include "open_address.h"
#include "hash_bucket.h"
#include <string>
#include <iostream>
using namespace std;

void printTestResult(const string& testName, bool result) {
    cout << (result ? "[PASS] " : "[FAIL] ") << testName << endl;
}

void test_open_address() {
    cout << "\n===== 测试开放寻址法哈希表 =====" << endl;
    open_address::HashTable<int, string> ht;
    cout << "创建哈希表成功" << endl;

    bool insert1 = ht.Insert({1, "A"});
    printTestResult("插入键 1 值 A", insert1);
    bool insert2 = ht.Insert({1, "B"});
    printTestResult("插入重复键 1 值 B（期望失败）", !insert2);
    bool insert3 = ht.Insert({2, "C"});
    printTestResult("插入键 2 值 C", insert3);

    auto node1 = ht.Find(1);
    printTestResult("查找键 1", node1 != nullptr && node1->_kv.second == "A");
    auto node2 = ht.Find(2);
    printTestResult("查找键 2", node2 != nullptr && node2->_kv.second == "C");
    auto node3 = ht.Find(3);
    printTestResult("查找不存在的键 3", node3 == nullptr);

    bool erase1 = ht.Erase(1);
    printTestResult("删除键 1", erase1);
    bool erase2 = ht.Erase(1);
    printTestResult("重复删除键 1（期望失败）", !erase2);

    cout << "\n--- 扩容测试 ---" << endl;
    for (int i = 3; i < 100; ++i) {
        ht.Insert({i, to_string(i)});
    }
    auto node99 = ht.Find(99);
    printTestResult("查找扩容后的键 99", node99 != nullptr && node99->_kv.second == "99");
}

void test_hash_bucket() {
    cout << "\n===== 测试链地址法哈希表 =====" << endl;
    hash_bucket::HashTable<string, int> ht;
    cout << "创建哈希表成功" << endl;

    bool insert1 = ht.Insert({"apple", 5});
    printTestResult("插入键 apple 值 5", insert1);
    bool insert2 = ht.Insert({"apple", 10});
    printTestResult("插入重复键 apple 值 10（期望失败）", !insert2);
    bool insert3 = ht.Insert({"banana", 8});
    printTestResult("插入键 banana 值 8", insert3);

    auto node1 = ht.Find("apple");
    printTestResult("查找键 apple", node1 != nullptr && node1->_kv.second == 5);
    auto node2 = ht.Find("banana");
    printTestResult("查找键 banana", node2 != nullptr && node2->_kv.second == 8);
    auto node3 = ht.Find("orange");
    printTestResult("查找不存在的 orange", node3 == nullptr);

    bool erase1 = ht.Erase("apple");
    printTestResult("删除键 apple", erase1);
    bool erase2 = ht.Erase("apple");
    printTestResult("重复删除键 apple（期望失败）", !erase2);

    cout << "\n--- 扩容测试 ---" << endl;
    for (int i = 0; i < 100; ++i) {
        string key = "key_" + to_string(i);
        ht.Insert({key, i});
    }
    auto node = ht.Find("key_99");
    printTestResult("查找扩容后的键 key_99", node != nullptr && node->_kv.second == 99);
}

struct Date {
    int _year;
    int _month;
    int _day;
    Date(int year = 1, int month = 1, int day = 1) : _year(year), _month(month), _day(day) {}
    bool operator==(const Date& d) const {
        return _year == d._year && _month == d._month && _day == d._day;
    }
};

struct DateHashFunc {
    size_t operator()(const Date& d) {
        size_t hash = 0;
        hash += d._year; hash *= 131;
        hash += d._month; hash *= 131;
        hash += d._day; hash *= 131;
        return hash;
    }
};

void test01() {
    hash_bucket::HashTable<string, string> ht1;
    const char* a1[] = {"abcd", "sort", "insert"};
    for (auto& it : a1) {
        ht1.Insert({it, it});
    }

    hash_bucket::HashTable<int, int> ht2;
    const int a2[] = {-19, -30, 5, 36, 13, 20, 21, 12};
    for (auto& it : a2) {
        ht2.Insert({it, it});
    }

    hash_bucket::HashTable<Date, int, DateHashFunc> ht3;
    ht3.Insert({{2025, 6, 29}, 1});
    ht3.Insert({{2025, 6, 30}, 1});
}

int main() {
    test_open_address();
    test_hash_bucket();
    test01();
    return 0;
}

C++ 进阶：哈希表原理与实现

C++ 进阶：哈希表原理与实现

概念介绍

什么是哈希？

核心术语

一、哈希函数

1. 哈希函数的核心特点是什么？

2. 哈希函数的设计目标是什么？

3. 常见的哈希函数有哪些？

直接定址法

除法散列法

乘法散列法

全域散列法

二、负载因子

1. 什么是负载因子？

2. 负载因子对哈希表的性能有什么影响？

3. 负载因子超过阈值时会发生什么？

三、哈希冲突

四、冲突处理

方法一：开放定址法

线性探测

二次探测

双重散列

方法二：链地址法

基本操作

怎么解决键 key 不能取模的问题？

代码实现

头文件

哈希表：HashTable.h

开放定址法：open_address.h

链地址法：hash_bucket.h

测试文件：Test.cpp

运行结果

更多推荐文章

相关免费在线工具

C++ 进阶：哈希表原理与实现

C++ 进阶：哈希表原理与实现

概念介绍

什么是哈希？

核心术语

一、哈希函数

1. 哈希函数的核心特点是什么？

2. 哈希函数的设计目标是什么？

3. 常见的哈希函数有哪些？

直接定址法

除法散列法

乘法散列法

全域散列法

二、负载因子

1. 什么是负载因子？

2. 负载因子对哈希表的性能有什么影响？

3. 负载因子超过阈值时会发生什么？

三、哈希冲突

四、冲突处理

方法一：开放定址法

线性探测

二次探测

双重散列

方法二：链地址法

基本操作

怎么解决键 key 不能取模的问题？

代码实现

头文件

哈希表：HashTable.h

开放定址法：open_address.h

链地址法：hash_bucket.h

测试文件：Test.cpp

运行结果

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具