unordered_map, unordered_set simulation implementation
- Hash table source code
- Control of hash table template parameters
- functor increase
- Forward iterator implementation
-
- *Operator overloading
- ->Operator overloading
- + + operator overloading
- != and == operator overloading
- begin() and end() implementation
- unordered_set implementation
- unordered_map implementation
- map/set versus unordered_map/unordered_set
- Code after hash table adjustment
Hash table source code
template<class K> struct HashFunc {<!-- --> size_t operator()(const K & amp; key) {<!-- --> //All types are forced to size_t type return (size_t)key; } }; //template specialization template<> struct HashFunc<string> {<!-- --> size_t operator()(const string & amp; key) {<!-- --> size_t val = 0; for (auto ch : key) {<!-- --> val *= 131; val + = ch; } return val; } }; namespace HashBucket {<!-- --> template<class K, class V> struct HashNode {<!-- --> pair<K, V> _kv; HashNode<K, V>* _next; \t\t//Constructor HashNode(const pair<K, V> & amp; kv) :_kv(kv) ,_next(nullptr) {<!-- -->} }; template<class K, class V, class Hash = HashFunc<K>> classHashTable {<!-- --> typedef HashNode<K, V> Node; public: ~HashTable() {<!-- --> for (size_t i = 0; i < _tables.size(); i + + ) {<!-- --> Node* cur = _tables[i]; while (cur) {<!-- --> Node* next = cur->_next; delete cur; cur = next; } _tables[i] = nullptr; } } inline size_t __stl_next_prime(size_t n) {<!-- --> static const size_t __stl_num_primes = 28; static const size_t __stl_prime_list[__stl_num_primes] = {<!-- --> 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 3221225473, 4294967291 }; for (size_t i = 0; i < __stl_num_primes; + + i) {<!-- --> if (__stl_prime_list[i] > n) {<!-- --> return __stl_prime_list[i]; } } return -1; } bool Insert(const pair<K, V> & amp; kv) {<!-- --> //If the key-value pair exists, return false if (Find(kv.first)) {<!-- --> return false; } Hash hash; //If the load factor is 1, expand the capacity if (_size == _tables.size()) {<!-- --> //Create a new hash table vector<Node*> newTables; size_t newSizes = _size == 0 ? 10 : 2 * _tables.size(); //Initialize each element to empty newTables.resize(__stl_next_prime(_tables.size()), nullptr); //Insert the old table node into the new table for (size_t i = 0; i < _tables.size(); i + + ) {<!-- --> Node* cur = _tables[i]; while (cur) {<!-- --> //Record the next node of cur Node* next = cur->_next; //Calculate the corresponding hash bucket number size_t hashi = hash(cur->_kv.first) % newTables.size(); //Move the old table node to the new table cur->_next = newTables[hashi]; newTables[hashi] = cur; cur = next; } _tables[i] = nullptr; } _tables.swap(newTables); } //Calculate hash bucket number size_t hashi = hash(kv.first) % _tables.size(); //Insert node Node* newnode = new Node(kv); newnode->_next = _tables[hashi]; _tables[hashi] = newnode; //Number of elements + + _size + + ; return true; } //Find Node* Find(const K & amp; key) {<!-- --> //If the hash table is empty, return empty if (_tables.size() == 0) {<!-- --> return nullptr; } Hash hash; //Calculate hash address size_t hashi = hash(key) % _tables.size(); Node* cur = _tables[hashi]; //Traverse the hash bucket while (cur) {<!-- --> if ((cur->_kv.first) == key) {<!-- --> return cur; } cur = cur->_next; } return nullptr; } \t\t//delete bool Erase(const K & amp; key) {<!-- --> //Hash table size is 0, deletion failed if (_tables.size() == 0) {<!-- --> return false; } Hash hash; //Calculate hash address size_t hashi = hash(key) % _tables.size(); Node* prev = nullptr; Node* cur = _tables[hashi]; //Traverse the hash bucket to find whether the deleted node exists while (cur) {<!-- --> if (hash(hash(cur->_kv.first)) == key) {<!-- --> if (prev) {<!-- --> prev->_next = cur->_next; } else {<!-- --> _tables[hashi] = cur->_next; } //Delete the node delete cur; _size--; return true; } prev = cur; cur = cur->_next; } //Delete node does not exist, return false return false; } size_t Size() {<!-- --> return _size; } size_t TableSize() {<!-- --> return _tables.size(); } size_t BucketNum() {<!-- --> size_t num = 0; for (size_t i = 0; i < _tables.size(); i + + ) {<!-- --> if (_tables[i]) {<!-- --> num + + ; } } return num; } private: vector<Node*> _tables; size_t _size = 0; }; }
Control of hash table template parameters
unordered_set belongs to the K model, and unordered_map belongs to the KV model, but at the bottom level we use a hash table to implement it, so we need to set the second parameter of the hash table to T.
template<class K, class T> struct HashNode {<!-- --> T_data; HashNode<T>* _next; \t\t//Constructor HashNode(const T & data) :_data(data) ,_next(nullptr) {<!-- -->} }; template<class K, class T, class Hash = HashFunc<K>> classHashTable {<!-- --> typedef HashNode<T> Node; public: //...... private: vector<Node*> _tables; size_t _size = 0; };
The T template parameter may be just the key value Key, or it may be a key-value pair composed of Key and Value. If it is an unordered_set container, then the template parameters it passes into the underlying red-black tree are Key and Key:
template<class K, class Hash = HashFunc<K>> class unorder_set {<!-- --> public: //... private: HashBucket::HashTable<K, K, Hash> _ht; };
If it is an unordered_map container, then the template parameters it passes into the underlying red-black tree are Key and Value:
template<class K, class V, class Hash = HashFunc<K>> class unorder_map {<!-- --> public: //... private: HashBucket::HashTable<K, pair<K, V>, Hash> _ht; };
Functions added
For the unordered_set container, we need to perform key-value comparison, that is, compare T directly. But for the unordered_map container, what we need to compare is the key in the key-value pair
Therefore, we need to provide a functor in the upper layer unordered_set and unordered_map, and perform comparison operations separately according to the incoming T type:
map functor:
template<class K, class V, class Hash = HashFunc<K>> class unorder_map {<!-- --> struct MapKeyOfT {<!-- --> const K & amp; operator()(const pair<K, V> & amp; kv) {<!-- --> return kv.first; } }; public: //... private: HashBucket::HashTable<K, pair<K, V>, Hash, MapKeyOfT> _ht; };
set functor:
template<class K, class Hash = HashFunc<K>> class unorder_set {<!-- --> struct SetKeyOfT {<!-- --> const K & amp; operator()(const K & amp; key) {<!-- --> return key; } }; public: //... private: HashBucket::HashTable<K, K, Hash, SetKeyOfT> _ht; };
Forward iterator implementation
There are only forward iterators in the hash table. The forward iterator of the hash table actually encapsulates the entire hash table:
//Predeclaration template<class K, class T, class Hash, class KeyOfT> class HashTable; template<class K, class T, class Hash, class KeyOfT> struct __HashIterator {<!-- --> typedef HashNode<T> Node; typedef HashTable <K, T, Hash, KeyOfT> HT; typedef __HashIterator <K, T, Hash, KeyOfT> Self; \t Node* _node; HT* _pht; }
*Operator overloading
The dereference operation is to return the data of a node in a singly linked list:
T & operator*() {<!-- --> return _node->_data; }
->Operator overloading
->The operation is to return the address of the data:
T* operator->() {<!-- --> return &_node->_data; }
+ + operator overloading
+ + in the hash table is actually searching for the node next to the node in the current hash bucket. If the search has been completed in one hash bucket, search in the next hash bucket until it is found;
code show as below:
Self & amp; operator + + () {<!-- --> //Find the next node of this node if (_node->_next) {<!-- --> //If the next node is not empty, it points to the next node. _node = _node->_next; } else {<!-- --> Hash hash; KeyOfT kot; //If it is empty, calculate the hash address of the location of the hash bucket. size_t i = hash(kot(_node->_data)) % _pht->_tables.size(); //Address + + calculates the location of the next bucket i + + ; //Continue to loop and search for (; i < _pht->_tables.size(); i + + ) {<!-- --> if (_pht->_tables[i]) {<!-- --> _node = _pht->_tables[i]; break; } } //Find the entire hash table and point to nullptr if (i == _pht->_tables.size()) {<!-- --> _node = nullptr; } } return *this; }
!= and == operator overloading
!= and == are used to determine whether they are the same node:
//!= bool operator!=(const Self & amp; s) {<!-- --> return _node != s._node; } //== bool operator==(const Self & s) {<!-- --> return _node == s._node; }
Implementation of begin() and end()
- The begin function returns the forward iterator of the first non-nullptr position in the hash table.
- The end function returns the forward iterator of the position next to the last position in the hash table. Here, a null pointer is directly used to construct a forward iterator.
class HashTable {<!-- --> typedef HashNode<T> Node; template<class K, class T, class Hash, class KeyOfT> friend struct __HashIterator; public: typedef __HashIterator <K, T, Hash, KeyOfT> iterator; iterator begin() {<!-- --> //Traverse the entire array from front to back for (size_t i = 0; i < _tables.size(); i + + ) {<!-- --> //Find a non-empty position and return the position iterator if (_tables[i]) {<!-- --> return iterator(_tables[i], this); } } //Finally return end(); return end(); } iterator end() {<!-- --> //Return an iterator to an empty position return iterator(nullptr, this); } }
unordered_set implementation
template<class K, class Hash = HashFunc<K>> class unordered_set {<!-- --> struct SetKeyOfT {<!-- --> const K & amp; operator()(const K & amp; key) {<!-- --> return key; } }; public: typedef typename HashBucket::HashTable <K, K, Hash, SetKeyOfT>::iterator iterator; iterator begin() {<!-- --> return _ht.begin(); } iterator end() {<!-- --> return _ht.end(); } pair<iterator, bool> insert(const K & amp; key) {<!-- --> return _ht.Insert(key); } private: HashBucket::HashTable<K, K, Hash, SetKeyOfT> _ht; };
unordered_map implementation
template<class K, class V, class Hash = HashFunc<K>> class unordered_map {<!-- --> struct MapKeyOfT {<!-- --> const K & amp; operator()(const pair<K, V> & amp; kv) {<!-- --> return kv.first; } }; public: typedef typename HashBucket::HashTable <K, pair<K, V>, Hash, MapKeyOfT>::iterator iterator; iterator begin() {<!-- --> return _ht.begin(); } iterator end() {<!-- --> return _ht.end(); } pair<iterator, bool> insert(const K & amp; key) {<!-- --> _ht.Insert(key); } V & amp; operator[](const K & amp; key) {<!-- --> pair<iterator, bool> ret = _ht.Insert(make_pair(key, V())); return ret.first->second; } private: HashBucket::HashTable<K, pair<K, V>, Hash, MapKeyOfT> _ht; };
Comparison between map/set and unordered_map/unordered_set
The bottom layer of map/set is implemented using red-black trees, and the bottom layer of unordered_map/unordered_set is implemented using hash tables. The underlying implementations of the two are different. For a small amount of data, there is no difference in their addition, deletion, and modification, but for a large amount of data The unordered series of data is superior, especially for searches, the unordered series can basically maintain high efficiency;
Hash table adjusted code
#pragma once templatestruct HashFunc { size_t operator()(const K & amp; key) { //All types are forced to size_t type return (size_t)key; } }; //template specialization template<> struct HashFunc { size_t operator()(const string & amp; key) { size_t val = 0; for (auto ch : key) { val *= 131; val + = ch; } return val; } }; namespace HashBucket { template struct HashNode { T_data; HashNode * _next; \t\t//Constructor HashNode(const T & data) :_data(data) ,_next(nullptr) {} }; //predeclaration template class HashTable; template struct __HashIterator { typedef HashNode Node; typedef HashTable HT; typedef __HashIterator Self; \t\t Node* _node; HT* _pht; \t\t//Constructor __HashIterator(Node* node, HT* pht) :_node(node) ,_pht(pht) {} T & operator*() {<!-- --> return _node->_data; } T* operator->() {<!-- --> return &_node->_data; } Self & operator + + () { //Find the next node of this node if (_node->_next) { //If the next node is not empty, it points to the next node. _node = _node->_next; } else { Hash hash; KeyOfT kot; //If it is empty, calculate the hash address of the location of the hash bucket. size_t i = hash(kot(_node->_data)) % _pht->_tables.size(); //Address + + calculates the location of the next bucket i + + ; //Continue to loop and search for (; i < _pht->_tables.size(); i + + ) { if (_pht->_tables[i]) { _node = _pht->_tables[i]; break; } } //Find the entire hash table and point to nullptr if (i == _pht->_tables.size()) { _node = nullptr; } } return *this; } bool operator!=(const Self & amp; s) { return _node != s._node; } bool operator==(const Self & s) { return _node == s._node; } }; template classHashTable { typedef HashNode Node; template friend struct __HashIterator; public: typedef __HashIterator iterator; iterator begin() { for (size_t i = 0; i < _tables.size(); i + + ) { if (_tables[i]) { return iterator(_tables[i], this); } } return end(); } iterator end() { return iterator(nullptr, this); } ~HashTable() { for (size_t i = 0; i < _tables.size(); i + + ) { Node* cur = _tables[i]; while (cur) { Node* next = cur->_next; delete cur; cur = next; } _tables[i] = nullptr; } } inline size_t __stl_next_prime(size_t n) { static const size_t __stl_num_primes = 28; static const size_t __stl_prime_list[__stl_num_primes] = { 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 3221225473, 4294967291 }; for (size_t i = 0; i < __stl_num_primes; + + i) { if (__stl_prime_list[i] > n) { return __stl_prime_list[i]; } } return -1; } pair Insert(const T & amp; data) { Hash hash; KeyOfT kot; //If the key-value pair exists, return false iterator ret = Find((kot(data))); if (ret != end()) { return make_pair(ret, false); } //If the load factor is 1, expand the capacity if (_size == _tables.size()) { //Create a new hash table vector newTables; size_t newSizes = _size == 0 ? 10 : 2 * _tables.size(); //Initialize each element to empty newTables.resize(__stl_next_prime(_tables.size()), nullptr); //Insert the old table node into the new table for (size_t i = 0; i < _tables.size(); i + + ) { Node* cur = _tables[i]; while (cur) { //Record the next node of cur Node* next = cur->_next; //Calculate the corresponding hash bucket number size_t hashi = hash(kot(cur->_data)) % newTables.size(); //Move the old table node to the new table cur->_next = newTables[hashi]; newTables[hashi] = cur; cur = next; } _tables[i] = nullptr; } _tables.swap(newTables); } //Calculate hash bucket number size_t hashi = hash(kot(data)) % _tables.size(); //Insert node Node* newnode = new Node(data); newnode->_next = _tables[hashi]; _tables[hashi] = newnode; //Number of elements + + _size + + ; return make_pair(iterator(newnode, this), true); } //Find iterator Find(const K & amp; key) { //If the hash table is empty, return empty if (_tables.size() == 0) { return end(); } Hash hash; KeyOfT kot; //Calculate hash address size_t hashi = hash(key) % _tables.size(); Node* cur = _tables[hashi]; //Traverse the hash bucket while (cur) { if (kot(cur->_data) == key) { return iterator(cur, this); } cur = cur->_next; } return end(); } \t\t//delete bool Erase(const K & amp; key) { //Hash table size is 0, deletion failed if (_tables.size() == 0) { return false; } Hash hash; //Calculate hash address size_t hashi = hash(key) % _tables.size(); Node* prev = nullptr; Node* cur = _tables[hashi]; //Traverse the hash bucket to find whether the deleted node exists while (cur) { if (hash(kot(cur->_data)) == key) { if (prev) { prev->_next = cur->_next; } else { _tables[hashi] = cur->_next; } //Delete the node delete cur; _size--; return true; } prev = cur; cur = cur->_next; } //Delete node does not exist, return false return false; } private: vector _tables; size_t _size = 0; }; }