Simulation implementation of unordered_map and unordered_set in C++

unordered_map, unordered_set simulation implementation

  • Hash table source code
  • Control of hash table template parameters
  • functor increase
  • Forward iterator implementation
    • *Operator overloading
    • ->Operator overloading
    • + + operator overloading
    • != and == operator overloading
    • begin() and end() implementation
  • unordered_set implementation
  • unordered_map implementation
  • map/set versus unordered_map/unordered_set
  • Code after hash table adjustment

Hash table source code

template<class K>
struct HashFunc
{<!-- -->
size_t operator()(const K & amp; key)
{<!-- -->
//All types are forced to size_t type
return (size_t)key;
}
};

//template specialization
template<>
struct HashFunc<string>
{<!-- -->
size_t operator()(const string & amp; key)
{<!-- -->
size_t val = 0;
for (auto ch : key)
{<!-- -->
val *= 131;
val + = ch;
}

return val;
}
};
namespace HashBucket
{<!-- -->
template<class K, class V>
struct HashNode
{<!-- -->
pair<K, V> _kv;
HashNode<K, V>* _next;

\t\t//Constructor
HashNode(const pair<K, V> & amp; kv)
:_kv(kv)
,_next(nullptr)
{<!-- -->}
};

template<class K, class V, class Hash = HashFunc<K>>
classHashTable
{<!-- -->
typedef HashNode<K, V> Node;
public:

~HashTable()
{<!-- -->
for (size_t i = 0; i < _tables.size(); i + + )
{<!-- -->
Node* cur = _tables[i];
while (cur)
{<!-- -->
Node* next = cur->_next;
delete cur;
cur = next;
}
_tables[i] = nullptr;
}
}

inline size_t __stl_next_prime(size_t n)
{<!-- -->
static const size_t __stl_num_primes = 28;
static const size_t __stl_prime_list[__stl_num_primes] =
{<!-- -->
53, 97, 193, 389, 769,
1543, 3079, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433,
1572869, 3145739, 6291469, 12582917, 25165843,
50331653, 100663319, 201326611, 402653189, 805306457,
1610612741, 3221225473, 4294967291
};

for (size_t i = 0; i < __stl_num_primes; + + i)
{<!-- -->
if (__stl_prime_list[i] > n)
{<!-- -->
return __stl_prime_list[i];
}
}

return -1;
}
bool Insert(const pair<K, V> & amp; kv)
{<!-- -->
//If the key-value pair exists, return false
if (Find(kv.first))
{<!-- -->
return false;
}

Hash hash;
//If the load factor is 1, expand the capacity
if (_size == _tables.size())
{<!-- -->
//Create a new hash table
vector<Node*> newTables;
size_t newSizes = _size == 0 ? 10 : 2 * _tables.size();

//Initialize each element to empty
newTables.resize(__stl_next_prime(_tables.size()), nullptr);

//Insert the old table node into the new table
for (size_t i = 0; i < _tables.size(); i + + )
{<!-- -->
Node* cur = _tables[i];

while (cur)
{<!-- -->
//Record the next node of cur
Node* next = cur->_next;
//Calculate the corresponding hash bucket number
size_t hashi = hash(cur->_kv.first) % newTables.size();

//Move the old table node to the new table
cur->_next = newTables[hashi];
newTables[hashi] = cur;
cur = next;
}
_tables[i] = nullptr;
}
_tables.swap(newTables);
}
//Calculate hash bucket number
size_t hashi = hash(kv.first) % _tables.size();

//Insert node
Node* newnode = new Node(kv);
newnode->_next = _tables[hashi];
_tables[hashi] = newnode;

//Number of elements + +
_size + + ;
return true;
}
//Find
Node* Find(const K & amp; key)
{<!-- -->
//If the hash table is empty, return empty
if (_tables.size() == 0)
{<!-- -->
return nullptr;
}
Hash hash;
//Calculate hash address
size_t hashi = hash(key) % _tables.size();
Node* cur = _tables[hashi];
//Traverse the hash bucket
while (cur)
{<!-- -->
if ((cur->_kv.first) == key)
{<!-- -->
return cur;
}
cur = cur->_next;
}
return nullptr;
}
\t\t//delete
bool Erase(const K & amp; key)
{<!-- -->
//Hash table size is 0, deletion failed
if (_tables.size() == 0)
{<!-- -->
return false;
}
Hash hash;
//Calculate hash address
size_t hashi = hash(key) % _tables.size();

Node* prev = nullptr;
Node* cur = _tables[hashi];
//Traverse the hash bucket to find whether the deleted node exists
while (cur)
{<!-- -->
if (hash(hash(cur->_kv.first)) == key)
{<!-- -->
if (prev)
{<!-- -->
prev->_next = cur->_next;
}
else
{<!-- -->
_tables[hashi] = cur->_next;
}
//Delete the node
delete cur;
_size--;

return true;
}

prev = cur;
cur = cur->_next;
}
//Delete node does not exist, return false
return false;
}

size_t Size()
{<!-- -->
return _size;
}

size_t TableSize()
{<!-- -->
return _tables.size();
}

size_t BucketNum()
{<!-- -->
size_t num = 0;
for (size_t i = 0; i < _tables.size(); i + + )
{<!-- -->
if (_tables[i])
{<!-- -->
num + + ;
}
}
return num;
}
private:
vector<Node*> _tables;
size_t _size = 0;
};
}

Control of hash table template parameters

unordered_set belongs to the K model, and unordered_map belongs to the KV model, but at the bottom level we use a hash table to implement it, so we need to set the second parameter of the hash table to T.

template<class K, class T>
struct HashNode
{<!-- -->
T_data;
HashNode<T>* _next;

\t\t//Constructor
HashNode(const T & data)
:_data(data)
,_next(nullptr)
{<!-- -->}
};
template<class K, class T, class Hash = HashFunc<K>>
classHashTable
{<!-- -->
typedef HashNode<T> Node;
public:
//......
private:
vector<Node*> _tables;
size_t _size = 0;
};

The T template parameter may be just the key value Key, or it may be a key-value pair composed of Key and Value. If it is an unordered_set container, then the template parameters it passes into the underlying red-black tree are Key and Key:

template<class K, class Hash = HashFunc<K>>
class unorder_set
{<!-- -->
public:
//...
private:
HashBucket::HashTable<K, K, Hash> _ht;
};

If it is an unordered_map container, then the template parameters it passes into the underlying red-black tree are Key and Value:

template<class K, class V, class Hash = HashFunc<K>>
class unorder_map
{<!-- -->
public:
//...
private:
HashBucket::HashTable<K, pair<K, V>, Hash> _ht;
};

Functions added

For the unordered_set container, we need to perform key-value comparison, that is, compare T directly. But for the unordered_map container, what we need to compare is the key in the key-value pair . We need to extract the key first and then compare.

Therefore, we need to provide a functor in the upper layer unordered_set and unordered_map, and perform comparison operations separately according to the incoming T type:

map functor:

template<class K, class V, class Hash = HashFunc<K>>
class unorder_map
{<!-- -->
struct MapKeyOfT
{<!-- -->
const K & amp; operator()(const pair<K, V> & amp; kv)
{<!-- -->
return kv.first;
}
};
public:
//...
private:
HashBucket::HashTable<K, pair<K, V>, Hash, MapKeyOfT> _ht;
};

set functor:

template<class K, class Hash = HashFunc<K>>
class unorder_set
{<!-- -->
struct SetKeyOfT
{<!-- -->
const K & amp; operator()(const K & amp; key)
{<!-- -->
return key;
}
};
public:
//...
private:
HashBucket::HashTable<K, K, Hash, SetKeyOfT> _ht;
};

Forward iterator implementation

There are only forward iterators in the hash table. The forward iterator of the hash table actually encapsulates the entire hash table:

//Predeclaration
template<class K, class T, class Hash, class KeyOfT>
class HashTable;

template<class K, class T, class Hash, class KeyOfT>
struct __HashIterator
{<!-- -->
typedef HashNode<T> Node;
typedef HashTable <K, T, Hash, KeyOfT> HT;
typedef __HashIterator <K, T, Hash, KeyOfT> Self;

\t
Node* _node;
HT* _pht;
}

*Operator overloading

The dereference operation is to return the data of a node in a singly linked list:

T & operator*()
{<!-- -->
return _node->_data;
}

->Operator overloading

->The operation is to return the address of the data:

T* operator->()
{<!-- -->
return &_node->_data;
}

+ + operator overloading

+ + in the hash table is actually searching for the node next to the node in the current hash bucket. If the search has been completed in one hash bucket, search in the next hash bucket until it is found;

code show as below:

Self & amp; operator + + ()
{<!-- -->
//Find the next node of this node
if (_node->_next)
{<!-- -->
//If the next node is not empty, it points to the next node.
_node = _node->_next;
}
else
{<!-- -->
Hash hash;
KeyOfT kot;

//If it is empty, calculate the hash address of the location of the hash bucket.
size_t i = hash(kot(_node->_data)) % _pht->_tables.size();

//Address + + calculates the location of the next bucket
i + + ;

//Continue to loop and search
for (; i < _pht->_tables.size(); i + + )
{<!-- -->
if (_pht->_tables[i])
{<!-- -->
_node = _pht->_tables[i];
break;
}
}
//Find the entire hash table and point to nullptr
if (i == _pht->_tables.size())
{<!-- -->
_node = nullptr;
}
}

return *this;
}

!= and == operator overloading

!= and == are used to determine whether they are the same node:

//!=
bool operator!=(const Self & amp; s)
{<!-- -->
return _node != s._node;
}
//==
bool operator==(const Self & s)
{<!-- -->
return _node == s._node;
}

Implementation of begin() and end()

  1. The begin function returns the forward iterator of the first non-nullptr position in the hash table.
  2. The end function returns the forward iterator of the position next to the last position in the hash table. Here, a null pointer is directly used to construct a forward iterator.
class HashTable
{<!-- -->
typedef HashNode<T> Node;

template<class K, class T, class Hash, class KeyOfT>
friend struct __HashIterator;
public:
typedef __HashIterator <K, T, Hash, KeyOfT> iterator;

iterator begin()
{<!-- -->
//Traverse the entire array from front to back
for (size_t i = 0; i < _tables.size(); i + + )
{<!-- -->
//Find a non-empty position and return the position iterator
if (_tables[i])
{<!-- -->
return iterator(_tables[i], this);
}
}
//Finally return end();
return end();
}

iterator end()
{<!-- -->
//Return an iterator to an empty position
return iterator(nullptr, this);
}
}

unordered_set implementation

template<class K, class Hash = HashFunc<K>>
class unordered_set
{<!-- -->
struct SetKeyOfT
{<!-- -->
const K & amp; operator()(const K & amp; key)
{<!-- -->
return key;
}
};
public:
typedef typename HashBucket::HashTable <K, K, Hash, SetKeyOfT>::iterator iterator;

iterator begin()
{<!-- -->
return _ht.begin();
}

iterator end()
{<!-- -->
return _ht.end();
}

pair<iterator, bool> insert(const K & amp; key)
{<!-- -->
return _ht.Insert(key);
}
private:
HashBucket::HashTable<K, K, Hash, SetKeyOfT> _ht;
};

unordered_map implementation

template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{<!-- -->
struct MapKeyOfT
{<!-- -->
const K & amp; operator()(const pair<K, V> & amp; kv)
{<!-- -->
return kv.first;
}
};
public:
typedef typename HashBucket::HashTable <K, pair<K, V>, Hash, MapKeyOfT>::iterator iterator;

iterator begin()
{<!-- -->
return _ht.begin();
}

iterator end()
{<!-- -->
return _ht.end();
}

pair<iterator, bool> insert(const K & amp; key)
{<!-- -->
_ht.Insert(key);
}

V & amp; operator[](const K & amp; key)
{<!-- -->
pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
return ret.first->second;
}
private:
HashBucket::HashTable<K, pair<K, V>, Hash, MapKeyOfT> _ht;
};

Comparison between map/set and unordered_map/unordered_set

The bottom layer of map/set is implemented using red-black trees, and the bottom layer of unordered_map/unordered_set is implemented using hash tables. The underlying implementations of the two are different. For a small amount of data, there is no difference in their addition, deletion, and modification, but for a large amount of data The unordered series of data is superior, especially for searches, the unordered series can basically maintain high efficiency;

Hash table adjusted code

#pragma once

template
struct HashFunc
{
size_t operator()(const K & amp; key)
{
//All types are forced to size_t type
return (size_t)key;
}
};

//template specialization
template<>
struct HashFunc
{
size_t operator()(const string & amp; key)
{
size_t val = 0;
for (auto ch : key)
{
val *= 131;
val + = ch;
}

return val;
}
};

namespace HashBucket
{
template
struct HashNode
{
T_data;
HashNode* _next;

\t\t//Constructor
HashNode(const T & data)
:_data(data)
,_next(nullptr)
{}
};

//predeclaration
template
class HashTable;

template
struct __HashIterator
{
typedef HashNode Node;
typedef HashTable  HT;
typedef __HashIterator  Self;

\t\t
Node* _node;
HT* _pht;

\t\t//Constructor
__HashIterator(Node* node, HT* pht)
:_node(node)
,_pht(pht)
{}

T & operator*()
{<!-- -->
return _node->_data;
}

T* operator->()
{<!-- -->
return &_node->_data;
}

Self & operator + + ()
{
//Find the next node of this node
if (_node->_next)
{
//If the next node is not empty, it points to the next node.
_node = _node->_next;
}
else
{
Hash hash;
KeyOfT kot;

//If it is empty, calculate the hash address of the location of the hash bucket.
size_t i = hash(kot(_node->_data)) % _pht->_tables.size();

//Address + + calculates the location of the next bucket
i + + ;

//Continue to loop and search
for (; i < _pht->_tables.size(); i + + )
{
if (_pht->_tables[i])
{
_node = _pht->_tables[i];
break;
}
}
//Find the entire hash table and point to nullptr
if (i == _pht->_tables.size())
{
_node = nullptr;
}
}

return *this;
}

bool operator!=(const Self & amp; s)
{
return _node != s._node;
}

bool operator==(const Self & s)
{
return _node == s._node;
}
};

template
classHashTable
{
typedef HashNode Node;

template
friend struct __HashIterator;
public:
typedef __HashIterator  iterator;

iterator begin()
{
for (size_t i = 0; i < _tables.size(); i + + )
{
if (_tables[i])
{
return iterator(_tables[i], this);
}
}

return end();
}

iterator end()
{
return iterator(nullptr, this);
}

~HashTable()
{
for (size_t i = 0; i < _tables.size(); i + + )
{
Node* cur = _tables[i];
while (cur)
{
Node* next = cur->_next;
delete cur;
cur = next;
}
_tables[i] = nullptr;
}
}

inline size_t __stl_next_prime(size_t n)
{
static const size_t __stl_num_primes = 28;
static const size_t __stl_prime_list[__stl_num_primes] =
{
53, 97, 193, 389, 769,
1543, 3079, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433,
1572869, 3145739, 6291469, 12582917, 25165843,
50331653, 100663319, 201326611, 402653189, 805306457,
1610612741, 3221225473, 4294967291
};

for (size_t i = 0; i < __stl_num_primes; + + i)
{
if (__stl_prime_list[i] > n)
{
return __stl_prime_list[i];
}
}

return -1;
}
pair Insert(const T & amp; data)
{
Hash hash;
KeyOfT kot;
//If the key-value pair exists, return false
iterator ret = Find((kot(data)));
if (ret != end())
{
return make_pair(ret, false);
}
//If the load factor is 1, expand the capacity
if (_size == _tables.size())
{
//Create a new hash table
vector newTables;
size_t newSizes = _size == 0 ? 10 : 2 * _tables.size();

//Initialize each element to empty
newTables.resize(__stl_next_prime(_tables.size()), nullptr);

//Insert the old table node into the new table
for (size_t i = 0; i < _tables.size(); i + + )
{
Node* cur = _tables[i];

while (cur)
{
//Record the next node of cur
Node* next = cur->_next;
//Calculate the corresponding hash bucket number
size_t hashi = hash(kot(cur->_data)) % newTables.size();

//Move the old table node to the new table
cur->_next = newTables[hashi];
newTables[hashi] = cur;
cur = next;
}
_tables[i] = nullptr;
}
_tables.swap(newTables);
}
//Calculate hash bucket number
size_t hashi = hash(kot(data)) % _tables.size();

//Insert node
Node* newnode = new Node(data);
newnode->_next = _tables[hashi];
_tables[hashi] = newnode;

//Number of elements + +
_size + + ;
return make_pair(iterator(newnode, this), true);
}
//Find
iterator Find(const K & amp; key)
{
//If the hash table is empty, return empty
if (_tables.size() == 0)
{
return end();
}
Hash hash;
KeyOfT kot;
//Calculate hash address
size_t hashi = hash(key) % _tables.size();
Node* cur = _tables[hashi];
//Traverse the hash bucket
while (cur)
{
if (kot(cur->_data) == key)
{
return iterator(cur, this);
}
cur = cur->_next;
}
return end();
}
\t\t//delete
bool Erase(const K & amp; key)
{
//Hash table size is 0, deletion failed
if (_tables.size() == 0)
{
return false;
}
Hash hash;
//Calculate hash address
size_t hashi = hash(key) % _tables.size();

Node* prev = nullptr;
Node* cur = _tables[hashi];
//Traverse the hash bucket to find whether the deleted node exists
while (cur)
{
if (hash(kot(cur->_data)) == key)
{
if (prev)
{
prev->_next = cur->_next;
}
else
{
_tables[hashi] = cur->_next;
}
//Delete the node
delete cur;
_size--;

return true;
}

prev = cur;
cur = cur->_next;
}
//Delete node does not exist, return false
return false;
}
private:
vector _tables;
size_t _size = 0;
};
}