Data structure – hash (hash table)

tips:

Jump table: haven’t studied the code yet

1. Dictionary order – ordered linked list

#include<iostream>
using namespace std;


class SortedChainNode {
    int data;
    int* link;
};
template<class E, class K>
class SortedChain {
public:
    SortedChain() { first = 0; }
    ~SortedChain();
    bool IsEmpty() const { return first == 0; }
    int Length() const;
    bool Search(const K & amp; k, E & amp; e) const;
    SortedChain<E, K> & amp; Delete(const K & amp; k, E & amp; e);
    SortedChain<E, K> & Insert(const E & e);
    SortedChain<E, K> & DistinctInsert(const E & e);
    void Output(ostream & amp; out) const;
private:
    SortedChainNode<E, K>* first;
};

//Search (the linked list is arranged in lexicographic order)
template<class E, class K>
bool SortedChain<E, K>::Search(const K & amp; k, E & amp; e) const
{
    SortedChainNode<E, K>* p = first;
    while (p & amp; & amp; p->data < k)
        p = p->link;

    // Determine whether it matches
    if (p & amp; & amp; p->data == k) // If the linked list is not empty yet and the data matches
    {
        e = p->data; return true;
    }
    return false; // The linked list is empty, or the current data is greater than k
}

//delete
template<class E, class K>
SortedChain<E, K> & amp; SortedChain<E, K>::Delete(const K & amp; k, E & amp; e)
{
    SortedChainNode<E, K>* p = first, * tp = 0;
    //double pointer
    // search for match with k
    while (p & amp; & amp; p->data < k) {
        tp = p;
        p = p->link;
    }
    if (p & amp; & amp; p->data == k) //The node to be deleted is found
    {
        e = p->data;
        if (tp) tp->link = p->link; //tp is not 0, that is, p is an ordinary node
        else first = p->link; //p is the first node
        delete p;
        return *this;
    }
    throw BadInput(); //There are no nodes to delete
    return *this;
}

//insert
template<class E, class K>
SortedChain<E, K> & SortedChain<E, K>::Insert(const E & e)
{
    SortedChainNode<E, K>* p = first, * tp = 0;
    while (p & amp; & amp; p->data < e) {//Same as double pointer
        tp = p;
        p = p->link;
    }
    SortedChainNode<E, K>* q = new SortedChainNode<E, K>;
    q->data = e;
    q->link = p;
    if (tp) tp->link = q; //not the insertion head pointer
    else first = q;
    return *this;
}

//Do not allow the insertion of repeated keywords
template<class E, class K>
SortedChain<E, K> & SortedChain<E, K>
::DistinctInsert(const E & amp; e)
{//Insert e only if no element with same key

    SortedChainNode<E, K>* p = first, * tp = 0;

    while (p & amp; & amp; p->data < e)
    {
        tp = p;
        p = p->link;
    }
    // check if duplicate
    if (p & amp; & amp; p->data == e) throw BadInput();
    // not duplicate, set up node for e
    SortedChainNode<E, K>* q = new SortedChainNode<E, K>;
    q->data = e;

    // insert node just after tp
    q->link = p;
    if (tp) tp->link = q;
    else first = q;

    return *this;
}

2. Hash – ordered list

Hash function->address=Hash(key): From known keywords->immediately calculate the unique address

Hash table (Hash table): a table or structure constructed from this

Scope of application of hash table:

?
key
The value range of

?
Pending
key
Not worth much

?
Storage space is limited

?
Especially suitable for needs
quick search
The problem

Filling factor = number of key elements/hash table length

Key question one: Constructing a Hash function

?
OK
Hash
function

1) Deterministic: the same key is always mapped to the same address

2) Fast calculation: Complexity–O(1)

3) Surjection: Cover the entire hash space as fully as possible

4) Uniform: The probability that the key code is mapped to each position in the hash table is as close as possible, which can effectively avoid aggregation.

1. Direct addressing method: Hash(key)= a * key + b; perform linear operation

2. Digital analysis method:

There are n d digits, each of which may have r different symbols. These r different symbols may not necessarily appear at the same frequency in each bit. They may be evenly distributed in some bits and unevenly in other bits.

Then according to the characteristics of the known keyword set, those bits that are evenly distributed (less conflicts) should be selected for hash mapping.

3. Square method: take the middle digits after squaring the keyword as the hash address

4. Folding method: Divide the keyword into several parts with the same number of digits (the number of digits in the last part can be different), and then take the superposition sum of these parts (with the carry rounded off) as the hash address

5. Divide with remainder method (the most commonly used method): The remainder obtained after the key is divided by a number p that is not larger than the hash table length m is the hash address: H(key)=key % p (p< =m)

6. (Pseudo) random number method

7.Polynomial method

Key issue two: handling conflicts

1. Linear open addressing method

?
Linear detection method:
Use a hash function to calculate the initial hash address
H
0
, once a conflict occurs, search for the “next” free position in the table sequentially.
H
i

?
Square detection method:
d=i
2
—
detection
h(k)
,
h(k) + 1
,
h(k) + 4
,
…

Double hashing (re-hashing): Requires two hashing functions – the first hashing function calculates the preferred address of the keyword, and in the event of a collision, the second hashing function is used The function calculates the increment to the next address; or directly calculates the next address

Linear detection method:

Search: Check sequentially starting from h(k) until a bucket satisfies:
The keyword is the same as the target keyword, and the search is successful

Empty bucket or return to
h(k)
, searchfailed
Delete: Lazy deletion – only mark for deletion

The “role” played by a bucket marked for deletion varies depending on the specific type of operation.

1) When searching for an entry, it is regarded as a “non-empty bucket that must not match”, and the search chain continues here.

2) When inserting an entry, it is regarded as a “must-match free bucket” and can be used to store new entries.

Disadvantages:

1. Aggregation problem:
Past conflicts will lead to subsequent conflicts.
h(k
1
)=
i
,
h(k
2
)=j
,
k
1
may occupy
k
2
of
hash
table location, which may cause severe local aggregation and sharp performance degradation, even if
hash
The table is still empty

2. When the table is full, performance will almost certainly be poor.

Advantages:

1.Simple

2. As long as the table is not full, you can always find a space and the insertion is successful.

3. No additional (pointers, linked lists or overflow areas, etc.) space search chain is required. It is localized and can make full use of the system cache and effectively reduce I/O.

(Code with some tricks, deleted part to be added):

#include<iostream>
using namespace std;

//hashtable class
template<class E, class K>
class HashTable {
public:
    HashTable(int divisor = 11);//divisor divisor
    ~HashTable() { delete[] ht; delete[] empty; }
    bool Search(const K & amp; k, E & amp; e) const;
    HashTable<E, K> & Insert(const E & e);
    void Output();//output the hash table
private:
    int hSearch(const K & amp; k) const;
    int m; //The number of buckets (i.e. divisor)
    E* ht; // hash table array
    bool* empty; // Whether it is empty
};

//Constructor
template<class E, class K>
HashTable<E, K>::HashTable(int divisor)
{//Constructor.
    m = divisor;

    // allocate hash table arrays
    ht = new E[m];
    empty = new bool[m];

    // set all buckets to empty
    for (int i = 0; i < m; i + + )
        empty[i] = true;
}

//Auxiliary function hsearch
template<class E, class K>
int HashTable<E, K>::hSearch(const K & amp; k) const//k is the element to be searched
{// Search an open addressed table.
 // Return location of k if present.
 // Otherwise return insert point if there is space.
    int i = k % m; // The bucket k should have been put into
    int j = i; // start at home bucket
    do
    {
        if (empty[j] || ht[j] == k) return j;//j can be inserted or repeated in j
        j = (j + 1) % m; // Not + 1 directly, so that the corresponding bucket can be found in a loop
    }
    while (j != i);
    // After one traversal, we returned to the bucket where we should have put it, that is, we could not find the location to put k.
    return j; // table full
}

//Insert operation
template<class E, class K>
HashTable<E, K> & HashTable<E, K>::Insert(const E & e)//Insert e in ht
{// Hash table insert.
    K k = e; // extract key
    int b = hSearch(k);
    // check if insert is to be done
    if (empty[b]) {
        empty[b] = false;
        ht[b] = e;
        return *this;
    }

    // no insert, check if duplicate or full
    if (ht[b] == k) throw BadInput(); // The same number is inserted repeatedly
    throw NoMem(); // Column is full
    return *this; // Visual C++ needs this line
}

//Search function
template<class E, class K>
bool HashTable<E, K>::Search(const K & amp; k, E & amp; e) const
{// Put element that matches k in e.
 // Return false if no match.
    int b = hSearch(k);
    if (empty[b] || ht[b] != k) return false;
    e = ht[b];
    return true;
}

2. Linked list method (zipper method [chain address method])

Different from 1, when encountering a conflict, instead of looking backward for a blank bucket, the current bucket is linked into a linked list

#include<iostream>
using namespace std;

//chain hash table
template<class E, class K>
class ChainHashTable {
public:
    ChainHashTable(int divisor = 11)
    {
        m = divisor; ht = new SortedChain<E, K>[m];
    }
    ~ChainHashTable() { delete[] ht; }
    bool Search(const K & amp; k, E & amp; e) const
    {
        return ht[k % m].Search(k, e);
    }
    ChainHashTable<E, K> & Insert(const E & e)
    {
        ht[e % m].DistinctInsert(e); return *this;
    }
    ChainHashTable<E, K> & amp; Delete(const K & amp; k, E & amp; e)
    {
        ht[k % m].Delete(k, e); return *this;
    }
    void Output() const; // output the table
private:
    int m; // divisor
    SortedChain<E, K>* ht; // array of chains
};

3. Public overflow area method

Create a separate continuous space, and conflicting entries will be stored in this area in order.

3. Jump list

Reduced array search complexity: o(logn)

Insertion: Note that if you insert some data between n~m, the upper layer will have (m-n)/2 more nodes, and each upper layer will have

#include<iostream>
using namespace std;

template <typename T>
class Quadlist
{ //Quadruple table
private:
int _size; //scale
QuadlistNodePosi(T) header, trailer; //Sentinel
protected:
void init(); int clear(); //Initialize and clear all nodes
public:
QuadlistNodePosi(T) first() const
{ return header->succ; } //First node
QuadlistNodePosi(T) last() const
{ return trailer->pred; } //End node
T remove(QuadlistNodePosi(T) p); //Delete p
QuadlistNodePosi(T) insertAfterAbove //Insert data item e, making it the successor of p and the upper neighbor of b
(T const & amp; e, QuadlistNodePosi(T) p, QuadlistNodePosi(T) b = NULL);
};
\t
template <typename K, typename V>
class Skiplist : public Dictionary<K, V>, public List<Quadlist<Entry<K, V>*>>
{ //Multiple inheritance
protected:
bool skipSearch(ListNode<Quadlist<Entry<K, V>>*>* & amp;qlist,
QuadlistNode<Entry<K, V>>* & amp;p, K & amp; k);
public:
int size() //The total number of entries, that is, the size of the underlying Quadlist
{return empty() ? 0 : last()->data->size();}
int level() { return List::size(); } //Level height, that is, the total number of Quadlists
bool put(K, V); //Insert (Skiplist allows repeated entries, so it must succeed)
V* get(K k); //Read (directly implemented based on skipSearch())
bool remove(K k); //Delete
};
//Find
template <typename K, typename V> bool Skiplist<K, V>::skipSearch
(ListNode<Quadlist<Entry<K, V>>*>* & amp; qlist, //From the specified layer qlist
QuadlistNode<Entry<K, V>>* & amp;p, //Start from the first node p
K & amp; k) { //Search to the right and down for the target key k
while (true) { //At each level
while (p->succ & amp; & amp; (p->entry.key <= k)) //Search from front to back
p = p->succ; //Until a larger key appears or overflows to the trailer
p = p->pred; //Go back one step to determine whether it is a hit.
if (p->pred & amp; & amp; (k == p->entry.key)) return true; //If hit, return successfully
qlist = qlist->succ; // Otherwise, go to the next level
if (!qlist->succ) return false; //If the bottom layer has been penetrated, it means failure
p = (p->pred) ? // Otherwise go to
p->below : qlist->data->first(); //The next node of the current tower
}
}
//insert
template <typename K, typename V>
bool Skiplist<K, V>::put(K k, V v) {
Entry<K, V> e = Entry<K, V>(k, v); //New entries that will be randomly inserted into multiple copies
if (empty()) insertAsFirst(new Quadlist<Entry<K, V>>); //First Entry
ListNode<Quadlist<Entry<K, V>>*>* qlist = first(); //From the top list
QuadlistNode<Entry<K, V>>* p = qlist->data->first(); //Start from the first node
if (skipSearch(qlist, p, k)) //Find the appropriate insertion position
while (p->below) p = p->below; //If there is already a similar entry, you need to force it to the bottom of the tower.
qlist = last(); // Below, immediately to the right of p, a new tower will grow layer by layer from bottom to top.
QuadlistNode<Entry<K, V>>* b //The new node b is
= qlist->data->insertAfterAbove(e, p); //The base of the new tower
while (rand() % 2) { //After tossing a coin, if it is determined that the new tower needs to be taller, then
while (qlist->data->valid(p) & amp; & amp; !p->above) //Find the height not lower than this
p = p->pred; //most recent precursor
if (!qlist->data->valid(p)) { //If the predecessor is header
if (qlist == first()) //And it is currently the top level, which means it must
insertAsFirst(new Quadlist<Entry<K, V>>); //Create a new layer first, then
p = qlist->pred->data->first()->pred; //Transfer p to the header of the previous layer
}
else //otherwise, you can go ahead
p = p->above; //Raise p to this height
qlist = qlist->pred; //Go up one level and add new nodes to this layer
b = qlist->data->insertAfterAbove(e, p, b); //Insert after p and above b
}
return true; //Skiplist allows duplicate elements, so the insertion must be successful
} //Note: Thanks to the setting of sentinels, which aspects have been simplified?

oj–Finding the maximum interval of integers-performance

Problem: Always forget to consider the related situation of bucket 0

#include<iostream>
using namespace std;

int seed,n;
int rand()
{
return(((seed = seed * 214013L + 2531011L) >> 16) & amp; 0x7fff);
}
int rand32()
{
return ((rand() << 16) + (rand() << 1) + rand() % 2);
}

int maxgap(int a[])
{
int amax = a[0], amin = a[0];//Maximum and minimum value of array
for (int i = 0; i < n; i + + )
{
if (a[i] > amax)amax = a[i];
if (a[i] < amin)amin = a[i];
}
if (amax == amin)return 0;

bool* bucket = new bool[n];
for (int i = 0; i < n; i + + )bucket[i] = 0;
int* imax = new int[n];
int* imin = new int[n];//n numbers are divided into n-1 segments
double gap = double(amax - amin) / n - 1;
//For uniformly distributed gaps, the maximum interval must be larger than this!
for (int i = 0; i < n; i + + )
{
//Similar to finding the appropriate hash function part
\t\t//Most important part!
int index = int((a[i] - amin) / gap);
//a[i] is out of order: this way it can be determined that a[i] with a similar distance to the minimum point is placed in the bucket with the same number.
if (bucket[index] == 0)//Forgot! ! !
{
imax[index] = a[i];
imin[index] = a[i];
bucket[index] = 1;
}
else
{
//Find the maximum and minimum values in a bucket
imax[index] == max(a[i], imax[index]);
imin[index] == min(a[i], imin[index]);
}
}
int lastmax;
int maxgap = 0;
//Find the first non-empty bucket
for (int i = 0; i < n; i + + )
{
if (bucket[i] != 0)
{
lastmax = imax[i];
break;
}
}

for (int i = 1; i <= n; i + + )
{
if (bucket[i] != 0)//Forgot! ! !
{
maxgap = max(maxgap, imin[i] - lastmax);
lastmax = imax[i];
}
}
return maxgap;
}
int main()
{
cin >> n >> seed;
int* np = new int[n];
for (int i = 0; i < n; i + + )
np[i] = rand32();//Get n numbers immediately
cout << maxgap(np);
return 0;
}

The knowledge points of the article match the official knowledge files, and you can further learn related knowledge. Algorithm skill tree Home page Overview 57308 people are learning the system