Construction, encoding and decoding of Huffman trees

1. Question requirements:

According to the principle of Huffman coding, write a program to find the Huffman code based on the user’s input node weights and decode the given code.

(1) Initialization: Enter a string from the keyboard (or read a file), count the characters that appear and the frequency of each character, use the frequency of the characters as the weight of the node, and build a Huffman tree. Perform Huffman coding on each character, and finally print out the characters and the Huffman code corresponding to each character.

(2) Coding: Use the built Huffman tree to perform Huffman coding on the “input string”, and finally print the Huffman code corresponding to the input string (written to a file).

(3) Decoding: Use the built Huffman tree to decode a given string of codes and print out the resulting string. (optional)

Test data: Encode the string {casbcatbsatbat}; decode the message “1101000”. Character set D={? }, the frequency of occurrence is w={? }

2. Question analysis

According to the meaning of the question, we can divide the code into the following parts:

(1) Based on the input string, find the character set and corresponding frequency (weight value)

(2) To construct a Huffman tree, you need to use functions that find the smallest and second smallest weights.

(3) Encoding

(4) Decoding

(5) Release memory

3. Code implementation

The code may be a little ugly, and you can also give us your feedback.

I won’t write any more explanations here. My code has a lot of complicated and somewhat useless comments to make it easier for me to read in the future.

(1) Preparatory work for building a Huffman tree: find the character set and corresponding frequency (weight value) based on the input string

#include<iostream>
#include<string.h>
#include<stdlib.h>
using namespace std;
#defineMAXSIZE 1000

//Definition of Huffman tree
typedef struct HTNode
{
int weight;//weight value
int lchild, rchild, parent;//The left child, right child, and parents of this node
}HTNode, * HuffmanTree;//Dynamically allocate array to store Huffman tree

//Preparatory work for constructing Huffman tree
//Input a string to find the character set and frequency, n stores the number of leaf nodes
void findLenAndFre(char* s,int* fre,int* n)
{
char* temp=new char[MAXSIZE];//Input characters
cout<<"Please enter a string:";
cin>>temp;
cout<<endl;
int len=strlen(temp);
\t
for(int i=0;i<len;i + + )//Outer loop control input characters
{
bool found=false;//Identifies whether the character is stored
for(int j=0;j<count;j + + )//The inner loop controls whether the characters are recorded
{
if(temp[i]==s[j])
{
fre[j] + + ;
found=true;//Indicates that the character has been stored
break;
}
}
if(!found) //If the character is not stored
{
s[count]=temp[i];//Assign the current character to the character set
fre[count] =1;//Frequency initialization
count + + ;//Number of character set elements + +
}
}
*n=count;
\t
//Output character set and frequency
cout << "Character set" << endl << "D={";
for (int i = 0; i < count; i + + )
{
cout << s[i];
if (i != count-1)
cout << ",";
}
cout << "}" << endl;
cout << endl;
cout << "frequency" << endl << "W={";
for (int i = 0; i < count; i + + )
{
cout << fre[i];
if (i != count-1)
cout << ",";
}
cout << "}" << endl << endl;
}

(2) Find the sequence number of the smallest and second smallest weight value

//Find the corners corresponding to the smallest and second smallest weights from 1 to i
void Select(HuffmanTree HT, int pos, int* s1, int* s2)
{
int min1, min2;//Minimum and second smallest weight value
min1 = min2 = INT_MAX;
*s1=*s2=-1;
for (int i = 1; i <= pos; i + + )
{
if (HT[i].parent == 0)//Only compare weights when there are no parents
{
if (HT[i].weight <= min1)//If the weight is less than the minimum weight
{
min2 = min1;//The next smallest weight is the current minimum weight
*s2 = *s1;//Update the second smallest weight value sequence number
min1 = HT[i].weight;//Minimum weight update
*s1 = i;// i is the sequence number of the minimum weight
}
else if (HT[i].weight < min2)//If the weight is less than the second smallest weight
{
min2 = HT[i].weight;
*s2 = i;//Then i is the sequence number of the second smallest weight value
}
}
}
}

(3) Construct Huffman tree from string

a. Nodes of Huffman tree

b. Initialization of Huffman tree

c. Assignment of Huffman tree weights

void CreatHuffmanTree(HuffmanTree & HT, int n,int* fre,char* s)//Construct Huffman tree
{
//n--leaf node || fre--weight || s--character set
if (n <= 1) return;
int m = 2 * n - 1,i;//Number of nodes
HT = new HTNode[m + 1];//Dynamic allocation array
for (i = 1; i <= m; i + + ) //Initialize the left and right children of all nodes, the parents are 0, and the data
{
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
if(i<=n)
HT[i].weight=fre[i-1];
else
HT[i].weight = 0;
}

//----------Initialization ends, construction begins---------
int s1, s2;//Record the smallest and second smallest weight value sequence number
for (i = n + 1; i <= m; i + + )//Construct new nodes from bottom to top
{
Select(HT, i - 1, & amp;s1, & amp;s2);//Find the sequence number of the smallest and second smallest weight value
HT[s1].parent = i;
HT[s2].parent = i;//Update the parents of s1 s2
HT[i].lchild = s1;
HT[i].rchild = s2;//Update the left and right child numbers of i
HT[i].weight = HT[s1].weight + HT[s2].weight;//Update the weight of i
}

}

(4) Encoding

typedef char** HuffmanCode;//Dynamically allocate array to store Huffman code

void HuffmanCoding(HuffmanTree HT, HuffmanCode HC, int n, char* s)//encoding
{
//Start from the leaf node and trace back to the root node, left 0 and right 1, the code is stored in HC
HC = new char* [n];//Storage encoding of leaf nodes
char* cd = new char[n + 1];//Temporarily store the code, one more to store \0
cd[n] = '\0';//The last element of the array is the end mark. When storing the encoding, it is stored from n forward.
\t
//Start from the first leaf node and trace back the Huffman tree to find its parents.
    //If it is the left child of both parents, cd=0; if it is the right child, cd=1
//Then find the parents of the parents, the same process as above, until the parents of the node are 0
for (int i = 1; i <= n; i + + )
{
int start = n;//Start points to the end. With the process of finding the root in reverse, start moves forward.
int p = HT[i].parent;//Storage parents
int c = i;//Storage the current node
while (p != 0) //If the parents are not 0, keep looping
{
--start;

//Is it wrong because I wrote ==i? ?

if (HT[p].lchild == c)//If it is the left child of both parents
{
cd[start] = '0';
}
else if (HT[p].rchild == c)//If it is the right child of both parents
{
cd[start] = '1';
}
c = p;//update sequence number, trace back up
p = HT[p].parent;//Update parents
}//This completes the encoding of the i-th leaf node and stores it in HC
HC[i-1] = new char[n-start + 1];//Allocate space for the i-th character encoding
strcpy(HC[i-1], & amp;cd[start]);//Copy the encoding to HC
}//for
delete[] cd;//Release temporary space
//Output encoding results
for (int i = 1; i <= n; i + + )
{
//cout << HT[i].data << ":" << HC[i-1] << endl;
cout << s[i-1] << ":" << HC[i-1] << endl;
}
cout << endl;
}

(5) Decoding

void HuffmanTranslate(HuffmanTree HT,int n,char* s)//Decoding
{
string result;
cout << "Please enter the characters that need to be decoded, with # as the end mark" << endl;

//Input character by character
    //If you enter 1, go to the right child of the current root;
    //Input 0, remove the left child until both left and right children are 0
char c;
int cur = 2 * n - 1;//Start at the root node
int l = HT[cur].lchild;
int r = HT[cur].rchild;//Record the serial numbers of the left and right children
\t
while(cin>>c)//Input characters
{
if(c=='#')//End mark
break;
else
{
if(c=='0')
cur=HT[cur].lchild;
else if(c=='1')
cur=HT[cur].rchild;
}//This has moved to the target node
if(HT[cur].lchild==0 & amp; & amp;HT[cur].rchild==0)//If the target node is a leaf node
{
result + =s[cur-1];//The characters corresponding to this node are entered into result
cur=2*n-1;//The current node returns to the root node and searches for the target node again
}
}
cout << endl;

//Output decoding results
cout << "Decoding result"<<endl;
cout<<result<<endl;
}

(6) Release memory

void destroyHuffmanTree(HuffmanTree & HT)//Destroy the Huffman tree
{
    delete[]HT;
}

void destroyHuffmanCode(HuffmanCode & HC, int n)//Destroy Huffman code
{
    for (int i = 0; i < n; i + + )
    {
        delete[] HC[i];
    }
    delete[]HC;
}

(7) Main function

int main()
{
HuffmanTreeHT;
HuffmanCode HC;
char* s=(char*)malloc(sizeof(MAXSIZE));
int* fre=(int*)malloc(sizeof(MAXSIZE));
int n;
findLenAndFre(s,fre, & amp;n);//Find the character set and corresponding weight based on the input characters

CreatHuffmanTree(HT, n,fre,s);//Create Huffman tree

cout << "Encoding result" << endl;
HuffmanCoding(HT, HC, n,s);//encoding
cout << endl;

HuffmanTranslate(HT, n,s);//Decoding
cout << endl;
\t
destroyHuffmanTree(HT);
    destroyHuffmanCode(HC, n);
\t
free(s);
free(fre);

return 0;
}

This is the end of my code