1. Question requirements:
According to the principle of Huffman coding, write a program to find the Huffman code based on the user’s input node weights and decode the given code.
(1) Initialization: Enter a string from the keyboard (or read a file), count the characters that appear and the frequency of each character, use the frequency of the characters as the weight of the node, and build a Huffman tree. Perform Huffman coding on each character, and finally print out the characters and the Huffman code corresponding to each character.
(2) Coding: Use the built Huffman tree to perform Huffman coding on the “input string”, and finally print the Huffman code corresponding to the input string (written to a file).
(3) Decoding: Use the built Huffman tree to decode a given string of codes and print out the resulting string. (optional)
Test data: Encode the string {casbcatbsatbat}; decode the message “1101000”. Character set D={? }, the frequency of occurrence is w={? }
2. Question analysis
According to the meaning of the question, we can divide the code into the following parts:
(1) Based on the input string, find the character set and corresponding frequency (weight value)
(2) To construct a Huffman tree, you need to use functions that find the smallest and second smallest weights.
(3) Encoding
(4) Decoding
(5) Release memory
3. Code implementation
The code may be a little ugly, and you can also give us your feedback.
I won’t write any more explanations here. My code has a lot of complicated and somewhat useless comments to make it easier for me to read in the future.
(1) Preparatory work for building a Huffman tree: find the character set and corresponding frequency (weight value) based on the input string
#include<iostream> #include<string.h> #include<stdlib.h> using namespace std; #defineMAXSIZE 1000 //Definition of Huffman tree typedef struct HTNode { int weight;//weight value int lchild, rchild, parent;//The left child, right child, and parents of this node }HTNode, * HuffmanTree;//Dynamically allocate array to store Huffman tree //Preparatory work for constructing Huffman tree //Input a string to find the character set and frequency, n stores the number of leaf nodes void findLenAndFre(char* s,int* fre,int* n) { char* temp=new char[MAXSIZE];//Input characters cout<<"Please enter a string:"; cin>>temp; cout<<endl; int len=strlen(temp); \t for(int i=0;i<len;i + + )//Outer loop control input characters { bool found=false;//Identifies whether the character is stored for(int j=0;j<count;j + + )//The inner loop controls whether the characters are recorded { if(temp[i]==s[j]) { fre[j] + + ; found=true;//Indicates that the character has been stored break; } } if(!found) //If the character is not stored { s[count]=temp[i];//Assign the current character to the character set fre[count] =1;//Frequency initialization count + + ;//Number of character set elements + + } } *n=count; \t //Output character set and frequency cout << "Character set" << endl << "D={"; for (int i = 0; i < count; i + + ) { cout << s[i]; if (i != count-1) cout << ","; } cout << "}" << endl; cout << endl; cout << "frequency" << endl << "W={"; for (int i = 0; i < count; i + + ) { cout << fre[i]; if (i != count-1) cout << ","; } cout << "}" << endl << endl; }
(2) Find the sequence number of the smallest and second smallest weight value
//Find the corners corresponding to the smallest and second smallest weights from 1 to i void Select(HuffmanTree HT, int pos, int* s1, int* s2) { int min1, min2;//Minimum and second smallest weight value min1 = min2 = INT_MAX; *s1=*s2=-1; for (int i = 1; i <= pos; i + + ) { if (HT[i].parent == 0)//Only compare weights when there are no parents { if (HT[i].weight <= min1)//If the weight is less than the minimum weight { min2 = min1;//The next smallest weight is the current minimum weight *s2 = *s1;//Update the second smallest weight value sequence number min1 = HT[i].weight;//Minimum weight update *s1 = i;// i is the sequence number of the minimum weight } else if (HT[i].weight < min2)//If the weight is less than the second smallest weight { min2 = HT[i].weight; *s2 = i;//Then i is the sequence number of the second smallest weight value } } } }
(3) Construct Huffman tree from string
a. Nodes of Huffman tree
b. Initialization of Huffman tree
c. Assignment of Huffman tree weights
void CreatHuffmanTree(HuffmanTree & HT, int n,int* fre,char* s)//Construct Huffman tree { //n--leaf node || fre--weight || s--character set if (n <= 1) return; int m = 2 * n - 1,i;//Number of nodes HT = new HTNode[m + 1];//Dynamic allocation array for (i = 1; i <= m; i + + ) //Initialize the left and right children of all nodes, the parents are 0, and the data { HT[i].parent = 0; HT[i].lchild = 0; HT[i].rchild = 0; if(i<=n) HT[i].weight=fre[i-1]; else HT[i].weight = 0; } //----------Initialization ends, construction begins--------- int s1, s2;//Record the smallest and second smallest weight value sequence number for (i = n + 1; i <= m; i + + )//Construct new nodes from bottom to top { Select(HT, i - 1, & amp;s1, & amp;s2);//Find the sequence number of the smallest and second smallest weight value HT[s1].parent = i; HT[s2].parent = i;//Update the parents of s1 s2 HT[i].lchild = s1; HT[i].rchild = s2;//Update the left and right child numbers of i HT[i].weight = HT[s1].weight + HT[s2].weight;//Update the weight of i } }
(4) Encoding
typedef char** HuffmanCode;//Dynamically allocate array to store Huffman code void HuffmanCoding(HuffmanTree HT, HuffmanCode HC, int n, char* s)//encoding { //Start from the leaf node and trace back to the root node, left 0 and right 1, the code is stored in HC HC = new char* [n];//Storage encoding of leaf nodes char* cd = new char[n + 1];//Temporarily store the code, one more to store \0 cd[n] = '\0';//The last element of the array is the end mark. When storing the encoding, it is stored from n forward. \t //Start from the first leaf node and trace back the Huffman tree to find its parents. //If it is the left child of both parents, cd=0; if it is the right child, cd=1 //Then find the parents of the parents, the same process as above, until the parents of the node are 0 for (int i = 1; i <= n; i + + ) { int start = n;//Start points to the end. With the process of finding the root in reverse, start moves forward. int p = HT[i].parent;//Storage parents int c = i;//Storage the current node while (p != 0) //If the parents are not 0, keep looping { --start; //Is it wrong because I wrote ==i? ? if (HT[p].lchild == c)//If it is the left child of both parents { cd[start] = '0'; } else if (HT[p].rchild == c)//If it is the right child of both parents { cd[start] = '1'; } c = p;//update sequence number, trace back up p = HT[p].parent;//Update parents }//This completes the encoding of the i-th leaf node and stores it in HC HC[i-1] = new char[n-start + 1];//Allocate space for the i-th character encoding strcpy(HC[i-1], & amp;cd[start]);//Copy the encoding to HC }//for delete[] cd;//Release temporary space //Output encoding results for (int i = 1; i <= n; i + + ) { //cout << HT[i].data << ":" << HC[i-1] << endl; cout << s[i-1] << ":" << HC[i-1] << endl; } cout << endl; }
(5) Decoding
void HuffmanTranslate(HuffmanTree HT,int n,char* s)//Decoding { string result; cout << "Please enter the characters that need to be decoded, with # as the end mark" << endl; //Input character by character //If you enter 1, go to the right child of the current root; //Input 0, remove the left child until both left and right children are 0 char c; int cur = 2 * n - 1;//Start at the root node int l = HT[cur].lchild; int r = HT[cur].rchild;//Record the serial numbers of the left and right children \t while(cin>>c)//Input characters { if(c=='#')//End mark break; else { if(c=='0') cur=HT[cur].lchild; else if(c=='1') cur=HT[cur].rchild; }//This has moved to the target node if(HT[cur].lchild==0 & amp; & amp;HT[cur].rchild==0)//If the target node is a leaf node { result + =s[cur-1];//The characters corresponding to this node are entered into result cur=2*n-1;//The current node returns to the root node and searches for the target node again } } cout << endl; //Output decoding results cout << "Decoding result"<<endl; cout<<result<<endl; }
(6) Release memory
void destroyHuffmanTree(HuffmanTree & HT)//Destroy the Huffman tree { delete[]HT; } void destroyHuffmanCode(HuffmanCode & HC, int n)//Destroy Huffman code { for (int i = 0; i < n; i + + ) { delete[] HC[i]; } delete[]HC; }
(7) Main function
int main() { HuffmanTreeHT; HuffmanCode HC; char* s=(char*)malloc(sizeof(MAXSIZE)); int* fre=(int*)malloc(sizeof(MAXSIZE)); int n; findLenAndFre(s,fre, & amp;n);//Find the character set and corresponding weight based on the input characters CreatHuffmanTree(HT, n,fre,s);//Create Huffman tree cout << "Encoding result" << endl; HuffmanCoding(HT, HC, n,s);//encoding cout << endl; HuffmanTranslate(HT, n,s);//Decoding cout << endl; \t destroyHuffmanTree(HT); destroyHuffmanCode(HC, n); \t free(s); free(fre); return 0; }
This is the end of my code