Huffman tree
Basic introduction to Huffman trees
Illustration of Huffman tree construction steps
Create Huffman tree code implementation
""" Create Huffman tree """ classEleNode: """ Node class """ def __init__(self, value: int): self.value = value self.left = None # Point to the left child node self.right = None # Point to the right child node def __str__(self): return f"Node [value={self.value}]" def pre_order(self): """Preorder traversal of a binary tree""" if self is None: return # Output the parent node first print(self, end=' => ') # If the left subtree is not empty, recurse the left subtree if self.left is not None: self.left.pre_order() # If the right subtree is not empty, recurse the right subtree if self.right is not None: self.right.pre_order() classHuffmanTree: arr: list = None def __init__(self, arr): self.arr = arr def create_huffman_tree(self) -> EleNode: # Sort the arr list in ascending order self.arr.sort() # Traverse the array and construct each array element into a Node node # Add Node nodes to a new list node_list = [] for i in self.arr: node_list.append(EleNode(i)) # Loop through the following steps until there is only one binary tree left in the list. At this time, the binary tree is the Huffman tree. while len(node_list) > 1: # Take out the smallest two-class binary tree in the weight sequence (i.e., the first two elements in the list) to build a new binary tree left = node_list.pop(0) right = node_list.pop(0) #The weight of the root node of the new binary tree = the sum of the weights of the two nodes parent = EleNode(left.value + right.value) # Let the left and right nodes of the new binary tree point to the two smallest nodes respectively. parent.left = left parent.right = right #Insert the new binary tree into the specified position in the sequence based on the size of the root node i = 0 n = len(node_list) while i < n: if node_list[i].value >= parent.value: node_list.insert(i, parent) # Found the location where the root node is stored break i+=1 else: # The end of the loop indicates that the new binary tree has the largest weight, and there is no one larger than it in the node_list list. # So add it to the end of the list node_list.append(parent) # At this time, there is only one binary tree in the list, which is the Huffman tree required. # Return the root node of the Huffman tree return node_list[0] huffman = HuffmanTree([13, 7, 8, 3, 29, 6, 1]) node = huffman.create_huffman_tree() node.pre_order()
Huffman coding
Basic introduction
Analysis of Huffman coding principles
Example of Huffman coding
Idea analysis
Code
""" Huffman coding """ class CodeNode: def __init__(self, data: int, weight: int): self.data = data # Store the ASCII code value corresponding to the character self.weight = weight # stores the number of times a character appears in the string self.left = None self.right = None def __str__(self): return f"Node[data={chr(self.data) if self.data else None}, weight={self.weight}]" def pre_order(self): """Preorder traversal of a binary tree""" print(self) if self.left: self.left.pre_order() if self.right: self.right.pre_order() classHuffmanCode: # Huffman coding table # Store the encoding corresponding to each character # key is the corresponding character, val is the encoding corresponding to the character huffman_code_tab = {} # Record the length of the last segment of the Huffman binary encoding string # That is to say, the Huffman binary string is divided into eight bits. When divided into the last one, the length does not need to be 8. #So use a variable to store the length of the last binary string, which will be used during decoding. last_char_len = 0 def create_huffman_tree(self, s: str) -> CodeNode: """ Construct Huffman coded binary tree :param s: string to encode :return: """ # Traverse the string, count the number of occurrences of each character, and put the results into the dictionary kw = {} for ch in s: ascii_code = ord(ch) if kw.get(ascii_code): # If the character has appeared before, directly add 1 to its number kw[ascii_code] + = 1 else: # If it has not appeared before, the number of occurrences is 1 kw[ascii_code] = 1 # Sort the dictionary according to the number of times characters appear kw = sorted(kw.items(), key=lambda kv: (kv[1], kv[0])) # Traverse the dictionary and construct each element into a Node node # Add Node nodes to a new list node_list = [] for k, v in kw: # print(chr(k),'=', v, end=', ') node_list.append(CodeNode(k, v)) # Loop through the following steps until there is only one binary tree left in the list. At this time, the binary tree is the Huffman tree. while len(node_list) > 1: # Take out the smallest two-class binary tree in the weight sequence (i.e., the first two elements in the list) to build a new binary tree left = node_list.pop(0) right = node_list.pop(0) #The weight of the root node of the new binary tree = the sum of the weights of the two nodes parent = CodeNode(None, left.weight + right.weight) # Let the left and right nodes of the new binary tree point to the two smallest nodes respectively. parent.left = left parent.right = right #Insert the new binary tree into the specified position in the sequence based on the size of the root node n = len(node_list) i = 0 while i < n: if node_list[i].weight >= parent.weight: node_list.insert(i, parent) # Found the location where the root node is stored break i + = 1 else: # The end of the loop indicates that the new binary tree has the largest weight, and there is no one larger than it in the node_list list. # So add it to the end of the list node_list.append(parent) # At this time, there is only one binary tree in the list, which is the Huffman tree required. # Return the root node of the Huffman tree return node_list[0] def get_huffman_code_tab(self, ele_node: CodeNode, code: str, code_str: str): """ Traverse the created Huffman tree and obtain the codes of all leaf nodes (leaf nodes are the characters to be obtained) Here it is specified that the left node is 0 and the right node is 1 :param ele_node: The root node of the tree to be traversed, initially the root node :param code: indicates whether the selected path is a left node or a right node :param code_str: the code corresponding to each character :return: """ code_str + = code # splicing coding if ele_node.data is None: # represents a non-leaf node, because the data of the leaf node is set to be empty when creating the Huffman tree. # code_str + = code ifele_node.left: self.get_huffman_code_tab(ele_node.left, '0', code_str) ifele_node.right: self.get_huffman_code_tab(ele_node.right, '1', code_str) else: # is a leaf node self.huffman_code_tab[chr(ele_node.data)] = code_str def huffman_zip(self, s: str) -> list: """ Use the Huffman encoding table to convert each character in the string into the corresponding encoding That is, compress a string according to Huffman coding to obtain a compressed result. :param s: string to convert :return: Returns the encoded list """ res = '' # Traverse the string, convert each character into the corresponding encoding, and concatenate all the encodings # "i like like like java do you like a java" => The following form # 1100111111101110001011111110111000101111111011100010100001011000101011 # 001100001011001000011001110111111101110001011010100001011000101 for i in s: res + = self.huffman_code_tab[i] # Split the resulting encoded string into eight bits, convert each eight bits into an int, and store the int in a list code_list = [] i = 0 n = len(res) while i < n: num = int(res[i:i + 8], 2) # Convert binary string to integer code_list.append(num) i + = 8 if i < n <= i + 8: # It has been divided into the last part, record the length of this part self.last_char_len = n - i return code_list def huffman_decode(self, code_list) -> str: """ Decompress Huffman encoding and obtain a readable string :param code_list: Huffman code list to be decompressed :return: decoded string """ # Convert the Huffman encoding list into the corresponding binary string # [415, 476, 95, 476, 95, 476, 80, 177, 345, 389, 400, 206, 254, 226, 212, 44, 5] => # 1100111111101110001011111110111000101111111011100010100001011000101011 # 001100001011001000011001110111111101110001011010100001011000101 code_str = '' # Store the corresponding binary string count = 0 n = len(code_list) for i in code_list: t = "{:08b}".format(i) # Convert integer to binary string code_str + = t count + = 1 if count == n - 1: break code_str + = "{:0{k}b}".format(code_list[count], k=self.last_char_len) # Swap the key values of the Huffman coding table # For example, the original is 'a': '001' => becomes '001': 'a' code_tab = {} for k, v in self.huffman_code_tab.items(): code_tab[v] = k # Traverse binary string j = 0 i=1 n = len(code_str) res_code = '' #Decoded string while i <= n: t = code_str[j:i] ch = code_tab.get(t) if ch: res_code + = ch j=i i+=1 return res_code s = "i like like like java do you like a java" # s = "I love python hahha nihao" huffman = HuffmanCode() root_node = huffman.create_huffman_tree(s) huffman.get_huffman_code_tab(root_node, '', '') huffman_code_list = huffman.huffman_zip(s) decode_str = huffman.huffman_decode(huffman_code_list) print(decode_str)