哈夫曼编码(Huffman Coding),又称霍夫曼编码,是一种编码方式,哈夫曼编码是可变字长编码(VLC)的一种。Huffman于1952年提出一种编码方法,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。
1 #include "stdafx.h" 2 #include <iostream> 3 #include<queue> 4 #include<vector> 5 #include <string> 6 7 using namespace std; 8 9 typedef struct 10 { 11 //内容 12 unsigned char character; 13 //频次 14 unsigned int frequency; 15 16 } CharNode; 17 18 struct MinHeapNode 19 { 20 char data; 21 unsigned int freq; 22 MinHeapNode *left, *right; 23 MinHeapNode(char data, unsigned int freq) 24 { 25 left = right = NULL; 26 this->data = data; 27 this->freq = freq; 28 } 29 }; 30 typedef struct MinHeapNode MinHeapNode; 31 32 struct compare 33 { 34 bool operator()(MinHeapNode *a, MinHeapNode *b) 35 { 36 //"a > b" 表示 数值小 的 优先级高, 排最上面 37 //反之 "<" 表示大的优先级高 38 return (a->freq > b->freq); 39 } 40 }; 41 42 void get_huffuman_code(MinHeapNode *topNode, string code) 43 { 44 if (NULL == topNode) 45 { 46 return; 47 } 48 //表示已到达最末端,含有左右节点的元素中(由while循环中生成)data必为-1 49 if (topNode->data != -1) 50 { 51 cout << topNode->data << " : " << code<<endl; 52 } 53 get_huffuman_code(topNode->left, code + "0"); 54 get_huffuman_code(topNode->right, code + "1"); 55 } 56 57 int main() 58 { 59 FILE * inputFile = NULL; 60 fopen_s(&inputFile,"input.txt", "rb"); 61 if (!inputFile) 62 { 63 cout<< "Error: open file failed !" << endl; 64 return -1; 65 } 66 67 //初始化ASCII码数组 68 CharNode nodeArr[256] = { {0,0} }; 69 while (!feof(inputFile)) 70 { 71 char buf = getc(inputFile); 72 cout << buf; 73 nodeArr[buf].character = buf; 74 nodeArr[buf].frequency++; 75 } 76 cout << endl; 77 //定义一个小顶堆 78 priority_queue<MinHeapNode*, vector<MinHeapNode*>, compare > minHeap; 79 for (size_t i = 0; i < 256; i++) 80 { 81 //将数据堆中,与升序排序效果类似,即堆首为小值,堆尾为大值 82 if (nodeArr[i].frequency > 0) 83 { 84 minHeap.push(new MinHeapNode(nodeArr[i].character, nodeArr[i].frequency)); 85 cout << "Node " << i << ": [" << nodeArr[i].character << ", " << nodeArr[i].frequency << "]" << endl; 86 } 87 } 88 89 MinHeapNode *leftNode = NULL, *rightNode = NULL, *topNode = NULL; 90 //从堆首中抛出两个较小节点,生成一个新节点放回堆中并进行重新排序, 91 //故,每次操作完成后 堆中会减少一个元素, 且堆顶必为权值最小的2个元素 92 //直至堆中只剩一个元素时,哈夫曼树生成完毕 93 while (minHeap.size() > 1) 94 { 95 //较小放左侧 96 leftNode = minHeap.top(); 97 minHeap.pop(); 98 //较大的放右侧 99 rightNode = minHeap.top(); 100 minHeap.pop(); 101 102 topNode = new MinHeapNode(-1, leftNode->freq + rightNode->freq); 103 topNode->left = leftNode; 104 topNode->right = rightNode; 105 minHeap.push(topNode); 106 } 107 get_huffuman_code(topNode, ""); 108 getchar(); 109 return 0; 110 }
对于解码过程,从根节点出发,依次读取文件流的bit 遇到“0”找leftNode,遇到“1”找rightNode,直至 data != -1