该代码採用二叉树结合双向链表实现了限制长度的最优Huffman编码,本文代码中的权重所有採用整数值表示。http://pan.baidu.com/s/1mgHn8lq
算法原理详见:A fast algorithm for optimal length-limited Huffman codes.pdf
演示样例:符号ABCDE的权重分别为10,6,2,1,1
不限制长度的最优Huffman编码为A:0,B:10,C:110,D:1110,E:1111,平均码长为1.8bits/symbol;
限制长度3的最优Huffman编码为 A:0,B:100,C:101,D:110,E:111, 平均码长为2.0bits/symbol;
限制长度最优Huffman编码实现代码例如以下:
算法原理详见:A fast algorithm for optimal length-limited Huffman codes.pdf
演示样例:符号ABCDE的权重分别为10,6,2,1,1
不限制长度的最优Huffman编码为A:0,B:10,C:110,D:1110,E:1111,平均码长为1.8bits/symbol;
限制长度3的最优Huffman编码为 A:0,B:100,C:101,D:110,E:111, 平均码长为2.0bits/symbol;
限制长度最优Huffman编码实现代码例如以下:
//Reference:A fast algorithm for optimal length-limited Huffman codes.pdf,http://pan.baidu.com/s/1o6E19Bs //author:by Pan Yumin.2014-06-18 //with the method of BinaryTree and linked-list #include <stdio.h> #include <memory.h> #include <malloc.h> #define MaxSymbols 256 //the Maximum Number of Symbols #define MaxHuffLen 16 //the Limited Length typedef unsigned char boolean; #ifndef FALSE //in case these macros already exist #define FALSE 0 //values of boolean #endif #ifndef TRUE #define TRUE 1 #endif typedef struct __Node{ int width; int weight; int index; int depth; struct __Node *prev; //double linked list struct __Node *next; //double linked list struct __Node *left; //left child struct __Node *right; //right child }Node; typedef struct __HuffTable{ unsigned int index; unsigned int len; unsigned int code; }HuffTable; //Test memory leak /*int g_malloc = 0,g_free = 0; void* my_malloc(int size){ g_malloc++; return malloc(size); } void my_free(void *ptr){ if(ptr){ g_free++; free(ptr); ptr = NULL; } } #define malloc my_malloc #define free my_free*/ //Get the smallest term in the diadic expansion of X int GetSmallestTerm(int X) { int N=0; while((X & 0x01) == 0){ X >>= 1; N++; } return 1<<N; } void deleteNode(Node *head,unsigned char *Flag,int Symbols,boolean isDelete) { if(head->left == NULL && head->right == NULL){ if(isDelete) Flag[head->depth*Symbols+head->index] = 0; else Flag[head->depth*Symbols+head->index] = 1; } if(head->left){ deleteNode(head->left,Flag,Symbols,isDelete); } if(head->right){ deleteNode(head->right,Flag,Symbols,isDelete); } free(head); head = NULL; } //N:the Num of node void Package_Merge(Node *head,Node **tail,int minWidth,unsigned char * Flag,int Symbols) { Node *tmp = NULL,*node_1 = NULL,*node_2 = NULL; Node *node_P_head = NULL,*node_P_tail = NULL; //node_P_tail not store data,node_P_head store data Node *node_head = head; //the head of 2*minWidth //package node_P_tail = (Node *)malloc(sizeof(Node)); memset(node_P_tail,0,sizeof(Node)); node_2 = node_P_tail; node_1 = (*tail)->prev; for(;node_1 != NULL && node_1 != head; node_1=(*tail)->prev){ if(node_1->width == minWidth){ tmp = (Node*)malloc(sizeof(Node)); tmp->right = node_1->next; //insert from right to left,so the weight from small to large tmp->left = node_1; tmp->width = 2*minWidth; tmp->weight = node_1->weight+node_1->next->weight; tmp->next = node_2; tmp->prev = NULL; node_2->prev = tmp; node_2 = tmp; *tail = node_1->prev; (*tail)->next = NULL; //two intervals }else{ break; } } node_P_head = node_2; if(*tail != head && (*tail)->width == minWidth){ //if the number of minwidth is odd,delete the max weight item of minwidth *tail = (*tail)->prev; deleteNode((*tail)->next,Flag,Symbols,TRUE); (*tail)->next = NULL; } //find the range of 2*minWidth node_1 = *tail; for(;node_1 != head && node_1->width == 2*minWidth;node_1 = node_1->prev){ } node_head = node_1; //the head of 2*minWidth, node_head not store 2*minWidth //merge node_1 = node_head->next; node_2 = node_P_head; for(;node_1 != NULL && node_2 != node_P_tail;){ if(node_1->weight >= node_2->weight){ node_1 = node_1->next; }else{ //insert to the major list node_1->prev->next = node_2; node_2->prev = node_1->prev; node_1->prev = node_2; node_2 = node_2->next; node_2->prev->next = node_1; node_2->prev = NULL; } } if(node_1 == NULL){ //insert list 2 to the major list (*tail)->next = node_2; node_2->prev = *tail; *tail = node_P_tail->prev; (*tail)->next = NULL; free(node_P_tail); node_P_tail = NULL; }else{ free(node_P_tail); node_P_tail = NULL; } } //N:the Num of node int LengthLimitedHuffmanCode(Node *head,Node *tail,int X,unsigned char * Flag,int Symbols) { int minwidth,r; while(X>0){ minwidth = GetSmallestTerm(X); if( head->next == NULL) //I empty return -1; r = tail->width; //Just for Huffman Code,else r = GetMinWidth(head); if(r>minwidth){ return -2; }else if(r == minwidth){ tail = tail->prev; deleteNode(tail->next,Flag,Symbols,FALSE); tail->next = NULL; X = X-minwidth; }else{ Package_Merge(head,&tail,r,Flag,Symbols); } } return 0; } void PrintHuffCode(HuffTable Huffcode) { int i; for(i=Huffcode.len-1;i>=0;i--){ printf("%d",(Huffcode.code>>i) & 0x01); } } void GenerateHuffmanCode(HuffTable *HuffCode,unsigned char *Flag,int L,int Symbols,int *SortIndex) { char Code[17]; int Pre_L = 0; int i=0,j=0; unsigned int codes[MaxHuffLen+2]={0},rank[MaxHuffLen+1] = {0}; //rank: the number of symbols in every length //find the first code for(i=0;i<Symbols;i++){ for(j=0;j<L;j++){ HuffCode[i].len += Flag[j*Symbols+i]; } if(HuffCode[i].len != 0) rank[HuffCode[i].len]++; HuffCode[i].index = SortIndex[i]; } for(i=0;i<=L;i++){ codes[i+1] = (codes[i]+rank[i])<<1; rank[i] = 0; } //code for(i=0;i<Symbols;i++){ HuffCode[i].code = codes[HuffCode[i].len] + rank[HuffCode[i].len]++; } } float BitsPerSymbol(HuffTable *HuffCode,int *weight,int Symbols,int WeightSum) { float bitspersymbol = 0.0; int i; for(i=0;i<Symbols;i++){ bitspersymbol += (float)HuffCode[i].len*weight[i]; } return bitspersymbol/WeightSum; } void FreqSort(int *Freq,int *SortIndex,int Symbols) { int i,j,tmp; for(i=0;i<Symbols;i++){ for(j=i+1;j<Symbols;j++){ if(Freq[i]<Freq[j]){ tmp = Freq[i]; Freq[i] = Freq[j]; Freq[j] = tmp; tmp = SortIndex[i]; SortIndex[i] = SortIndex[j]; SortIndex[j] = tmp; } } } } int GenLenLimitedOptHuffCode(int *Freq,int Symbols) { int i,j; unsigned char *Flag = NULL; //record the state of the node unsigned int rank[MaxHuffLen]; Node *node = NULL,*head = NULL,*tail = NULL,*tmp = NULL; //head not store data,just a head,tail store data int Ret = 0; HuffTable HuffCode[MaxSymbols]; float bitspersymbols = 0.0; int WeightSum = 0; int SortIndex[MaxSymbols]; if(Symbols > (1<<MaxHuffLen)){ printf("Symbols > (1<<MaxHuffLen) "); return -1; } for(i=0;i<MaxSymbols;i++){ SortIndex[i] = i; } FreqSort(Freq,SortIndex,Symbols); //sort for(i=0;i<Symbols;i++){ WeightSum += Freq[i]; } head = (Node*)malloc(sizeof(Node)); memset(head,0,sizeof(Node)); Flag = (unsigned char*)malloc(MaxHuffLen*Symbols*sizeof(unsigned char)); memset(Flag,1,MaxHuffLen*Symbols*sizeof(unsigned char)); memset(HuffCode,0,sizeof(HuffCode)); node = head; for(i=0;i<MaxHuffLen;i++){ for(j=0;j<Symbols;j++){ tmp = (Node*)malloc(sizeof(Node)); tmp->prev = node; tmp->next = NULL; tmp->left = NULL; tmp->right = NULL; tmp->width = 1<<(MaxHuffLen-i-1); tmp->weight = Freq[j]; tmp->index = j; tmp->depth = i; node->next = tmp; node = tmp; } } tail = node; //tail Ret = LengthLimitedHuffmanCode(head,tail,(Symbols-1)<<MaxHuffLen,Flag,Symbols); GenerateHuffmanCode(HuffCode,Flag,MaxHuffLen,Symbols,SortIndex); //print HuffCode for(i=0;i<Symbols;i++){ printf("%03d weight:%04d Code:",HuffCode[i].index,Freq[i]); PrintHuffCode(HuffCode[i]); printf(" CodeLen:%02d",HuffCode[i].len); printf(" "); } bitspersymbols = BitsPerSymbol(HuffCode,Freq,Symbols,WeightSum); printf("average code length:%f bits/symbol. ",bitspersymbols); free(head); head = NULL; free(Flag); Flag = NULL; return Ret; } #include <time.h> int main() { //int Freq[MaxSymbols] = {1,25,3,4,9,6,4,6,26,15,234,4578}; //weight is not zero. int Freq[MaxSymbols] = {10,6,2,1,1}; //weight is not zero. GenLenLimitedOptHuffCode(Freq,5); return 0; }
执行上述程序输出结果例如以下所看到的: