• 基于二叉树和双向链表实现限制长度的最优Huffman编码


    该代码採用二叉树结合双向链表实现了限制长度的最优Huffman编码,本文代码中的权重所有採用整数值表示。http://pan.baidu.com/s/1mgHn8lq
    算法原理详见:A fast algorithm for optimal length-limited Huffman codes.pdf
    演示样例:符号ABCDE的权重分别为10,6,2,1,1
       不限制长度的最优Huffman编码为A:0,B:10,C:110,D:1110,E:1111,平均码长为1.8bits/symbol;
       限制长度3的最优Huffman编码为  A:0,B:100,C:101,D:110,E:111,  平均码长为2.0bits/symbol;
    限制长度最优Huffman编码实现代码例如以下:
    //Reference:A fast algorithm for optimal length-limited Huffman codes.pdf,http://pan.baidu.com/s/1o6E19Bs
    //author:by Pan Yumin.2014-06-18
    //with the method of BinaryTree and linked-list
    #include <stdio.h>
    #include <memory.h>
    #include <malloc.h>
    
    #define  MaxSymbols 256	//the Maximum Number of Symbols
    #define  MaxHuffLen	16	//the Limited Length
    
    typedef unsigned char boolean;
    #ifndef FALSE			//in case these macros already exist
    #define FALSE	0		//values of boolean
    #endif
    
    #ifndef TRUE
    #define TRUE	1
    #endif
    
    typedef struct __Node{
    	int width;
    	int weight;
    	int index;
    	int depth;
    
    	struct __Node *prev;	//double linked list
    	struct __Node *next;	//double linked list
    	struct __Node *left;	//left child
    	struct __Node *right;	//right child
    }Node;
    
    typedef struct __HuffTable{
    	unsigned int index;
    	unsigned int len;
    	unsigned int code;
    }HuffTable;
    
    //Test memory leak
    /*int g_malloc = 0,g_free = 0;
    
    void* my_malloc(int size){
    	g_malloc++;
    	return malloc(size);
    }
    void my_free(void *ptr){
    	if(ptr){
    		g_free++;
    		free(ptr);
    		ptr = NULL;
    	}
    }
    #define malloc my_malloc
    #define free my_free*/
    
    //Get the smallest term in the diadic expansion of X
    int GetSmallestTerm(int X)
    {
    	int N=0;
    	while((X & 0x01) == 0){
    		X >>= 1;
    		N++;
    	}
    	return 1<<N;
    }
    void deleteNode(Node *head,unsigned char *Flag,int Symbols,boolean isDelete)
    {
    	if(head->left == NULL && head->right == NULL){
    		if(isDelete)
    			Flag[head->depth*Symbols+head->index] = 0;
    		else
    			Flag[head->depth*Symbols+head->index] = 1;
    	}
    	if(head->left){
    		deleteNode(head->left,Flag,Symbols,isDelete);
    	}
    	if(head->right){
    		deleteNode(head->right,Flag,Symbols,isDelete);
    	}
    	free(head);	head = NULL;
    }
    
    //N:the Num of node
    void Package_Merge(Node *head,Node **tail,int minWidth,unsigned char * Flag,int Symbols)
    {
    	Node *tmp = NULL,*node_1 = NULL,*node_2 = NULL;
    	Node *node_P_head = NULL,*node_P_tail = NULL;		//node_P_tail not store data,node_P_head store data
    	Node *node_head = head;								//the head of 2*minWidth
    	//package
    	node_P_tail = (Node *)malloc(sizeof(Node));
    	memset(node_P_tail,0,sizeof(Node));
    
    	node_2 = node_P_tail;	node_1 = (*tail)->prev;
    	for(;node_1 != NULL && node_1 != head; node_1=(*tail)->prev){
    		if(node_1->width == minWidth){
    			tmp = (Node*)malloc(sizeof(Node));
    			tmp->right = node_1->next;				//insert from right to left,so the weight from small to large
    			tmp->left = node_1;
    			tmp->width = 2*minWidth;
    			tmp->weight = node_1->weight+node_1->next->weight;
    			tmp->next = node_2;
    			tmp->prev = NULL;
    
    			node_2->prev = tmp;
    			node_2 = tmp;
    			*tail = node_1->prev;	(*tail)->next = NULL;		//two intervals
    		}else{
    			break;
    		}
    	}
    	node_P_head = node_2;
    
    	if(*tail != head && (*tail)->width == minWidth){	//if the number of minwidth is odd,delete the max weight item of minwidth
    		*tail = (*tail)->prev;
    		deleteNode((*tail)->next,Flag,Symbols,TRUE);
    		(*tail)->next = NULL;
    	}
    
    	//find the range of 2*minWidth
    	node_1 = *tail;
    	for(;node_1 != head && node_1->width == 2*minWidth;node_1 = node_1->prev){
    	}
    	node_head = node_1;		//the head of 2*minWidth, node_head not store 2*minWidth
    
    	//merge
    	node_1 = node_head->next;	node_2 = node_P_head;
    	for(;node_1 != NULL && node_2 != node_P_tail;){
    		if(node_1->weight >= node_2->weight){
    			node_1 = node_1->next;
    		}else{		//insert to the major list
    			node_1->prev->next = node_2;
    			node_2->prev = node_1->prev;
    			node_1->prev = node_2;
    
    			node_2 = node_2->next;
    			node_2->prev->next = node_1;
    
    			node_2->prev = NULL;
    		}
    	}
    	if(node_1 == NULL){		//insert list 2 to the major list
    		(*tail)->next = node_2;
    		node_2->prev = *tail;
    		*tail = node_P_tail->prev;
    		(*tail)->next = NULL;
    		free(node_P_tail);	node_P_tail = NULL;
    	}else{
    		free(node_P_tail);	node_P_tail = NULL;
    	}
    }
    
    //N:the Num of node
    int LengthLimitedHuffmanCode(Node *head,Node *tail,int X,unsigned char * Flag,int Symbols)
    {
    	int minwidth,r;
    
    	while(X>0){
    		minwidth = GetSmallestTerm(X);
    		if( head->next == NULL)		//I empty
    			return -1;
    		r = tail->width;				//Just for Huffman Code,else r = GetMinWidth(head);
    		if(r>minwidth){
    			return -2;
    		}else if(r == minwidth){
    			tail = tail->prev;
    			deleteNode(tail->next,Flag,Symbols,FALSE);
    			tail->next = NULL;
    			X = X-minwidth;
    		}else{
    			Package_Merge(head,&tail,r,Flag,Symbols);
    		}		
    	}
    
    	return 0;
    }
    void PrintHuffCode(HuffTable Huffcode)
    {
    	int i;
    	for(i=Huffcode.len-1;i>=0;i--){
    		printf("%d",(Huffcode.code>>i) & 0x01);
    	}
    }
    void GenerateHuffmanCode(HuffTable *HuffCode,unsigned char *Flag,int L,int Symbols,int *SortIndex)
    {
    	char Code[17];
    	int Pre_L = 0;
    	int i=0,j=0;
    	unsigned int codes[MaxHuffLen+2]={0},rank[MaxHuffLen+1] = {0};	//rank: the number of symbols in every length
    	//find the first code
    	for(i=0;i<Symbols;i++){
    		for(j=0;j<L;j++){
    			HuffCode[i].len += Flag[j*Symbols+i];
    		}
    		if(HuffCode[i].len != 0)
    			rank[HuffCode[i].len]++;
    		HuffCode[i].index = SortIndex[i];
    	}
    
    	for(i=0;i<=L;i++){
    		codes[i+1] = (codes[i]+rank[i])<<1;
    		rank[i] = 0;
    	}
    
    	//code
    	for(i=0;i<Symbols;i++){
    		HuffCode[i].code = codes[HuffCode[i].len] + rank[HuffCode[i].len]++;
    	}
    }
    float BitsPerSymbol(HuffTable *HuffCode,int *weight,int Symbols,int WeightSum)
    {
    	float bitspersymbol = 0.0;
    	int i;
    	for(i=0;i<Symbols;i++){
    		bitspersymbol += (float)HuffCode[i].len*weight[i];
    	}
    	return bitspersymbol/WeightSum;
    }
    
    void FreqSort(int *Freq,int *SortIndex,int Symbols)
    {
    	int i,j,tmp;
    	for(i=0;i<Symbols;i++){
    		for(j=i+1;j<Symbols;j++){
    			if(Freq[i]<Freq[j]){
    				tmp = Freq[i];
    				Freq[i] = Freq[j];
    				Freq[j] = tmp;
    
    				tmp = SortIndex[i];
    				SortIndex[i] = SortIndex[j];
    				SortIndex[j] = tmp;
    			}
    		}
    	}
    }
    
    int GenLenLimitedOptHuffCode(int *Freq,int Symbols)
    {
    	int i,j;
    	unsigned char *Flag = NULL;	//record the state of the node
    	unsigned int rank[MaxHuffLen];
    	Node *node = NULL,*head = NULL,*tail = NULL,*tmp = NULL;	//head not store data,just a head,tail store data
    	int Ret = 0;
    	HuffTable HuffCode[MaxSymbols];
    	float bitspersymbols = 0.0;
    	int WeightSum = 0;
    	int SortIndex[MaxSymbols];
    
    	if(Symbols > (1<<MaxHuffLen)){
    		printf("Symbols > (1<<MaxHuffLen)
    ");
    		return -1;
    	}
    
    	for(i=0;i<MaxSymbols;i++){
    		SortIndex[i] = i;
    	}
    	FreqSort(Freq,SortIndex,Symbols);		//sort
    
    	for(i=0;i<Symbols;i++){	
    		WeightSum += Freq[i];
    	}
    
    	head = (Node*)malloc(sizeof(Node));
    	memset(head,0,sizeof(Node));
    	Flag = (unsigned char*)malloc(MaxHuffLen*Symbols*sizeof(unsigned char));
    	memset(Flag,1,MaxHuffLen*Symbols*sizeof(unsigned char));
    
    	memset(HuffCode,0,sizeof(HuffCode));
    	node = head;
    
    	for(i=0;i<MaxHuffLen;i++){
    		for(j=0;j<Symbols;j++){
    			tmp = (Node*)malloc(sizeof(Node));
    			tmp->prev = node;							tmp->next = NULL;
    			tmp->left = NULL;							tmp->right = NULL;
    			tmp->width = 1<<(MaxHuffLen-i-1);
    			tmp->weight = Freq[j];
    			tmp->index = j;								tmp->depth = i;
    			node->next = tmp;
    			node = tmp;
    		}
    	}
    	tail = node;	//tail
    	Ret = LengthLimitedHuffmanCode(head,tail,(Symbols-1)<<MaxHuffLen,Flag,Symbols);
    
    	GenerateHuffmanCode(HuffCode,Flag,MaxHuffLen,Symbols,SortIndex);
    	
    	//print HuffCode
    	for(i=0;i<Symbols;i++){
    		printf("%03d weight:%04d Code:",HuffCode[i].index,Freq[i]);
    		PrintHuffCode(HuffCode[i]);
    		printf("	CodeLen:%02d",HuffCode[i].len);
    		printf("
    ");
    	}
    	bitspersymbols = BitsPerSymbol(HuffCode,Freq,Symbols,WeightSum);
    	printf("average code length:%f bits/symbol.
    ",bitspersymbols);
    
    	free(head);	head = NULL;
    	free(Flag);	Flag = NULL;
    
    	return Ret;
    }
    #include <time.h>
    int main()
    {
    	//int Freq[MaxSymbols] = {1,25,3,4,9,6,4,6,26,15,234,4578};	//weight is not zero.
     	int Freq[MaxSymbols] = {10,6,2,1,1};	//weight is not zero.
     	GenLenLimitedOptHuffCode(Freq,5);
     	return 0;
    }

    执行上述程序输出结果例如以下所看到的:




  • 相关阅读:
    USACO Milk2 区间合并
    Codeforces 490B Queue【模拟】
    HDU 3974 Assign the task 简单搜索
    HDU 5119 Happy Matt Friends(2014北京区域赛现场赛H题 裸背包DP)
    Cin、Cout 加快效率方法
    POJ 1159 回文LCS滚动数组优化
    POJ 2479 不相交最大子段和
    POJ 1458 最长公共子序列 LCS
    在阿里最深刻的,还是职场之道给我的震撼
    精细化
  • 原文地址:https://www.cnblogs.com/yutingliuyl/p/6944421.html
Copyright © 2020-2023  润新知