1 //<HuffmanGKM.h> 2 #include "stdafx.h" 3 #include <string> 4 5 using std::string; 6 7 const int CHNUM=256; //字符数 8 const int PLUS=128; //字符下标偏移量 9 struct WeightGkm //字符频度结构,包含频度和字符值 10 { 11 unsigned long w; 12 char c; 13 }; 14 typedef struct HTNode //huffman树结构 15 { 16 int count; 17 WeightGkm w; 18 string code; 19 HTNode * lchild; 20 HTNode * rchild; 21 }HTNode,*HTree; 22 23 class HuffmanGKM 24 { 25 private: 26 HTree T; //构造Huffman树; 27 string huffCode[CHNUM]; //256个字符的Huffman编码; 28 29 unsigned long weight[CHNUM]; //256个可打印字符的频度(或叫权重) 30 unsigned long long file_size; //原始文件字符总数,即文件长度 31 32 string original_file; //原始文件路径 33 string compress_file; //压缩文件存储路径 34 string decompress_file; //解压缩文件存储路径 35 36 void QuickSortHT( HTree ht[], int left, int right); //快速排序 37 int Partition( HTree ht[], int left, int right); //快速排序中的“分半” 38 void SelectInsert( HTree ht[], HTree t, int left ,int right); //按序插入 39 public: 40 41 HuffmanGKM( string originalFile , string compressFile, string decompressFile); 42 43 int ReadFile(); //读取原文件,并记录每个字符频度 44 int BuildHuffTree(); //根据频度建立字符的Huffman树 45 int CreateHuffCode(); //根据huffman树得到huffman编码 46 int CompressFile(); //用新编码转换原文件 47 int DecompressFile(); //根据huffman树解压缩huffman编码的压缩文件 48 49 ~HuffmanGKM(); //析构函数,释放堆空间 50 };
1 //<HuffmanGKM.cpp> 2 #include "stdafx.h" 3 #include <iostream> 4 #include <fstream> 5 #include "HuffmanGKM.h" 6 #include <string> 7 #include <bitset> 8 9 using std::bitset; 10 using std::string; 11 using std::ifstream; 12 using std::ofstream; 13 using std::cout; 14 using std::endl; 15 using std::ios; 16 17 HuffmanGKM::HuffmanGKM( string originalFile , string compressFile, string decompressFile) 18 { 19 for(int i=0;i<CHNUM;i++) 20 weight[i]=0; 21 22 file_size=0; 23 original_file=originalFile; 24 compress_file=compressFile; 25 decompress_file=decompressFile; 26 27 } 28 29 int HuffmanGKM::ReadFile() 30 { 31 ifstream read; 32 read.open (original_file); 33 if(read.fail()) 34 { 35 cout<<"The original file open failed when read file!!"; 36 return 0; 37 } 38 39 char next; 40 read.get(next); 41 while(!read.eof())//统计频度。 42 { 43 weight[next+PLUS]++; 44 read.get(next); 45 file_size++; 46 } 47 48 read.close (); 49 return 0; 50 } 51 52 void HuffmanGKM::QuickSortHT ( HTree htt[], int left, int right ) 53 { 54 int pivot; 55 if( left < right ) // 肯定为真的条件 56 { 57 pivot = Partition ( htt, left, right ); 58 QuickSortHT( htt, left, pivot-1 ); 59 QuickSortHT( htt, pivot+1, right ); 60 } 61 62 } 63 //快速排序的patition算法 64 int HuffmanGKM::Partition ( HTree htt[ ], int left, int right ) //这是左大右小的排序 65 { 66 HTree HTPivot = htt[left]; //这叫“虚左以待” 67 68 while( left < right ) 69 { 70 while( right > left && htt[ right]->w.w >= HTPivot->w.w ) 71 right--; 72 htt[ left ] = htt[ right ]; 73 74 while( left < right && htt[ left ]->w.w <= HTPivot->w.w ) 75 left++; 76 htt[ right ] = htt[ left ]; 77 } 78 79 htt[ left ] = HTPivot; 80 81 return left; //最后left=right,所以返回哪个都一样 82 83 } 84 85 void HuffmanGKM:: SelectInsert( HTree htt[], HTree p, int left ,int right)//left是第一个要比较的元素 86 { 87 for( ;left<=right;left++) 88 { 89 if( p->w.w > htt[left]->w.w ) 90 htt[left-1]=htt[left];//左移小元素。 91 else 92 break; 93 } 94 htt[left-1]=p; 95 } 96 97 int HuffmanGKM::BuildHuffTree() 98 { 99 int left=0,right=CHNUM-1; 100 HTree ht[CHNUM]; //树结点的排序数组 101 102 for( int i=0; i<CHNUM;i++) //初始化huffman树结点 103 { 104 ht[i]= new HTNode ; 105 ht[i]->w.w=weight[i]; //字符频度 106 ht[i]->count=1; //树中结点个数,仅做测试用。 107 ht[i]->w.c=i-PLUS; //字符值 108 ht[i]->lchild =0; 109 ht[i]->rchild=0; 110 } 111 112 QuickSortHT( ht ,left , right ); //先把各结点字符按频度升序排序。 113 114 HTree parent; 115 while(left<right) //建树的过程很简单。 116 { 117 ht[left]->code ="1"; 118 ht[left+1]->code ="0"; 119 parent=new HTNode; 120 parent->lchild =ht[left]; 121 parent->rchild =ht[left+1]; 122 123 parent->w.c=0; 124 parent->w.w=parent->lchild ->w.w+parent->rchild ->w.w ; 125 parent->count=parent->lchild ->count + parent->rchild->count + 1; 126 SelectInsert( ht,parent,left+2,right); 127 left++; 128 } 129 T=parent; //T为建好的huffman树。 130 return 0; 131 } 132 133 int HuffmanGKM::CreateHuffCode () 134 { 135 //非递归后序遍历二叉树,访问叶子结点 136 HTree stack[CHNUM]; 137 int sign[CHNUM]={0}; 138 HTree p=T; 139 int top=0; 140 141 while( p||top ) 142 { 143 if(p) 144 { 145 stack[top]=p; 146 sign[top]=1; 147 top++; 148 p=p->lchild ; 149 } 150 else // p为空指针,循环出栈 151 while( top!=0 ) //后序遍历中,当访问完一个结点时,则以该结点为根的树都访问完,所以下一步应该继续出栈, 152 { 153 top--; 154 p = stack[top]; 155 156 if( sign[top] == 2 ) //表示p的左右子树都已走过 157 { 158 if( p->lchild ==0 && p->rchild ==0 ) 159 for(int i=1;i<=top;i++) 160 huffCode[p->w.c+PLUS]+=stack[i]->code; 161 } 162 else //表示仅走过T的左子树 ,右子树必定是第一次遇到, 163 { 164 stack[top]=p; 165 sign[top]=2; 166 top++; 167 p=p->rchild; 168 break; 169 }//else if 170 } //while ( !IsEmpty ) 171 172 if(top==0) 173 break; 174 }//while 175 return 0; 176 } 177 178 int HuffmanGKM::CompressFile() 179 { 180 ifstream read; 181 read.open (original_file); 182 if(read.fail ()) 183 { 184 cout<<"The original file open failed when compress!!!"; 185 return 1; 186 } 187 ofstream write; 188 write.open(compress_file,ios::binary ); 189 if(write.fail ()) 190 { 191 cout<<"The compress files open failed when compress!!!" ; 192 return 1; 193 } 194 char next; 195 unsigned char buff=0; 196 int count=0; 197 198 read.get(next); 199 while(!read.eof()) 200 { 201 for(string::size_type i=0;i<huffCode[next+PLUS].size();i++) 202 { 203 if( huffCode[next+PLUS][i]=='0') 204 buff=(buff<<1); 205 else 206 if(huffCode[next+PLUS][i]=='1') 207 buff=(buff<<1)|1; 208 count++; 209 if(count==8) 210 { 211 write<<buff; 212 count=0; 213 } 214 } 215 read.get(next); 216 } 217 if(count!=0) 218 for(;count!=8;count++) 219 buff=(buff<<1); 220 write<<buff; 221 read.close(); 222 write.close(); 223 224 return 0; 225 } 226 227 int HuffmanGKM::DecompressFile () 228 { 229 ifstream read; 230 read.open (compress_file,ios::binary ); 231 if(read.fail()) 232 { 233 cout<<"The compress file pen failed when decompress!!"<<endl; 234 return 0; 235 } 236 ofstream write; 237 write.open (decompress_file); 238 if(write.fail()) 239 { 240 cout<<"The decompress file open failed when decompress!!"<<endl; 241 return 0; 242 } 243 HTree p=T; 244 245 char next; 246 read.get(next); 247 unsigned long long countSize=0; 248 while(1) 249 { 250 bitset<8>b(next); 251 read.get(next); 252 for(int i=b.size()-1;i>=0;i--) 253 { 254 if(b.test(i)) 255 p=p->lchild ; 256 else 257 p=p->rchild ; 258 259 if(p->lchild ==0 && p->rchild ==0) 260 { 261 write<<p->w.c; 262 p=T; 263 countSize++; 264 } 265 if(countSize>=file_size) 266 break; 267 } 268 if(countSize>=file_size) 269 break; 270 } 271 272 read.close (); 273 write.close(); 274 return 0; 275 } 276 277 HuffmanGKM::~HuffmanGKM() 278 { 279 280 HTree stack[CHNUM]; 281 int sign[CHNUM]={0}; 282 283 HTree p=T; 284 int top=0; 285 while( p||top ) 286 { 287 if(p) 288 { 289 stack[top]=p; 290 sign[top]=1; 291 top++; 292 p=p->lchild ; 293 } 294 else // p为空指针,循环出栈 295 while( top!=0 ) //后序遍历中,当访问完一个结点时,则以该结点为根的树都访问完,所以下一步应该继续出栈, 296 { 297 top--; 298 p = stack[top]; 299 300 if( sign[top] == 2 ) //表示p的左右子树都已走过 ,后序遍历,释放所有结点 301 delete(p); 302 303 else //表示仅走过T的左子树 ,右子树必定是第一次遇到, 304 { 305 stack[top]=p; 306 sign[top]=2; 307 top++; 308 p=p->rchild; 309 break; 310 }//else if 311 } //while ( !IsEmpty ) 312 313 if(top==0) 314 break; 315 }//while 316 }
1 //Huffman.cpp : main函数文件。 2 #include "stdafx.h" 3 #include "HuffmanGKM.h" 4 #include <iostream> 5 #include <string> 6 7 using std::string; 8 using std::cout; 9 using std::endl; 10 int _tmain(int argc, _TCHAR* argv[]) 11 { 12 13 string originalFile="../TestGkm/奥巴马.txt"; 14 string compressFile="../TestGkm/奥巴马compress.txt"; 15 string decompressFile="../TestGkm/奥巴马decompress.txt"; 16 17 HuffmanGKM huff( originalFile, compressFile,decompressFile); 18 19 huff.ReadFile (); 20 huff.BuildHuffTree (); 21 huff.CreateHuffCode (); 22 huff.CompressFile(); 23 huff.DecompressFile (); 24 25 cout<<"COMPLETE!"<<endl; 26 27 getchar(); 28 29 return 0; 30 }