#pragma once #include "stdafx.h" #include "BinTree.h" #include "BitsOper.h" namespace Huffman { using namespace std; std::map<std::string, int> mp; using line = std::vector<string>; using vctLine = std::vector<line>; using char_int = std::pair<char, int>; std::map<char, string> ms; std::map<char, string> codeMap; template<typename T> struct HuffTree { enum ChildState { LeftChild = 0, RightChild = 1, Root = 2 //特点 父节点为空 }; T data{ T() }; // 0 或者1 HuffTree* Left{ nullptr }; HuffTree* Right{ nullptr }; HuffTree* Pairent{ nullptr }; ChildState localState; void InsertLeft(HuffTree* ht) { Left = ht; } void InsertRight(HuffTree* ht) { Right = ht; } void Free() { //if (Left != nullptr) FreeHuffChild(Left); //if (Right != nullptr) FreeHuffChild(Right); FreeHuffTree(Left); FreeHuffTree(Right); } void FreeHuffTree(HuffTree* tree) { if (tree->Left) { FreeHuffTree(tree->Left); //delete tree->Left; tree->Left = nullptr; } if (tree->Right) { FreeHuffTree(tree->Right); //delete tree->Right; tree->Right = nullptr; } delete tree; } std::vector<T> ToList() { std::vector<T> vct; PushNodeToVct(vct, this); return vct; } //Breadth-First Search int ndepth = 0; string ss = ""; void PushNodeToVct(std::vector<T>& vct, HuffTree* hu) { vct.push_back(hu->data); cout << "(" << hu->data.first << "," << hu->data.second << ")"; //cout << (int)hu->data.second; ss += std::to_string(hu->data.second); if (hu->Left != nullptr) { ss += "->"; cout << "->"; PushNodeToVct(vct, hu->Left); ndepth++; } else { ndepth--; //ss.substr(0, ss.size() - 3); cout << endl; //return; } if (hu->Right != nullptr) { //cout << hu->data.first << endl; ss += "->"; cout << "->"; PushNodeToVct(vct, hu->Right); ndepth++; } else { //ss.substr(0, ss.size()); cout << " "; //ss += " "; //cout << ss; } ndepth++; } }; using HInfoT = HuffTree<char_int>; HInfoT* huffTree = nullptr; //构造结果 int BinaryStringToInt(string s) { uint64_t val = 0; for (int i = 0; i < s.size(); i++) { int asd = s[i] == '1' ? 1 : 0; val += (asd << s.size() - 1 - i); } return val; } std::string ToBinaryString(uint8_t byte) { string s = ""; //位 for (int i = 7; i >= 0; i--) { uint8_t b = 1 << i; uint8_t bb = byte & b; if (bb == 0) { s += "0"; } else { s += "1"; } } return s; } std::string DecodeHuffman(std::vector<unsigned char> buff) { string bits = ""; for (int i = 0; i < buff.size(); i++) { bits += ToBinaryString(buff[i]); } std::string rs = ""; HInfoT* pCur = huffTree; for (int i = 0; i < bits.length(); i++) { if (bits[i] == '0') { if (pCur->Left != nullptr) { pCur = pCur->Left; } else { rs.push_back(pCur->data.first); pCur = huffTree->Left; } } if (bits[i] == '1') { if (pCur->Right != nullptr) { pCur = pCur->Right; } else { rs.push_back(pCur->data.first); pCur = huffTree->Right; } } } return rs; } void GenerateHuffCode(line& sr, HInfoT* hit, string& str) { if (hit->Left != nullptr) { str += "1"; ms[hit->Left->data.first] = str; GenerateHuffCode(sr, hit->Left, str); } else { string c = str; c.erase(c.end() - 1); sr.push_back(c); return; } if (hit->Right != nullptr) { str += "0"; ms[hit->Right->data.first] = str; GenerateHuffCode(sr, hit->Right, str); } //左子节点一定会有 right node 不一定有 else { //由于先遍历左子节点 string c = str; c.erase(c.end() - 1); sr.push_back(c); } //FindTree(sr, tree); } void GetCode(HInfoT* node, string& s) { if (node->localState == HInfoT::ChildState::LeftChild) { s += "0"; if (node->Pairent != NULL) { GetCode(node->Pairent, s); } } if (node->localState == HInfoT::ChildState::RightChild) { s += "1"; if (node->Pairent != NULL) { GetCode(node->Pairent, s); } } if (node->localState == HInfoT::ChildState::Root) { //s += "r"; } } vector<unsigned char> EncodeHuffman(std::string str) { using HInfoT = HuffTree<char_int>; std::map<char, int> mp; for (int i = 0; i < str.length(); i++) { if (mp.find(str[i]) == mp.end()) { mp.insert(std::pair<char, int>(str[i], 1)); } else { mp[str[i]] ++; } } std::deque<char_int> dqLst; for (auto it : mp) { dqLst.push_back(it); cout << it.first << endl; } std::function<bool(char_int&, char_int&)> asc = [](char_int& lhs, char_int& rhs) { return lhs.second < rhs.second; }; std::sort(dqLst.begin(), dqLst.end(), asc); //当前概率 + 当前所指向的元素 HInfoT* root = nullptr;// 左节点为 0 右节点1 HInfoT* hinf = nullptr; std::deque<HInfoT*> dqNodes; std::function<bool(HInfoT*, HInfoT*)> ascNode = [](HInfoT* l, HInfoT* r) { return l->data.second < r->data.second; }; for (int i = 0; i < dqLst.size(); i++) { HInfoT* h = new HInfoT; //char h->data = dqLst[i]; h->data.first = dqLst[i].first; //概率 h->data.second = dqLst[i].second; dqNodes.push_back(h); cout << "(" << h->data.first << "," << h->data.second << ")" << endl; } int pos; char ch; while (dqNodes.size() > 2) { hinf = new HInfoT(); root = hinf; hinf->data.second = dqNodes[0]->data.second + dqNodes[1]->data.second; hinf->data.first = dqNodes[0]->data.first; int nLeft = dqNodes[0]->data.second < dqNodes[1]->data.second ? 0 : 1; int nRight = nLeft == 0 ? 1 : 0; hinf->Left = dqNodes[nLeft]; dqNodes[nLeft]->Pairent = hinf; dqNodes[nLeft]->localState = HInfoT::LeftChild; dqNodes[nRight]->Pairent = hinf; dqNodes[nRight]->localState = HInfoT::RightChild; ch = hinf->Left->data.first; hinf->Right = dqNodes[nRight]; //dqNodes.erase(dqNodes.begin()); //dqNodes.erase(dqNodes.begin() + 1); dqNodes.pop_front(); dqNodes.pop_front(); dqNodes.push_back(hinf); std::sort(dqNodes.begin(), dqNodes.end(), ascNode); } int nLeft = dqNodes[0]->data.second < dqNodes[1]->data.second ? 0 : 1; int nRight = nLeft == 0 ? 1 : 0; root = new HInfoT(); root->data.second = hinf->data.second + hinf->data.second; root->data.first = hinf->data.first; root->Left = dqNodes[nLeft]; root->Right = dqNodes[nRight]; root->localState = HInfoT::Root; dqNodes[nLeft]->Pairent = root; dqNodes[nLeft]->localState = HInfoT::ChildState::LeftChild; dqNodes[nRight]->Pairent = root; dqNodes[nRight]->localState = HInfoT::ChildState::RightChild; std::vector<HInfoT*> vn; std::function<void(std::vector<HInfoT*>*)> bfsFunc = [root](std::vector<HInfoT*>* vhint) { deque<HInfoT*> tmpStack; HInfoT* r = root; tmpStack.push_back(root); vhint->push_back(root); while (tmpStack.size() > 0) { r = tmpStack.front(); tmpStack.pop_front(); if (r->Left != nullptr) { tmpStack.push_back(r->Left); vhint->push_back(r->Left); } if (r->Right != nullptr) { tmpStack.push_back(r->Right); vhint->push_back(r->Right); } } }; bfsFunc(&vn); string codeNodeStr; for (auto v : vn) { codeNodeStr.clear(); //只记录底层节点的码,也就是真实的码 if (v->Left == nullptr && v->Right == nullptr) { GetCode(v, codeNodeStr); std::reverse(codeNodeStr.begin(), codeNodeStr.end()); codeMap[v->data.first] = codeNodeStr; } } cout << endl; for (auto vn : codeMap) { cout << "[" << vn.first << ":" << vn.second << "]"; } cout << "codeMap" << endl; //获取编码表 auto vct = root->ToList(); for (auto v : vct) { std::cout << v.first << ":" << v.second << endl; } //生成Huffman表 //小的为0 大的为1 const int leftVal = 0; const int rightVal = 1; line vl; vl.push_back(string()); string s = ""; //GenerateHuffCode(vl, root, s); //for (int i = 0; i < vl.size(); i++) //{ // //cout << vl[i] << endl; // string stt = ""; // for (int j = 0; j < vl[i].size(); j++) // { // if (j % 3 == 0) // { // stt += ","; // } // stt += vl[i][j]; // } // cout << stt << endl; //} //cout << "---------------------" << endl;/* << pr.second */ //std::string encoderStr; //char buff[2048]; std::string sres; deque<char> sdq; //用来存储字符串 deque<string> dq; //编码替换 需要换成每8个Bit为1字节 deque<string> stringRes; //最终结果 8 bit 为一个string vector<unsigned char> ucRes; //字节表 //std::copy(sdq.begin(), sdq.end(), str.begin()); for (int i = 0; i < str.size(); i++) { sdq.push_back(str[i]); } unsigned char uch; while (sdq.size() > 0) { //存储010字符串 dq.push_back(codeMap[sdq.front()]); sdq.pop_front(); //buff = ms[str[i]]; } int i = 0; string sChar; //到8位就下一个 s = ""; for (int i = 0; i < dq.size(); i++) { s += dq[i]; } int offset = 0; for (int i = 0; i < s.size(); i++) { //这里可以优化 if (i % 8 == 0 && i != 0) { string tmp; tmp.resize(8); std::copy(s.begin() + i - 8, s.begin() + i, tmp.begin()); stringRes.push_back(tmp); offset += 8; } } if (offset < s.size()) { string tmp; tmp.resize(s.size() - offset); std::copy(s.begin() + offset, s.end(), tmp.begin()); while (tmp.size() < 8) { tmp.push_back('0'); } stringRes.push_back(tmp); } for (int i = 0; i < stringRes.size(); i++) { ucRes.push_back(BinaryStringToInt(stringRes[i])); //cout << ucRes[i] << endl; } int m = ucRes.size(); cout << "uc res" << m << endl; huffTree = root; return ucRes; } }
构造过程: 1.先统计各个字符的出现概率
2.找出最小的两个作为一个节点,该节点的概率为两个子节点的概率之和,继续排序,
3.重复上一步骤,知道剩下的节点为1
4.约定左子节点和右子节点 为0 或者1
其中还有一个类似的编码类似,算术编码,算术编码的过程是要把概率用整数表示。构造完成之后然后用对应的节点替换,节点表示(从根节点到节点)
解码过程,使用遍历,遇到0然后指向左子节点(按照约定),遇到1(指向右子节点),然后指向下一节点。如果为空,则把该子节点所表示的字符打印。
然后从根节点开始。
中间huffman树是可以按位操作的,每个节点表示0或者1 占据大小为1bit,所以在替换环节,用bit oper 或者bit vector来处理。读取的时候也一样