UVA 240 Variable Radix Huffman Encoding

题目大意

　　哈夫曼编码是一种最优编码方法。根据已知源字母表中字符出现的频率，将源字母表中字符编码为目标字母表中字符，最优的意思是编码信息的平均长度最小。在该问题中，你需要将 N 个大写字母（源字母 $S_1 dots S_N$，频率 $f_1 dots f_N$）转换成 R 进制数字（目标字母 $T_1 dots T_R$）。

　　当 R = 2 时，编码过程分几个步骤，每个步骤中，有两个最低频率的源字符 S₁、S₂，合并成一个新的“组合字母”，频率为 S₁、S₂ 的频率之和。如果最低频率和次低频率相等，则字母表中最早出现的字母被选中。经过一系列的步骤后，最后只剩两个字母合并，每次合并的字母分配一个目标字符，较低频率的分配 0，另一个分配 1。（如果一个合并中，每个字母有相同的频率，最早出现的分配 0，出于比较的目的，组合字母的值为合并中最早出现的字母的值。）源符号的最终编码由每次形成的目标字符组成。

　　目标字符以相反顺序连接，最终编码序列中第一个字符为分配给组合字母的最后一个目标字符。

　　当 R > 2 时，每一个步骤分配 R 个符号。由于每个步骤将 R 个字母或组合字母合并为一个组合字母，并且最后一次合并必须合并 R 个字母和组合字母，源字母必须包含 k * (R - 1) + R 个字母， k 为整数。由于 N 可能不是很大，因此必须包括适当数量具有零频率的虚拟字母。这些虚拟的字母不包含在输出中。在进行比较时，虚拟字母晚于字母表中的任何字母。

　　霍夫曼编码的基本过程与 R = 2 情况相同。在每次合并中，将具有最低频率的 R 个字母合并，形成新的组合字母，其频率等于组中包括的字母频率的总和。被合并的字母被分配目标字母符号 0 到 R - 1。

分析

　　先构建哈夫曼树，再生成编码。

　　在处理 R > 2 的情况时，可以按照题目所讲的那样补虚拟字母，也可以先处理掉多余的字母，我采取的是后一种方案。

　　处理完多余字母之后就是中规中矩的哈夫曼问题了。

　　案例见原题。

代码如下

  1 #include <bits/stdc++.h>
  2 using namespace std;
  3  
  4 #define INIT() ios::sync_with_stdio(false);cin.tie(0);cout.tie(0);
  5 #define Rep(i,n) for (int i = 0; i < (n); ++i)
  6 #define For(i,s,t) for (int i = (s); i <= (t); ++i)
  7 #define rFor(i,t,s) for (int i = (t); i >= (s); --i)
  8 #define ForLL(i, s, t) for (LL i = LL(s); i <= LL(t); ++i)
  9 #define rForLL(i, t, s) for (LL i = LL(t); i >= LL(s); --i)
 10 #define foreach(i,c) for (__typeof(c.begin()) i = c.begin(); i != c.end(); ++i)
 11 #define rforeach(i,c) for (__typeof(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)
 12  
 13 #define pr(x) cout << #x << " = " << x << "  "
 14 #define prln(x) cout << #x << " = " << x << endl
 15  
 16 #define LOWBIT(x) ((x)&(-x))
 17  
 18 #define ALL(x) x.begin(),x.end()
 19 #define INS(x) inserter(x,x.begin())
 20  
 21 #define ms0(a) memset(a,0,sizeof(a))
 22 #define msI(a) memset(a,inf,sizeof(a))
 23 #define msM(a) memset(a,-1,sizeof(a))
 24 
 25 #define MP make_pair
 26 #define PB push_back
 27 #define ft first
 28 #define sd second
 29  
 30 template<typename T1, typename T2>
 31 istream &operator>>(istream &in, pair<T1, T2> &p) {
 32     in >> p.first >> p.second;
 33     return in;
 34 }
 35  
 36 template<typename T>
 37 istream &operator>>(istream &in, vector<T> &v) {
 38     for (auto &x: v)
 39         in >> x;
 40     return in;
 41 }
 42  
 43 template<typename T1, typename T2>
 44 ostream &operator<<(ostream &out, const std::pair<T1, T2> &p) {
 45     out << "[" << p.first << ", " << p.second << "]" << "
";
 46     return out;
 47 }
 48 
 49 inline int gc(){
 50     static const int BUF = 1e7;
 51     static char buf[BUF], *bg = buf + BUF, *ed = bg;
 52     
 53     if(bg == ed) fread(bg = buf, 1, BUF, stdin);
 54     return *bg++;
 55 } 
 56 
 57 inline int ri(){
 58     int x = 0, f = 1, c = gc();
 59     for(; c<48||c>57; f = c=='-'?-1:f, c=gc());
 60     for(; c>47&&c<58; x = x*10 + c - 48, c=gc());
 61     return x*f;
 62 }
 63  
 64 typedef long long LL;
 65 typedef unsigned long long uLL;
 66 typedef pair< double, double > PDD;
 67 typedef pair< int, int > PII;
 68 typedef pair< int, PII > PIPII;
 69 typedef pair< string, int > PSI;
 70 typedef pair< int, PSI > PIPSI;
 71 typedef set< int > SI;
 72 typedef vector< int > VI;
 73 typedef vector< VI > VVI;
 74 typedef vector< PII > VPII;
 75 typedef map< int, int > MII;
 76 typedef map< int, PII > MIPII;
 77 typedef map< string, int > MSI;
 78 typedef multimap< int, int > MMII;
 79 //typedef unordered_map< int, int > uMII;
 80 typedef pair< LL, LL > PLL;
 81 typedef vector< LL > VL;
 82 typedef vector< VL > VVL;
 83 typedef priority_queue< int > PQIMax;
 84 typedef priority_queue< int, VI, greater< int > > PQIMin;
 85 const double EPS = 1e-10;
 86 const LL inf = 0x7fffffff;
 87 const LL infLL = 0x7fffffffffffffffLL;
 88 const LL mod = 1e9 + 7;
 89 const int maxN = 1e4 + 7;
 90 const LL ONE = 1;
 91 const LL evenBits = 0xaaaaaaaaaaaaaaaa;
 92 const LL oddBits = 0x5555555555555555;
 93 
 94 template<class T>
 95 inline string toString(T x) {
 96     ostringstream sout;
 97     sout << x;
 98     return sout.str();
 99 }
100 
101 // R: 基數
102 // N: 字母數量
103 // T: 案例標號 
104 int R, N, T; 
105 int freq[27], freqSum;
106 string codes[27];
107 
108 // expSum: 期望和 
109 LL expSum;
110 double avglen;
111 
112 struct Node{
113     LL timestamp = infLL;
114     int letter = 0;
115     int weight = 0;
116     vector< Node > nexts;
117     
118     bool operator< (const Node &x) const {
119         if(weight == x.weight) return timestamp > x.timestamp;
120         return weight > x.weight;
121     }
122 };
123 Node root;
124 
125 void buildHuffmanTree() {
126     priority_queue< Node > minH;
127     
128     Rep(i, N) {
129         Node t;
130         t.letter = i;
131         t.weight = freq[i];
132         t.timestamp = i;
133         minH.push(t);
134     }
135     
136 
137     // 題目中說要補問號占位符，我這裏就不補了，直接把需要補充的節點合成一個代表節點 
138     int r = (N - 1) % (R - 1);
139     if(r) r += 1;
140     
141     Node tmpR;
142     Rep(i, r) {
143         Node tmp = minH.top(); minH.pop();
144         
145         tmpR.nexts.PB(tmp);
146         tmpR.weight += tmp.weight;
147         // 代表節點的時間戳由集合中最早的節點決定 
148         tmpR.timestamp = min(tmpR.timestamp, tmp.timestamp);
149     }
150     if(tmpR.weight) minH.push(tmpR);
151 
152     // 正式建树 
153     while(minH.size() >= R) {
154         Node t;
155         Rep(i, R) {
156             Node tmp = minH.top(); minH.pop();
157             
158             t.nexts.PB(tmp);
159             t.weight += tmp.weight;
160             // 代表節點的時間戳由集合中最早的節點決定 
161             t.timestamp = min(t.timestamp, tmp.timestamp);
162         }
163         minH.push(t);
164     } 
165     
166     //assert(minH.size() == 1);
167     root = minH.top();
168 }
169 
170 void dfs(Node &rt, string ret, int deep) {
171     if(!rt.nexts.size()) {
172         codes[rt.letter] = ret;
173         expSum += freq[rt.letter] * deep;
174         return;
175     }
176     
177     Rep(i, rt.nexts.size()) {
178         //assert((i + R - rt.nexts.size()) / 10 == 0);
179         dfs(rt.nexts[i], ret + toString(i + R - rt.nexts.size()), deep + 1);
180     }
181 }
182 
183 void generateCode() {
184     expSum = 0;
185     
186     dfs(root, "", 0);
187     
188     avglen = 1.0 * expSum / freqSum;
189 }
190 
191 int main(){
192     //freopen("MyOutput.txt","w",stdout);
193     //freopen("input.txt","r",stdin);
194     //INIT();
195     while(cin >> R) {
196         if(!R) break;
197         cin >> N;
198         freqSum = 0;
199         Rep(i, N) {
200             cin >> freq[i];
201             freqSum += freq[i];
202         }
203         
204         buildHuffmanTree();
205         generateCode();
206         
207         printf("Set %d; average length %.2f
", ++T, avglen);
208         Rep(i, N) printf("    %c: %s
", i + 'A', codes[i].c_str());
209         printf("
");
210     }
211     return 0;
212 }

View Code

相关阅读:
大数据之路week07--day05 （一个基于Hadoop的数据仓库建模工具之一 HIve）
大数据之路week07--day04 （Linux 中查看文件内容的关键字处）
大数据之路week07--day04 (YARN，Hadoop的优化，combline，join思想，)
hdu 1575 Tr A（矩阵快速幂，简单）
hdu 1757 A Simple Math Problem （矩阵快速幂，简单）
zoj 2974 Just Pour the Water （矩阵快速幂，简单）
LightOj 1065
LightOj 1096
poj 1006 生理周期（中国剩余定理）
POJ 2251 Dungeon Master（广搜，三维，简单）
原文地址：https://www.cnblogs.com/zaq19970105/p/10998764.html