0. 说明
MapFile 介绍 && 测试
1. 介绍
对 MapFile 的介绍如下:
- MapFile 是带有索引的 SequenceFile
- MapFile 是排序的 SequenceFile
2. 测试
package hadoop.sequencefile; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.Text; import org.junit.Test; /** * 测试 MapFile 操作 * Windows 下查看压缩后的 MapFile : * hdfs dfs -text file:///E:/test/mapfile/index * hdfs dfs -text file:///E:/test/mapfile/data */ public class TestMapFile { /** * 测试序列文件写操作 */ @Test public void testWriteSeq() throws Exception { Configuration conf = new Configuration(); // 设置文件系统为本地模式 conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); // 数据存放的文件夹路径 String path = "E:/test/mapfile"; MapFile.Writer writer = new MapFile.Writer(conf, fs, path, IntWritable.class, Text.class); for (int i = 1; i <= 100; i++) { IntWritable key = new IntWritable(i); Text value = new Text("helloworld" + i); writer.append(key, value); } writer.close(); } /** * SequenceFile 转换为 MapFile * 新建文件夹 E:/test/mapfile2 * 将 SequenceFile 放入其中并重命名为 data */ @Test public void SeqConvert() throws Exception { System.setProperty("HADOOP_USER_NAME", "centos"); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); Path p = new Path("file:///E:/test/mapfile2"); long cnt = MapFile.fix(fs, p, IntWritable.class, Text.class, false, conf); System.out.println(cnt); } }