• Hadoop生态圈-HBase的HFile创建方式


                          Hadoop生态圈-HBase的HFile创建方式

                                                  作者:尹正杰

    版权声明:原创作品,谢绝转载!否则将追究法律责任。

      废话不多说,直接上代码,想说的话都在代码的注释里面。

    一.环境准备

    list
    create 'yinzhengjie:WordCount3','f1','f2'
    list
    desc 'yinzhengjie:WordCount3'
    scan 'yinzhengjie:WordCount3'

    二.编写HFile创建方式的代码

    1>.编写Map端代码

     1 /*
     2 @author :yinzhengjie
     3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E7%94%9F%E6%80%81%E5%9C%88/
     4 EMAIL:y1053419035@qq.com
     5 */
     6 package cn.org.yinzhengjie.hbase.hfile;
     7 
     8 import org.apache.hadoop.io.IntWritable;
     9 import org.apache.hadoop.io.LongWritable;
    10 import org.apache.hadoop.io.Text;
    11 import org.apache.hadoop.mapreduce.Mapper;
    12 
    13 import java.io.IOException;
    14 
    15 public class HFileOutputMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    16     @Override
    17     protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    18         //得到一行数据
    19         String line = value.toString();
    20         String[] arr = line.split(" ");
    21         //
    22         for (String word : arr){
    23             context.write(new Text(word),new IntWritable(1));
    24         }
    25     }
    26 }

    2>.编写Reducer端代码

     1 /*
     2 @author :yinzhengjie
     3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E7%94%9F%E6%80%81%E5%9C%88/
     4 EMAIL:y1053419035@qq.com
     5 */
     6 package cn.org.yinzhengjie.hbase.hfile;
     7 
     8 import org.apache.hadoop.hbase.Cell;
     9 import org.apache.hadoop.hbase.CellUtil;
    10 import org.apache.hadoop.hbase.KeyValue;
    11 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    12 import org.apache.hadoop.hbase.util.Bytes;
    13 import org.apache.hadoop.io.IntWritable;
    14 import org.apache.hadoop.io.Text;
    15 import org.apache.hadoop.mapreduce.Reducer;
    16 
    17 import java.io.IOException;
    18 
    19 public class HFileOutputReducer extends Reducer<Text,IntWritable,ImmutableBytesWritable,Cell> {
    20     @Override
    21     protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    22         int sum = 0;
    23         for (IntWritable value : values) {
    24             sum += value.get();
    25         }
    26         if(key.toString().length() > 0){
    27             ImmutableBytesWritable outKey = new ImmutableBytesWritable(Bytes.toBytes(key.toString()));
    28             //创建cell
    29             Cell cell = CellUtil.createCell(Bytes.toBytes(key.toString()),
    30                     Bytes.toBytes("f1"), Bytes.toBytes("count"),System.currentTimeMillis(),
    31                     KeyValue.Type.Minimum,Bytes.toBytes(sum+""),null);
    32             context.write(outKey,cell);
    33         }
    34     }
    35 }

    3>.编写主程序代码

     1 /*
     2 @author :yinzhengjie
     3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E7%94%9F%E6%80%81%E5%9C%88/
     4 EMAIL:y1053419035@qq.com
     5 */
     6 package cn.org.yinzhengjie.hbase.hfile;
     7 
     8 import org.apache.hadoop.conf.Configuration;
     9 import org.apache.hadoop.fs.Path;
    10 import org.apache.hadoop.hbase.Cell;
    11 import org.apache.hadoop.hbase.HBaseConfiguration;
    12 import org.apache.hadoop.hbase.HTableDescriptor;
    13 import org.apache.hadoop.hbase.TableName;
    14 import org.apache.hadoop.hbase.client.Connection;
    15 import org.apache.hadoop.hbase.client.ConnectionFactory;
    16 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    17 import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
    18 import org.apache.hadoop.io.IntWritable;
    19 import org.apache.hadoop.io.Text;
    20 import org.apache.hadoop.mapreduce.Job;
    21 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    22 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    23 
    24 public class App {
    25 
    26     public static void main(String[] args) throws Exception {
    27 
    28         System.setProperty("HADOOP_USER_NAME", "yinzhengjie");
    29         Configuration conf = HBaseConfiguration.create();
    30         conf.set("fs.defaultFS","file:///");
    31         Connection conn = ConnectionFactory.createConnection(conf);
    32         Job job = Job.getInstance(conf);
    33         job.setJobName("HFile WordCount");
    34         job.setJarByClass(App.class);
    35         job.setMapperClass(HFileOutputMapper.class);
    36         job.setReducerClass(HFileOutputReducer.class);
    37         //设置输出格式
    38         job.setOutputFormatClass(HFileOutputFormat2.class);
    39         //设置路径
    40         FileInputFormat.addInputPath(job,new Path("file:///D:\BigData\yinzhengjieData\word.txt"));
    41         FileOutputFormat.setOutputPath(job,new Path("file:///D:\BigData\yinzhengjieData\hfile"));
    42         //设置输出k-v
    43         job.setOutputKeyClass(ImmutableBytesWritable.class);
    44         job.setOutputValueClass(Cell.class);
    45         //设置map端输出k-v
    46         job.setMapOutputKeyClass(Text.class);
    47         job.setMapOutputValueClass(IntWritable.class);
    48         /**
    49          *      配置和"yinzhengjie:WordCount3"进行关联,也就是说"yinzhengjie:WordCount3"这个表必须在HBase数据库中存在,
    50          * 实际操作是以"yinzhengjie:WordCount3"为模板,便于生成HFile文件!
    51          */
    52         HFileOutputFormat2.configureIncrementalLoad(job, new HTableDescriptor(TableName.valueOf("yinzhengjie:WordCount3")),
    53                 conn.getRegionLocator(TableName.valueOf("yinzhengjie:WordCount3")) );
    54         job.waitForCompletion(true);
    55     }
    56 }

    4>.查看测试结果

  • 相关阅读:
    Hadoop之MapReduce学习(一)
    Spark 0.9.0启动脚本——bin/run-example
    Spark 0.9.0启动脚本——sbin/spark-daemon.sh
    Spark 0.9.0启动脚本——启动总结
    Spark 0.9.0启动脚本——sbin/start-slave.sh
    Spark 0.9.0启动脚本——sbin/slaves.sh
    Spark 0.9.0启动脚本——sbin/start-slaves.sh
    Spark 0.9.0启动脚本——sbin/start-master.sh
    Spark 0.9.0启动脚本——sbin/spark-config.sh
    Hadoop 2.2.0启动脚本——bin/hdfs
  • 原文地址:https://www.cnblogs.com/yinzhengjie/p/9172357.html
Copyright © 2020-2023  润新知