• 使用MapReduce将HDFS数据导入到HBase(二)


    package com.bank.service;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.conf.Configured;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
    import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
    import org.apache.hadoop.hbase.mapreduce.TableReducer;
    import org.apache.hadoop.hbase.util.Bytes;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    import org.apache.hadoop.util.Tool;
    import org.apache.hadoop.util.ToolRunner;

    /**
     * 使用MapReduce批量导入Hbase
     *     通过TableOutputFormat,该类内部传给指定的Put实例并调用table.put()方法。作业结束前会主动调用flushCommits()方法保存仍在写缓冲区的数据
     *
     * @author mengyao
     *
     */
    public class CnyBatch extends Configured implements Tool {

        static class CnyBatchMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
            protected void map(LongWritable key, Text value, Context context)
                    throws java.io.IOException, InterruptedException {
                context.write(key, value);
            }
        }

        static class CnyBatchReduce extends TableReducer<LongWritable, Text, NullWritable> {
            private final static String familyName = "info";
            private final static String[] qualifiers = {"gzh", "currency", "version", "valuta", "qfTime", "flag", "machineID"};
            @Override
            protected void reduce(LongWritable key,
                    java.lang.Iterable<Text> value, Context context)
                    throws java.io.IOException, InterruptedException {
                final String[] values = value.toString().split(" ");
                if (values.length == 7 && values.length == qualifiers.length) {
                     final String row = values[0]+"_"+values[1]+"_"+values[2]+"_"+values[3];
                     long timestamp = System.currentTimeMillis();
                     Put put = new Put(Bytes.toBytes(row));
                     for (int i = 0; i < values.length; i++) {
                         String qualifier = qualifiers[i];
                         String val = values[i];
                         put.add(Bytes.toBytes(familyName), Bytes.toBytes(qualifier), timestamp, Bytes.toBytes(val));
                     }
                     context.write(NullWritable.get(), put);
                } else {
                     System.err.println(" ERROR: value length must equale qualifier length ");
                }
            };
        }

        @Override
        public int run(String[] arg0) throws Exception {
            Job job = Job.getInstance(getConf(), CnyBatch.class.getSimpleName());
            TableMapReduceUtil.addDependencyJars(job);
            job.setJarByClass(CnyBatch.class);
            
            FileInputFormat.setInputPaths(job, arg0[0]);
            job.setMapperClass(CnyBatchMapper.class);
            job.setMapOutputKeyClass(LongWritable.class);
            job.setMapOutputValueClass(Text.class);
            
            job.setReducerClass(CnyBatchReduce.class);
            job.setOutputFormatClass(TableOutputFormat.class);
            
            
            return job.waitForCompletion(true) ? 0 : 1;
        }

        public static void main(String[] args) throws Exception {
            Configuration conf = new Configuration();
            conf.set("hbase.zookeeper.quorum", "h5:2181,h6:2181,h7:2181");
            conf.set("hbase.zookeeper.property.clientPort", "2181");
            conf.set("dfs.socket.timeout", "100000");
            String[] otherArgs = new GenericOptionsParser(args).getRemainingArgs();
            if (otherArgs.length != 2) {
                System.err.println(" ERROR: <dataInputDir> <tableName>");
                System.exit(2);
            }
            conf.set(TableOutputFormat.OUTPUT_TABLE, args[1]);
            int status = ToolRunner.run(conf, new CnyBatch(), args);
            System.exit(status);
        }
    }

  • 相关阅读:
    网页中输出漂亮格式的Php数组神器
    从对轮播图的封装中体会 面对接口 编程的思想
    UITabBarController 详解
    UINavigationController 详解
    UITabBarController 和 UINavigationController 的详解
    【iOS开发】canOpenURL: failed for URL
    SDK里报错[NSConcreteMutableData wbsdk_base64EncodedString]
    UIScrollView 几乎所有的属性和方法
    oracle服务器和客户端字符集的查看和修改
    查看、修改oracle字符集,查看oracle版本
  • 原文地址:https://www.cnblogs.com/mengyao/p/4231301.html
Copyright © 2020-2023  润新知