• hbase的wordcount


    package com.neworigin.HBaseMR;
    
    import java.io.IOException;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hbase.Cell;
    import org.apache.hadoop.hbase.CellUtil;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.client.Mutation;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.client.Result;
    import org.apache.hadoop.hbase.client.Scan;
    import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
    import org.apache.hadoop.hbase.mapreduce.TableMapper;
    import org.apache.hadoop.hbase.mapreduce.TableReducer;
    import org.apache.hadoop.hbase.util.Bytes;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class HbaseMRTest {
        static Configuration conf=null;
        static{
    //        配置configuration的三种方法
    //        ①直接将hbase-site.xml拿来放到src下面
            conf=HBaseConfiguration.create();
    //        ②设置服务器和端口        
    //        conf.set("hbase.zookeeper.quorum", "s100:2181,s101:2181,s102:2181");
    //        ③扥开设置服务器和端口
            conf.set("hbase.zookeeper.quorum", "s100,s101,s102");
            conf.set("hbase.zookeeper.property.clientPort", "2181");
        }
        
    ////    表信息
    //    public static final String tablename="wordtest";//表一
    //    public static final String colf="content";//表列族
    //    public static final String col="info";////    
    //    public static final String tablename2="stat";//表二
    //    
    //public static void initTB(){
    //    
    //}    
    public static class HBmapper extends TableMapper<Text,IntWritable>/*输出类型*/{
        private static IntWritable one=new IntWritable(1);
        private static Text word =new Text();
    //    输入类型,key:row key  value:一行数据的结果集 result
        protected void map(ImmutableBytesWritable key, Result value,
                Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context)
                throws IOException, InterruptedException {
            for(Cell cell:value.rawCells())
            {
                word.set(CellUtil.cloneValue(cell));//读取值
                context.write(word, one);//输出:单词----1
            }
            
        }
    }
    public static class HBreducer extends TableReducer<Text,IntWritable,ImmutableBytesWritable>{
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,
                Reducer<Text, IntWritable, ImmutableBytesWritable, Mutation>.Context context)
                throws IOException, InterruptedException {
            
            int sum=0;
            //叠加单词个数
            for(IntWritable value:values)
            {
                sum+=value.get();
            }
            Put put = new Put(Bytes.toBytes(key.toString()));//设置row key为单词
            put.add(Bytes.toBytes("content"), Bytes.toBytes("info"), Bytes.toBytes(String.valueOf(sum)));
            //写到hbase中的需要指定rowkey和put
            context.write(new ImmutableBytesWritable(Bytes.toBytes(key.toString())), put);
        }
        
    }
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job=new Job(conf,"HBaseMR");
        Scan scan =new Scan();
        TableMapReduceUtil.initTableMapperJob("wordtest",scan, HBmapper.class, Text.class, IntWritable.class, job);
        TableMapReduceUtil.initTableReducerJob("stat", HBreducer.class, job);
        
        job.waitForCompletion(true);
        System.out.println("finished");
    }
    }
  • 相关阅读:
    一文看懂Fluentd语法
    mongo 使用聚合合并字段
    加速开发流程的 Dockerfile 最佳实践
    nodejs之RSA加密/签名
    nodejs之https双向认证
    自签证书生成
    白话理解https
    一文看懂k8s Deployment yaml
    基于xtermjs实现的web terminal
    intelliJ 中文设置
  • 原文地址:https://www.cnblogs.com/chengdonghui/p/7978285.html
Copyright © 2020-2023  润新知