• hbase->Mapreduce->hbase


    Hbase对Mapreduce API进行了扩展,方便Mapreduce任务读写HTable数据。


    package taglib.customer;
    import java.io.IOException;  
    
    import org.apache.hadoop.conf.Configuration;  
    import org.apache.hadoop.hbase.HBaseConfiguration;  
    import org.apache.hadoop.hbase.client.Put;  
    import org.apache.hadoop.hbase.client.Result;  
    import org.apache.hadoop.hbase.client.Scan;  
    import org.apache.hadoop.hbase.io.ImmutableBytesWritable;  
    import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;  
    import org.apache.hadoop.hbase.mapreduce.TableMapper;  
    import org.apache.hadoop.hbase.mapreduce.TableReducer;  
    import org.apache.hadoop.hbase.util.Bytes;  
    import org.apache.hadoop.io.IntWritable;  
    import org.apache.hadoop.io.Text;  
    import org.apache.hadoop.mapreduce.Job;
    public class MrHbase {
    
    	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    		// TODO Auto-generated method stub
    		Configuration conf = HBaseConfiguration.create();  
    		conf.set("hbase.zookeeper.quorum", "192.168.58.101");
            Job job = new Job(conf,"ExampleSummary");  
            job.setJarByClass(MrHbase.class);     // class that contains mapper and reducer  
    
            Scan scan = new Scan();  
            scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs  
            scan.setCacheBlocks(false);  // don't set to true for MR jobs  
            // set other scan attrs  
            //scan.addColumn(family, qualifier);  
            TableMapReduceUtil.initTableMapperJob(  
                    "blog",        // input table  
                    scan,               // Scan instance to control CF and attribute selection  
                    MyMapper.class,     // mapper class  
                    Text.class,         // mapper output key  
                    IntWritable.class,  // mapper output value  
                    job);  
            TableMapReduceUtil.initTableReducerJob(  
                    "blog2",        // output table  
                    MyTableReducer.class,    // reducer class  
                    job);  
            job.setNumReduceTasks(1);   // at least one, adjust as required  
    
            boolean b = job.waitForCompletion(true);  
            if (!b) {  
                throw new IOException("error with job!");  
            }
    	}
    	public static class MyMapper extends TableMapper<Text, IntWritable>  {  
    		  
            private final IntWritable ONE = new IntWritable(1);  
            private Text text = new Text();  
      
            public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException {  
                String ip = Bytes.toString(row.get());  
                String url = new String(value.getValue(Bytes.toBytes("article"), Bytes.toBytes("title")));  
                text.set(ip+"&"+url);  
                context.write(text, ONE);  
            }  
        }
    	public static class MyTableReducer extends TableReducer<Text, IntWritable, ImmutableBytesWritable>  {  
            public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {  
                int sum = 0;  
                for (IntWritable val : values) {  
                    sum += val.get();  
                }  
      
                Put put = new Put(key.getBytes());  
                put.add(Bytes.toBytes("article"), Bytes.toBytes("title"), Bytes.toBytes(String.valueOf(sum)));  
      
                context.write(null, put);  
            }  
        }  
    
    }
    


    版权声明:本文为博主原创文章,未经博主允许不得转载。

  • 相关阅读:
    架构设计
    git 常用命令
    C# 加载C++的dll
    windows 服务部署管理
    wpf 模板绑定控件属性
    golang开启module模式 go mod
    使用docker安装redis
    使用docker安装elasticsearch
    使用docker安装etcd
    使用docker安装mysql5.7
  • 原文地址:https://www.cnblogs.com/jamesf/p/4751473.html
Copyright © 2020-2023  润新知