• HBase 与 MapReduce 集成


    6. HBase 与 MapReduce 集成

    6.1 官方 HBase 与 MapReduce 集成

    1. 查看 HBase 的 MapReduce 任务的执行:bin/hbase mapredcp;
    2. 环境变量的导入
      1. 临时生效,在命令行执行操作:
        • export HBASE_HOME=/opt/module/hbase-1.3.4;
        • export HADOOP_HOME=/opt/module/hadoop-2.8.5;
        • export HADOOP_CLASSPATH=${HBASE_HOME}/bin/hbase mapredcp;
      2. 永久生效,在/etc/profile配置
        • export HBASE_HOME=/opt/module/hbase-1.3.4;
        • export HADOOP_HOME=/opt/module/hadoop-2.8.5;
        • 并在hadoop-env.sh配置:export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/opt/module/hbase/lib/*
    3. 运行官方的 MapReduce 任务
    // ===== 案例一:统计Student表中有多少行数据 (`opt/module/hbase-1.3.4/` 目录下)
    /opt/module/hadoop-2.8.5/bin/yarn jar ./lib/hbase-server-1.3.4.jar rowcounter student
    
    
    // ===== 案例二:使用 MapReduce 将本地数据导入到 HBASE
    // 1. 本地创建一个fruit.tsv文件
    1001    Apple   Red
    1002    Pear    Yellow
    1003    Pineapple   Yellow
    
    // 2. 创建 HBase 表
    create 'fruit','info'
    
    // 3. 在 HDFS 中创建 input_fruit 文件夹并上传 fruit.tsv 文件
    /opt/module/hadoop-2.8.5/bin/hdfs dfs -mkdir /input_fruit
    /opt/module/hadoop-2.8.5/bin/hdfs dfs -put fruit.tsv /input_fruit/
    
    // 4. 执行 MapReduce, 将 fruit.tsv 导入到 HBase 的 fruit 表中
    /opt/module/hadoop-2.8.5/bin/yarn jar ./lib/hbase-server-1.3.4.jar importtsv -Dimporttsv.columns=HBASE_ROW_KEY,info:name,info:color fruit hdfs://IP地址/input_fruit
    

    6.2 自定义HBase-MapReduce

    • 需求:将 fruit 表中的部分数据,通过MR迁入到 fruit_mr 表中
    // 1. 创建 FruitMapper 类,用于读取 fruit 表中的数据
    public class FruitMapper extends TableMapper<ImmutableBytesWritable, Put>{
    
    	@Override
    	protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
    		// 创建put对象
    		Put put = new Put(key.get());
    		
    		Cell[] cells = value.rawCells();
    		
    		for(Cell cell : cells) {
    			if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
    				put.add(cell);
    			}
    		}
    		
    		context.write(key, put);
    	}
    }
    
    // 2. 创建 FruitReducer 类,用于写入 
    public class FruitReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable>{
    
    	@Override
    	protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
    		for (Put value : values) {
    			context.write(NullWritable.get(), value);
    		}
    	}
    }
    
    // 3. 创建 FruitDriver 类,用于执行 mapper 和 reducer
    public class FruitDriver extends Configuration implements Tool{
    
    	private Configuration configuration = null;
    	
    	@Override
    	public void setConf(Configuration conf) {
    		this.configuration = conf;
    	}
    	
    	@Override
    	public Configuration getConf() {
    		return configuration;
    	}
    
    	@Override
    	public int run(String[] args) throws Exception {
    		// 获取任务对象
    		Job job = Job.getInstance(configuration);
    		
    		// 指定 Driver类
    		job.setJarByClass(FruitDriver.class);
    		
    		// 指定 Mapper
    		TableMapReduceUtil.initTableMapperJob("fruit", new Scan(), FruitMapper.class, ImmutableBytesWritable.class, Put.class, job);
    		
    		// 指定 Reducer
    		TableMapReduceUtil.initTableReducerJob("fruit_mr", FruitReducer.class, job);
    		
    		// 提交
    		boolean result = job.waitForCompletion(true);
    		
    		return result ? 0 : 1;
    	}
    
    	public static void main(String[] args) throws Exception {
    		
    		Configuration configuration = HBaseConfiguration.create();
    		ToolRunner.run(configuration, new FruitDriver(), args);
    	}
    }
    
    // 4. 打成 fruit.jar包
    // 5. HBase 中创建 fruit_mr 表
    create 'fruit_mr','info'
    
    // 6. 在 /opt/module/hbase 中执行:
    /opt/module/hadoop-2.8.5/bin/yarn jar ./fruit.jar com.noodles.mr1.FruitDriver(Driver的类名)
    

    6.3 自定义 HBase-MapReduce2

    • 需求:实现将 HDFS 中的数据写入到 HBase 表中
    // 1. 创建 Mapper, 用于读取 HDFS 上的文件
    public class HDFSMapper extends Mapper<LongWritable, Text, NullWritable, Put>{
    
    	@Override
    	protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, NullWritable, Put>.Context context)
    			throws IOException, InterruptedException {
    		// 获取一行数据
    		String line = value.toString();
    		
    		// 切割
    		String[] split = line.split("	");
    		
    		// 封装 Put 对象
    		Put put = new Put(Bytes.toBytes(split[0]));
    		put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(split[1]));
    		put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("color"), Bytes.toBytes(split[2]));
    		
    		// 写出去
    		context.write(NullWritable.get(), put);
    	}
    }
    
    // 2. 创建 Reducer, 用于写入
    public class HDFSReducer extends TableReducer<NullWritable, Put, NullWritable>{
    
    	@Override
    	protected void reduce(NullWritable key, Iterable<Put> values,
    			Reducer<NullWritable, Put, NullWritable, Mutation>.Context context) throws IOException, InterruptedException {
    		
    		// 写出数据
    		for(Put value : values) {
    			context.write(NullWritable.get(), value);
    		}
    	}
    }
    
    // 3. 创建Driver
    public class HDFSDriver extends Configuration implements Tool{
    	
    	private Configuration configuration = null;
    
    	@Override
    	public void setConf(Configuration conf) {
    		this.configuration = conf;
    	}
    
    	@Override
    	public Configuration getConf() {
    		return configuration;
    	}
    
    	@Override
    	public int run(String[] args) throws Exception {
    
    		// 获取 Job 对象
    		Job job = Job.getInstance(configuration);
    		
    		// 设置主类
    		job.setJarByClass(HDFSDriver.class);
    		
    		// 设置 Mapper
    		job.setMapperClass(HDFSMapper.class);
    		job.setMapOutputKeyClass(NullWritable.class);
    		job.setMapOutputValueClass(Put.class);
    		
    		// 设置 Reducer
    		TableMapReduceUtil.initTableReducerJob("fruit2", HDFSReducer.class, job);
    
            // 设置输入路径
    		// import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    		FileInputFormat.setInputPaths(job, args[0]);
    		
    		// 提交
    		boolean result = job.waitForCompletion(true);
    		
    		return result ? 0 : 1;
    	}
    	
    	public static void main(String[] args) throws Exception {
    		
    		Configuration configuration = HBaseConfiguration.create();
    		ToolRunner.run(configuration, new HDFSDriver(), args);
    
    	}
    }
    
    // 4. 打成 fruit.jar包
    // 5. HBase 中创建 fruit2 表
    create 'fruit2','info'
    
    // 6. 在 /opt/module/hbase 中执行:
    /opt/module/hadoop-2.8.5/bin/yarn jar ./fruit.jar com.noodles.mr2.HDFSDriver(Driver的类名) /input_fruit/fruit.tsv(文件路径)
    
  • 相关阅读:
    线性代数思维导图——3.向量
    微分中值定理的基础题型总结
    构造函数
    Python课程笔记(七)
    0241. Different Ways to Add Parentheses (M)
    0014. Longest Common Prefix (E)
    0013. Roman to Integer (E)
    0011. Container With Most Water (M)
    0010. Regular Expression Matching (H)
    0012. Integer to Roman (M)
  • 原文地址:https://www.cnblogs.com/linkworld/p/11069763.html
Copyright © 2020-2023  润新知