• 本地数据导入到hbase表中。


    1.首先我们要先创建好表

    #在li这个命名空间下创建一个baidu这个命名空间,并且制定列簇info
    create "li:baidu","info"
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.client.Put;
    import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
    
    import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
    import org.apache.hadoop.hbase.util.Bytes;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    
    import java.io.IOException;
    
    public class ReadFromFileIntoHbase {
        static class ReadFromFileMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put>{
            ImmutableBytesWritable ibw = new ImmutableBytesWritable();
            private byte[] FAMILY = Bytes.toBytes("info");
            private byte[] SEARCH = Bytes.toBytes("serch");
            private byte[] RANK = Bytes.toBytes("rank");
            private byte[] CLICK = Bytes.toBytes("click");
            private byte[] URL = Bytes.toBytes("url");
            @Override
            protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
                //指定一下新表中的分隔符
                String[] words = value.toString().split("	");
                //把用户id+时间当做rowkey
                String rk = words[1]+"-"+words[0];
                ibw.set(Bytes.toBytes(rk));
                //原表中有
                if(words.length==6){
                    Put put = new Put(Bytes.toBytes(rk));
                    put.addColumn(FAMILY,SEARCH,Bytes.toBytes(words[2]));
                    put.addColumn(FAMILY,RANK,Bytes.toBytes(words[3]));
                    put.addColumn(FAMILY,CLICK,Bytes.toBytes(words[4]));
                    put.addColumn(FAMILY,URL,Bytes.toBytes(words[5]));
                    context.write(ibw,put);
                }else {
                    return;
                }
            }
            public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
                Configuration config = HBaseConfiguration.create();
                config.set("hbase.zookeeper.quorum","server3:2181");
                config.set("zookeeper.znode.parent","/hbase-unsecure");
                Job job = Job.getInstance(config, "ExampleRead");
                job.setJarByClass(ReadFromFileIntoHbase.class);
    
                job.setMapperClass(ReadFromFileMapper.class);
                job.setOutputKeyClass(ImmutableBytesWritable.class);
                job.setOutputValueClass(Put.class);
    
                //因为我们需要将最后的数据放入一个表中,所以我们需要这样做
                TableMapReduceUtil.initTableReducerJob("liang:sogo4",null,job);
                //设置一个reduce
                job.setNumReduceTasks(0);
    
                FileInputFormat.addInputPath(job,new Path("D:\sogou.500w.utf8"));
    
                boolean b = job.waitForCompletion(true);
                if (!b) {
                    throw new IOException("error with job!");
                }
            }
        }
    }

    2.测验一下

    #查看前10条数据
    scan "li:baidu",{LIMIT=>10}
  • 相关阅读:
    type为number的input标签输入小数的方法
    魔兽争霸3作弊码
    pom.xml 添加 JUnit依赖
    缺失 import org.apache.commons.fileupload.FileUploadException; 包
    使用WebMvcConfigurerAdapter 做登录,失效的一个小小原因
    Missing session attribute 'user' of type List 解决办法
    @RequestMapping 和 @GetMapping @PostMapping 区别
    thymeleaf模板对没有结束符的HTML5标签解析出错的解决办法
    Thymeleaf 模板 引用头部 尾部 最简单教程
    IDEA 如何弹出Select classes to import
  • 原文地址:https://www.cnblogs.com/shiji7/p/11929621.html
Copyright © 2020-2023  润新知