• HBase过滤器的使用


    一、常用过滤器:

      1、数据准备:  

    Rowkey:001    Family:Quilfifier address    value: 昆明市西山区
    Rowkey:001    Family:Quilfifier age    value: 23
    Rowkey:001    Family:Quilfifier name    value: 小明
    Rowkey:001    Family:Quilfifier personType    value: 布控人员,涉恐人员,线索人员
    Rowkey:001    Family:Quilfifier zjhm    value: 620302199822332832
    Rowkey:002    Family:Quilfifier address    value: 昆明市西山区福海路
    Rowkey:002    Family:Quilfifier age    value: 33
    Rowkey:002    Family:Quilfifier name    value: 小李
    Rowkey:002    Family:Quilfifier personType    value: 重点人员,涉恐人员,线索人员
    Rowkey:002    Family:Quilfifier zjhm    value: 620302199822332442
    Rowkey:003    Family:Quilfifier address    value: 昆明市西山区福海路
    Rowkey:003    Family:Quilfifier age    value: 34
    Rowkey:003    Family:Quilfifier name    value: 小王
    Rowkey:003    Family:Quilfifier personType    value: 重点人员,涉恐人员,在控人员
    Rowkey:003    Family:Quilfifier zjhm    value: 620302192398432442
    Rowkey:004    Family:Quilfifier address    value: 昆明市滇池路
    Rowkey:004    Family:Quilfifier age    value: 45
    Rowkey:004    Family:Quilfifier name    value: 小花
    Rowkey:004    Family:Quilfifier personType    value: 涉恐人员,线索人员
    Rowkey:004    Family:Quilfifier zjhm    value: 643020304050403436
    Rowkey:005    Family:Quilfifier address    value: 云南省西双版纳
    Rowkey:005    Family:Quilfifier age    value: 60
    Rowkey:005    Family:Quilfifier name    value: 小马
    Rowkey:005    Family:Quilfifier personType    value: ,涉案人员,涉恐人员,线索人员
    Rowkey:005    Family:Quilfifier zjhm    value: 643020302938413436
    Rowkey:006    Family:Quilfifier address    value: 北京市朝阳区
    Rowkey:006    Family:Quilfifier age    value: 66
    Rowkey:006    Family:Quilfifier name    value: 大壮
    Rowkey:006    Family:Quilfifier personType    value: 良民
    Rowkey:006    Family:Quilfifier zjhm    value: 673747322344384456

      2、过滤器的使用:

      

    package HBase;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hbase.Cell;
    import org.apache.hadoop.hbase.CellUtil;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.TableName;
    import org.apache.hadoop.hbase.client.*;
    import org.apache.hadoop.hbase.filter.*;
    import org.apache.hadoop.hbase.util.Bytes;
    
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
    
    public class Operator {
        public static Admin admin = null;
        public static Connection conn = null;
    
        public Connection getConn() throws IOException {
    
            Configuration hbaseConf = HBaseConfiguration.create();
         hbaseConf.set("hbase.zookeeper.quorum","master:2181,slave1:2181,slave2:2181"); hbaseConf.set(
    "hbase.zookeeper.quorum", "master:2181"); Connection HbaseConn = ConnectionFactory.createConnection(hbaseConf); return HbaseConn; } public Operator() { try { conn = Hbase.getConnection(); admin = conn.getAdmin(); } catch (Exception e) { e.getMessage(); } } public static void main(String[] args) throws Exception { Operator operator = new Operator(); operator.filter("person"); // operator.pageFilter("person"); } /** * SingleColumnValueFilter和SingleColumnValueExcludeFilter * 用来查找并返回指定条件的列的数据 * a,如果查找时没有该列,两种filter都会把该行所有数据返回 * b,如果查找时有该列,但是不符合条件,则该行所有列都不返回 * c,如果找到该列,并且符合条件,前者返回所有列,后者返回除该列以外的所有 */ public void filter(String tableName) throws Exception { Table table = conn.getTable(TableName.valueOf(tableName)); Scan scan = new Scan(); //SingleColumnValueFilter:二进制比较器,完整匹配字节数组,返回匹配到的整行 Filter filter = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("良民"))); //SingleColumnValueFilter:二进制比较器,只比较前缀是否相同,返回的是匹配到的整行,并非每一列 Filter filter0 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("重点"))); //SingleColumnValueFilter:匹配正则表达式,返回匹配到的整行 Filter filter1 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*重点人员.*")); //SingleColumnValueFilter:匹配是否包含子串,大小写不敏感,返回匹配到的整行 Filter filter2 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new SubstringComparator("线索人员")); //查询出匹配的行,但是过滤掉所匹配的列 Filter filter3 = new SingleColumnValueExcludeFilter(Bytes.toBytes("info"), Bytes.toBytes("personType"), CompareFilter.CompareOp.EQUAL, new SubstringComparator("线索人员")); //RandomRowFilter:按照一定的几率来返回随机的结果 Filter filter4 = new RandomRowFilter((float) 0.5); //RowFilter:删选出指定开头行健的所有匹配的行 Filter filter5 = new PrefixFilter(Bytes.toBytes("00")); //ValueFilter:按照value全数据库搜索,返回的是所匹配值的某一列,并非某一行 Filter filter6 = new ValueFilter(CompareFilter.CompareOp.NOT_EQUAL, new BinaryComparator(Bytes.toBytes("23"))); //按family(列族)查找,取回所有符合条件的“family” Filter filter7 = new FamilyFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("info"))); //KeyOnlyFilter:返回所有的行,但是值全是空 Filter filter8 = new KeyOnlyFilter(); //ColumnsPrefixFilter:按照列明的前缀来筛选单元格,返回所有行的指定某列 Filter filter9 = new ColumnPrefixFilter(Bytes.toBytes("ag")); //FirsterKeyOnlyFilter:返回的结果集中只包含第一列的而数据,在找到每一行的第一列后就会停止扫描 Filter filter10 = new FirstKeyOnlyFilter(); //InclusiveStopFilter:返回截止到指定行的所有数据,包含最后一行(005)。使用startRow以及stopRow的时候是左闭右开 Filter filter11 = new InclusiveStopFilter(Bytes.toBytes("005")); //cloumnCountGetFilter:返回每行最多返回多少列,在一行列数超过一定数量的时候,结束整个表的扫描 Filter filter12 = new ColumnCountGetFilter(6); //SkipFilter:附加过滤器,如果发现一行中的某一列不符合条件,则整行就会被过滤 Filter filter13 = new SkipFilter(filter6); //WhileMatchFilter:过滤数据,直到不符合条件,停止扫扫描,返回的是符合条件的每一列数据 Filter filter14 = new WhileMatchFilter(filter6); //QualifierFilter:列名过滤,返回指定的每一列数据 Filter filter15 = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("age"))); //MultipleColumnPrefixFilter:与ColumnsPrefixFilter不同的是可以指定多个列明的前缀 byte[][] prefixs = new byte[][]{Bytes.toBytes("ag"), Bytes.toBytes("na")}; Filter filter16 = new MultipleColumnPrefixFilter(prefixs); //ColumnRangeFilter:可以进行高效的列名内部扫描,因为列名是已经按照字典顺序排好的,返回[minColumn,maxColumn]之间的数据 boolean minColumnlnclusive = true; boolean maxColumnlnclusive = true; Filter filter17 = new ColumnRangeFilter(Bytes.toBytes("name"), minColumnlnclusive, Bytes.toBytes("zjhm"), maxColumnlnclusive); //DependentColumnFilter:尝试找到该列所在的每一行,并返回改行具有相同时间戳的全部键值对,返回的是具体的某一列,并非某一行 Filter filter18 = new DependentColumnFilter(Bytes.toBytes("info"), Bytes.toBytes("age")); //RandomRowFilter:随机选择一行的过滤器,chance是一个浮点数 float chance = 0.6f; Filter filter19 = new RandomRowFilter(chance); //ColumnPaginationFilter:按列分页过滤器,针对列数量很多的情况使用 int limit = 3; int columnOffset = 0; Filter filter20 = new ColumnPaginationFilter(limit, columnOffset); //综合过滤器使用 List<Filter> filters = new ArrayList<>(); filters.add(filter1); filters.add(filter2); FilterList fl = new FilterList(FilterList.Operator.MUST_PASS_ALL, filters); ((SingleColumnValueFilter) filter1).setFilterIfMissing(false); scan.setFilter(filter20); ResultScanner scanner = table.getScanner(scan); for (Result r : scanner) { for (Cell cell : r.rawCells()) { System.out.println( "Rowkey:" + Bytes.toString(r.getRow()) + " " + "Family:Quilfifier " + Bytes.toString(CellUtil.cloneQualifier(cell)) + " " + "value: " + Bytes.toString(CellUtil.cloneValue(cell)) ); } } scanner.close(); } /** * 分页过滤器 * PageFilter:用于按行分页 */ public void pageFilter(String tableName) throws IOException { Table table = conn.getTable(TableName.valueOf(tableName)); long pageSize = 2; int totalRowsCount = 0; PageFilter pageFilter = new PageFilter(pageSize); byte[] lastRow = null; while (true) { Scan scan = new Scan(); scan.setFilter(pageFilter); if (lastRow != null) { byte[] posfix = Bytes.toBytes("002"); byte[] startRow = Bytes.add(lastRow, posfix); scan.setStartRow(startRow); System.out.println("start row :" + Bytes.toString(startRow)); } ResultScanner scanner = table.getScanner(scan); int localRowsCount = 0; for (Result result : scanner) { System.out.println(localRowsCount++ + ":" + result); totalRowsCount++; lastRow = result.getRow(); } scanner.close(); if (localRowsCount == 0) break; } System.out.println("total rows is :" + totalRowsCount); } }

      3、自定义过滤器

        --后面在补

      

  • 相关阅读:
    WPF编译时提示"xxx不包含适合于入口点的静态 Main方法xxx"
    被解放的姜戈04 各取所需
    SublimeText3 snippet 编写总结
    安装 Apache 出现 <OS 10013> 以一种访问权限不允许的方式做了一个访问套接字的尝试
    被解放的姜戈03 所谓伊人
    被解放的姜戈02 庄园疑云
    被解放的姜戈01 初试天涯
    EXP-00000: Message 0 not found; No message file for product=RDBMS, facility=EXP问题的解决方案
    Python网络02 Python服务器进化
    Python网络01 原始Python服务器
  • 原文地址:https://www.cnblogs.com/Gxiaobai/p/10280083.html
Copyright © 2020-2023  润新知