Hbase之过滤器的使用

一、过滤器概念

基础API中的查询操作在面对大量数据的时候是非常物无力的，这里Hbase提供了高级的查询方法:Filter（过滤器）。过滤器可以根据簇、列、版本等更多的条件来对数据进行过滤，基于Hbase本身提供的三维有序（主键有序、列有序、版本有序），这些Filter可以高效的完成查询过滤的任务。带有Filter条件的RPC查询请求会把Filter分发到各个RegionServer,是一个服务器端的过滤器，这样可以减少网络传输的压力。

二、数据准备

二、Hbase过滤器的分类

比较过滤器

1、行键过滤器——Rowfilter，过滤rowkey=104以前的行

  Filter rowFilter = new RowFilter(CompareFilter.CompareOp.GREATER, new BinaryComparator("104".getBytes()));
  scan.setFilter(rowFilter);

package com.laotou;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.util.Bytes;
/**
 * @Author: 
 * @Date: 2019/5/17
 */
public class Test {
    private static final String ZK_CONNECT_KEY = "hbase.zookeeper.quorum";
    private static final String ZK_CONNECT_VALUE = "192.168.200.100,192.168.200.101,192.168.200.102";
    private static final String ZK_CONNECT_CLIENT = "hbase.zookeeper.property.clientPort";
    private static final String ZK_CONNECT_CLIENT_PORT = "2181";
    private static Configuration conf = new Configuration();
    private static Connection connection = null;
    public static void main(String[] args) throws Exception {
        conf.set(ZK_CONNECT_CLIENT,ZK_CONNECT_CLIENT_PORT);
        conf.set(ZK_CONNECT_KEY,ZK_CONNECT_VALUE);
        connection = ConnectionFactory.createConnection(conf);
        scanData();
    }
    private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        Filter rowFilter = new RowFilter(CompareFilter.CompareOp.GREATER, new BinaryComparator("104".getBytes()));
        scan.setFilter(rowFilter);
//        //调一次返回50的cell，可以减少请求次数
//        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell)));
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }
}

运行结果部分截图

2、列簇过滤器 FamilyFilter (将列簇为info的行全部取出来)

Filter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator("info".getBytes()));
scan.setFilter(familyFilter);

package com.laotou;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
/**
 * @Author: 
 * @Date: 2019/5/17
 */
public class Test {
    private static final String ZK_CONNECT_KEY = "hbase.zookeeper.quorum";
    private static final String ZK_CONNECT_VALUE = "192.168.200.100,192.168.200.101,192.168.200.102";
    private static final String ZK_CONNECT_CLIENT = "hbase.zookeeper.property.clientPort";
    private static final String ZK_CONNECT_CLIENT_PORT = "2181";
    private static Configuration conf = new Configuration();
    private static Connection connection = null;
    public static void main(String[] args) throws Exception {
        conf.set(ZK_CONNECT_CLIENT,ZK_CONNECT_CLIENT_PORT);
        conf.set(ZK_CONNECT_KEY,ZK_CONNECT_VALUE);
        connection = ConnectionFactory.createConnection(conf);
        scanData();
    }

    private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        Filter familyFilter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator("info".getBytes()));
        scan.setFilter(familyFilter);
//        //调一次返回50的cell，可以减少请求次数
        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell)));
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }
}

3、列过滤器 QualifierFilter

Filter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("boy"));
scan.setFilter(valueFilter);

 private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        Filter valueFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("boy"));
        scan.setFilter(valueFilter);
//        //调一次返回50的cell，可以减少请求次数
        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell)));
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }

4、时间戳过滤器 TimestampsFilter

List<Long> list = new ArrayList<>();
list.add( Long.valueOf("1558072555745").longValue());
TimestampsFilter timestampsFilter = new TimestampsFilter(list);
scan.setFilter(timestampsFilter);

private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        List<Long> list = new ArrayList<>();
        list.add( Long.valueOf("1558072555745").longValue());
        TimestampsFilter timestampsFilter = new TimestampsFilter(list);
        scan.setFilter(timestampsFilter);
//        //调一次返回50的cell，可以减少请求次数
        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell))+
                        ","+cell.getTimestamp());
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }

专用过滤器

1、单列值过滤器 SingleColumnValueFilter ----会返回满足条件的整行

SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
        "info".getBytes(), //列簇
        "name".getBytes(), //列
        CompareFilter.CompareOp.EQUAL,
        new SubstringComparator("lisi"));

 private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(
                "info".getBytes(), //列簇
                "name".getBytes(), //列
                CompareFilter.CompareOp.EQUAL,
                new SubstringComparator("lisi"));
//如果不设置为 true，则那些不包含指定 column 的行也会返回
        singleColumnValueFilter.setFilterIfMissing(true);
        scan.setFilter(singleColumnValueFilter);
//        //调一次返回50的cell，可以减少请求次数
        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell))+
                        ","+cell.getTimestamp());
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }

2、单列值排除器 SingleColumnValueExcludeFilter

SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter(
                "info".getBytes(), 
                "name".getBytes(), 
                CompareOp.EQUAL, 
                new SubstringComparator("lisi"));
singleColumnValueExcludeFilter.setFilterIfMissing(true);
        
scan.setFilter(singleColumnValueExcludeFilter);

  private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter(
                "info".getBytes(),
                "name".getBytes(),
                CompareFilter.CompareOp.EQUAL,
                new SubstringComparator("lisi"));
        singleColumnValueExcludeFilter.setFilterIfMissing(true);

        scan.setFilter(singleColumnValueExcludeFilter);
//        //调一次返回50的cell，可以减少请求次数
        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell))+
                        ","+cell.getTimestamp());
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }

与上面单列值过滤器相比结果中排除了打印lisi这个字段和值

3、前缀过滤器 PrefixFilter----针对行键,将rowkey以12开头的打印出来

PrefixFilter prefixFilter = new PrefixFilter("12".getBytes());
scan.setFilter(prefixFilter);

package com.laotou;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.util.ArrayList;
import java.util.List;

/**
 * @Author: 
 * @Date: 2019/5/17
 */
public class Test {
    private static final String ZK_CONNECT_KEY = "hbase.zookeeper.quorum";
    private static final String ZK_CONNECT_VALUE = "192.168.200.100,192.168.200.101,192.168.200.102";
    private static final String ZK_CONNECT_CLIENT = "hbase.zookeeper.property.clientPort";
    private static final String ZK_CONNECT_CLIENT_PORT = "2181";
    private static Configuration conf = new Configuration();
    private static Connection connection = null;
    public static void main(String[] args) throws Exception {
        conf.set(ZK_CONNECT_CLIENT,ZK_CONNECT_CLIENT_PORT);
        conf.set(ZK_CONNECT_KEY,ZK_CONNECT_VALUE);
        connection = ConnectionFactory.createConnection(conf);
        scanData();
    }

    private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        PrefixFilter prefixFilter = new PrefixFilter("12".getBytes());
        scan.setFilter(prefixFilter);
//        //调一次返回50的cell，可以减少请求次数
        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell))+
                        ","+cell.getTimestamp());
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }

4、列前缀过滤器 ColumnPrefixFilter

ColumnPrefixFilter columnPrefixFilter = new ColumnPrefixFilter("name".getBytes());
        
scan.setFilter(columnPrefixFilter);

  private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        ColumnPrefixFilter columnPrefixFilter = new ColumnPrefixFilter("name".getBytes());
        scan.setFilter(columnPrefixFilter);
//        //调一次返回50的cell，可以减少请求次数
        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell))+
                        ","+cell.getTimestamp());
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }

5、分页过滤器 PageFilter

每一页打印两条数据

Filter filter = new PageFilter(2);

private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        Filter filter = new PageFilter(2);
        scan.setFilter(filter);
//        //调一次返回50的cell，可以减少请求次数
        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell))+
                        ","+cell.getTimestamp());
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }

6、行键范围过滤 [startkey,endkey）,结果为左闭右开

 scan.setStartRow(Bytes.toBytes("115"));
 scan.setStopRow(Bytes.toBytes("117"));

private static void scanData() throws Exception {
        //拿到表
        Table table = connection.getTable(TableName.valueOf("filtertest"));
        Scan scan=new Scan();
        scan.setStartRow(Bytes.toBytes("115"));
        scan.setStopRow(Bytes.toBytes("117"));
//        //调一次返回50的cell，可以减少请求次数
        scan.setCaching(50);
        ResultScanner scanner = table.getScanner(scan);
        //是通过迭代器的方式，每调用 一次next，将光标向下移动一个，所以需要动态修改next对象的值
        Result next = scanner.next();
        while (next!=null){
            //将一个Result中的对象转为一个cell数组
            Cell[] cells = next.rawCells();
            for(Cell cell:cells){
                System.out.println(Bytes.toString(CellUtil.cloneRow(cell))+
                        ","+Bytes.toString(CellUtil.cloneFamily(cell))+
                        ","+ Bytes.toString(CellUtil.cloneQualifier(cell))+
                        ","+Bytes.toString(CellUtil.cloneValue(cell))+
                        ","+cell.getTimestamp());
            }
            System.out.println();
            //每循环一次，修改next的值一次
            next=scanner.next();
        }
        scanner.close();
        table.close();
    }

相关阅读:
原创【cocos2d-x】CCMenuItemToggle 在lua中的使用
 SQL Server之LEFT JOIN、RIGHT LOIN、INNER JOIN的区别
 VS的IISExpress配置通过IP访问程序
 SQLServer执行大脚本文件时，提示“无法执行脚本没有足够的内存继续执行程序 (mscorlib)”
jqGrid中multiselect: true 操作checkbox
display：table的几个用法(元素平分宽度，垂直居中)
ASP.NET中前后台方法的相互调用
 AspxGridView使用手记
 大量文本框非空判断，如何提高灵活性？
Mysql安装、配置、优化
原文地址：https://www.cnblogs.com/yfb918/p/10641718.html