1、首先生成自定义过滤器,生成jar包,然后拷贝到服务器hbase目录的lib下。
1.1 自定义过滤器CustomFilter
import com.google.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterBase; import org.apache.hadoop.hbase.util.ByteStringer; import java.io.IOException; /** * @Author: xu.dm * @Date: 2019/4/14 12:16 * @Description: 自定义过滤器,选择列值匹配的行数据 */ public class CustomFilter extends FilterBase { private byte[] value = null; private boolean filterRow = true; public CustomFilter() { super(); } public CustomFilter(byte[] value) { this.value = value; } @Override public void reset() throws IOException { this.filterRow = true; } @Override public boolean filterRow() throws IOException { return this.filterRow; } //匹配的数据不过滤 @Override public ReturnCode filterCell(Cell c) throws IOException { if(CellUtil.matchingValue(c,value)) filterRow = false; return ReturnCode.INCLUDE; } /** * protobuf生成MyFilterProtos */ @Override public byte[] toByteArray() throws IOException { MyFilterProtos.CustomFilter.Builder builder = MyFilterProtos.CustomFilter.newBuilder(); if(value!=null) builder.setValue(ByteStringer.wrap(value)); return builder.build().toByteArray(); } public static Filter parseFrom(final byte[] pbBytes) throws DeserializationException { MyFilterProtos.CustomFilter proto; try { proto = MyFilterProtos.CustomFilter.parseFrom(pbBytes); } catch (InvalidProtocolBufferException e) { throw new DeserializationException(e); } return new CustomFilter(proto.getValue().toByteArray()); } }
1.2 MyFilterProtos是通过protobuf生成的,这里需要注意hbase使用的是protobuf2.5.0版本,不要使用高于2.5.0版本的protobuf,不然hbase会报找不到类的错误。
proto文件
syntax = "proto2"; option java_package = ""; option java_outer_classname = "MyFilterProtos"; option java_generic_services = true; option java_generate_equals_and_hash = true; option optimize_for = SPEED; message CustomFilter { required bytes value = 1; }
1.3 编辑成jar包,发布到hbase的lib目录,hbase需要重启
[root@bigdata-senior01 lib]# ls $HBASE_HOME/lib/Custom*.* /opt/hbase-2.0.4/lib/CustomFilter.jar
2、使用自定义过滤
2.1 程序中引入刚才发布的jar包
2.2 使用自定义过滤器过滤数据
//使用自定义过滤器,只显示匹配列值的行 private static void customFilterData() throws IOException{ Table table = helper.getConnection().getTable(TableName.valueOf("testtable")); List<Filter> filters = new ArrayList<Filter>(); Filter filter1 = new CustomFilter(Bytes.toBytes("user30")); filters.add(filter1); Filter filter2 = new CustomFilter(Bytes.toBytes("user20")); filters.add(filter2); Filter filter3 = new CustomFilter(Bytes.toBytes("user90")); filters.add(filter3); FilterList filterList = new FilterList( FilterList.Operator.MUST_PASS_ONE, filters); Scan scan = new Scan(); scan.setFilter(filterList); ResultScanner scanner = table.getScanner(scan); for(Result result:scanner){ helper.dumpResult(result); } scanner.close(); table.close(); } }
输出结果:
Cell: rowKey20/ex:addr/1555078771906/Put/vlen=8/seqid=0, Value: street20 Cell: rowKey20/info:username/1555078771906/Put/vlen=6/seqid=0, Value: user20 Cell: rowKey20/memo:detail/1555078771906/Put/vlen=8/seqid=0, Value: remark20 Cell: rowKey30/ex:addr/1555078771906/Put/vlen=8/seqid=0, Value: street30 Cell: rowKey30/info:username/1555078771906/Put/vlen=6/seqid=0, Value: user30 Cell: rowKey30/memo:detail/1555078771906/Put/vlen=8/seqid=0, Value: remark30 Cell: rowKey90/ex:addr/1555078771906/Put/vlen=8/seqid=0, Value: street90 Cell: rowKey90/info:username/1555078771906/Put/vlen=6/seqid=0, Value: user90 Cell: rowKey90/memo:detail/1555078771906/Put/vlen=8/seqid=0, Value: remark90