一下代码实用 0.99.0 以后的版本。
package hadoop; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.util.Bytes; public class HbaseTestNew { public static Configuration configuration; static { configuration = HBaseConfiguration.create(); configuration.set("hbase.zookeeper.property.clientPort", "2181"); configuration.set("hbase.zookeeper.quorum", "192.168.1.231"); configuration.set("hbase.master", "192.168.1.231:16000"); } public static void main(String[] args) throws IOException { createTable("test1"); // insertData("test1"); // QueryAll("test1"); // QueryByCondition1("test1"); // QueryByCondition2("test1"); // QueryByCondition3("test1"); // deleteRow("test1","row1"); // deleteByCondition("test1","row1"); } public static void createTable(String tableName) throws IOException { Connection connection = ConnectionFactory.createConnection(configuration); Admin admin = connection.getAdmin(); HTableDescriptor tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName)); tableDescriptor.addFamily(new HColumnDescriptor("column1")); tableDescriptor.addFamily(new HColumnDescriptor("column2")); tableDescriptor.addFamily(new HColumnDescriptor("column3")); admin.createTable(tableDescriptor); admin.close(); connection.close(); } public static void insertData(String tableName) throws IOException { Connection connection = ConnectionFactory.createConnection(configuration); Table table = connection.getTable(TableName.valueOf(tableName)); Put put = new Put("112233bbbcccc".getBytes()); put.addColumn("column1".getBytes(), null, "aaa".getBytes()); put.addColumn("column2".getBytes(), null, "bbb".getBytes()); put.addColumn("column3".getBytes(), null, "ccc".getBytes()); table.put(put); table.close(); connection.close(); } public static void dropTable(String tableName) throws IOException { Connection connection = ConnectionFactory.createConnection(configuration); Admin admin = connection.getAdmin(); admin.disableTable(TableName.valueOf(tableName)); admin.deleteTable(TableName.valueOf(tableName)); admin.close(); connection.close(); } public static void deleteRow(String tablename, String rowkey) throws IOException { Connection connection = ConnectionFactory.createConnection(configuration); Table table = connection.getTable(TableName.valueOf(tablename)); Delete d1 = new Delete(rowkey.getBytes()); table.delete(d1); table.close(); connection.close(); } /** * * @param tableName * @throws IOException */ public static void QueryAll(String tableName) throws IOException { Connection connection = ConnectionFactory.createConnection(configuration); Table table = connection.getTable(TableName.valueOf(tableName)); ResultScanner rs = table.getScanner(new Scan()); for (Result r : rs) { System.out.println("获得到rowkey:" + new String(r.getRow())); for (Cell cell : r.rawCells()) { System.out.println("列:" + new String(CellUtil.cloneFamily(cell)) + "====值:" + new String(CellUtil.cloneValue(cell))); } } rs.close(); table.close(); connection.close(); } public static void QueryByCondition1(String tableName) throws IOException { Connection connection = ConnectionFactory.createConnection(configuration); Table table = connection.getTable(TableName.valueOf(tableName)); Get scan = new Get("row1".getBytes()); Result r = table.get(scan); System.out.println("获得到rowkey:" + new String(r.getRow())); for (Cell cell : r.rawCells()) { System.out.println( "列:" + new String(CellUtil.cloneFamily(cell)) + "====值:" + new String(CellUtil.cloneValue(cell))); } table.close(); connection.close(); } public static void QueryByCondition2(String tableName) throws IOException { Connection connection = ConnectionFactory.createConnection(configuration); Table table = connection.getTable(TableName.valueOf(tableName)); Filter filter = new SingleColumnValueFilter(Bytes.toBytes("column1"), null, CompareOp.EQUAL, Bytes.toBytes("aaa")); Scan s = new Scan(); s.setFilter(filter); ResultScanner rs = table.getScanner(s); for (Result r : rs) { System.out.println("获得到rowkey:" + new String(r.getRow())); for (Cell cell : r.rawCells()) { System.out.println("列:" + new String(CellUtil.cloneFamily(cell)) + "====值:" + new String(CellUtil.cloneValue(cell))); } } rs.close(); table.close(); connection.close(); } public static void QueryByCondition3(String tableName) throws IOException { Connection connection = ConnectionFactory.createConnection(configuration); Table table = connection.getTable(TableName.valueOf(tableName)); List<Filter> filters = new ArrayList<Filter>(); Filter filter1 = new SingleColumnValueFilter(Bytes.toBytes("column1"), null, CompareOp.EQUAL, Bytes.toBytes("aaa")); filters.add(filter1); Filter filter2 = new SingleColumnValueFilter(Bytes.toBytes("column2"), null, CompareOp.EQUAL, Bytes.toBytes("bbb")); filters.add(filter2); Filter filter3 = new SingleColumnValueFilter(Bytes.toBytes("column3"), null, CompareOp.EQUAL, Bytes.toBytes("ccc")); filters.add(filter3); FilterList filterList1 = new FilterList(filters); Scan scan = new Scan(); scan.setFilter(filterList1); ResultScanner rs = table.getScanner(scan); for (Result r : rs) { System.out.println("获得到rowkey:" + new String(r.getRow())); for (Cell cell : r.rawCells()) { System.out.println("列:" + new String(CellUtil.cloneFamily(cell)) + "====值:" + new String(CellUtil.cloneValue(cell))); } } rs.close(); table.close(); connection.close(); } }
旧版本的 例子:
package hadoop; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTablePool; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.util.Bytes; public class HbaseTest { public static Configuration configuration; static { configuration = HBaseConfiguration.create(); configuration.set("hbase.zookeeper.property.clientPort", "2181"); configuration.set("hbase.zookeeper.quorum", "192.168.1.231"); configuration.set("hbase.master", "192.168.1.231:16000"); } public static void main(String[] args) { createTable("test1"); // insertData("test1"); // QueryAll("test1"); // QueryByCondition1("test1"); // QueryByCondition2("test1"); //QueryByCondition3("test1"); //deleteRow("test1","row1"); //deleteByCondition("test1","row1"); } public static void createTable(String tableName) { System.out.println("start create table ......"); try { HBaseAdmin hBaseAdmin = new HBaseAdmin(configuration); if (hBaseAdmin.tableExists(tableName)) { hBaseAdmin.disableTable(tableName); hBaseAdmin.deleteTable(tableName); System.out.println(tableName + " is exist,detele...."); } HTableDescriptor tableDescriptor = new HTableDescriptor(tableName); tableDescriptor.addFamily(new HColumnDescriptor("column1")); tableDescriptor.addFamily(new HColumnDescriptor("column2")); tableDescriptor.addFamily(new HColumnDescriptor("column3")); hBaseAdmin.createTable(tableDescriptor); } catch (MasterNotRunningException e) { e.printStackTrace(); } catch (ZooKeeperConnectionException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } System.out.println("end create table ......"); } public static void insertData(String tableName) { System.out.println("start insert data ......"); HTablePool pool = new HTablePool(configuration, 1000); HTable table = (HTable) pool.getTable(tableName); Put put = new Put("112233bbbcccc".getBytes()); put.add("column1".getBytes(), null, "aaa".getBytes()); put.add("column2".getBytes(), null, "bbb".getBytes()); put.add("column3".getBytes(), null, "ccc".getBytes()); try { table.put(put); } catch (IOException e) { e.printStackTrace(); } System.out.println("end insert data ......"); } public static void dropTable(String tableName) { try { HBaseAdmin admin = new HBaseAdmin(configuration); admin.disableTable(tableName); admin.deleteTable(tableName); } catch (MasterNotRunningException e) { e.printStackTrace(); } catch (ZooKeeperConnectionException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void deleteRow(String tablename, String rowkey) { try { HTable table = new HTable(configuration, tablename); List list = new ArrayList(); Delete d1 = new Delete(rowkey.getBytes()); list.add(d1); table.delete(list); System.out.println("删除行成功!"); } catch (IOException e) { e.printStackTrace(); } } public static void QueryAll(String tableName) { HTablePool pool = new HTablePool(configuration, 1000); HTable table = (HTable) pool.getTable(tableName); try { ResultScanner rs = table.getScanner(new Scan()); for (Result r : rs) { System.out.println("获得到rowkey:" + new String(r.getRow())); for (KeyValue keyValue : r.raw()) { System.out.println("列:" + new String(keyValue.getFamily()) + "====值:" + new String(keyValue.getValue())); } } } catch (IOException e) { e.printStackTrace(); } } public static void QueryByCondition1(String tableName) { HTablePool pool = new HTablePool(configuration, 1000); HTable table = (HTable) pool.getTable(tableName); try { Get scan = new Get("row1".getBytes()); Result r = table.get(scan); System.out.println("获得到rowkey:" + new String(r.getRow())); for (KeyValue keyValue : r.raw()) { System.out.println("列:" + new String(keyValue.getFamily()) + "====值:" + new String(keyValue.getValue())); } } catch (IOException e) { e.printStackTrace(); } } public static void QueryByCondition2(String tableName) { try { HTablePool pool = new HTablePool(configuration, 1000); HTable table = (HTable) pool.getTable(tableName); Filter filter = new SingleColumnValueFilter(Bytes .toBytes("column1"), null, CompareOp.EQUAL, Bytes .toBytes("aaa")); Scan s = new Scan(); s.setFilter(filter); ResultScanner rs = table.getScanner(s); for (Result r : rs) { System.out.println("获得到rowkey:" + new String(r.getRow())); for (KeyValue keyValue : r.raw()) { System.out.println("列:" + new String(keyValue.getFamily()) + "====值:" + new String(keyValue.getValue())); } } } catch (Exception e) { e.printStackTrace(); } } public static void QueryByCondition3(String tableName) { try { HTablePool pool = new HTablePool(configuration, 1000); HTable table = (HTable) pool.getTable(tableName); List<Filter> filters = new ArrayList<Filter>(); Filter filter1 = new SingleColumnValueFilter(Bytes .toBytes("column1"), null, CompareOp.EQUAL, Bytes .toBytes("aaa")); filters.add(filter1); Filter filter2 = new SingleColumnValueFilter(Bytes .toBytes("column2"), null, CompareOp.EQUAL, Bytes .toBytes("bbb")); filters.add(filter2); Filter filter3 = new SingleColumnValueFilter(Bytes .toBytes("column3"), null, CompareOp.EQUAL, Bytes .toBytes("ccc")); filters.add(filter3); FilterList filterList1 = new FilterList(filters); Scan scan = new Scan(); scan.setFilter(filterList1); ResultScanner rs = table.getScanner(scan); for (Result r : rs) { System.out.println("获得到rowkey:" + new String(r.getRow())); for (KeyValue keyValue : r.raw()) { System.out.println("列:" + new String(keyValue.getFamily()) + "====值:" + new String(keyValue.getValue())); } } rs.close(); } catch (Exception e) { e.printStackTrace(); } } }
关于HBase的过滤器
HBase为筛选数据提供了一组过滤器,通过这个过滤器可以在HBase中数据的多个维度(行、列、数据版本)上进行对数据的筛选操作,也就是说过滤器最终能够筛选的数据能够细化到具体的一个存储单元格上(由行键、列名、时间戳定位)。通常来说,通过行键、值来筛选数据的应用场景较多。需要说明的是,过滤器会极大地影响查询效率。所以,在数据量较大的数据表中,应尽量避免使用过滤器。
下面介绍一些常用的HBase内置过滤器的用法:
1、RowFilter:筛选出匹配的所有的行。使用BinaryComparator可以筛选出具有某个行键的行,或者通过改变比较运算符(下面的例子中是CompareFilter.CompareOp.EQUAL)来筛选出符合某一条件的多条数据,如下示例就是筛选出行键为row1的一行数据。
// 筛选出匹配的所有的行 Filter rf = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("row1")));
2、PrefixFilter:筛选出具有特定前缀的行键的数据。这个过滤器所实现的功能其实也可以由RowFilter结合RegexComparator来实现,不过这里提供了一种简便的使用方法,如下示例就是筛选出行键以row为前缀的所有的行。
// 筛选匹配行键的前缀成功的行 Filter pf = new PrefixFilter(Bytes.toBytes("row"));
3、KeyOnlyFilter:这个过滤器唯一的功能就是只返回每行的行键,值全部为空,这对于只关注于行键的应用场景来说非常合适,这样忽略掉其值就可以减少传递到客户端的数据量,能起到一定的优化作用。
// 返回所有的行键,但值全是空 Filter kof = new KeyOnlyFilter();
4、RandomRowFilter:按照一定的几率(<=0会过滤掉所有的行,>=1会包含所有的行)来返回随机的结果集,对于同样的数据集,多次使用同一个RandomRowFilter会返回不同的结果集,对于需要随机抽取一部分数据的应用场景,可以使用此过滤器。
// 随机选出一部分的行 Filter rrf = new RandomRowFilter((float) 0.8);
5、InclusiveStopFilter:扫描的时候,我们可以设置一个开始行键和一个终止行键,默认情况下,这个行键的返回是前闭后开区间,即包含起始行,但不包含终止行。如果我们想要同时包含起始行和终止行,那么可以使用此过滤器。
// 包含了扫描的上限在结果之内 Filter isf = new InclusiveStopFilter(Bytes.toBytes("row1"));
6、FirstKeyOnlyFilter:如果想要返回的结果集中只包含第一列的数据,那么这个过滤器能够满足要求。它在找到每行的第一列之后会停止扫描,从而使扫描的性能也得到了一定的提升。
// 筛选出每行的第一个单元格 Filter fkof = new FirstKeyOnlyFilter();
7、ColumnPrefixFilter:它按照列名的前缀来筛选单元格,如果我们想要对返回的列的前缀加以限制的话,可以使用这个过滤器。
// 筛选出前缀匹配的列 Filter cpf = new ColumnPrefixFilter(Bytes.toBytes("qual1"));
8、ValueFilter:按照具体的值来筛选单元格的过滤器,这会把一行中值不能满足的单元格过滤掉,如下面的构造器,对于每一行的一个列,如果其对应的值不包含ROW2_QUAL1,那么这个列就不会返回给客户端。
// 筛选某个(值的条件满足的)特定的单元格 Filter vf = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("ROW2_QUAL1"));
9、ColumnCountGetFilter:这个过滤器在遇到一行的列数超过我们所设置的限制值的时候,结束扫描操作。
// 如果突然发现一行中的列数超过设定的最大值时,整个扫描操作会停止 Filter ccf = new ColumnCountGetFilter(2);
10、SingleColumnValueFilter:用一列的值决定这一行的数据是否被过滤,可对它的对象调用setFilterIfMissing方法,默认的参数是false。其作用是,对于咱们要使用作为条件的列,如果参数为true,这样的行将会被过滤掉,如果参数为false,这样的行会包含在结果集中。
// 将满足条件的列所在的行过滤掉 SingleColumnValueFilter scvf = new SingleColumnValueFilter( • Bytes.toBytes("colfam1"), • Bytes.toBytes("qual2"), • CompareFilter.CompareOp.NOT_EQUAL, • new SubstringComparator("BOGUS")); scvf.setFilterIfMissing(true);
11、SingleColumnValueExcludeFilter:这个过滤器与第10种过滤器唯一的区别就是,作为筛选条件的列,其行不会包含在返回的结果中。
12、SkipFilter:这是一种附加过滤器,其与ValueFilter结合使用,如果发现一行中的某一列不符合条件,那么整行就会被过滤掉。
// 发现某一行中的一列需要过滤时,整个行就会被过滤掉 Filter skf = new SkipFilter(vf);
13、WhileMatchFilter:使用这个过滤器,当遇到不符合设定条件的数据的时候,整个扫描结束。
// 当遇到不符合过滤器rf设置的条件时,整个扫描结束 Filter wmf = new WhileMatchFilter(rf);
14. FilterList:可以用于综合使用多个过滤器。其有两种关系: Operator.MUST_PASS_ONE表示关系AND,Operator.MUST_PASS_ALL表示关系OR,并且FilterList可以嵌套使用,使得我们能够表达更多的需求。
// 综合使用多个过滤器,AND和OR两种关系 List<Filter> filters = new ArrayList<Filter>(); filters.add(rf); filters.add(vf); FilterList fl = new FilterList(FilterList.Operator.MUST_PASS_ALL,filters);