• Hbase之缓存扫描加快读取速度


    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.TableName;
    import org.apache.hadoop.hbase.client.*;
    import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
    
    import java.io.IOException;
    
    /**
     * Created by similarface on 16/8/23.
     */
    public class ScanDataUseCache {
        private static Table table=null;
        public static Table getTable() {
            if(table==null){
                try {
                    Configuration configuration = HBaseConfiguration.create();
                    Connection connection = ConnectionFactory.createConnection(configuration);
                    //建立表的连接
                    return connection.getTable(TableName.valueOf("testtable"));
                }catch (IOException e){
                    return table;
                }
            }
            return table;
        }
        private static void scan(int caching,int batch,boolean small) {
            int count=0;
            //setCaching 设置的值为每次rpc的请求记录数,默认是1;cache大可以优化性能,但是太大了会花费很长的时间进行一次传输。
            //setBatch 设置每次取的column size;有些row特别大,所以需要分开传给client,就是一次传一个row的几个column。
            //setSmall 是否为小扫描
            //setScanMetricsEnabled 使用了集合
            Scan scan = new Scan().setCaching(caching).setBatch(batch).setSmall(small).setScanMetricsEnabled(true);
            ResultScanner scanner=null;
            try {
                scanner = getTable().getScanner(scan);
            }catch (IOException e){
                System.out.println(e);
            }
            if (scanner!=null){
                for (Result result:scanner){
                    count++;
                }
            scanner.close();
            ScanMetrics metrics = scan.getScanMetrics();
            System.out.println("Caching: " + caching + ", Batch: " + batch + ", Small: " + small + ", Results: " + count + ", RPCs: " + metrics.countOfRPCcalls);
            }
            else {
                System.out.println("Error");
            }
        }
    
        public static void main(String[] args) throws IOException {
            // Caching: 1, Batch: 1, Small: false, Results: 9, RPCs: 12
            scan(1, 1, false);
    
            //Caching: 1, Batch: 0, Small: false, Results: 4, RPCs: 7
            scan(1, 0, false);
    
            // Caching: 1, Batch: 0, Small: true, Results: 4, RPCs: 0
            scan(1, 0, true);
    
            //Caching: 200, Batch: 1, Small: false, Results: 9, RPCs: 3
            scan(200, 1, false);
    
            //Caching: 200, Batch: 0, Small: false, Results: 4, RPCs: 3
            scan(200, 0, false);
    
            //Caching: 200, Batch: 0, Small: true, Results: 4, RPCs: 0
            scan(200, 0, true);
    
            // Caching: 2000, Batch: 100, Small: false, Results: 4, RPCs: 3
            scan(2000, 100, false);
    
            // Caching: 2, Batch: 100, Small: false, Results: 4, RPCs: 5
            scan(2, 100, false);
    
            // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5
            scan(2, 10, false);
    
            // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5
            scan(5, 100, false);
    
            // Caching: 5, Batch: 100, Small: false, Results: 4, RPCs: 3
            scan(5, 20, false);
    
            // Caching: 10, Batch: 10, Small: false, Results: 4, RPCs: 3
            scan(10, 10, false);
        }
    }
    
    /**
     Caching: 1, Batch: 0, Small: false, Results: 5, RPCs: 8
     Caching: 1, Batch: 0, Small: true, Results: 5, RPCs: 0
     Caching: 200, Batch: 1, Small: false, Results: 1009, RPCs: 8
     Caching: 200, Batch: 0, Small: false, Results: 5, RPCs: 3
     Caching: 200, Batch: 0, Small: true, Results: 5, RPCs: 0
     Caching: 2000, Batch: 100, Small: false, Results: 14, RPCs: 3
     Caching: 2, Batch: 100, Small: false, Results: 14, RPCs: 10
     Caching: 2, Batch: 10, Small: false, Results: 104, RPCs: 55
     Caching: 5, Batch: 100, Small: false, Results: 14, RPCs: 5
     Caching: 5, Batch: 20, Small: false, Results: 54, RPCs: 13
     Caching: 10, Batch: 10, Small: false, Results: 104, RPCs: 13
     **/
    

     

    这是一个9行数据的表

    每行包含一些列

    使用缓存为6  批量为3的扫描器 

    需要3个RPC

    3个列装入一个Result实例

    6个result到缓存中 组成一个RPC

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hbase.HBaseConfiguration;
    import org.apache.hadoop.hbase.TableName;
    import org.apache.hadoop.hbase.client.*;
    import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
    
    import java.io.IOException;
    
    /**
     * Created by similarface on 16/8/24.
     */
    public class ScanWithOffsetAndLimit {
        private static Table table = null;
    
        public static Table getTable() {
            if (table == null) {
                try {
                    Configuration configuration = HBaseConfiguration.create();
                    Connection connection = ConnectionFactory.createConnection(configuration);
                    //建立表的连接
                    return connection.getTable(TableName.valueOf("testtable"));
                } catch (IOException e) {
                    return table;
                }
            }
            return table;
        }
    
        /**
         * 遍历访问数据
         * @param num 运行次序
         * @param caching
         * @param batch
         * @param offset
         * @param maxResults
         * @param maxResultSize
         * @param dump
         * @throws IOException
         */
        private static void scan(int num, int caching, int batch, int offset, int maxResults, int maxResultSize, boolean dump
        ) throws IOException {
            int count = 0;
            Scan scan = new Scan().setCaching(caching).setBatch(batch)
                    .setRowOffsetPerColumnFamily(offset)
                    .setMaxResultsPerColumnFamily(maxResults)
                    .setMaxResultSize(maxResultSize)
                    .setScanMetricsEnabled(true);
            ResultScanner scanner = getTable().getScanner(scan);
            System.out.println("Scan #" + num + " running...");
            for (Result result : scanner) {
                count++;
                if (dump)
                    System.out.println("Result [" + count + "]:" + result);
            }
            scanner.close();
            ScanMetrics metrics = scan.getScanMetrics();
            System.out.println("Caching: " + caching + ", Batch: " + batch +
                    ", Offset: " + offset + ", maxResults: " + maxResults +
                    ", maxSize: " + maxResultSize + ", Results: " + count +
                    ", RPCs: " + metrics.countOfRPCcalls);
        }
    
        public static void main(String[] args) throws IOException {
            //偏移为0 最大2个cell 所以会扫描到列1 和列2
            scan(1, 11, 0, 0, 2, -1, true);
            //偏移为4 最大2个cell 所以会扫描到列5 和列6
            scan(2, 11, 0, 4, 2, -1, true);
            //
            scan(3, 5, 0, 0, 2, -1, false);
            scan(4, 11, 2, 0, 5, -1, true);
            scan(5, 11, -1, -1, -1, 1, false);
            scan(6, 11, -1, -1, -1, 10000, false);
        }
    }
    
    /**
     Caching: 11, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 458
     Caching: 11, Batch: 0, Offset: 4, maxResults: 2, maxSize: -1, Results: 1, RPCs: 3
     Caching: 5, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 1004
     Caching: 11, Batch: 2, Offset: 0, maxResults: 5, maxSize: -1, Results: 5009, RPCs: 458
     Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 1, Results: 5005, RPCs: 11012
     Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 10000, Results: 5005, RPCs: 469
    **/
    
  • 相关阅读:
    Python函数语法里的中括号和逗号是什么意思
    关于mysql配置文件中jdbc url 的记录
    MySQL 优化慢查询
    Windows10 解决端口占用问题
    数据结构与算法分析-表,栈,队列
    MySQL手动执行rollback,内部实现分析
    Docker 之 RabbitMQ安装教程 基于腾讯云
    Docker容器启动报WARNING: IPv4 forwarding is disabled. Networking will not work
    List按需转换Map
    位移运算符 1<<4
  • 原文地址:https://www.cnblogs.com/similarface/p/5800509.html
Copyright © 2020-2023  润新知