• Hbase的写入负载均衡


    在写Hbase的时候,会担心数据分布在各个region上不均匀(与预分区无关);

    这个时候可能想到的方式:hash、加盐等(当然是可以的,但是读取的时候咋办呢? 哪天写一个这样的文章)

    本文采用新方式:

            <repository>
                <id>cloudera</id>
                <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
            </repository>
        </repositories>
    
    <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-client</artifactId>
                <version>1.2.1</version>
                <exclusions>
                    <exclusion>
                        <groupId>javax.servlet</groupId>
                        <artifactId>*</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>org.apache.hbase</groupId>
                <artifactId>hbase-server</artifactId>
                <version>1.2.1</version>
                <exclusions>
                    <exclusion>
                        <groupId>javax.servlet</groupId>
                        <artifactId>*</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>

    代码:

    HbaseBalancer:
    package hbase_balance;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.hbase.*;
    import org.apache.hadoop.hbase.client.*;
    import org.apache.hadoop.hbase.util.Bytes;
    
    import java.io.IOException;
    import java.util.*;
    
    /**
     *
     *  优化策略 : 在table级别,对多于平局值部分的region,采用最少region节点分配策略,替代 admin.balancer();
     *  最终效果 : 在table级别,每个regionServer的region个数都在平均值上下
     *  balance region on table level
     */
    public class HbaseBalancer {
    
        public static final String tableStr = "data1";
        public static final String ZK_QUORUM = "hadoop01:2181,hadoop02:2181,hadoop03:2181";
    
        public static final Integer BUCKETS_PER_NODE = 50;
        public static void main(String[] args) throws IOException {
    
            Configuration config = HBaseConfiguration.create();
            config.set(HConstants.ZOOKEEPER_QUORUM, ZK_QUORUM);
            Connection conn = ConnectionFactory.createConnection(config);
    
            Admin admin = conn.getAdmin();
            ClusterStatus clusterStatus = admin.getClusterStatus();
            Collection<ServerName> hServers = clusterStatus.getServers();
    
            System.out.println("region servers :");
            Map<String,RegionServer> allRegionServers = new HashMap<String,RegionServer>(15);
            // 根据region server创建 hostname 和regionServer的映射,对allRegionServers进行初始化
            for(ServerName server : hServers){
    
                RegionServer rs = new RegionServer();
                rs.setServerName(server);
                allRegionServers.put( server.getHostname(), rs ) ;
    
                String getHostAndPort = server.getHostAndPort();
                String getHostname = server.getHostname();
    
                Long startCode = server.getStartcode();
                System.out.println(startCode +" "+getHostname +" " +getHostAndPort);
                //List<HRegionInfo> regionInfos = admin.getOnlineRegions(server);
                allRegionServers .put(server.getHostname(), rs);
            }
    
            Table table = conn.getTable(TableName.valueOf(tableStr ));
            // 获取region的位置信息
            RegionLocator locator = conn.getRegionLocator(table.getName());
            List<HRegionLocation> hRegionLocations= locator.getAllRegionLocations();
    
            int avgCnt = (( int)hRegionLocations.size())/ hServers.size();
            System.out.println("avgCnt :" + avgCnt);
            System.out.println("hRegionLocations.size() :"+hRegionLocations.size());
            System.out.println("hServers.size() :" + hServers.size());
            List<HRegionLocation> toAssign = new ArrayList<HRegionLocation>(); // 当一个region server 的region的数量大于平均值的时候,保存需要进行重新分配的region
    
            System.out.println("=============== get Region Location end =============== ");
            // 根据已有的regionLocation信息进行最大程度的分配到各自节点上
            for (HRegionLocation hRegionLocation: hRegionLocations) {
                String hostname =hRegionLocation.getHostname();
                System.out.println("hostname :" + hostname);
    //            RegionServer rs = allRegionServers.getOrDefault(hostname , new RegionServer() );
                // 上面预先创建的allRegionServers,已经进行初始化,保证这里不会取空值
                RegionServer rs = allRegionServers.get(hostname);
                System.out.println("rs.getRegions().size() :"+rs.getRegions().size());
                if (rs.getRegions().size() == 0) {
                    rs.setServerName(hRegionLocation.getServerName());
                    System.out.println("hRegionLocation.getServerName()  :"+hRegionLocation.getServerName());
                }
                if (rs.getRegions().size() < avgCnt) {
                    rs.addRegion(hRegionLocation.getRegionInfo().getRegionNameAsString());
                } else {
                    toAssign.add(hRegionLocation);
                }
                //noinspection Since15
                allRegionServers.putIfAbsent(hostname,rs); // move to rs.add
                System.out.println(" one of the" + hRegionLocation.toString());
            }
            System.out.println("=============== get Region Location end =============== ");
    
            // get all table regions which need to move
            // move to erery serve
            System.out.println(" region reassign");
            Iterator<HRegionLocation> assign = toAssign.iterator();
            for (HRegionLocation assignRegion: toAssign) {
                System.out.println("all need to reassign region " + assignRegion.toString());
            }
            System.out.println("=============== region reassign began ===============");
    
            while (assign.hasNext()){
                HRegionLocation region = assign.next();
                ServerName sn = region.getServerName();
    
                HRegionInfo regionInfo = region.getRegionInfo();
                String getEncodedName = regionInfo.getEncodedName();
                String sourceHostname = region.getHostname();
                String sourceName = sn.getServerName();
    
                Random rand = new Random();
                //String destServerKey = allRegionServers.keySet().toArray()[rand .nextInt(toAssign.size())].toString();
                String destServerKey = getMinRegionServer(allRegionServers);
                RegionServer rs = allRegionServers.get(destServerKey);
                if (rs.getRegions().size() > avgCnt ){
                    // 当所有的regionServer中的region个数大于 平均个数的是停止分配,保证每个节点的region的个数尽可能的平均分配到各个节点上,
                    // 不会导致最后每个regionServer的region 个数已经达到平均值,但是某些regionServer的region个数仍然> (avgCnt+ 1)
                    break;
                }
                System.out.println(" get region toAssign" + region);
                String destServerName = rs.getServerName().getServerName();
                admin.move(regionInfo.getEncodedNameAsBytes(),Bytes.toBytes(destServerName));
                System.out.println(" reassign to " + destServerName);
                rs.addRegion(regionInfo.getRegionNameAsString());
            }
    
            System.out.println("=============== region reassign end ===============");
        }
    
        /**
         * 从regionserver中遍历得到最小的 region server 的hostname
         * @param allRegionServers
         * @return region server host name
         */
        public static String getMinRegionServer(Map<String,RegionServer> allRegionServers ){
            String key = "";
            Integer cnt = Integer.MAX_VALUE ;
            for (String hostname : allRegionServers.keySet() ) {
                if ( allRegionServers.get(hostname).getRegions().size() < cnt ){
                    cnt = allRegionServers.get(hostname).getRegions().size();
                    key = hostname;
                }
            }
            return  key;
        }
    }
    RegionServer:
    package hbase_balance;
    
    import org.apache.hadoop.hbase.ServerName;
    
    import java.util.ArrayList;
    import java.util.List;
    
    /**
     * Created by angel;
     */
    public class RegionServer {
    
        private ServerName serverName;
    
        private List<String> regions ;
    
        public RegionServer(){
            super();
            this.regions = new ArrayList<String>();
        }
    
        public ServerName getServerName() {
            return serverName;
        }
    
        public void setServerName(ServerName serverName) {
            this.serverName = serverName;
        }
    
    
        public List<String> getRegions() {
            return regions;
        }
    
        public void setRegions(List<String> regions) {
            this.regions = regions;
        }
        public void addRegion ( String region ) {
            this.regions.add(region);
        }
    
    }
  • 相关阅读:
    后台管理UI
    14.6.3.2 Configuring Multiple Buffer Pool Instances 配置多个Buffer Poll 实例:
    14.6.3.1 The InnoDB Buffer Pool
    innodb_buffer_pool_instances and innodb_buffer_pool_size的关系
    猪肉都被绑上了“家族标签”,大数据已波及到农牧业!
    14.6.2 Configuring InnoDB for Read-Only Operation
    jquery EasyUI datagrid重新加载传参问题
    分布式系统事务一致性解决方案
    面试题整理
    dump iot表
  • 原文地址:https://www.cnblogs.com/niutao/p/10547980.html
Copyright © 2020-2023  润新知