思路:从页面抓table ddl,从meta表ENDKEY拿预分区key,然后组装建表语句
1. 获取表定义table.ddl
1_get_alltable_ddl.sh $MASTER_HOST
#!/bin/bash
# hbase master主机名或ip
MASTER_HOST=$1
curl http://${MASTER_HOST}:16010/tablesDetailed.jsp > table.jsp.tmp
cat table.jsp.tmp |grep "<td>"|grep -vE "<td><a"|awk -F'<td>' '{print $2}'|awk -F'</td>' '{print $1}' > table.ddl
执行结果示例:生成的table.ddl文件内容如下
'test:groupmsg', {CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY', 'hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy', 'hbase.hregion.max.filesize' => '10737418240'}}, {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
'test:trace_log', {TABLE_ATTRIBUTES => {DURABILITY => 'SKIP_WAL', CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY', 'hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy', 'hbase.hregion.max.filesize' => '53687091200'}}, {NAME => 'f', BLOOMFILTER => 'ROWCOL', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => '2592000 SECONDS (30 DAYS)', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
'test:usermsg', {CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY', 'hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy', 'hbase.hregion.max.filesize' => '10737418240'}}, {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
2. 获取表元数据table meta
2_get_table_meta.sh ${table_name}
#!/bin/bash
TABLE_NAME=$1
TABLE_META_PATH="${TABLE_NAME}.meta"
hbase shell <<< "scan 'hbase:meta',{FILTER=>\"PrefixFilter('${TABLE_NAME},')\"}" > ${TABLE_META_PATH}
执行结果示例:生成的${TABLE_NAME}.meta (test:usermsg.meta)文件内容如下
HBase Shell; enter 'help<RETURN>' for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version 1.2.6.1, rUnknown, Mon Nov 11 08:58:58 UTC 2019
scan 'hbase:meta',{FILTER=>"PrefixFilter('test:groupmsg,')"}
ROW COLUMN+CELL
test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:regioninfo, timestamp=1624508489258, value={ENCODED => 1a2bb43902a942a57c24ad5d9b64d3fc, NAME => 'test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc.', STARTKEY => '', ENDKEY => '40000000'}
test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:seqnumDuringOpen, timestamp=1624508489258, value=\x00\x00\x00\x00\x00B\x13\x16
test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:server, timestamp=1624508489258, value=hbase-rs103.xx.example.com:16020
test:groupmsg,,1562322491739.1a2bb43902a942a57c24ad5d9b64d3fc. column=info:serverstartcode, timestamp=1624508489258, value=1624507850013
test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:regioninfo, timestamp=1625218758316, value={ENCODED => 764d259a82bfca7b45d8737fa83c5436, NAME => 'test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436.', STARTKEY => '40000000', ENDKEY => '80000000'}
test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:seqnumDuringOpen, timestamp=1625218758316, value=\x00\x00\x00\x00\x02q\x0E\xF4
test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:server, timestamp=1625218758316, value=hbase-rs102.xx.example.com:16020
test:groupmsg,40000000,1562322491739.764d259a82bfca7b45d8737fa83c5436. column=info:serverstartcode, timestamp=1625218758316, value=1625218597990
test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:regioninfo, timestamp=1624416270816, value={ENCODED => c365430d306ad7eac771395aa4573ea0, NAME => 'test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0.', STARTKEY => '80000000', ENDKEY => 'c0000000'}
test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:seqnumDuringOpen, timestamp=1624416270816, value=\x00\x00\x00\x00\x02\x05\x02\x19
test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:server, timestamp=1624416270816, value=hbase-rs195.xx.example.com:16020
test:groupmsg,80000000,1562322491739.c365430d306ad7eac771395aa4573ea0. column=info:serverstartcode, timestamp=1624416270816, value=1624416077140
test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:regioninfo, timestamp=1621828288501, value={ENCODED => 8a5ca10df6e2e8572cc4f8c1a7256722, NAME => 'test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722.', STARTKEY => 'c0000000', ENDKEY => ''}
test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:seqnumDuringOpen, timestamp=1621828288501, value=\x00\x00\x00\x00\x01\x96\x93\xEA
test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:server, timestamp=1621828288501, value=hbase-rs179.xx.example.com:16020
test:groupmsg,c0000000,1562322491739.8a5ca10df6e2e8572cc4f8c1a7256722. column=info:serverstartcode, timestamp=1621828288501, value=1621828098776
4 row(s) in 0.5810 seconds
-- 该表有 4 个region
3. 生成建表语句
生成建表语句,将建表语句输出到文件$TABLE_NAME.ddl
如果考虑将regions数量减半,可通过只取奇数行或偶数行key, 也可多次取奇数减半再减半,下面脚本中只取一次,也就是ln2
3_generate_table_ddl.sh ${table_name}
#!/bin/bash
TABLE_DDL_MAP=table.ddl
TABLE_NAME=$1
TABLE_META_PATH="${TABLE_NAME}.meta"
TABLE_DDL_WITH_PRESPLITS=""
function general_table_ddl {
local TABLE_NAME=$1
local DDL={`< ${TABLE_DDL_MAP} grep -i "'${TABLE_NAME}'"|awk -F", {" '{print $NF}'`
local TABLE_DDL="create '${TABLE_NAME}', ${DDL}"
# 预分区
# 将regions数量减半,只取奇数行split key
local PRE_SPLITS_STRING=$(< $TABLE_META_PATH grep -i 'ENDKEY'|awk -F\> '{print $NF}'|tr -d '}'|awk 'NR%2 != 0'|tr '\n' ','|awk -F", ''," '{print $1}')
# 预分区,按全部splits key分区
#local PRE_SPLITS_STRING=$(< $TABLE_META_PATH grep -i 'ENDKEY'|awk -F\> '{print $NF}'|tr -d '}'|tr '\n' ','|awk -F", ''," '{print $1}')
# echo "PRE_SPLITS_STRING====>"$PRE_SPLITS_STRING
if [ "${PRE_SPLITS_STRING}" != "''," ]
then
TABLE_DDL_WITH_PRESPLITS="${TABLE_DDL}, {SPLITS => [${PRE_SPLITS_STRING}]}"
else
TABLE_DDL_WITH_PRESPLITS="${TABLE_DDL}"
fi
echo -e `date` DEBUG TABLE DDL "===>\n${TABLE_DDL_WITH_PRESPLITS}"
echo ${TABLE_DDL_WITH_PRESPLITS} > $TABLE_NAME.ddl
return 0
}
# 生成建表语句
general_table_ddl ${TABLE_NAME}
执行结果示例:生成的${TABLE_NAME}.ddl (test:groupmsg.ddl)文件内容如下
create 'test:groupmsg', {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}, {SPLITS => [ '40000000', '80000000', 'c0000000']}
如果要修改表的一些属性,可将修改表语句一同写入文件,则test:groupmsg.ddl文件如下:
create 'test:groupmsg', {NAME => 'f', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', COMPRESSION => 'SNAPPY', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}, {SPLITS => [ '40000000', '80000000', 'c0000000']}
alter_async 'test:groupmsg', {CONFIGURATION => {'COMPRESSION_COMPACT' => 'SNAPPY','hbase.regionserver.region.split.policy' => 'org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy','hbase.hregion.max.filesize' => '10737418240'}}
4. 建表
通过从文件中读取命令方式执行sql
# 提前确认namespace是否存在,否则会报错
hbase shell ./test:groupmsg.ddl