-- 1、Doris建表
use dim;
CREATE TABLE IF NOT EXISTS `dim_tab_name` (
`inc_day` date NULL COMMENT "日期(分区)"
,`dept_code` varchar(100) NULL COMMENT "网点代码"
,`dept_name` varchar(100) NULL COMMENT "网点名称"
,.............
,`valid_dt` varchar(100) NULL COMMENT "生效日期"
,`invalid_tm` varchar(100) NULL COMMENT "失效日期"
) ENGINE=OLAP
DUPLICATE KEY(`inc_day`, `dept_code`)
COMMENT "...的维表(历史快照)"
PARTITION BY RANGE(`inc_day`)(
START ("20210801") END ("20211011") EVERY (INTERVAL 1 day)
)
DISTRIBUTED BY HASH(`dept_code`) BUCKETS 32
PROPERTIES(
"storage_medium" = "SSD",
"dynamic_partition.enable" = "true",
"dynamic_partition.time_unit" = "DAY",
"dynamic_partition.end" = "1",
"dynamic_partition.prefix" = "p",
"dynamic_partition.buckets" = "32"
);
二、brokerload批量导数
-- 查看brokerload批量导数任务的日志
use dim;SHOW LOAD WHERE LABEL = 'doris_dim_tab_name'
--执行brokerload批量导数
load label dim.doris_dim_tab_name
(
--tez的文件目录不匹配,问题,导致数据导入不成功
data infile("hdfs://ip:port/hive/warehouse/dim/dim_tab_name/inc_day=2021[08|09|10]*/*")
INTO TABLE `dim_tab_name`
FORMAT AS "parquet"
-- 这里的顺序需要和hive中保持一致
(
dept_code,dept_name,.......,valid_dt,invalid_tm
)
COLUMNS FROM PATH AS (inc_day) --从hdfs路径中获取分区的字段值
set (
--建议字段映射
inc_day=inc_day
,dept_code=dept_code
,dept_name=dept_name
..................
,valid_dt=valid_dt
,invalid_tm=invalid_tm
)
)
-- 通过SHOW BROKER 查看目前集群的Broker名称
WITH BROKER hdfs_broker
PROPERTIES
(
"max_filter_ratio" = "0.01"
);
三、创建etl调度任务
----目标源,导入前准备语句:
truncate table dim.dim_tab_name partition(p$[time(yyyyMMdd,-1d)])
----目标源,Stream Load参数:
"strict_mode":"false","max_filter_ratio":"0.10","exec_mem_limit":"12884901888","timeout":"10000"