• hive入ES5.6.8


    1、--建立索引

    number_of_shards:分片 number_of_replicas:副本数 index.refresh_interval:缓存策略
    curl -XPUT 'http://192.168.10.69:9200/zhuanlidata9' -d '{"settings":{"number_of_shards":64,"number_of_replicas":0,"index.refresh_interval": -1}}'

    2、--创建mapping

    curl -X PUT '192.168.10.69:9200/zhuanlidata9/_mapping/zhuanliquanwen' -d '
    {
    "properties":{
    "uuid":{"type":"keyword"},
    "filename":{"type":"keyword"},
    "lang":{"type":"keyword"},
    "country":{"type":"keyword"},
    "doc_number":{"type":"keyword"},
    "kind":{"type":"keyword"},
    "date":{"type":"keyword"}, 
    "gazette_num":{"type":"keyword"},
    "gazette_date":{"type":"keyword"},
    "appl_type":{"type":"keyword"},
    "appl_country":{"type":"keyword"},
    "appl_doc_number":{"type":"keyword"},
    "appl_date":{"type":"keyword"},
    "text":{"type":"keyword"},
    "invention_title":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
    "assignees":{"type":"text"},
    "assignees_address":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
    "abstracts":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
    "applicants":{"type":"text"},
    "applicants_address":{"type":"text"},
    "inventors":{"type":"text"},
    "agents":{"type":"text"},
    "agency":{"type":"text"},
    "descriptions":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
    "claims":{"type":"text","analyzer":"ik_max_word","search_analyzer":"ik_max_word"},
    "cn_related_publication":{"type":"text"},
    "cn_publication_referen":{"type":"text"},
    "cn_related_document":{"type":"text"},
    "priority_claims":{"type":"text"},
    "reference":{"type":"text"},
    "searcher":{"type":"text"}
    } 
    }'

    3、--创建hive映射ES表

    --11.31上输入"hive" 然后执行如下命令。
    hive
    --添加jar包
    add jar /data/2/zly/elasticsearch-hadoop-5.6.8/dist/elasticsearch-hadoop-5.6.8.jar;
    --建立映射表
    CREATE EXTERNAL TABLE test.zhuanlidata9 (
    uuid string,
    filename string ,
    lang string ,
    country string ,
    doc_number string ,
    kind string ,
    date string ,
    gazette_num string ,
    gazette_date string ,
    appl_type string ,
    appl_country string ,
    appl_doc_number string ,
    appl_date string ,
    text string ,
    invention_title string ,
    assignees string ,
    assignees_address string ,
    abstracts string ,
    applicants string ,
    applicants_address string ,
    inventors string ,
    agents string ,
    agency string ,
    descriptions string ,
    claims string ,
    cn_related_publication string ,
    cn_publication_referen string ,
    Cn_related_document string ,
    priority_claims string ,
    Reference string ,
    Searcher string
    )
    STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
    TBLPROPERTIES(
    'es.resource' = 'zhuanlidata9/zhuanliquanwen', 
    'es.nodes'='192.168.10.69,192.168.10.70,192.168.10.71',
    'es.port'='9200',
    'es.mapping.id' = 'uuid',
    'es.write.operation'='upsert'
    );
    --退出hive
    exit;

    4、--将数据load进hive映射es表/*在11.31上 修改 /data/2/zly/test_hive_es.sh 的循环次数以及表名/*

    --{1..18}循环次数  mapreduce.job.running.map.limit 线程数
    #!/bin/bash
    for i in {1..18}
    do
    hive -e "
    add jar /data/2/zly/elasticsearch-hadoop-5.6.8/dist/elasticsearch-hadoop-5.6.8.jar;
    set mapreduce.job.running.map.limit=50;
    insert into test.zhuanlidata9
    select 
    regexp_replace(reflect("java.util.UUID", "randomUUID"), "-", "") uuid,
    filename,
    lang,
    country,
    doc_number,
    kind,
    case when appl_date like '2%' then appl_date else '' end date ,
    gazette_num,
    gazette_date,
    appl_type,
    appl_country,
    appl_doc_number,
    case when appl_date like '2%' then appl_date else '' end appl_date ,
    text,
    invention_title,
    assignees,
    assignees_address,
    abstracts,
    applicants,
    applicants_address,
    inventors,
    agents,
    agency,
    descriptions,
    claims,
    cn_related_publication,
    cn_publication_referen,
    Cn_related_document,
    priority_claims,
    Reference,
    Searcher
    from report_statistics.zhuanli_zlqw;
    "
    done



  • 相关阅读:
    Maven介绍
    自考:计算机网络原理 2018版 李全龙 课后习题答案
    jmeter分布式部署遇到的坑
    mysql循环 insert插入多条数据
    认识Nacos.
    mysql中where子句中使用别名查询出现问题
    python之bytes和string相互转换
    什么叫线圈?什么寄存器?什么叫保持寄存器?
    Modbus-RTU详解(转载)
    python进制之间的转换函数
  • 原文地址:https://www.cnblogs.com/oneby/p/9187776.html
Copyright © 2020-2023  润新知