• Hue 工具使用



    Hue 是一个 Web 接口的 Hadoop 分析数据工具,由 Cloudra 公司开源

    官方网址

    Github 地址 -> 安装方法

    文档地址

    一.Build

    1.ubuntu安装所需环境(以Github为准)

    # JDK
    # maven
    # 其他环境
    $ sudo apt-get install git ant gcc g++ libffi-dev libkrb5-dev libmysqlclient-dev libsasl2-dev libsasl2-modules-gssapi-mit libsqlite3-dev libssl-dev libxml2-dev libxslt-dev make maven libldap2-dev python-dev python-setuptools libgmp3-dev
    

    2.build

    $ make apps
    

    二.配置

    1.基础配置(位于官方文档3.1节)

    secret_key=jFE93j;2[290-eiw.KEiwN2s3['d;/.q[eIW^y#e=+Iei*@Mn<qW5o
    
    http_host=cen-ubuntu
    http_port=8888
    
    time_zone=Asia/Shanghai
    

    2.WebHDFS 配置

    # hdfs-site.xml(默认为true)
    <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
    </property>
    
    # core-site.xml 配置代理
    <property>
        <name>hadoop.proxyuser.hue.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hue.groups</name>
        <value>*</value>
    </property>
    
    # hue.ini 配置 3 处,若配置 HA 需要配置 logical_name 
    [hadoop]
    
      # Configuration for HDFS NameNode
      # ------------------------------------------------------------------------
      [[hdfs_clusters]]
        # HA support by using HttpFs
    
        [[[default]]]
          # Enter the filesystem uri
          fs_defaultfs=hdfs://cen-ubuntu:8020
    
          # NameNode logical name.
          ## logical_name=
    
          # Use WebHdfs/HttpFs as the communication mechanism.
          # Domain should be the NameNode or HttpFs host.
          # Default port is 14000 for HttpFs.
          webhdfs_url=http://cen-ubuntu:50070/webhdfs/v1
    
          # Change this if your HDFS cluster is Kerberos-secured
          ## security_enabled=false
    
          # In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
          # have to be verified against certificate authority
          ## ssl_cert_ca_verify=True
    
          # Directory of the Hadoop configuration
          hadoop_conf_dir=/opt/cdh5.3.6/hadoop-2.6.0-cdh5.12.0/etc/hadoop
    

    3.YARN 配置

    # hue.ini
    [[yarn_clusters]]
    
      [[[default]]]
        # Enter the host on which you are running the ResourceManager
        resourcemanager_host=cen-ubuntu
    
        # The port where the ResourceManager IPC listens on
        resourcemanager_port=8032
    
        # Whether to submit jobs to this cluster
        submit_to=True
    
        # Resource Manager logical name (required for HA)
        ## logical_name=
    
        # Change this if your YARN cluster is Kerberos-secured
        ## security_enabled=false
    
        # URL of the ResourceManager API
        resourcemanager_api_url=http://cen-ubuntu:8088
    
        # URL of the ProxyServer API
        proxy_api_url=http://cen-ubuntu:8088
    
        # URL of the HistoryServer API
        history_server_api_url=http://cen-ubuntu:19888
    
        # URL of the Spark History Server
        ## spark_history_server_url=http://localhost:18088
    
        # In secure mode (HTTPS), if SSL certificates from YARN Rest APIs
        # have to be verified against certificate authority
        ## ssl_cert_ca_verify=True
    

    4.临时文件目录

    [filebrowser]
      # Location on local filesystem where the uploaded archives are temporary stored.
      archive_upload_tempdir=/tmp
    

    5.Hive 配置(需要启动Hive server2 服务 启动 Hive 服务)

    # hive-site.xml
    <!-- 配置server2 的地址和端口 -->
    <property>
      <name>hive.server2.thrift.port</name>
      <value>10000</value>
      <description>Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'binary'.</description>
    </property>
    
    <property>
      <name>hive.server2.thrift.bind.host</name>
      <value>cen-ubuntu</value>
      <description>Bind host on which to run the HiveServer2 Thrift service.</description>
    </property>
    
    # 启动hiveserver2
    $ bin/hiveserver2 
    
    # hive-site.xml
    <!-- 配置远程 remote metastore 的uri 见hive官方文档-->
    <property>
      <name>hive.metastore.uris</name>
      <value>thrift://cen-ubuntu:9083</value>
    </property>
      
    # 启动 metastore server
    hive --service metastore
    
    # hue.ini
    [beeswax]
    
      # Host where HiveServer2 is running.
      # If Kerberos security is enabled, use fully-qualified domain name (FQDN).
      hive_server_host=cen-ubuntu
    
      # Port where HiveServer2 Thrift server runs on.
      hive_server_port=10000
    
      # Hive configuration directory, where hive-site.xml is located
      hive_conf_dir=/opt/cdh5.3.6/hive-1.1.0-cdh5.12.0/conf
    
      # Timeout in seconds for thrift calls to Hive service
      server_conn_timeout=120
    

    6.database 链接管理关系型数据库(SQLite3 是 que 自带的数据库)(注意:需要删除[[[xxx]]]前注释)

    ###########################################################################
    # Settings for the RDBMS application
    ###########################################################################
    
    [librdbms]
      # The RDBMS app can have any number of databases configured in the databases
      # section. A database is known by its section name
      # (IE sqlite, mysql, psql, and oracle in the list below).
    
      [[databases]]
        # sqlite configuration.
        ## [[[sqlite]]]
          # Name to show in the UI.
          nice_name=SQLite
    
          # For SQLite, name defines the path to the database.
          name=/opt/cdh5.3.6/hue-3.9.0-cdh5.12.0/desktop/desktop.db
    
          # Database backend to use.
          engine=sqlite
    
          # Database options to send to the server when connecting.
          # https://docs.djangoproject.com/en/1.4/ref/databases/
          ## options={}
    
        # mysql, oracle, or postgresql configuration.
        [[[mysql]]]
          # Name to show in the UI.
          nice_name="My SQL DB"
    
          # For MySQL and PostgreSQL, name is the name of the database.
          # For Oracle, Name is instance of the Oracle server. For express edition
          # this is 'xe' by default.
          name=mysqldb
    
          # Database backend to use. This can be:
          # 1. mysql
          # 2. postgresql
          # 3. oracle
          engine=mysql
    
          # IP or hostname of the database to connect to.
          host=cen-ubuntu
    
          # Port the database server is listening to. Defaults are:
          # 1. MySQL: 3306
          # 2. PostgreSQL: 5432
          # 3. Oracle Express Edition: 1521
          port=3306
    
          # Username to authenticate with when connecting to the database.
          user=root
    
          # Password matching the username to authenticate with when
          # connecting to the database.
          password=ubuntu
    
          # Database options to send to the server when connecting.
          # https://docs.djangoproject.com/en/1.4/ref/databases/
          ## options={}
    

    7.Oozie 配置

    [liboozie]
      # The URL where the Oozie service runs on. This is required in order for
      # users to submit jobs. Empty value disables the config check.
      oozie_url=http://cen-ubuntu:11000/oozie
    
      # Requires FQDN in oozie_url if enabled
      ## security_enabled=false
    
      # Location on HDFS where the workflows/coordinator are deployed when submitted.
      remote_deployement_dir=/user/cen/examples/apps
    
      [oozie]
        # Location on local FS where the examples are stored.
        local_data_dir=/opt/cdh5.3.6/oozie-4.1.0-cdh5.12.0/examples
    
        # Location on local FS where the data for the examples is stored.
        sample_data_dir=/opt/cdh5.3.6/oozie-4.1.0-cdh5.12.0/examples/input-data
    
        # Location on HDFS where the oozie examples and workflows are stored.
        # Parameters are $TIME and $USER, e.g. /user/$USER/hue/workspaces/workflow-$TIME
        remote_data_dir=/user/cen/examples/apps/
    

    三.运行

    # 0.0.0.0意味着所有ip都能访问,本来是在hue.ini中配置的,但是配置不生效,因此手动设置
    $ build/env/bin/hue runserver 0.0.0.0:8000
  • 相关阅读:
    阿里双11,如何实现每秒几十万的高并发写入
    记住:永远不要在 MySQL 中使用 UTF-8
    史上最烂的项目:苦撑 12 年,600 多万行代码
    除了不要 SELECT * ,使用数据库还应知道的11个技巧!
    厉害了,为了干掉 HTTP ,Spring团队又开源 nohttp 项目!
    请停止学习框架
    基于 MySQL 主从模式搭建上万并发的系统架构
    JS获取节点
    JS函数
    JS
  • 原文地址:https://www.cnblogs.com/cenzhongman/p/7261170.html
Copyright © 2020-2023  润新知