• 大数据集群运维(64)Airflow安装教程


    1.安装环境

    CentOS-6.5
    Python-2.7.12
    setuptools-29.0.1
    pip-9.0.1

    2.编译Python

    sudo yum install -y gcc
    sudo yum install -y gcc-c++
    sudo yum install -y wget
    sudo yum install -y mysql
    sudo yum install -y mysql-devel
    sudo yum install -y python-devel
    sudo yum install -y zlib-devel
    sudo yum install -y openssl-devel
    sudo yum install -y sqlite-devel
    wget https://www.python.org/ftp/python/2.7.12/Python-2.7.12.tgz
    
    sudo mkdir /usr/local/python27
    sudo tar zxfv Python-2.7.12.tgz -C /usr/local/
    cd /usr/local/Python-2.7.12/
    ./configure --prefix=/usr/local/python27
    make
    make install
    sudo mv /usr/bin/python /usr/bin/python2.6
    sudo ln -sf /usr/local/python/bin/python /usr/bin/python2.7
    
    vim /usr/bin/yum
    #!/usr/bin/python2.6
    
    vim /etc/profile
    export PYTHON_HOME=/usr/bin/python2.6
    export PATH=$PYTHON_HOME/bin:$PATH
    
    wget https://pypi.python.org/packages/59/88/2f3990916931a5de6fa9706d6d75eb32ee8b78627bb2abaab7ed9e6d0622/setuptools-29.0.1.tar.gz#md5=28ecfd0f2574b489b9a18343879a7324
    tar zxfv setuptools-29.0.1.tar.gz
    cd setuptools-29.0.1
    python setup.py install
    
    wget https://pypi.python.org/packages/11/b6/abcb525026a4be042b486df43905d6893fb04f05aac21c32c638e939e447/pip-9.0.1.tar.gz#md5=35f01da33009719497f01a4ba69d63c9
    tar zxfv pip-9.0.1.tar.gz
    cd pip-9.0.1
    python setup.py install
    pip install --upgrade pip
    
    wget https://pypi.python.org/packages/a5/e9/51b544da85a36a68debe7a7091f068d802fc515a3a202652828c73453cad/MySQL-python-1.2.5.zip#md5=654f75b302db6ed8dc5a898c625e030c
    unzip MySQL-python-1.2.5.zip
    cd MySQL-python-1.2.5
    python setup.py install
    
    #第三方包 /usr/local/python27/lib/python2.7/site-packages

    3.安装

        airflow通过pip可以方便的安装到系统中。

    # airflow needs a home, ~/airflow is the default,
    # but you can lay foundation somewhere else if you prefer
    # (optional)
    export AIRFLOW_HOME=/usr/local/airflow
    
    # install from pypi using pip
    pip install airflow
    pip install airflow[hive]
    
    # initialize the database
    airflow initdb
    
    # start the web server, default port is 8080
    airflow webserver -p 8080

    4.设置mysql为元数据库

    #首先要安装mysql客户端
    sudo yum install -y mysql
    sudo yum install -y mysql-devel
    
    CREATE USER airflow;
    CREATE DATABASE airflow;
    CREATE DATABASE celery_result_airflow;
    
    GRANT all privileges on airflow.* TO 'airflow'@'%' IDENTIFIED BY 'airflow';
    GRANT all privileges on celery_result_airflow.* TO 'airflow'@'%' IDENTIFIED BY 'airflow';
    
    #安装mysql模块
    wget https://pypi.python.org/packages/a5/e9/51b544da85a36a68debe7a7091f068d802fc515a3a202652828c73453cad/MySQL-python-1.2.5.zip#md5=654f75b302db6ed8dc5a898c625e030c
    unzip MySQL-python-1.2.5.zip
    cd MySQL-python-1.2.5
    python setup.py install
    
    #在airflow的配置文件中配置mysql为元数据的存储库
    sudo vi $AIRFLOW_HOME/airflow.cfg
    
    #更改数据库链接:
    sql_alchemy_conn = mysql://airflow:airflow@localhost:3306/airflow
    
    #对应字段解释如下:
    dialect+driver://username:password@host:port/database
    
    #初始化元数据库
    airflow initdb
    
    #重置元数据库
    airflow resetdb

    5.安装登录模块

    #安装password模块
    pip install airflow[password]
    
    #在airflow的配置文件中修改需要认证
    sudo vi $AIRFLOW_HOME/airflow.cfg
    [webserver]
    authenticate = True
    filter_by_owner = True
    auth_backend = airflow.contrib.auth.backends.password_auth

    运行以下代码将用户名密码写入元数据库中

    import airflow
    from airflow import models, settings
    from airflow.contrib.auth.backends.password_auth import PasswordUser
    
    user = PasswordUser(models.User())
    user.username = 'quzhengpeng'
    user.email = 'quzhengpeng@163.com'
    user.password = 'quzhengpeng'
    session = settings.Session()
    session.add(user)
    session.commit()
    session.close()
    exit()

    5.启动守护进程

        启动后台守护进程了之后,Airflow才能实时监控任务的调度情况。将任务脚本放到${AIRFLOW_HOME}/dags下在web UI 就能看到任务执行情况。

    airflow scheduler

    6.启动web服务

    #启动web进程
    airflow webserver -p 8080
    
    #关闭CentOS6的防火墙
    sudo service iptables stop
    
    #关闭CentOS6的SELinux
    setenforce 0
    
    #关闭CentOS7的防火墙
    systemctl stop firewalld.service
    
    #禁止firewall开机启动
    systemctl disable firewalld.service

    Celery+MySQL

    #Celery文档 http://docs.jinkan.org/docs/celery/index.html
    #Celery4.0.0在airflow中有一些问题,所以安装Celery3
    pip install -U Celery==3.1.24
    pip install airflow[celery]
    

    修改配置文件

    vi airflow.cfg
    
    [core]
    executor = CeleryExecutor
    
    [celery]
    broker_url = sqla+mysql://airflow:airflow@localhost:3306/airflow
    
    celery_result_backend = db+mysql://airflow:airflow@localhost:3306/airflow

    启动airflow

    airflow webserver -p 8080
    
    airflow scheduler
    
    #以非root用户运行
    airflow worker
    
    #启动Celery WebUI 查看celery任务
    airflow flower 
    http://localhost:5555/

    Celery+RabbitMQ

    wget http://www.rabbitmq.com/releases/rabbitmq-server/v3.6.5/rabbitmq-server-3.6.5-1.noarch.rpm
    
    #安装RabbitMQ的依赖包
    yum install erlang
    
    yum install socat
    
    #如果下载了rabbitmq的yum源 sudo yum install -y rabbitmq-server
    rpm -ivh rabbitmq-server-3.6.5-1.noarch.rpm

    启动RabbitMQ服务

    #启动rabbitmq服务
    sudo service rabbitmq-server start 
    #或者
    sudo rabbitmq-server
    
    #添加 -detached 属性来让它在后台运行(注意:只有一个破折号)
    sudo rabbitmq-server -detached
    
    #设置开机启动rabbitmq服务
    chkconfig rabbitmq-server on
    
    #永远不要用 kill 停止 RabbitMQ 服务器,而是应该用 rabbitmqctl 命令
    sudo rabbitmqctl stop

    设置RabbitMQ

    #创建一个RabbitMQ用户
    rabbitmqctl add_user airflow airflow
    
    #创建一个RabbitMQ虚拟主机
    rabbitmqctl add_vhost vairflow
    
    #将这个用户赋予admin的角色
    rabbitmqctl set_user_tags airflow admin
    
    #允许这个用户访问这个虚拟主机
    rabbitmqctl set_permissions -p vairflow airflow ".*" ".*" ".*"
    
    # no usage
    rabbitmq-plugins enable rabbitmq_management 

    修改airflow配置文件支持Celery

    vi $AIRFLOW_HOME/airflow/airflow.cfg
    
    #更改Executor为CeleryExecutor
    executor = CeleryExecutor
    
    #更改broker_url
    broker_url = amqp://airflow:airflow@localhost:5672/vairflow
    Format explanation: transport://userid:password@hostname:port/virtual_host
    
    #更改celery_result_backend
    celery_result_backend = amqp://airflow:airflow@localhost:5672/vairflow
    Format explanation: transport://userid:password@hostname:port/virtual_host

    安装airflow的celery和rabbitmq模块

    pip install airflow[celery]
    pip install airflow[rabbitmq]

    airflow使用DAG(Directed Acyclic Graph,有向无环图为)来管理作业流的

    #创建DAG
    from datetime import datetime, timedelta
    from airflow.models import DAG
    args = {
        'owner': 'airflow',
        'start_date': seven_days_ago,
        'email': ['airflow@airflow.com'],
        'email_on_failure': True,
        'email_on_retry': True,
        'retries': 3,
        'retries_delay': timedelta(seconds=60),
        'depends_on_past': True
    }
    
    dag = DAG(
        dag_id='dag',
        default_args=args,
        schedule_interval='0 0 * * *',
        dagrun_timeout=timedelta(minutes=60)
    )

    创建任务将任务添加到DAG中

    from airflow.operators.bash_operator import BashOperator
    from airflow.operators.dummy_operator import DummyOperator
    
    demo = DummyOperator(
        task_id='demo',
        dag=dag
    )
    
    last_execute = BashOperator(
        task_id='last_execute',
        bash_command='echo 1',
        dag=dag
    )

    配置任务的依赖关系

    demo.set_downstream(last_execute)
     
  • 相关阅读:
    spring AOP概述和简单应用
    log4j输出指定功能的log配置方式区别
    java项目配置常见问题
    android 浮动按钮的伸缩效果
    Android之探究viewGroup自定义子属性参数的获取流程
    javaWeb之maven多数据库环境的配置信息
    mybatis generator配置生成代码的问题
    java之初识服务器跨域获取数据
    java之Maven配置和springMvc的简单应用
    UnicodeDecodeError: ‘ascii’ codec can’t decode byte 0xe5
  • 原文地址:https://www.cnblogs.com/huanghanyu/p/13935309.html
Copyright © 2020-2023  润新知