• cortex 水平扩展试用


    cortex 支持多实例运行,可以灵活实际大规模的部署,以下demo,运行了三个cortex 实例,没有配置副本数(主要是ha )
    同时对于三个cortex 使用haproxy 做为push 以及查询接口的lb,同时基于prometheus 对于haproxy 进行监控(基于haproxy 2.0 提供的promettheus
    支持),基于docker-compose 运行系统依赖的组件,详细配置参考 github

    环境准备

    • docker-compose文件
     
    version: "3"
    services:
        haproxy:
          image: haproxy:2.0.5-alpine
          volumes:
          - "./haproxy.cfg:/usr/local/etc/haproxy/haproxy.cfg"
          ports:
          - "8404:8404"
          - "9009:9009"
        consul:
          image: consul
          ports:
          - "8500:8500"
        cortex1:
          image: cortexproject/cortex:master-7d13c2f0
          command: -config.file=/etc/single-process-config.yaml -ring.store=consul -consul.hostname=consul:8500
          ports:
          - "9001:9009"
          volumes:
          - "./single-process-config.yaml:/etc/single-process-config.yaml"
        cortex2:
          image: cortexproject/cortex:master-7d13c2f0
          command: -config.file=/etc/single-process-config.yaml -ring.store=consul -consul.hostname=consul:8500
          ports:
          - "9002:9009"
          volumes:
          - "./single-process-config.yaml:/etc/single-process-config.yaml"
        cortex3:
          image: cortexproject/cortex:master-7d13c2f0
          command: -config.file=/etc/single-process-config.yaml -ring.store=consul -consul.hostname=consul:8500
          ports:
          - "9003:9009"
          volumes:
          - "./single-process-config.yaml:/etc/single-process-config.yaml"
        granfan:
          image: grafana/grafana
          ports:
          - "3000:3000"
        node-exporter:
          image: basi/node-exporter
          ports:
          - "9100:9100"
        prometheus:
          image: prom/prometheus
          ports:
          - "9090:9090"
          volumes:
          - "./prometheus.yml:/etc/prometheus/prometheus.yml"
    • cortex 配置
      single-process-config.yaml 文件
     
    # Configuration for running Cortex in single-process mode.
    # This should not be used in production. It is only for getting started
    # and development.
    # Disable the requirement that every request to Cortex has a
    # X-Scope-OrgID header. `fake` will be substituted in instead.
    auth_enabled: false
    server:
      http_listen_port: 9009
      # Configure the server to allow messages up to 100MB.
      grpc_server_max_recv_msg_size: 104857600
      grpc_server_max_send_msg_size: 104857600
      grpc_server_max_concurrent_streams: 1000
    distributor:
      shard_by_all_labels: true
      pool:
        health_check_ingesters: true
    ingester_client:
      grpc_client_config:
        # Configure the client to allow messages up to 100MB.
        max_recv_msg_size: 104857600
        max_send_msg_size: 104857600
        use_gzip_compression: true
    ingester:
      #chunk_idle_period: 15m
      lifecycler:
        # The address to advertise for this ingester. Will be autodiscovered by
        # looking up address on eth0 or en0; can be specified if this fails.
        # address: 127.0.0.1
        # We want to start immediately and flush on shutdown.
        join_after: 0
        claim_on_rollout: false
        final_sleep: 0s
        num_tokens: 512
        # Use an in memory ring store, so we don't need to launch a Consul.
        ring:
          kvstore:
            store: inmemory
          replication_factor: 1
    # Use local storage - BoltDB for the index, and the filesystem
    # for the chunks.
    schema:
      configs:
      - from: 2019-03-25
        store: boltdb
        object_store: filesystem
        schema: v10
        index:
          prefix: index_
          period: 168h
    storage:
      boltdb:
        directory: /tmp/cortex/index
      filesystem:
        directory: /tmp/cortex/chunks
    • prometheus 配置
      prometheus.yml 文件,主要远端write 地址配置,同时配置了几个监控metrics
     
    # my global config
    global:
      scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
      evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
      # scrape_timeout is set to the global default (10s).
    remote_write:
    - url: http://haproxy:9009/api/prom/push
    # Alertmanager configuration
    alerting:
      alertmanagers:
      - static_configs:
        - targets:
          # - alertmanager:9093
    # Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
    rule_files:
      # - "first_rules.yml"
      # - "second_rules.yml"
    # A scrape configuration containing exactly one endpoint to scrape:
    # Here it's Prometheus itself.
    scrape_configs:
      # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
      - job_name: 'prometheus'
        # metrics_path defaults to '/metrics'
        # scheme defaults to 'http'.
        static_configs:
        - targets: ['localhost:9090']
      - job_name: 'node-exporter'
        # metrics_path defaults to '/metrics'
        # scheme defaults to 'http'.
        static_configs:
        - targets: ['node-exporter:9100']
      - job_name: 'haproxy-exporter'
        # metrics_path defaults to '/metrics'
        # scheme defaults to 'http'.
        static_configs:
        - targets: ['haproxy:8404']
    • haproxy 配置
      主要是lb 后端cortex 服务9009 端口
     
    global
        # master-worker required for `program` section
        # enable here or start with -Ws
        master-worker
        mworker-max-reloads 3
        # enable core dumps
        set-dumpable
        user root
        stats socket /run/haproxy.sock mode 600 level admin
        group root
        log stdout local0
    defaults
        mode http
        log global
        timeout client 5s
        timeout server 5s
        timeout connect 5s
        option redispatch
        option httplog
    resolvers dns
        parse-resolv-conf
        resolve_retries 3
        timeout resolve 1s
        timeout retry 1s
        hold other 30s
        hold refused 30s
        hold nx 30s
        hold timeout 30s
        hold valid 10s
        hold obsolete 30s
    userlist api 
      user admin password $5$aVnIFECJ$2QYP64eTTXZ1grSjwwdoQxK/AP8kcOflEO1Q5fc.5aA
    frontend stats
        bind *:8404
        # Enable Prometheus Exporter
        http-request use-service prometheus-exporter if { path /metrics }
        stats enable
        stats uri /stats
        stats refresh 10s
    frontend fe_main
        bind :9009 
        log-format "%ci:%cp [%tr] %ft %b/%s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r cpu_calls:%[cpu_calls] cpu_ns_tot:%[cpu_ns_tot] cpu_ns_avg:%[cpu_ns_avg] lat_ns_tot:%[lat_ns_tot] lat_ns_avg:%[lat_ns_avg]"
        default_backend be_main
    backend be_main
         # Enable Power of Two Random Choices Algorithm
        balance random(2)
        # Enable Layer 7 retries
        retry-on all-retryable-errors
        retries 3 
        server cortex1 cortex1:9009 check inter 2s
        server cortex2 cortex2:9009 check inter 2s
        server cortex3 cortex3:9009 check inter 2s
    backend be_503
        errorfile 503 /usr/local/etc/haproxy/errors/503.http

    启动&&效果

    • 启动
    docker-compose up -d
    • prometheus 效果

     



    • haproxy 监控


    • 配置grafana

    主要是prometheus datasource 以及dashboard 的添加
    datasource ,注意datasource 地址为cortex 通过haproxy lb 的地址

    dashboard, 可以参考项目,直接导入node exporter以及haproxy 的metrics dashboard

    • granfan 效果

    • cortex ring 效果

    独立


    haproxy

    说明

    以上是一个简单的配置,实际上我们可以通过在cortex启动的时候指定-distributor.replication-factor=3 保证cortex 的ha,同时上边是比较简单的
    实践,实际我们还需要其他后端存储做为数据的存储

    参考资料

    https://github.com/cortexproject/cortex/blob/master/docs/getting_started.md
    https://github.com/rongfengliang/cortex-docker-compose-running

  • 相关阅读:
    set desktop for aliyun ubuntu
    【深度学习】paddlepaddle——基于卷积神经网络的手写字识别案例
    【深度学习】paddlepaddle基础语法
    【深度学习】TensorFlow——理解张量
    【深度学习】TensorFlow——图详解
    【深度学习】TensorFlow——变量op
    【深度学习】TensorFlow——实现线性回归案例
    【深度学习】TensorFlow——理解会话
    【深度学习】TensorFlow——初识tensorflow
    【机器学习】量化策略
  • 原文地址:https://www.cnblogs.com/rongfengliang/p/11607479.html
Copyright © 2020-2023  润新知