• prometheus告警规则模板:MySQL,nginx,node


    rules_up.yml

    groups:
    - name: up
      rules:
      - alert: mysql
        expr: up{instance="db1",job="mysql"} != 0
        for: 10s
        labels:
          instance: '{{$labels.instance}}'
          priority: "3"
          type: 'mysql' 
        annotations:
          description: '{{ $labels.instance }} nginx-vts-web  is down please handle'
          summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'
    
    
    
      - alert: node
        expr: up{job="node"} != 0
        for: 10s
        labels:
          instance: '{{$labels.instance}}'
          priority: "3"
          type: 'node' 
        annotations:
          description: '{{ $labels.instance }} nginx-vts-web  is down please handle'
          summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'
    
    
      - alert: nginx
        expr: up{instance="web",job="nginx"} != 0
        for: 10s
        labels:
          instance: '{{$labels.instance}}'
          priority: "3"
          type: 'nginx' 
        annotations:
          description: '{{ $labels.instance }} nginx-vts-web  is down please handle'
          summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'
    
    

    rules_mysql.yml

    groups:
    - name: mysql.rules
      rules:
      - record: mysql_slave_lag_seconds
        expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay
      - record: mysql_heartbeat_lag_seconds
        expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds
      - record: job:mysql_transactions:rate5m
        expr: sum(rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))
          WITHOUT (command)
      - alert: MySQLGaleraNotReady
        expr: mysql_global_status_wsrep_ready != 1
        for: 5m
        labels:
          severity: warning
        annotations:
          description: '{{$labels.job}} on {{$labels.instance}} is not ready.'
          summary: Galera cluster node not ready
      - alert: MySQLGaleraOutOfSync
        expr: (mysql_global_status_wsrep_local_state != 4 and mysql_global_variables_wsrep_desync
          == 0)
        for: 5m
        labels:
          severity: warning
        annotations:
          description: '{{$labels.job}} on {{$labels.instance}} is not in sync ({{$value}}
            != 4).'
          summary: Galera cluster node out of sync
      - alert: MySQLGaleraDonorFallingBehind
        expr: (mysql_global_status_wsrep_local_state == 2 and mysql_global_status_wsrep_local_recv_queue
          > 100)
        for: 5m
        labels:
          severity: warning
        annotations:
          description: '{{$labels.job}} on {{$labels.instance}} is a donor (hotbackup)
            and is falling behind (queue size {{$value}}).'
          summary: xtradb cluster donor node falling behind
      - alert: MySQLReplicationNotRunning
        expr: mysql_slave_status_slave_io_running == 0 or mysql_slave_status_slave_sql_running
          == 0
        for: 2m
        labels:
          severity: critical
        annotations:
          description: Slave replication (IO or SQL) has been down for more than 2 minutes.
          summary: Slave replication is not running
      - alert: MySQLReplicationLag
        expr: (mysql_slave_lag_seconds > 30) and ON(instance) (predict_linear(mysql_slave_lag_seconds[5m],
          60 * 2) > 0)
        for: 1m
        labels:
          severity: critical
        annotations:
          description: The mysql slave replication has fallen behind and is not recovering
          summary: MySQL slave replication is lagging
      - alert: MySQLReplicationLag
        expr: (mysql_heartbeat_lag_seconds > 30) and ON(instance) (predict_linear(mysql_heartbeat_lag_seconds[5m],
          60 * 2) > 0)
        for: 1m
        labels:
          severity: critical
        annotations:
          description: The mysql slave replication has fallen behind and is not recovering
          summary: MySQL slave replication is lagging
      - alert: MySQLInnoDBLogWaits
        expr: rate(mysql_global_status_innodb_log_waits[15m]) > 10
        labels:
          severity: warning
        annotations:
          description: The innodb logs are waiting for disk at a rate of {{$value}} /
            second
          summary: MySQL innodb log writes stalling
    
    

    rules_nginx.yml

    groups:
    - name: aws_ec2_nginx-vts-web
      rules:
      - alert: nginx-vts-web-status
        expr: up{job="nginx-vts-web"} == 0
        for: 1m
        labels:
          instance: '{{$labels.instance}}'
          priority: "3"
          type: 'nginx' 
        annotations:
          description: '{{ $labels.instance }} nginx-vts-web  is down please handle'
          summary: 'jobname: {{$labels.instance}} nginx-vts-web is down please handle'
      - alert: nginx application 5xx gt 10 per min
        expr: sum(nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} - nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} offset 1m) by (application,direction) > 10
        for: 1m
        labels:
          type: 'nginx' 
          priority: "4"
        annotations:
          description: ' {{$labels.application}} {{$labels.direction}} >10/m'
          summary: '  {{$labels.application}} {{$labels.direction}}  >10/m'
      - alert: nginx application 5xx gt 1 per min
        expr: sum(nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} - nginx_vts_filter_requests_total{direction="5xx",job="nginx-vts-web"} offset 1m) by (application,direction,filter_name) > 1
        for: 1m
        labels:
          type: 'nginx-detail' 
          priority: "4"
        annotations:
          description: ' {{$labels.application}} {{$labels.direction}}{{$labels.filter_name}} >10/m'
          summary: '  {{$labels.application}} {{$labels.direction}}  >10/m'
     # - alert: nginx application 2xx gt 10 per min
     #   expr: sum(nginx_vts_filter_requests_total{direction="2xx",job="nginx-vts-web"} - nginx_vts_filter_requests_total{direction="2xx",job="nginx-vts-web"} offset 1m) by (application,direction) > 10
     #   for: 1m
     #   labels:
     #     test: 'yes' 
     #     priority: "4"
     #   annotations:
     #     description: ' {{$labels.application}} {{$labels.direction}} >10/m'
     #     summary: '  {{$labels.application}} {{$labels.direction}}  >10/m'
    
      - alert: nginx interface time  gt 300ms
        expr: nginx_vts_filter_request_seconds{job="nginx-vts-web",filter_name!="/v1/users/kyc/upload"}*1000 > 300
        for: 1m
        labels:
          type: 'nginx' 
          priority: "4"
        annotations:
          description: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}}  >300ms'
          summary: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'
    
      - alert: nginx interface time  gt 2s
        expr: nginx_vts_filter_request_seconds{job="nginx-vts-web",filter_name="/v1/users/kyc/upload"}*1000 > 2000
        for: 1m
        labels:
          type: 'nginx' 
          priority: "4"
        annotations:
          description: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}}  >300ms'
          summary: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'
     
    
    

    rules_node.yml

    groups:
    - name: example-node-exporter-rules
      rules:
      # The count of CPUs per node, useful for getting CPU time as a percent of total.
      - alert: instance:node_cpus:count
        expr: count(node_cpu_seconds_total{mode="idle"}) without (cpu,mode) > 2
        for: 1s
        annotations:
          description: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}}  >300ms'
          summary: ' {{$labels.application}} {{$labels.filter}} {{$labels.filter_name}} >300ms'
    - name: node_up
      rules:
      # The count of CPUs per node, useful for getting CPU time as a percent of total.
      - alert: instance:node_cpus:count
        expr: up{instance="node",job="node"} != 1
        for: 1s
        annotations:
          description: ' {{$labels.application}} {{$lables.instance}}  is up'
          summary: ' {{$labels.application}} {{$lables.instance}}  is up'
    
    
    
  • 相关阅读:
    webrtc公开课
    webrtc第二篇 聊天室
    webrtc第一篇
    lua53
    setValuesForKeysWithDictionary forUndefinedKey
    xcode 插件
    2016年1月7日 隐藏NavigationBar时的一个坑
    jdbc
    brew gradle
    Openwrt TF Card Auto Mount&Check (4)
  • 原文地址:https://www.cnblogs.com/sanduzxcvbnm/p/15591790.html
Copyright © 2020-2023  润新知