• k8s全方位监控-prometheus-alertmanager部署-配置第一条告警邮件


    1、alertmanager告警插件部署

    [root@VM_0_48_centos prometheus]# cat  alertmanager-pvc.yaml
    apiVersion: v1
    kind: PersistentVolumeClaim
    metadata:
      name: alertmanager
      namespace: kube-system
      labels:
        kubernetes.io/cluster-service: "true"
        addonmanager.kubernetes.io/mode: EnsureExists
    spec:
      storageClassName: managed-nfs-storage           #必须动态PV,需要改为自己的名字
      accessModes:
        - ReadWriteOnce
      resources:
        requests:


    [root@VM_0_48_centos prometheus]# cat alertmanager-deployment.yaml
    apiVersion: apps/v1
    kind: Deployment
    metadata:
    name: alertmanager
    namespace: kube-system
    labels:
    k8s-app: alertmanager
    kubernetes.io/cluster-service: "true"
    addonmanager.kubernetes.io/mode: Reconcile
    version: v0.14.0
    spec:
    replicas: 1
    selector:
    matchLabels:
    k8s-app: alertmanager
    version: v0.14.0
    template:
    metadata:
    labels:
    k8s-app: alertmanager
    version: v0.14.0
    annotations:
    scheduler.alpha.kubernetes.io/critical-pod: ''
    spec:
    priorityClassName: system-cluster-critical
    containers:
    - name: prometheus-alertmanager
    image: "prom/alertmanager:v0.14.0"
    imagePullPolicy: "IfNotPresent"
    args:
    - --config.file=/etc/config/alertmanager.yml
    - --storage.path=/data
    - --web.external-url=/
    ports:
    - containerPort: 9093
    readinessProbe:
    httpGet:
    path: /#/status
    port: 9093
    initialDelaySeconds: 30
    timeoutSeconds: 30
    volumeMounts:
    - name: config-volume
    mountPath: /etc/config
    - name: storage-volume
    mountPath: "/data"
    subPath: ""
    resources:
    limits:
    cpu: 10m
    memory: 50Mi
    requests:
    cpu: 10m
    memory: 50Mi
    - name: prometheus-alertmanager-configmap-reload
    image: "jimmidyson/configmap-reload:v0.1"
    imagePullPolicy: "IfNotPresent"
    args:
    - --volume-dir=/etc/config
    - --webhook-url=http://localhost:9093/-/reload
    volumeMounts:
    - name: config-volume
    mountPath: /etc/config
    readOnly: true
    resources:
    limits:
    cpu: 10m
    memory: 10Mi
    requests:
    cpu: 10m
    memory: 10Mi
    volumes:
    - name: config-volume
    configMap:
    name: alertmanager-config
    - name: storage-volume
    persistentVolumeClaim:
    claimName: alertmanager

    [root@VM_0_48_centos prometheus]# cat alertmanager-service.yaml
    apiVersion: v1
    kind: Service
    metadata:
    name: alertmanager
    namespace: kube-system
    labels:
    kubernetes.io/cluster-service: "true"
    addonmanager.kubernetes.io/mode: Reconcile
    kubernetes.io/name: "Alertmanager"
    spec:
    ports:
    - name: http
    port: 80
    protocol: TCP
    targetPort: 9093
    selector:
    k8s-app: alertmanager
    type: "ClusterIP"

    2、prometheus 查看服务是否正常

    查看alertmanager服务是否正常
    [root@VM_0_48_centos prometheus]# kubectl  get svc  -n  kube-system
    NAME                   TYPE        CLUSTER-IP   EXTERNAL-IP   PORT(S)                  AGE
    alertmanager           ClusterIP   10.0.0.176   <none>        80/TCP                   46h
    

     

    3、 prometheus 配置告警规则和调用altermanager

    [root@VM_0_48_centos prometheus]# cat  prometheus-configmap.yaml
    # Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: prometheus-config
      namespace: kube-system
      labels:
        kubernetes.io/cluster-service: "true"
        addonmanager.kubernetes.io/mode: EnsureExists
    data:
      prometheus.yml: |
        scrape_configs:
        - job_name: prometheus
          static_configs:
          - targets:
            - localhost:9090
    
        - job_name: kubernetes-apiservers
          kubernetes_sd_configs:
          - role: endpoints
          relabel_configs:
          - action: keep
            regex: default;kubernetes;https
            source_labels:
            - __meta_kubernetes_namespace
            - __meta_kubernetes_service_name
            - __meta_kubernetes_endpoint_port_name
          scheme: https
          tls_config:
            ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
            insecure_skip_verify: true
          bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
    
        - job_name: kubernetes-nodes-kubelet
          kubernetes_sd_configs:
          - role: node
          relabel_configs:
          - action: labelmap
            regex: __meta_kubernetes_node_label_(.+)
          scheme: https
          tls_config:
            ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
            insecure_skip_verify: true
          bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
    
        - job_name: kubernetes-nodes-cadvisor
          kubernetes_sd_configs:
          - role: node
          relabel_configs:
          - action: labelmap
            regex: __meta_kubernetes_node_label_(.+)
          - target_label: __metrics_path__
            replacement: /metrics/cadvisor
          scheme: https
          tls_config:
            ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
            insecure_skip_verify: true
          bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
    
        - job_name: kubernetes-service-endpoints
          kubernetes_sd_configs:
          - role: endpoints
          relabel_configs:
          - action: keep
            regex: true
            source_labels:
            - __meta_kubernetes_service_annotation_prometheus_io_scrape
          - action: replace
            regex: (https?)
            source_labels:
            - __meta_kubernetes_service_annotation_prometheus_io_scheme
            target_label: __scheme__
          - action: replace
            regex: (.+)
            source_labels:
            - __meta_kubernetes_service_annotation_prometheus_io_path
            target_label: __metrics_path__
          - action: replace
            regex: ([^:]+)(?::d+)?;(d+)
            replacement: $1:$2
            source_labels:
            - __address__
            - __meta_kubernetes_service_annotation_prometheus_io_port
            target_label: __address__
          - action: labelmap
            regex: __meta_kubernetes_service_label_(.+)
          - action: replace
            source_labels:
            - __meta_kubernetes_namespace
            target_label: kubernetes_namespace
          - action: replace
            source_labels:
            - __meta_kubernetes_service_name
            target_label: kubernetes_name
    
        - job_name: kubernetes-services
          kubernetes_sd_configs:
          - role: service
          metrics_path: /probe
          params:
            module:
            - http_2xx
          relabel_configs:
          - action: keep
            regex: true
            source_labels:
            - __meta_kubernetes_service_annotation_prometheus_io_probe
          - source_labels:
            - __address__
            target_label: __param_target
          - replacement: blackbox
            target_label: __address__
          - source_labels:
            - __param_target
            target_label: instance
          - action: labelmap
            regex: __meta_kubernetes_service_label_(.+)
          - source_labels:
            - __meta_kubernetes_namespace
            target_label: kubernetes_namespace
          - source_labels:
            - __meta_kubernetes_service_name
            target_label: kubernetes_name
    
        - job_name: kubernetes-pods
          kubernetes_sd_configs:
          - role: pod
          relabel_configs:
          - action: keep
            regex: true
            source_labels:
            - __meta_kubernetes_pod_annotation_prometheus_io_scrape
          - action: replace
            regex: (.+)
            source_labels:
            - __meta_kubernetes_pod_annotation_prometheus_io_path
            target_label: __metrics_path__
          - action: replace
            regex: ([^:]+)(?::d+)?;(d+)
            replacement: $1:$2
            source_labels:
            - __address__
            - __meta_kubernetes_pod_annotation_prometheus_io_port
            target_label: __address__
          - action: labelmap
            regex: __meta_kubernetes_pod_label_(.+)
          - action: replace
            source_labels:
            - __meta_kubernetes_namespace
            target_label: kubernetes_namespace
          - action: replace
            source_labels:
            - __meta_kubernetes_pod_name
            target_label: kubernetes_pod_name
        alerting:
          alertmanagers:
          - static_configs:
              - targets: ["alertmanager:80"]   ####需要修改alertmanger服务名字,集群内部通过服务名调用

     4、配置告警规则-创建告警规则的configmap

    [root@VM_0_48_centos prometheus]# cat prometheus-rules.yaml
    # Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: prometheus-rules-config
      namespace: kube-system
      labels:
        kubernetes.io/cluster-service: "true"
        addonmanager.kubernetes.io/mode: EnsureExists
    data:
      rules.yml: |
        groups:
        - name: node
          rules:
          - alert: memory_use
            expr: 100-(node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes)/node_memory_MemTotal_bytes*100 >= 90
            for: 10s
            annotations:
              summary: "机器 {{ $labels.instance }} 内存使用的率超过90%"
      
            expr: 100-(node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes)/node_memory_MemTotal_bytes*100 >= 80
            for: 10s
            annotations:
              summary: "机器 {{ $labels.instance }} 内存使用的率超过80%"

    5、配置告警规则-将告警规则挂载到promethues中

    [root@VM_0_48_centos prometheus]# cat prometheus-statefulset.yaml 
    apiVersion: apps/v1
    kind: StatefulSet
    metadata:
      name: prometheus
      namespace: kube-system
      labels:
        k8s-app: prometheus
        kubernetes.io/cluster-service: "true"
        addonmanager.kubernetes.io/mode: Reconcile
        version: v2.2.1
    spec:
      serviceName: "prometheus"
      replicas: 1
      podManagementPolicy: "Parallel"
      updateStrategy:
       type: "RollingUpdate"
      selector:
        matchLabels:
          k8s-app: prometheus
      template:
        metadata:
          labels:
            k8s-app: prometheus
          annotations:
            scheduler.alpha.kubernetes.io/critical-pod: ''
        spec:
          priorityClassName: system-cluster-critical
          serviceAccountName: prometheus
          initContainers:
          - name: "init-chown-data"
            image: "busybox:latest"
            imagePullPolicy: "IfNotPresent"
            command: ["chown", "-R", "65534:65534", "/data"]
            volumeMounts:
            - name: prometheus-data
              mountPath: /data
              subPath: ""
          containers:
            - name: prometheus-server-configmap-reload
              image: "jimmidyson/configmap-reload:v0.1"
              imagePullPolicy: "IfNotPresent"
              args:
                - --volume-dir=/etc/config
                - --webhook-url=http://localhost:9090/-/reload
              volumeMounts:
                - name: config-volume
                  mountPath: /etc/config
                  readOnly: true
              resources:
                limits:
                  cpu: 10m
                  memory: 10Mi
                requests:
                  cpu: 10m
                  memory: 10Mi
    
            - name: prometheus-server
              image: "prom/prometheus:v2.2.1"
              imagePullPolicy: "IfNotPresent"
              args:
                - --config.file=/etc/config/prometheus.yml
                - --storage.tsdb.path=/data
                - --web.console.libraries=/etc/prometheus/console_libraries
                - --web.console.templates=/etc/prometheus/consoles
                - --web.enable-lifecycle
              ports:
                - containerPort: 9090
              readinessProbe:
                httpGet:
                  path: /-/ready
                  port: 9090
                initialDelaySeconds: 30
                timeoutSeconds: 30
              livenessProbe:
                httpGet:
                  path: /-/healthy
                  port: 9090
                initialDelaySeconds: 30
                timeoutSeconds: 30
              # based on 10 running nodes with 30 pods each
              resources:
                limits:
                  cpu: 200m
                  memory: 1000Mi
                requests:
                  cpu: 200m
                  memory: 1000Mi
    
              volumeMounts:
                - name: config-volume
                  mountPath: /etc/config
                - name: prometheus-data
                  mountPath: /data
                  subPath: ""
                - name: rules-volume
                  mountPath: /etc/config/alert_rules
          terminationGracePeriodSeconds: 300
          volumes:
            - name: config-volume
              configMap:
                name: prometheus-config
            - name: rules-volume    
              configMap:
                name: prometheus-rules-config   ####挂载规则到服务目录下
            - name: prometheus-data
              persistentVolumeClaim:
                claimName: prometheus-claim
    

     6、配置告警规则 配置 prometheus启用告警规则

    [root@VM_0_48_centos prometheus]# cat prometheus-configmap.yaml
    # Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: prometheus-config
      namespace: kube-system
      labels:
        kubernetes.io/cluster-service: "true"
        addonmanager.kubernetes.io/mode: EnsureExists
    data:
      prometheus.yml: |
        scrape_configs:
        - job_name: prometheus
          static_configs:
          - targets:
            - localhost:9090
        - job_name: node
          static_configs:
            - targets: 
              - 172.19.0.48:9100
              - 172.19.0.14:9100
              - 172.19.0.9:9100
              - 172.19.0.2:9100
    
        - job_name: kubernetes-apiservers
          kubernetes_sd_configs:
          - role: endpoints
          relabel_configs:
          - action: keep
            regex: default;kubernetes;https
            source_labels:
            - __meta_kubernetes_namespace
            - __meta_kubernetes_service_name
            - __meta_kubernetes_endpoint_port_name
          scheme: https
          tls_config:
            ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
            insecure_skip_verify: true
          bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
    
        - job_name: kubernetes-nodes-kubelet
          kubernetes_sd_configs:
          - role: node
          relabel_configs:
          - action: labelmap
            regex: __meta_kubernetes_node_label_(.+)
          scheme: https
          tls_config:
            ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
            insecure_skip_verify: true
          bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
    
        - job_name: kubernetes-nodes-cadvisor
          kubernetes_sd_configs:
          - role: node
          relabel_configs:
          - action: labelmap
            regex: __meta_kubernetes_node_label_(.+)
          - target_label: __metrics_path__
            replacement: /metrics/cadvisor
          scheme: https
          tls_config:
            ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
            insecure_skip_verify: true
          bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
    
        - job_name: kubernetes-service-endpoints
          kubernetes_sd_configs:
          - role: endpoints
          relabel_configs:
          - action: keep
            regex: true
            source_labels:
            - __meta_kubernetes_service_annotation_prometheus_io_scrape
          - action: replace
            regex: (https?)
            source_labels:
            - __meta_kubernetes_service_annotation_prometheus_io_scheme
            target_label: __scheme__
          - action: replace
            regex: (.+)
            source_labels:
            - __meta_kubernetes_service_annotation_prometheus_io_path
            target_label: __metrics_path__
          - action: replace
            regex: ([^:]+)(?::d+)?;(d+)
            replacement: $1:$2
            source_labels:
            - __address__
            - __meta_kubernetes_service_annotation_prometheus_io_port
            target_label: __address__
          - action: labelmap
            regex: __meta_kubernetes_service_label_(.+)
          - action: replace
            source_labels:
            - __meta_kubernetes_namespace
            target_label: kubernetes_namespace
          - action: replace
            source_labels:
            - __meta_kubernetes_service_name
            target_label: kubernetes_name
    
        - job_name: kubernetes-services
          kubernetes_sd_configs:
          - role: service
          metrics_path: /probe
          params:
            module:
            - http_2xx
          relabel_configs:
          - action: keep
            regex: true
            source_labels:
            - __meta_kubernetes_service_annotation_prometheus_io_probe
          - source_labels:
            - __address__
            target_label: __param_target
          - replacement: blackbox
            target_label: __address__
          - source_labels:
            - __param_target
            target_label: instance
          - action: labelmap
            regex: __meta_kubernetes_service_label_(.+)
          - source_labels:
            - __meta_kubernetes_namespace
            target_label: kubernetes_namespace
          - source_labels:
            - __meta_kubernetes_service_name
            target_label: kubernetes_name
    
        - job_name: kubernetes-pods
          kubernetes_sd_configs:
          - role: pod
          relabel_configs:
          - action: keep
            regex: true
            source_labels:
            - __meta_kubernetes_pod_annotation_prometheus_io_scrape
          - action: replace
            regex: (.+)
            source_labels:
            - __meta_kubernetes_pod_annotation_prometheus_io_path
            target_label: __metrics_path__
          - action: replace
            regex: ([^:]+)(?::d+)?;(d+)
            replacement: $1:$2
            source_labels:
            - __address__
            - __meta_kubernetes_pod_annotation_prometheus_io_port
            target_label: __address__
          - action: labelmap
            regex: __meta_kubernetes_pod_label_(.+)
          - action: replace
            source_labels:
            - __meta_kubernetes_namespace
            target_label: kubernetes_namespace
          - action: replace
            source_labels:
            - __meta_kubernetes_pod_name
            target_label: kubernetes_pod_name
        alerting:
          alertmanagers:
          - static_configs:
              - targets: ["alertmanager:80"] 
        rule_files:
        - "/etc/config/alert_rules/*.yml"   ####启用挂载的规则文件

    5、配置告警媒介

    [root@VM_0_48_centos prometheus]# cat   alertmanager-configmap.yaml
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: alertmanager-config
      namespace: kube-system
      labels:
        kubernetes.io/cluster-service: "true"
        addonmanager.kubernetes.io/mode: EnsureExists
    data:
      alertmanager.yml: |
        global:
          resolve_timeout: 5m
          smtp_smarthost: 'smtp.163.com:25'
          smtp_from: 'xjq18125012766@163.com'
          smtp_auth_username: '********@163.com'
          smtp_auth_password: '*******'
          smtp_require_tls: false  
    
        route:
          group_by: ['alertname']
          group_wait: 10s
          group_interval: 30s
          repeat_interval: 10s    
          receiver: 'mail'
    
        receivers:
          - name: 'mail'
            email_configs:
            - to: '2654071080@qq.com'

    6、告警结果展示  

  • 相关阅读:
    POJ 2991 Crane(线段树)
    HDU 1754 I Hate It(线段树)
    HDU 1754 I Hate It(线段树)
    HDU 1166 敌兵布阵 (线段树模版题)
    HDU 1166 敌兵布阵 (线段树模版题)
    Tree Recovery
    Tree Recovery
    情人节的电灯泡(二维树状数组)
    情人节的电灯泡(二维树状数组)
    【LeetCode】Validate Binary Search Tree 二叉查找树的推断
  • 原文地址:https://www.cnblogs.com/xiajq/p/11347217.html
Copyright © 2020-2023  润新知