• Prometheus监控神器-Kubernetes篇(三)


    在Kubernetes中手动方式部署Prometheus联邦。

    monitor-prom

    当我们有多个Kubernetes集群的时候,这个时候就需要需要指标汇总的需求了,如上图一样,我们假定在外部部署一个Prometheus的Federate,然后去采集当前k8s中的kube-system与default俩个
    namespace。

    环境

    我的本地环境使用的 sealos 一键部署,主要是为了便于测试。

    OS Kubernetes HostName IP Service
    Ubuntu 18.04 1.17.7 sealos-k8s-m1 192.168.1.151 node-exporter prometheus-federate-0
    Ubuntu 18.04 1.17.7 sealos-k8s-m2 192.168.1.152 node-exporter grafana alertmanager-0
    Ubuntu 18.04 1.17.7 sealos-k8s-m3 192.168.1.150 node-exporter alertmanager-1
    Ubuntu 18.04 1.17.7 sealos-k8s-node1 192.168.1.153 node-exporter prometheus-0 kube-state-metrics
    Ubuntu 18.04 1.17.7 sealos-k8s-node2 192.168.1.154 node-exporter prometheus-1
    Ubuntu 18.04 1.17.7 sealos-k8s-node2 192.168.1.155 node-exporter prometheus-2

    部署 Prometheus联邦集群

    创建prometheus-federate数据目录

    # 在m1上执行
    mkdir /data/prometheus-federate/
    chown -R 65534:65534 /data/prometheus-federate/
    

    创建Prometheus联邦 StorageClass 配置文件

    cd /data/manual-deploy/prometheus/
    cat prometheus-federate-storageclass.yaml
    apiVersion: storage.k8s.io/v1
    kind: StorageClass
    metadata:
      name: prometheus-federate-lpv
    provisioner: kubernetes.io/no-provisioner
    volumeBindingMode: WaitForFirstConsumer
    

    创建Prometheus联邦pv配置文件

    apiVersion: v1
    kind: PersistentVolume
    metadata:
      name: prometheus-federate-lpv-0
    spec:
      capacity:
        storage: 10Gi
      volumeMode: Filesystem
      accessModes:
      - ReadWriteOnce
      persistentVolumeReclaimPolicy: Retain
      storageClassName: prometheus-federate-lpv
      local:
        path: /data/prometheus-federate
      nodeAffinity:
        required:
          nodeSelectorTerms:
          - matchExpressions:
            - key: kubernetes.io/hostname
              operator: In
              values:
              - sealos-k8s-m1
    

    创建Prometheus联邦configmap配置文件

    cat prometheus-federate-configmap.yaml
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: prometheus-federate-config
      namespace: kube-system
    data:
      alertmanager_rules.yaml: |
        groups:
        - name: example
          rules:
          - alert: InstanceDown
            expr: up == 0
            for: 1m
            labels:
              severity: page
            annotations:
              summary: "Instance {{ $labels.instance }} down"
              description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
          - alert: NodeMemoryUsage
            expr: (node_memory_MemTotal_bytes -(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes )) / node_memory_MemTotal_bytes * 100 > 80
            for: 1m
            labels:
              team: ops
            annotations:
              summary: "cluster:{{ $labels.cluster }} {{ $labels.instance }}: High Memory usage detected"
              description: "{{ $labels.instance }}: Memory usage is above 55% (current value is: {{ $value }}"
      prometheus.yml: |
        global:
          scrape_interval:     30s
          evaluation_interval: 30s
        alerting:
          alertmanagers:
          - static_configs:
            - targets:
                - alertmanager-0.alertmanager-operated:9093
                - alertmanager-1.alertmanager-operated:9093           
        rule_files:
          - "/etc/prometheus/alertmanager_rules.yaml"
        scrape_configs:
          - job_name: 'federate'
            scrape_interval: 30s
            honor_labels: true
            metrics_path: '/federate'
            params:
              'match[]':
                - '{job=~"kubernetes.*"}'
                - '{job="prometheus"}'
            static_configs:
              - targets:
                - 'prometheus-0.prometheus:9090'
                - 'prometheus-1.prometheus:9090'
                - 'prometheus-2.prometheus:9090'
    
    

    创建Prometheus联邦的statefulse文件

    cat prometheus-federate-statefulset.yaml
    apiVersion: apps/v1
    kind: StatefulSet
    metadata:
      name: prometheus-federate
      namespace: kube-system
      labels:
        k8s-app: prometheus-federate
        kubernetes.io/cluster-service: "true"
    spec:
      serviceName: "prometheus-federate"
      podManagementPolicy: "Parallel"
      replicas: 1
      selector:
        matchLabels:
          k8s-app: prometheus-federate
      template:
        metadata:
          labels:
            k8s-app: prometheus-federate
          annotations:
            scheduler.alpha.kubernetes.io/critical-pod: ''
        spec:
          affinity:
            podAntiAffinity:
              requiredDuringSchedulingIgnoredDuringExecution:
              - labelSelector:
                  matchExpressions:
                  - key: k8s-app
                    operator: In
                    values:
                    - prometheus-federate
                topologyKey: "kubernetes.io/hostname"
          priorityClassName: system-cluster-critical
          hostNetwork: true
          dnsPolicy: ClusterFirstWithHostNet
          containers:
          - name: prometheus-federate-configmap-reload
            image: "jimmidyson/configmap-reload:v0.4.0"
            imagePullPolicy: "IfNotPresent"
            args:
              - --volume-dir=/etc/config
              - --webhook-url=http://localhost:9091/-/reload
            volumeMounts:
              - name: config-volume
                mountPath: /etc/config
                readOnly: true
            resources:
              limits:
                cpu: 10m
                memory: 10Mi
              requests:
                cpu: 10m
                memory: 10Mi
            securityContext:
                runAsUser: 0
                privileged: true
          - image: prom/prometheus:v2.20.0
            imagePullPolicy: IfNotPresent
            name: prometheus
            command:
              - "/bin/prometheus"
            args:
              - "--web.listen-address=0.0.0.0:9091"
              - "--config.file=/etc/prometheus/prometheus.yml"
              - "--storage.tsdb.path=/prometheus"
              - "--storage.tsdb.retention=24h"
              - "--web.console.libraries=/etc/prometheus/console_libraries"
              - "--web.console.templates=/etc/prometheus/consoles"
              - "--web.enable-lifecycle"
            ports:
              - containerPort: 9091
                protocol: TCP
            volumeMounts:
              - mountPath: "/prometheus"
                name: prometheus-federate-data
              - mountPath: "/etc/prometheus"
                name: config-volume
            readinessProbe:
              httpGet:
                path: /-/ready
                port: 9091
              initialDelaySeconds: 30
              timeoutSeconds: 30
            livenessProbe:
              httpGet:
                path: /-/healthy
                port: 9091
              initialDelaySeconds: 30
              timeoutSeconds: 30
            resources:
              requests:
                cpu: 100m
                memory: 100Mi
              limits:
                cpu: 1000m
                memory: 2500Mi
            securityContext:
                runAsUser: 0
                privileged: true
          serviceAccountName: prometheus
          volumes:
            - name: config-volume
              configMap:
                name: prometheus-federate-config
      volumeClaimTemplates:
        - metadata:
            name: prometheus-federate-data
          spec:
            accessModes: [ "ReadWriteOnce" ]
            storageClassName: "prometheus-federate-lpv"
            resources:
              requests:
                storage: 5Gi       
    

    创建Prometheus联邦的svc文件

    cat prometheus-service-statefulset.yaml
    apiVersion: v1
    kind: Service
    metadata:
      name: prometheus
      namespace: kube-system
    spec:
      ports:
        - name: prometheus
          port: 9090
          targetPort: 9090
      selector:
        k8s-app: prometheus
      clusterIP: None
    

    部署

    cd /data/manual-deploy/prometheus/
    prometheus-federate-configmap.yaml
    prometheus-federate-pv.yaml
    prometheus-federate-service-statefulset.yaml
    prometheus-federate-statefulset.yaml
    prometheus-federate-storageclass.yaml
    kubectl apply -f prometheus-federate-storageclass.yaml
    kubectl apply -f prometheus-federate-pv.yaml
    kubectl apply -f prometheus-federate-configmap.yaml
    kubectl apply -f prometheus-federate-statefulset.yaml
    kubectl apply -f prometheus-federate-service-statefulset.yaml
    

    验证

    # pv
    kubectl -n kube-system get pvc |grep federate
    prometheus-federate-data-prometheus-federate-0   Bound    prometheus-federate-lpv-0   10Gi RWO   prometheus-federate-lpv 4h
    kubectl -n kube-system get pod |grep federate
    prometheus-federate-0                      2/2     Running   0          2d4h
    
    

    对此,联邦的配置就完成了,可以使用浏览器访问192.168.1.151:9091查看相应的targets信息,以及配置的rules规则,触发下警报,看看Alertmanager集群的警报功能是不是已经正常了

    k8stech-federate

  • 相关阅读:
    无题
    一周损兵又折将
    防火墙安装配置(准备)
    徐州项目顺利验收
    悲惨的圣诞节
    [原]防火墙安装配置(日志)
    [转]有关加薪申请资料
    解决FireFox下XML对象转化成字符串
    ASP.NET Application and Page Life Cycle(asp.net生命周期)
    C# 利用反射动态创建对象[转]
  • 原文地址:https://www.cnblogs.com/cloudnative/p/13644263.html
Copyright © 2020-2023  润新知