• kubernetes监控--Prometheus


    本文基于kubernetes 1.5.2版本编写

    kube-state-metrics

    kubectl create ns monitoring
    kubectl create sa -n monitoring kube-state-metrics
    
    cat << EOF > kube-state-metrics.yaml
    apiVersion: extensions/v1beta1
    kind: Deployment
    metadata:
      name: kube-state-metrics
      namespace: monitoring
    spec:
      replicas: 1
      template:
        metadata:
          labels:
            app: kube-state-metrics
        spec:
          serviceAccountName: kube-state-metrics
          containers:
          - name: kube-state-metrics
            image: quay.io/coreos/kube-state-metrics
            ports:
            - containerPort: 8080
    EOF
    kubectl create -f kube-state-metrics.yaml
    
    cat << EOF > kube-state-metrics-svc.yaml
    apiVersion: v1
    kind: Service
    metadata:
      annotations:
        prometheus.io/scrape: 'true'
      name: kube-state-metrics
      namespace: monitoring
      labels:
        app: kube-state-metrics
    spec:
      ports:
      - name: kube-state-metrics
        port: 8080
        protocol: TCP
      selector:
        app: kube-state-metrics
    EOF
    kubectl create -f kube-state-metrics-svc.yaml
    

    prom-node-exporter

    cat << EOF > prom-node-exporter.yaml 
    apiVersion: extensions/v1beta1
    kind: DaemonSet
    metadata:
      name: prometheus-node-exporter
      namespace: monitoring
      labels:
        app: prometheus
        component: node-exporter
    spec:
      template:
        metadata:
          name: prometheus-node-exporter
          labels:
            app: prometheus
            component: node-exporter
        spec:
          containers:
          - image: docker.io/prom/node-exporter:v0.14.0
            name: prometheus-node-exporter
            ports:
            - name: prom-node-exp
              #^ must be an IANA_SVC_NAME (at most 15 characters, ..)
              containerPort: 9100
              hostPort: 9100
          hostNetwork: true
          hostPID: true
    EOF
    kubectl create -f prom-node-exporter.yaml 
    
    
    cat << EOF > prom-node-exporter-svc.yaml
    apiVersion: v1
    kind: Service
    metadata:
      annotations:
        prometheus.io/scrape: 'true'
      name: prometheus-node-exporter
      namespace: monitoring
      labels:
        app: prometheus
        component: node-exporter
    spec:
      #clusterIP: None
      ports:
        - name: prometheus-node-exporter
          port: 9100
          protocol: TCP
      selector:
        app: prometheus
        component: node-exporter
      type: ClusterIP
    EOF
    kubectl create -f prom-node-exporter-svc.yaml 
    

    node-directory-size-metrics

    cat node-directory-size-metrics.yaml 
    apiVersion: extensions/v1beta1
    kind: DaemonSet
    metadata:
      name: node-directory-size-metrics
      namespace: monitoring
      annotations:
        description: |
          This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes.
          The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
          The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus.
          These are scheduled on every node in the Kubernetes cluster.
          To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`.
    spec:
      template:
        metadata:
          labels:
            app: node-directory-size-metrics
          annotations:
            prometheus.io/scrape: 'true'
            prometheus.io/port: '9102'
            description: |
              This `Pod` provides metrics in Prometheus format about disk usage on the node.
              The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
              The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus.
              This `Pod` is scheduled on every node in the Kubernetes cluster.
              To choose directories from the node to check just mount them on `read-du` below `/mnt`.
        spec:
          containers:
          - name: read-du
            image: giantswarm/tiny-tools
            imagePullPolicy: Always
            # FIXME threshold via env var
            # The
            command:
            - fish
            - --command
            - |
              touch /tmp/metrics-temp
              while true
                for directory in (du --bytes --separate-dirs --threshold=100M /mnt)
                  echo $directory | read size path
                  echo "node_directory_size_bytes{path="$path"} $size" 
                    >> /tmp/metrics-temp
                end
                mv /tmp/metrics-temp /tmp/metrics
                sleep 300
              end
            volumeMounts:
            - name: host-fs-var
              mountPath: /mnt/var
              readOnly: true
            - name: metrics
              mountPath: /tmp
          - name: caddy
            image: dockermuenster/caddy:latest
            command:
            - "caddy"
            - "-port=9102"
            - "-root=/var/www"
            ports:
            - containerPort: 9102
            volumeMounts:
            - name: metrics
              mountPath: /var/www
          volumes:
          - name: host-fs-var
            hostPath:
              path: /var
          - name: metrics
            emptyDir:
              medium: Memory
    kubectl create -f node-directory-size-metrics.yaml
    

    prometheus

    cat prometheus-configmap.yaml 
    apiVersion: v1
    kind: ConfigMap
    metadata:
      creationTimestamp: null
      name: prometheus-core
      namespace: monitoring
    data:
      prometheus.yaml: |
        global:
          scrape_interval: 30s
          scrape_timeout: 30s
          evaluation_interval: 30s
        rule_files:
          - "/etc/prometheus-rules/*.rules"
        scrape_configs:
    
          # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L37
          - job_name: 'kubernetes-nodes'
            tls_config:
              ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
            bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
            kubernetes_sd_configs:
              - role: node
            relabel_configs:
              - source_labels: [__address__]
                regex: '(.*):10250'
                replacement: '${1}:10255'
                target_label: __address__
    
          # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L79
          - job_name: 'kubernetes-endpoints'
            kubernetes_sd_configs:
              - role: endpoints
            relabel_configs:
              - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
                action: keep
                regex: true
              - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
                action: replace
                target_label: __scheme__
                regex: (https?)
              - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
                action: replace
                target_label: __metrics_path__
                regex: (.+)
              - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
                action: replace
                target_label: __address__
                regex: (.+)(?::d+);(d+)
                replacement: $1:$2
              - action: labelmap
                regex: __meta_kubernetes_service_label_(.+)
              - source_labels: [__meta_kubernetes_namespace]
                action: replace
                target_label: kubernetes_namespace
              - source_labels: [__meta_kubernetes_service_name]
                action: replace
                target_label: kubernetes_name
    
          # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L119
          - job_name: 'kubernetes-services'
            metrics_path: /probe
            params:
              module: [http_2xx]
            kubernetes_sd_configs:
              - role: service
            relabel_configs:
              - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
                action: keep
                regex: true
              - source_labels: [__address__]
                target_label: __param_target
              - target_label: __address__
                replacement: blackbox
              - source_labels: [__param_target]
                target_label: instance
              - action: labelmap
                regex: __meta_kubernetes_service_label_(.+)
              - source_labels: [__meta_kubernetes_namespace]
                target_label: kubernetes_namespace
              - source_labels: [__meta_kubernetes_service_name]
                target_label: kubernetes_name
    
          # https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L156
          - job_name: 'kubernetes-pods'
            kubernetes_sd_configs:
              - role: pod
            relabel_configs:
              - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
                action: keep
                regex: true
              - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
                action: replace
                target_label: __metrics_path__
                regex: (.+)
              - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
                action: replace
                regex: (.+):(?:d+);(d+)
                replacement: ${1}:${2}
                target_label: __address__
              - action: labelmap
                regex: __meta_kubernetes_pod_label_(.+)
              - source_labels: [__meta_kubernetes_namespace]
                action: replace
                target_label: kubernetes_namespace
              - source_labels: [__meta_kubernetes_pod_name]
                action: replace
                target_label: kubernetes_pod_name
              - source_labels: [__meta_kubernetes_pod_container_port_number]
                action: keep
                regex: 9d{3}
    kubectl create -f prometheus-configmap.yaml 
    
    
    cat prometheus-rules.yaml 
    apiVersion: v1
    data:
      cpu-usage.rules: |
        ALERT NodeCPUUsage
          IF (100 - (avg by (instance) (irate(node_cpu{name="node-exporter",mode="idle"}[5m])) * 100)) > 75
          FOR 2m
          LABELS {
            severity="page"
          }
          ANNOTATIONS {
            SUMMARY = "{{$labels.instance}}: High CPU usage detected",
            DESCRIPTION = "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})"
          }
      instance-availability.rules: |
        ALERT InstanceDown
          IF up == 0
          FOR 1m
          LABELS { severity = "page" }
          ANNOTATIONS {
            summary = "Instance {{ $labels.instance }} down",
            description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
          }
      low-disk-space.rules: |
        ALERT NodeLowRootDisk
          IF ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75
          FOR 2m
          LABELS {
            severity="page"
          }
          ANNOTATIONS {
            SUMMARY = "{{$labels.instance}}: Low root disk space",
            DESCRIPTION = "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value }})"
          }
    
        ALERT NodeLowDataDisk
          IF ((node_filesystem_size{mountpoint="/data-disk"} - node_filesystem_free{mountpoint="/data-disk"} ) / node_filesystem_size{mountpoint="/data-disk"} * 100) > 75
          FOR 2m
          LABELS {
            severity="page"
          }
          ANNOTATIONS {
            SUMMARY = "{{$labels.instance}}: Low data disk space",
            DESCRIPTION = "{{$labels.instance}}: Data disk usage is above 75% (current value is: {{ $value }})"
          }
      mem-usage.rules: |
        ALERT NodeSwapUsage
          IF (((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75
          FOR 2m
          LABELS {
            severity="page"
          }
          ANNOTATIONS {
            SUMMARY = "{{$labels.instance}}: Swap usage detected",
            DESCRIPTION = "{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})"
          }
    
        ALERT NodeMemoryUsage
          IF (((node_memory_MemTotal-node_memory_MemFree-node_memory_Cached)/(node_memory_MemTotal)*100)) > 75
          FOR 2m
          LABELS {
            severity="page"
          }
          ANNOTATIONS {
            SUMMARY = "{{$labels.instance}}: High memory usage detected",
            DESCRIPTION = "{{$labels.instance}}: Memory usage is above 75% (current value is: {{ $value }})"
          }
    kind: ConfigMap
    metadata:
      creationTimestamp: null
      name: prometheus-rules
      namespace: monitoring
    kubectl create -f prometheus-rules.yaml
    
    kubectl create sa prometheus-k8s -n monitoring
    
    
    cat << EOF > prometheus-core-deploy.yaml 
    apiVersion: extensions/v1beta1
    kind: Deployment
    metadata:
      name: prometheus-core
      namespace: monitoring
      labels:
        app: prometheus
        component: core
    spec:
      replicas: 1
      template:
        metadata:
          name: prometheus-main
          labels:
            app: prometheus
            component: core
        spec:
          serviceAccountName: prometheus-k8s
          containers:
          - name: prometheus
            image: prom/prometheus:v1.7.1
            args:
              - '-storage.local.retention=12h'
              - '-storage.local.memory-chunks=500000'
              - '-config.file=/etc/prometheus/prometheus.yaml'
              - '-alertmanager.url=http://alertmanager:9093/'
            ports:
            - name: webui
              containerPort: 9090
            resources:
              requests:
                cpu: 500m
                memory: 500M
              limits:
                cpu: 500m
                memory: 500M
            volumeMounts:
            - name: config-volume
              mountPath: /etc/prometheus
            - name: rules-volume
              mountPath: /etc/prometheus-rules
          volumes:
          - name: config-volume
            configMap:
              name: prometheus-core
          - name: rules-volume
            configMap:
              name: prometheus-rules
    EOF
    kubectl create -f prometheus-core-deploy.yaml 
    
    
    cat << EOF > prometheus-core-service.yaml 
    apiVersion: v1
    kind: Service
    metadata:
      name: prometheus
      namespace: monitoring
      labels:
        app: prometheus
        component: core
      annotations:
        prometheus.io/scrape: 'true'
    spec:
      type: NodePort
      ports:
        - port: 9090
          protocol: TCP
          name: webui
      selector:
        app: prometheus
        component: core
    EOF
    kubectl create -f prometheus-core-service.yaml
    

    grafana

    cat << EOF > grafana-core-deployment.yaml 
    apiVersion: extensions/v1beta1
    kind: Deployment
    metadata:
      name: grafana-core
      namespace: monitoring
      labels:
        app: grafana
        component: core
    spec:
      replicas: 1
      template:
        metadata:
          labels:
            app: grafana
            component: core
        spec:
          containers:
            - image: docker.io/grafana/grafana:latest
              name: grafana-core
              # env:
              resources:
                # keep request = limit to keep this container in guaranteed class
                limits:
                  cpu: 100m
                  memory: 100Mi
                requests:
                  cpu: 100m
                  memory: 100Mi
              ports:
                - name: grafana
                  containerPort: 3000
              env:
                # This variable is required to setup templates in Grafana.
                  # The following env variables are required to make Grafana accessible via
                  # the kubernetes api-server proxy. On production clusters, we recommend
                  # removing these env variables, setup auth for grafana, and expose the grafana
                  # service using a LoadBalancer or a public IP.
                - name: GF_AUTH_BASIC_ENABLED
                  value: "false"
                - name: GF_AUTH_ANONYMOUS_ENABLED
                  value: "true"
                - name: GF_AUTH_ANONYMOUS_ORG_ROLE
                  value: Admin
                # - name: GF_SERVER_ROOT_URL
                #   value: /api/v1/proxy/namespaces/monitoring/services/grafana/
              volumeMounts:
              - name: grafana-persistent-storage
                mountPath: /var
          volumes:
          - name: grafana-persistent-storage
            hostPath:
              emptyDir: {}
              #path: /grafanaData
    EOF
    kubectl create -f grafana-core-deployment.yaml 
    
    
    cat << EOF > grafana-core-service.yaml 
    apiVersion: v1
    kind: Service
    metadata:
      name: grafana
      namespace: monitoring
      labels:
        app: grafana
        component: core
      # annotations:
      #   prometheus.io/scrape: 'true'
    spec:
      type: NodePort
      ports:
        - port: 3000
          nodePort: 31000
      selector:
        app: grafana
        component: core
    EOF
    kubectl create -f grafana-core-service.yaml 
    

    grafana模板

    {
      "annotations": {
        "list": []
      },
      "editable": true,
      "gnetId": null,
      "graphTooltip": 0,
      "hideControls": false,
      "id": 21,
      "links": [],
      "refresh": false,
      "rows": [
        {
          "collapse": false,
          "height": 282,
          "panels": [
            {
              "aliasColors": {},
              "bars": false,
              "dashLength": 10,
              "dashes": false,
              "datasource": null,
              "fill": 0,
              "height": "",
              "hideTimeOverride": false,
              "id": 1,
              "legend": {
                "alignAsTable": true,
                "avg": false,
                "current": true,
                "hideEmpty": false,
                "hideZero": false,
                "max": true,
                "min": false,
                "rightSide": true,
                "show": true,
                "sideWidth": null,
                "total": false,
                "values": true
              },
              "lines": true,
              "linewidth": 2,
              "links": [],
              "minSpan": null,
              "nullPointMode": "null",
              "percentage": false,
              "pointradius": 5,
              "points": false,
              "renderer": "flot",
              "repeat": null,
              "seriesOverrides": [],
              "spaceLength": 10,
              "span": 6,
              "stack": false,
              "steppedLine": false,
              "targets": [
                {
                  "expr": "sum(container_memory_usage_bytes{pod_name="$pod", namespace="$namespace"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "30s",
                  "intervalFactor": 1,
                  "legendFormat": "total",
                  "refId": "B",
                  "step": 30
                },
                {
                  "expr": "sum(container_memory_rss{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "hide": false,
                  "interval": "30s",
                  "intervalFactor": 1,
                  "legendFormat": "rss",
                  "metric": "",
                  "refId": "A",
                  "step": 30
                },
                {
                  "expr": "sum(container_memory_cache{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "30s",
                  "intervalFactor": 1,
                  "legendFormat": "cache",
                  "refId": "D",
                  "step": 30
                },
                {
                  "expr": "sum(container_memory_swap{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "30s",
                  "intervalFactor": 1,
                  "legendFormat": "swap",
                  "refId": "C",
                  "step": 30
                },
                {
                  "expr": "sum(container_memory_failures_total{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "hide": true,
                  "intervalFactor": 2,
                  "legendFormat": "failures_total",
                  "refId": "E",
                  "step": 20
                },
                {
                  "expr": "sum(container_memory_failcnt{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "hide": true,
                  "intervalFactor": 2,
                  "legendFormat": "failcnt",
                  "refId": "F",
                  "step": 20
                },
                {
                  "expr": "sum(container_memory_working_set_bytes{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "hide": true,
                  "intervalFactor": 2,
                  "legendFormat": "working_set",
                  "refId": "G",
                  "step": 20
                }
              ],
              "thresholds": [],
              "timeFrom": null,
              "timeShift": null,
              "title": "内存使用量",
              "tooltip": {
                "shared": false,
                "sort": 0,
                "value_type": "individual"
              },
              "transparent": false,
              "type": "graph",
              "xaxis": {
                "buckets": null,
                "mode": "time",
                "name": null,
                "show": true,
                "values": [
                  "total"
                ]
              },
              "yaxes": [
                {
                  "format": "bytes",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                },
                {
                  "format": "bytes",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                }
              ]
            },
            {
              "aliasColors": {},
              "bars": false,
              "dashLength": 10,
              "dashes": false,
              "datasource": null,
              "decimals": null,
              "description": "获取CPU资源使用情况,判断方式现在时刻和一分钟前的数据进行对比。",
              "fill": 0,
              "id": 2,
              "legend": {
                "alignAsTable": true,
                "avg": false,
                "current": true,
                "hideEmpty": false,
                "hideZero": false,
                "max": true,
                "min": false,
                "rightSide": true,
                "show": true,
                "sideWidth": null,
                "total": false,
                "values": true
              },
              "lines": true,
              "linewidth": 2,
              "links": [],
              "minSpan": null,
              "nullPointMode": "null",
              "percentage": false,
              "pointradius": 5,
              "points": false,
              "renderer": "flot",
              "repeat": null,
              "seriesOverrides": [],
              "spaceLength": 10,
              "span": 6,
              "stack": true,
              "steppedLine": false,
              "targets": [
                {
                  "expr": "sum(rate(container_cpu_system_seconds_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
                  "format": "time_series",
                  "hide": false,
                  "interval": "30s",
                  "intervalFactor": 1,
                  "legendFormat": "system",
                  "metric": "",
                  "refId": "A",
                  "step": 30
                },
                {
                  "expr": "sum(rate(container_cpu_user_seconds_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "30s",
                  "intervalFactor": 1,
                  "legendFormat": "user",
                  "refId": "C",
                  "step": 30
                },
                {
                  "expr": "sum(rate(container_cpu_usage_seconds_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "30s",
                  "intervalFactor": 1,
                  "legendFormat": "total",
                  "refId": "B",
                  "step": 30
                }
              ],
              "thresholds": [],
              "timeFrom": null,
              "timeShift": null,
              "title": "CPU使用量(核)",
              "tooltip": {
                "shared": true,
                "sort": 0,
                "value_type": "individual"
              },
              "transparent": false,
              "type": "graph",
              "xaxis": {
                "buckets": null,
                "mode": "time",
                "name": null,
                "show": true,
                "values": [
                  "total"
                ]
              },
              "yaxes": [
                {
                  "format": "short",
                  "label": "",
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                },
                {
                  "format": "short",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                }
              ]
            }
          ],
          "repeat": null,
          "repeatIteration": null,
          "repeatRowId": null,
          "showTitle": false,
          "title": "所有pod",
          "titleSize": "h6"
        },
        {
          "collapse": false,
          "height": 199,
          "panels": [
            {
              "aliasColors": {},
              "bars": false,
              "dashLength": 10,
              "dashes": false,
              "datasource": null,
              "fill": 1,
              "id": 6,
              "legend": {
                "alignAsTable": true,
                "avg": false,
                "current": false,
                "max": false,
                "min": false,
                "rightSide": true,
                "show": true,
                "total": false,
                "values": false
              },
              "lines": true,
              "linewidth": 1,
              "links": [],
              "nullPointMode": "null",
              "percentage": false,
              "pointradius": 5,
              "points": false,
              "renderer": "flot",
              "seriesOverrides": [],
              "spaceLength": 10,
              "span": 6,
              "stack": false,
              "steppedLine": false,
              "targets": [
                {
                  "expr": "sum(rate(container_network_receive_packets_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "1m",
                  "intervalFactor": 2,
                  "legendFormat": "in",
                  "refId": "A",
                  "step": 120
                },
                {
                  "expr": "sum(rate(container_network_transmit_packets_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "1m",
                  "intervalFactor": 2,
                  "legendFormat": "out",
                  "refId": "B",
                  "step": 120
                },
                {
                  "expr": "sum(rate(container_network_receive_errors_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name) + sum(rate(container_network_transmit_errors_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name) + sum(rate(container_network_receive_packets_dropped_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name) + sum(rate(container_network_transmit_packets_dropped_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "1m",
                  "intervalFactor": 1,
                  "legendFormat": "error",
                  "refId": "C",
                  "step": 60
                }
              ],
              "thresholds": [],
              "timeFrom": null,
              "timeShift": null,
              "title": "数据包",
              "tooltip": {
                "shared": true,
                "sort": 0,
                "value_type": "individual"
              },
              "type": "graph",
              "xaxis": {
                "buckets": null,
                "mode": "time",
                "name": null,
                "show": true,
                "values": []
              },
              "yaxes": [
                {
                  "format": "pps",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                },
                {
                  "format": "pps",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                }
              ]
            },
            {
              "aliasColors": {},
              "bars": false,
              "dashLength": 10,
              "dashes": false,
              "datasource": null,
              "fill": 0,
              "id": 5,
              "legend": {
                "alignAsTable": true,
                "avg": false,
                "current": false,
                "max": false,
                "min": false,
                "rightSide": true,
                "show": true,
                "total": false,
                "values": false
              },
              "lines": true,
              "linewidth": 2,
              "links": [],
              "nullPointMode": "null",
              "percentage": false,
              "pointradius": 5,
              "points": false,
              "renderer": "flot",
              "seriesOverrides": [],
              "spaceLength": 10,
              "span": 6,
              "stack": false,
              "steppedLine": false,
              "targets": [
                {
                  "expr": "sum(rate(container_network_receive_bytes_total{pod_name="$pod", namespace="$namespace"}[1m])*1) by (namespace,container_name)",
                  "format": "time_series",
                  "interval": "1m",
                  "intervalFactor": 1,
                  "legendFormat": "in",
                  "metric": "",
                  "refId": "A",
                  "step": 60
                },
                {
                  "expr": "sum(rate(container_network_transmit_bytes_total{pod_name="$pod", namespace="$namespace"}[1m])*1) by (namespace,container_name)",
                  "format": "time_series",
                  "interval": "1m",
                  "intervalFactor": 1,
                  "legendFormat": "out",
                  "refId": "B",
                  "step": 60
                }
              ],
              "thresholds": [],
              "timeFrom": null,
              "timeShift": null,
              "title": "网络流量",
              "tooltip": {
                "shared": true,
                "sort": 0,
                "value_type": "individual"
              },
              "type": "graph",
              "xaxis": {
                "buckets": null,
                "mode": "time",
                "name": null,
                "show": true,
                "values": []
              },
              "yaxes": [
                {
                  "format": "bps",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                },
                {
                  "format": "bps",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                }
              ]
            }
          ],
          "repeat": null,
          "repeatIteration": null,
          "repeatRowId": null,
          "showTitle": false,
          "title": "Dashboard Row",
          "titleSize": "h6"
        },
        {
          "collapse": false,
          "height": 163,
          "panels": [
            {
              "aliasColors": {},
              "bars": false,
              "dashLength": 10,
              "dashes": false,
              "datasource": null,
              "fill": 0,
              "hideTimeOverride": false,
              "id": 3,
              "legend": {
                "alignAsTable": true,
                "avg": false,
                "current": false,
                "max": false,
                "min": false,
                "rightSide": true,
                "show": true,
                "sortDesc": true,
                "total": false,
                "values": false
              },
              "lines": true,
              "linewidth": 2,
              "links": [],
              "nullPointMode": "null",
              "percentage": false,
              "pointradius": 5,
              "points": false,
              "renderer": "flot",
              "repeat": null,
              "seriesOverrides": [],
              "spaceLength": 10,
              "span": 6,
              "stack": false,
              "steppedLine": false,
              "targets": [
                {
                  "expr": "sum(container_fs_limit_bytes{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "intervalFactor": 2,
                  "legendFormat": "total",
                  "refId": "A",
                  "step": 4
                },
                {
                  "expr": "sum(container_fs_usage_bytes{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "intervalFactor": 2,
                  "legendFormat": "usage",
                  "refId": "B",
                  "step": 4
                }
              ],
              "thresholds": [],
              "timeFrom": null,
              "timeShift": null,
              "title": "硬盘使用量",
              "tooltip": {
                "shared": false,
                "sort": 0,
                "value_type": "individual"
              },
              "type": "graph",
              "xaxis": {
                "buckets": null,
                "mode": "time",
                "name": null,
                "show": true,
                "values": []
              },
              "yaxes": [
                {
                  "format": "decbytes",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                },
                {
                  "format": "short",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                }
              ]
            },
            {
              "aliasColors": {},
              "bars": false,
              "dashLength": 10,
              "dashes": false,
              "datasource": null,
              "fill": 0,
              "id": 4,
              "legend": {
                "alignAsTable": true,
                "avg": false,
                "current": false,
                "max": false,
                "min": false,
                "rightSide": true,
                "show": true,
                "total": false,
                "values": false
              },
              "lines": true,
              "linewidth": 2,
              "links": [],
              "nullPointMode": "null",
              "percentage": false,
              "pointradius": 5,
              "points": false,
              "renderer": "flot",
              "seriesOverrides": [],
              "spaceLength": 10,
              "span": 6,
              "stack": false,
              "steppedLine": false,
              "targets": [
                {
                  "expr": "sum(container_fs_reads_total{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "30s",
                  "intervalFactor": 1,
                  "legendFormat": "read",
                  "refId": "A",
                  "step": 30
                },
                {
                  "expr": "sum(container_fs_writes_total{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "interval": "30s",
                  "intervalFactor": 1,
                  "legendFormat": "write",
                  "refId": "B",
                  "step": 30
                },
                {
                  "expr": "sum(container_fs_io_current{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
                  "format": "time_series",
                  "intervalFactor": 2,
                  "legendFormat": "current",
                  "refId": "C",
                  "step": 4
                }
              ],
              "thresholds": [],
              "timeFrom": null,
              "timeShift": null,
              "title": "IO",
              "tooltip": {
                "shared": true,
                "sort": 0,
                "value_type": "individual"
              },
              "type": "graph",
              "xaxis": {
                "buckets": null,
                "mode": "time",
                "name": null,
                "show": true,
                "values": []
              },
              "yaxes": [
                {
                  "format": "short",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                },
                {
                  "format": "short",
                  "label": null,
                  "logBase": 1,
                  "max": null,
                  "min": null,
                  "show": true
                }
              ]
            }
          ],
          "repeat": null,
          "repeatIteration": null,
          "repeatRowId": null,
          "showTitle": false,
          "title": "Dashboard Row",
          "titleSize": "h6"
        }
      ],
      "schemaVersion": 14,
      "style": "dark",
      "tags": [],
      "templating": {
        "list": [
          {
            "allValue": null,
            "current": {
              "selected": true,
              "text": "monitoring",
              "value": "monitoring"
            },
            "datasource": "prometheus",
            "hide": 0,
            "includeAll": false,
            "label": "namespace",
            "multi": false,
            "name": "namespace",
            "options": [],
            "query": "label_values(kube_pod_info{namespace=~".+"},namespace)",
            "refresh": 1,
            "regex": "",
            "sort": 0,
            "tagValuesQuery": "",
            "tags": [],
            "tagsQuery": "",
            "type": "query",
            "useTags": false
          },
          {
            "allValue": null,
            "current": {
              "selected": true,
              "text": "kube-state-metrics-2802505745",
              "value": "kube-state-metrics-2802505745"
            },
            "datasource": "prometheus",
            "hide": 0,
            "includeAll": false,
            "label": "创建者",
            "multi": false,
            "name": "created_by_name",
            "options": [],
            "query": "label_values(kube_pod_info{created_by_name=~".+",namespace="$namespace"} ,created_by_name)",
            "refresh": 1,
            "regex": "",
            "sort": 0,
            "tagValuesQuery": "",
            "tags": [],
            "tagsQuery": "",
            "type": "query",
            "useTags": false
          },
          {
            "allValue": null,
            "current": {
              "selected": true,
              "text": "kube-state-metrics-2802505745-k24zk",
              "value": "kube-state-metrics-2802505745-k24zk"
            },
            "datasource": "prometheus",
            "hide": 0,
            "includeAll": false,
            "label": "pod",
            "multi": false,
            "name": "pod",
            "options": [],
            "query": "label_values(kube_pod_info{created_by_name="$created_by_name",namespace="$namespace",pod=~".+"} ,pod)",
            "refresh": 1,
            "regex": "",
            "sort": 0,
            "tagValuesQuery": "",
            "tags": [],
            "tagsQuery": "",
            "type": "query",
            "useTags": false
          }
        ]
      },
      "time": {
        "from": "now-30m",
        "to": "now"
      },
      "timepicker": {
        "refresh_intervals": [
          "5s",
          "10s",
          "30s",
          "1m",
          "5m",
          "15m",
          "30m",
          "1h",
          "2h",
          "1d"
        ],
        "time_options": [
          "5m",
          "15m",
          "1h",
          "6h",
          "12h",
          "24h",
          "2d",
          "7d",
          "30d"
        ]
      },
      "timezone": "",
      "title": "基于资源对象对pod进行监控",
      "version": 5
    }
  • 相关阅读:
    linq to sql的性能和reader相比只是差一点点吗
    Win11删除右键菜单open in windows Terminal
    jdk1.8
    mvcc read view
    javascript 跨域双向通信方案,通过postMessage和window.name实现
    [原创]如何加载动态库、获取方法委托、卸载动态库
    awseks创建与使用
    aiops 调研
    consul调研
    机器学习调研
  • 原文地址:https://www.cnblogs.com/lykops/p/8263120.html
Copyright © 2020-2023  润新知