本文基于kubernetes 1.5.2版本编写
kube-state-metrics
kubectl create ns monitoring
kubectl create sa -n monitoring kube-state-metrics
cat << EOF > kube-state-metrics.yaml
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: kube-state-metrics
namespace: monitoring
spec:
replicas: 1
template:
metadata:
labels:
app: kube-state-metrics
spec:
serviceAccountName: kube-state-metrics
containers:
- name: kube-state-metrics
image: quay.io/coreos/kube-state-metrics
ports:
- containerPort: 8080
EOF
kubectl create -f kube-state-metrics.yaml
cat << EOF > kube-state-metrics-svc.yaml
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: 'true'
name: kube-state-metrics
namespace: monitoring
labels:
app: kube-state-metrics
spec:
ports:
- name: kube-state-metrics
port: 8080
protocol: TCP
selector:
app: kube-state-metrics
EOF
kubectl create -f kube-state-metrics-svc.yaml
prom-node-exporter
cat << EOF > prom-node-exporter.yaml
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: prometheus-node-exporter
namespace: monitoring
labels:
app: prometheus
component: node-exporter
spec:
template:
metadata:
name: prometheus-node-exporter
labels:
app: prometheus
component: node-exporter
spec:
containers:
- image: docker.io/prom/node-exporter:v0.14.0
name: prometheus-node-exporter
ports:
- name: prom-node-exp
#^ must be an IANA_SVC_NAME (at most 15 characters, ..)
containerPort: 9100
hostPort: 9100
hostNetwork: true
hostPID: true
EOF
kubectl create -f prom-node-exporter.yaml
cat << EOF > prom-node-exporter-svc.yaml
apiVersion: v1
kind: Service
metadata:
annotations:
prometheus.io/scrape: 'true'
name: prometheus-node-exporter
namespace: monitoring
labels:
app: prometheus
component: node-exporter
spec:
#clusterIP: None
ports:
- name: prometheus-node-exporter
port: 9100
protocol: TCP
selector:
app: prometheus
component: node-exporter
type: ClusterIP
EOF
kubectl create -f prom-node-exporter-svc.yaml
node-directory-size-metrics
cat node-directory-size-metrics.yaml
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: node-directory-size-metrics
namespace: monitoring
annotations:
description: |
This `DaemonSet` provides metrics in Prometheus format about disk usage on the nodes.
The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
The other container `caddy` just hands out the contents of that file on request via `http` on `/metrics` at port `9102` which are the defaults for Prometheus.
These are scheduled on every node in the Kubernetes cluster.
To choose directories from the node to check, just mount them on the `read-du` container below `/mnt`.
spec:
template:
metadata:
labels:
app: node-directory-size-metrics
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '9102'
description: |
This `Pod` provides metrics in Prometheus format about disk usage on the node.
The container `read-du` reads in sizes of all directories below /mnt and writes that to `/tmp/metrics`. It only reports directories larger then `100M` for now.
The other container `caddy` just hands out the contents of that file on request on `/metrics` at port `9102` which are the defaults for Prometheus.
This `Pod` is scheduled on every node in the Kubernetes cluster.
To choose directories from the node to check just mount them on `read-du` below `/mnt`.
spec:
containers:
- name: read-du
image: giantswarm/tiny-tools
imagePullPolicy: Always
# FIXME threshold via env var
# The
command:
- fish
- --command
- |
touch /tmp/metrics-temp
while true
for directory in (du --bytes --separate-dirs --threshold=100M /mnt)
echo $directory | read size path
echo "node_directory_size_bytes{path="$path"} $size"
>> /tmp/metrics-temp
end
mv /tmp/metrics-temp /tmp/metrics
sleep 300
end
volumeMounts:
- name: host-fs-var
mountPath: /mnt/var
readOnly: true
- name: metrics
mountPath: /tmp
- name: caddy
image: dockermuenster/caddy:latest
command:
- "caddy"
- "-port=9102"
- "-root=/var/www"
ports:
- containerPort: 9102
volumeMounts:
- name: metrics
mountPath: /var/www
volumes:
- name: host-fs-var
hostPath:
path: /var
- name: metrics
emptyDir:
medium: Memory
kubectl create -f node-directory-size-metrics.yaml
prometheus
cat prometheus-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
creationTimestamp: null
name: prometheus-core
namespace: monitoring
data:
prometheus.yaml: |
global:
scrape_interval: 30s
scrape_timeout: 30s
evaluation_interval: 30s
rule_files:
- "/etc/prometheus-rules/*.rules"
scrape_configs:
# https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L37
- job_name: 'kubernetes-nodes'
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:10255'
target_label: __address__
# https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L79
- job_name: 'kubernetes-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: (.+)(?::d+);(d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
# https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L119
- job_name: 'kubernetes-services'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
# https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L156
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: (.+):(?:d+);(d+)
replacement: ${1}:${2}
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- source_labels: [__meta_kubernetes_pod_container_port_number]
action: keep
regex: 9d{3}
kubectl create -f prometheus-configmap.yaml
cat prometheus-rules.yaml
apiVersion: v1
data:
cpu-usage.rules: |
ALERT NodeCPUUsage
IF (100 - (avg by (instance) (irate(node_cpu{name="node-exporter",mode="idle"}[5m])) * 100)) > 75
FOR 2m
LABELS {
severity="page"
}
ANNOTATIONS {
SUMMARY = "{{$labels.instance}}: High CPU usage detected",
DESCRIPTION = "{{$labels.instance}}: CPU usage is above 75% (current value is: {{ $value }})"
}
instance-availability.rules: |
ALERT InstanceDown
IF up == 0
FOR 1m
LABELS { severity = "page" }
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} down",
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minute.",
}
low-disk-space.rules: |
ALERT NodeLowRootDisk
IF ((node_filesystem_size{mountpoint="/root-disk"} - node_filesystem_free{mountpoint="/root-disk"} ) / node_filesystem_size{mountpoint="/root-disk"} * 100) > 75
FOR 2m
LABELS {
severity="page"
}
ANNOTATIONS {
SUMMARY = "{{$labels.instance}}: Low root disk space",
DESCRIPTION = "{{$labels.instance}}: Root disk usage is above 75% (current value is: {{ $value }})"
}
ALERT NodeLowDataDisk
IF ((node_filesystem_size{mountpoint="/data-disk"} - node_filesystem_free{mountpoint="/data-disk"} ) / node_filesystem_size{mountpoint="/data-disk"} * 100) > 75
FOR 2m
LABELS {
severity="page"
}
ANNOTATIONS {
SUMMARY = "{{$labels.instance}}: Low data disk space",
DESCRIPTION = "{{$labels.instance}}: Data disk usage is above 75% (current value is: {{ $value }})"
}
mem-usage.rules: |
ALERT NodeSwapUsage
IF (((node_memory_SwapTotal-node_memory_SwapFree)/node_memory_SwapTotal)*100) > 75
FOR 2m
LABELS {
severity="page"
}
ANNOTATIONS {
SUMMARY = "{{$labels.instance}}: Swap usage detected",
DESCRIPTION = "{{$labels.instance}}: Swap usage usage is above 75% (current value is: {{ $value }})"
}
ALERT NodeMemoryUsage
IF (((node_memory_MemTotal-node_memory_MemFree-node_memory_Cached)/(node_memory_MemTotal)*100)) > 75
FOR 2m
LABELS {
severity="page"
}
ANNOTATIONS {
SUMMARY = "{{$labels.instance}}: High memory usage detected",
DESCRIPTION = "{{$labels.instance}}: Memory usage is above 75% (current value is: {{ $value }})"
}
kind: ConfigMap
metadata:
creationTimestamp: null
name: prometheus-rules
namespace: monitoring
kubectl create -f prometheus-rules.yaml
kubectl create sa prometheus-k8s -n monitoring
cat << EOF > prometheus-core-deploy.yaml
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: prometheus-core
namespace: monitoring
labels:
app: prometheus
component: core
spec:
replicas: 1
template:
metadata:
name: prometheus-main
labels:
app: prometheus
component: core
spec:
serviceAccountName: prometheus-k8s
containers:
- name: prometheus
image: prom/prometheus:v1.7.1
args:
- '-storage.local.retention=12h'
- '-storage.local.memory-chunks=500000'
- '-config.file=/etc/prometheus/prometheus.yaml'
- '-alertmanager.url=http://alertmanager:9093/'
ports:
- name: webui
containerPort: 9090
resources:
requests:
cpu: 500m
memory: 500M
limits:
cpu: 500m
memory: 500M
volumeMounts:
- name: config-volume
mountPath: /etc/prometheus
- name: rules-volume
mountPath: /etc/prometheus-rules
volumes:
- name: config-volume
configMap:
name: prometheus-core
- name: rules-volume
configMap:
name: prometheus-rules
EOF
kubectl create -f prometheus-core-deploy.yaml
cat << EOF > prometheus-core-service.yaml
apiVersion: v1
kind: Service
metadata:
name: prometheus
namespace: monitoring
labels:
app: prometheus
component: core
annotations:
prometheus.io/scrape: 'true'
spec:
type: NodePort
ports:
- port: 9090
protocol: TCP
name: webui
selector:
app: prometheus
component: core
EOF
kubectl create -f prometheus-core-service.yaml
grafana
cat << EOF > grafana-core-deployment.yaml
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: grafana-core
namespace: monitoring
labels:
app: grafana
component: core
spec:
replicas: 1
template:
metadata:
labels:
app: grafana
component: core
spec:
containers:
- image: docker.io/grafana/grafana:latest
name: grafana-core
# env:
resources:
# keep request = limit to keep this container in guaranteed class
limits:
cpu: 100m
memory: 100Mi
requests:
cpu: 100m
memory: 100Mi
ports:
- name: grafana
containerPort: 3000
env:
# This variable is required to setup templates in Grafana.
# The following env variables are required to make Grafana accessible via
# the kubernetes api-server proxy. On production clusters, we recommend
# removing these env variables, setup auth for grafana, and expose the grafana
# service using a LoadBalancer or a public IP.
- name: GF_AUTH_BASIC_ENABLED
value: "false"
- name: GF_AUTH_ANONYMOUS_ENABLED
value: "true"
- name: GF_AUTH_ANONYMOUS_ORG_ROLE
value: Admin
# - name: GF_SERVER_ROOT_URL
# value: /api/v1/proxy/namespaces/monitoring/services/grafana/
volumeMounts:
- name: grafana-persistent-storage
mountPath: /var
volumes:
- name: grafana-persistent-storage
hostPath:
emptyDir: {}
#path: /grafanaData
EOF
kubectl create -f grafana-core-deployment.yaml
cat << EOF > grafana-core-service.yaml
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: monitoring
labels:
app: grafana
component: core
# annotations:
# prometheus.io/scrape: 'true'
spec:
type: NodePort
ports:
- port: 3000
nodePort: 31000
selector:
app: grafana
component: core
EOF
kubectl create -f grafana-core-service.yaml
grafana模板
{
"annotations": {
"list": []
},
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": 21,
"links": [],
"refresh": false,
"rows": [
{
"collapse": false,
"height": 282,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 0,
"height": "",
"hideTimeOverride": false,
"id": 1,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": false,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"minSpan": null,
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(container_memory_usage_bytes{pod_name="$pod", namespace="$namespace"}) by (namespace,pod_name)",
"format": "time_series",
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "total",
"refId": "B",
"step": 30
},
{
"expr": "sum(container_memory_rss{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"hide": false,
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "rss",
"metric": "",
"refId": "A",
"step": 30
},
{
"expr": "sum(container_memory_cache{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "cache",
"refId": "D",
"step": 30
},
{
"expr": "sum(container_memory_swap{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "swap",
"refId": "C",
"step": 30
},
{
"expr": "sum(container_memory_failures_total{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"hide": true,
"intervalFactor": 2,
"legendFormat": "failures_total",
"refId": "E",
"step": 20
},
{
"expr": "sum(container_memory_failcnt{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"hide": true,
"intervalFactor": 2,
"legendFormat": "failcnt",
"refId": "F",
"step": 20
},
{
"expr": "sum(container_memory_working_set_bytes{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"hide": true,
"intervalFactor": 2,
"legendFormat": "working_set",
"refId": "G",
"step": 20
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "内存使用量",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"transparent": false,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"total"
]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"decimals": null,
"description": "获取CPU资源使用情况,判断方式现在时刻和一分钟前的数据进行对比。",
"fill": 0,
"id": 2,
"legend": {
"alignAsTable": true,
"avg": false,
"current": true,
"hideEmpty": false,
"hideZero": false,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"minSpan": null,
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": true,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_cpu_system_seconds_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
"format": "time_series",
"hide": false,
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "system",
"metric": "",
"refId": "A",
"step": 30
},
{
"expr": "sum(rate(container_cpu_user_seconds_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
"format": "time_series",
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "user",
"refId": "C",
"step": 30
},
{
"expr": "sum(rate(container_cpu_usage_seconds_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
"format": "time_series",
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "total",
"refId": "B",
"step": 30
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "CPU使用量(核)",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"transparent": false,
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
"total"
]
},
"yaxes": [
{
"format": "short",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "所有pod",
"titleSize": "h6"
},
{
"collapse": false,
"height": 199,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 1,
"id": 6,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_network_receive_packets_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "in",
"refId": "A",
"step": 120
},
{
"expr": "sum(rate(container_network_transmit_packets_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 2,
"legendFormat": "out",
"refId": "B",
"step": 120
},
{
"expr": "sum(rate(container_network_receive_errors_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name) + sum(rate(container_network_transmit_errors_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name) + sum(rate(container_network_receive_packets_dropped_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name) + sum(rate(container_network_transmit_packets_dropped_total{namespace="$namespace",pod_name="$pod"}[1m])) by (namespace,pod_name)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 1,
"legendFormat": "error",
"refId": "C",
"step": 60
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "数据包",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "pps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "pps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 0,
"id": 5,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(rate(container_network_receive_bytes_total{pod_name="$pod", namespace="$namespace"}[1m])*1) by (namespace,container_name)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 1,
"legendFormat": "in",
"metric": "",
"refId": "A",
"step": 60
},
{
"expr": "sum(rate(container_network_transmit_bytes_total{pod_name="$pod", namespace="$namespace"}[1m])*1) by (namespace,container_name)",
"format": "time_series",
"interval": "1m",
"intervalFactor": 1,
"legendFormat": "out",
"refId": "B",
"step": 60
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "网络流量",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "bps",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
},
{
"collapse": false,
"height": 163,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 0,
"hideTimeOverride": false,
"id": 3,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sortDesc": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(container_fs_limit_bytes{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "total",
"refId": "A",
"step": 4
},
{
"expr": "sum(container_fs_usage_bytes{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "usage",
"refId": "B",
"step": 4
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "硬盘使用量",
"tooltip": {
"shared": false,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": null,
"fill": 0,
"id": 4,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum(container_fs_reads_total{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "read",
"refId": "A",
"step": 30
},
{
"expr": "sum(container_fs_writes_total{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "write",
"refId": "B",
"step": 30
},
{
"expr": "sum(container_fs_io_current{namespace="$namespace",pod_name="$pod"}) by (namespace,pod_name)",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "current",
"refId": "C",
"step": 4
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
"title": "IO",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": null,
"current": {
"selected": true,
"text": "monitoring",
"value": "monitoring"
},
"datasource": "prometheus",
"hide": 0,
"includeAll": false,
"label": "namespace",
"multi": false,
"name": "namespace",
"options": [],
"query": "label_values(kube_pod_info{namespace=~".+"},namespace)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"selected": true,
"text": "kube-state-metrics-2802505745",
"value": "kube-state-metrics-2802505745"
},
"datasource": "prometheus",
"hide": 0,
"includeAll": false,
"label": "创建者",
"multi": false,
"name": "created_by_name",
"options": [],
"query": "label_values(kube_pod_info{created_by_name=~".+",namespace="$namespace"} ,created_by_name)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"selected": true,
"text": "kube-state-metrics-2802505745-k24zk",
"value": "kube-state-metrics-2802505745-k24zk"
},
"datasource": "prometheus",
"hide": 0,
"includeAll": false,
"label": "pod",
"multi": false,
"name": "pod",
"options": [],
"query": "label_values(kube_pod_info{created_by_name="$created_by_name",namespace="$namespace",pod=~".+"} ,pod)",
"refresh": 1,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-30m",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "",
"title": "基于资源对象对pod进行监控",
"version": 5
}