prometheus
1. 部署
下载软件github
prometheus-2.14.0-rc.0.linux-amd64.tar.gz
解压
mkdir /opt/src tar -xvf prometheus-2.14.0-rc.0.linux-amd64.tar.gz -C /opt/ cd /opt/ ln -s prometheus-2.14.0-rc.0.linux-amd64/ prometheus
添加用户
groupadd prometheus
useradd -g prometheus -s /sbin/nologin prometheus
创建数据目录+权限
chown -R prometheus:prometheus /opt/prometheus mkdir -p /data/prometheus chown -R prometheus:prometheus /data/prometheus
配置启动文件
v /usr/lib/systemd/system/prometheus.service [Unit] Description=Prometheus Documentation=https://prometheus.io/ After=network.target [Service] # Type设置为notify时,服务会不断重启 Type=simple User=prometheus ExecStart=/opt/node_exporter/node_exporter --collector.systemd --collector.systemd.unit-whitelist=(docker|sshd).service --collector.textfile.directory /var/lib/node_exporter/textfile_collector/ Restart=on-failure [Install] WantedBy=multi-user.target
chown prometheus:prometheus /usr/lib/systemd/system/prometheus.service
安装node_export
tar -xvf node_exporter-0.18.1.linux-amd64.tar.gz -C /opt/ cd /opt/ ln -s node_exporter-0.18.1.linux-amd64/ node_exporter
groupadd prometheus useradd -g prometheus -s /sbin/nologin prometheus chown -R prometheus:prometheus /opt/node_exporter vi /usr/lib/systemd/system/node_exporter.service [Unit]
Description=node_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
User=prometheus
ExecStart=/opt/node_exporter/node_exporter --collector.systemd.unit-whitelist="(docker|sshd).service" --collector.textfile.directory /var/lib/node_exporter/textfile_collector/
Restart=on-failure
[Install]
WantedBy=multi-user.target systemctl enable node_exporter systemctl start node_exporter
mkdir -p /var/lib/node_exporter/textfile_collector
cat /var/lib/node_exporter/textfile_collector/metadata.prom
metadata{role="docker_server",datacenter="NJ"} 1
prometheus配置文件添加
- job_name: 'node'
static_configs:
- targets: ['10.4.7.11:9100']
热重启prometheus
kill -HUP <pid>
node节点运行cadvisor
docker run --volume=/:/rootfs:ro --volume=/var/run:/var/run:rw --volume=/sys:/sys:ro --volume=/var/lib/docker/:/var/lib/docker:ro --volume=/dev/disk/:/dev/disk:ro --publish=8080:8080 --detach=true --name=cadvisor google/cadvisor:latest
再次添加prometheus配置
- job_name: 'docker' static_configs: - targets: ['10.4.7.11:8080']
添加rules规则文件,在prometheus.yml同级目录创建
[root@localhost prometheus]# mkdir rules [root@localhost prometheus]# cd rules/ [root@localhost rules]# touch node_rules.yml
修改prometheus.yml文件
rule_files: - "rules/node_rules.yml"
node_rules.yml文件添加规则
groups:
- name: node_rules
rules:
- record: instance:node_cpu:avg_rate5m
expr: 100 - avg (irate(node_cpu_seconds_total{job="node",mode="idle"}[5m])) by (instance) * 100
- record: instance:node_cpus:count
expr: count by (instance)(node_cpu_seconds_total{mode="idle"})
- record: instance:node_cpu_saturation_load1
expr: node_load1 > on (instance) 2 * count by (instance)(node_cpu_seconds_total{mode="idle"})
- record: instance:node_memory_usage:percentage
expr: (node_memory_MemTotal_bytes - (node_memory_MemFree + node_memory_Cached_bytes + node_memory_Buffers_bytes)) / node_memory_MemTotal_bytes * 100
- record: instance:node_memory_swap_io_bytes:sum_rate
expr: 1024 * sum by (instance) (
(rate(node_vmstat_pgpgin[1m])
+ rate(node_vmstat_pgpgout[1m]))
)
- record: instance:root:node_filesystem_usage:percentage
expr: (node_filesystem_size_bytes{mountpoint="/"} - node_filesystem_free_bytes{mountpoint="/"}) / node_filesystem_size_bytes{mountpoint="/"} * 100
添加完后可通过promtools监测文件
./promtool check rules rules/node_rules.yml
监测通过后需要重载prometheus配置文件
kill -HUP 2018
在prom页面中即可搜索到键值
instance:node_cpu:avg_rate5m
下载安装granafa
wget https://dl.grafana.com/oss/release/grafana-6.7.1-1.x86_64.rpm
yum -y install grafana-6.7.1-1.x86_64.rpm
systemctl start grafana-server.service