部署
官网地址: https://prometheus.io/
rsyslog配置
[root@ops ~]# cat > /etc/rsyslog.d/syslog_server.conf << EOF if $programname == 'alertmanager' then /opt/logs/alertmanager.log if $programname == 'prometheus' then /opt/logs/prometheus.log if $programname == 'node_exporter' then /opt/logs/node_exporter.log if $programname == 'process_exporter' then /opt/logs/process_exporter.log if $programname == 'mysql_exporter' then /opt/logs/mysql_exporter.log if $programname == 'redis_exporter' then /opt/logs/redis_exporter.log if $programname == 'blackbox_exporter' then /opt/logs/blackbox_exporter.log if $programname == 'mysqld_exporter' then /opt/logs/mysqld_exporter.log if $programname == 'process-exporter' then /opt/logs/process-exporter.log if $programname == 'pushgateway' then /opt/logs/pushgateway.log if $programname == 'm3coordinator' then /opt/logs/m3coordinator.log if $programname == 'm3dbnode' then /opt/logs/m3dbnode.log EOF
logrotate
[root@ops ~]# cat > /etc/logrotate.d/logrotate.conf << EOF /opt/logs/*.log { daily missingok notifempty dateext compress delaycompress copytruncate rotate 15 } EOF
重启rsyslog
[root@ops ~]# systemctl restart rsyslog.service [root@ops ~]# systemctl status rsyslog.service [root@ops ~]# mkdir /opt/logs
Prometheus部署
下载软件
wget https://github.com/prometheus/prometheus/releases/download/v2.29.1/prometheus-2.29.1.linux-amd64.tar.gz tar xf prometheus-2.29.1.linux-amd64.tar.gz -C /opt/ cd /opt/ ln -s prometheus-2.29.1.linux-amd64 prometheus
准备启动文件
cat > /usr/lib/systemd/system/prometheus.service << EOF [Unit] Description="prometheus" Documentation=https://prometheus.io/ After=network.target [Service] Type=simple ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --storage.tsdb.path=/opt/prometheus/data --web.enable-lifecycle Restart=on-failure RestartSecs=5s SuccessExitStatus=0 LimitNOFILE=65536 StandardOutput=syslog StandardError=syslog SyslogIdentifier=prometheus [Install] WantedBy=multi-user.target EOF
启动服务
systemctl start prometheus.service
systemctl status prometheus.service
浏览器访问
http://192.168.168.106:9090/
热重启
# 命令行开启 --web.enable-lifecycle curl -X POST http://localhost:9090/-/reload
主配置文件
[root@ops prometheus]# cat prometheus.yml # 全局配置段 global: scrape_interval: 15s # 采集间隔时间 evaluation_interval: 15s # 计算报警和预聚合间隔 scrape_timeout: 10s # 采集超时时间 query_log_file: /opt/logs/prometheus_query_log # 查询日志,包含各阶段耗时统计 external_labels: # 全局标签组 account: 'huawei-main' # 通过本实例采集的数据都会叠加下面的标签 # Alertmanager 信息段 alerting: alertmanagers: - static_configs: - targets: # - alertmanager:9093 # 告警、预聚合配置文件段 rule_files: # - "first_rules.yml" #- "second_rules.yml" # 采集配置段 scrape_configs: - job_name: "prometheus" honor_timestamps: true scrape_interval: 15s scrape_timeout: 10s metrics_path: /metrics scheme: http file_sd_configs: - files: - targets/prometheus-*.yml refresh_interval: 1m - job_name: "nodes" honor_timestamps: true scrape_interval: 15s scrape_timeout: 10s metrics_path: /metrics scheme: http file_sd_configs: - files: - targets/nodes-*.yml refresh_interval: 1m - job_name: "mysqld" honor_timestamps: true scrape_interval: 15s scrape_timeout: 10s metrics_path: /metrics scheme: http file_sd_configs: - files: - targets/mysql-*.yml refresh_interval: 1m - job_name: "process" honor_timestamps: true scrape_interval: 15s scrape_timeout: 10s metrics_path: /metrics scheme: http file_sd_configs: - files: - targets/process-*.yml refresh_interval: 1m # 远程查询段 remote_read: # prometheus #- url: http://prometheus/v1/read # read_recent: true # m3db #- url: "http://m3coordinator-read:7201/api/v1/prom/remote/read" # read_recent: true # 远程写入段 remote_write: #- url: "http://m3coordinator-write:7201/api/v1/prom/remote/write" # queue_config: # capacity: 10000 # max_samples_per_send: 60000 # write_relabel_configs: # - source_labels: [__name__] # separator: ; # # 标签key前缀匹配到的drop # regex: '(kubelet_|apiserver_|container_fs_).*' # replacement: $1 # action: drop