1. 创建一个namespace[kube-ops.yaml]
1 apiVersion: v1 2 kind: Namespace 3 metadata: 4 name: kube-ops
2. 设置配置文件[kube-config.yaml]
1 apiVersion: v1 2 kind: ConfigMap 3 metadata: 4 name: prometheus-config 5 namespace: kube-ops 6 data: 7 prometheus.yml: | 8 global: 9 scrape_interval: 15s 10 evaluation_interval: 15s 11 scrape_configs: 12 13 - job_name: 'kubernetes-apiservers' 14 kubernetes_sd_configs: 15 - role: endpoints 16 scheme: https 17 tls_config: 18 ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 19 bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 20 relabel_configs: 21 - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] 22 action: keep 23 regex: default;kubernetes;https 24 25 - job_name: 'kubernetes-nodes' 26 kubernetes_sd_configs: 27 - role: node 28 relabel_configs: 29 - source_labels: [__address__] 30 regex: '(.*):10250' 31 replacement: '${1}:9100' 32 target_label: __address__ 33 action: replace 34 - action: labelmap 35 regex: __meta_kubernetes_node_label_(.+) 36 37 - job_name: 'kubernetes-kubelet' 38 kubernetes_sd_configs: 39 - role: node 40 relabel_configs: 41 - source_labels: [__address__] 42 regex: '(.*):10250' 43 replacement: '${1}:10255' 44 target_label: __address__ 45 action: replace 46 - action: labelmap 47 regex: __meta_kubernetes_node_label_(.+) 48 49 - job_name: 'kubernetes-cadvisor' 50 kubernetes_sd_configs: 51 - role: node 52 scheme: https 53 tls_config: 54 ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt 55 bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token 56 relabel_configs: 57 - action: labelmap 58 regex: __meta_kubernetes_node_label_(.+) 59 - target_label: __address__ 60 replacement: kubernetes.default.svc:443 61 - source_labels: [__meta_kubernetes_node_name] 62 regex: (.+) 63 target_label: __metrics_path__ 64 replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor 65 66 - job_name: 'kubernetes-service-endpoints' 67 kubernetes_sd_configs: 68 - role: endpoints 69 relabel_configs: 70 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] 71 action: keep 72 regex: true 73 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] 74 action: replace 75 target_label: __scheme__ 76 regex: (https?) 77 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] 78 action: replace 79 target_label: __metrics_path__ 80 regex: (.+) 81 - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] 82 action: replace 83 target_label: __address__ 84 regex: ([^:]+)(?::d+)?;(d+) 85 replacement: $1:$2 86 - action: labelmap 87 regex: __meta_kubernetes_service_label_(.+) 88 - source_labels: [__meta_kubernetes_namespace] 89 action: replace 90 target_label: kubernetes_namespace 91 - source_labels: [__meta_kubernetes_service_name] 92 action: replace 93 target_label: kubernetes_name 94 95 - job_name: 'kubernetes-services' 96 kubernetes_sd_configs: 97 - role: service 98 metrics_path: /probe 99 params: 100 module: [http_2xx] 101 relabel_configs: 102 - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe] 103 action: keep 104 regex: true 105 - source_labels: [__address__] 106 target_label: __param_target 107 - target_label: __address__ 108 replacement: blackbox-exporter.example.com:9115 109 - source_labels: [__param_target] 110 target_label: instance 111 - action: labelmap 112 regex: __meta_kubernetes_service_label_(.+) 113 - source_labels: [__meta_kubernetes_namespace] 114 target_label: kubernetes_namespace 115 - source_labels: [__meta_kubernetes_service_name] 116 target_label: kubernetes_name 117 118 - job_name: 'kubernetes-ingresses' 119 kubernetes_sd_configs: 120 - role: ingress 121 relabel_configs: 122 - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe] 123 action: keep 124 regex: true 125 - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path] 126 regex: (.+);(.+);(.+) 127 replacement: ${1}://${2}${3} 128 target_label: __param_target 129 - target_label: __address__ 130 replacement: blackbox-exporter.example.com:9115 131 - source_labels: [__param_target] 132 target_label: instance 133 - action: labelmap 134 regex: __meta_kubernetes_ingress_label_(.+) 135 - source_labels: [__meta_kubernetes_namespace] 136 target_label: kubernetes_namespace 137 - source_labels: [__meta_kubernetes_ingress_name] 138 target_label: kubernetes_name 139 140 - job_name: 'kubernetes-pods' 141 kubernetes_sd_configs: 142 - role: pod 143 relabel_configs: 144 - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] 145 action: keep 146 regex: true 147 - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] 148 action: replace 149 target_label: __metrics_path__ 150 regex: (.+) 151 - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] 152 action: replace 153 regex: ([^:]+)(?::d+)?;(d+) 154 replacement: $1:$2 155 target_label: __address__ 156 - action: labelmap 157 regex: __meta_kubernetes_pod_label_(.+) 158 - source_labels: [__meta_kubernetes_namespace] 159 action: replace 160 target_label: kubernetes_namespace 161 - source_labels: [__meta_kubernetes_pod_name] 162 action: replace 163 target_label: kubernetes_pod_name
3. prom 权限设置[prom-rbac.yaml]
1 apiVersion: v1 2 kind: ServiceAccount 3 metadata: 4 name: prometheus 5 namespace: kube-ops 6 --- 7 apiVersion: rbac.authorization.k8s.io/v1 8 kind: ClusterRole 9 metadata: 10 name: prometheus 11 rules: 12 - apiGroups: 13 - "" 14 resources: 15 - nodes 16 - services 17 - endpoints 18 - pods 19 - nodes/proxy 20 verbs: 21 - get 22 - list 23 - watch 24 - apiGroups: 25 - "" 26 resources: 27 - configmaps 28 - nodes/metrics 29 verbs: 30 - get 31 - nonResourceURLs: 32 - /metrics 33 verbs: 34 - get 35 --- 36 apiVersion: rbac.authorization.k8s.io/v1beta1 37 kind: ClusterRoleBinding 38 metadata: 39 name: prometheus 40 roleRef: 41 apiGroup: rbac.authorization.k8s.io 42 kind: ClusterRole 43 name: prometheus 44 subjects: 45 - kind: ServiceAccount 46 name: prometheus 47 namespace: kube-ops
4. 部署pod [prom-deploy.yaml]
1 apiVersion: apps/v1beta2 2 kind: Deployment 3 metadata: 4 labels: 5 name: prometheus-deployment 6 name: prometheus 7 namespace: kube-ops 8 spec: 9 replicas: 2 10 selector: 11 matchLabels: 12 app: prometheus 13 template: 14 metadata: 15 labels: 16 app: prometheus 17 spec: 18 containers: 19 - image: prom/prometheus:latest 20 name: prometheus 21 command: 22 - "/bin/prometheus" 23 args: 24 - "--config.file=/etc/prometheus/prometheus.yml" 25 - "--storage.tsdb.path=/prometheus" 26 - "--storage.tsdb.retention=24h" 27 ports: 28 - containerPort: 9090 29 protocol: TCP 30 volumeMounts: 31 - mountPath: "/prometheus" 32 name: data 33 - mountPath: "/etc/prometheus" 34 name: config-volume 35 resources: 36 requests: 37 cpu: 100m 38 memory: 100Mi 39 limits: 40 cpu: 500m 41 memory: 2500Mi 42 serviceAccountName: prometheus 43 volumes: 44 - name: data 45 emptyDir: {} 46 - name: config-volume 47 configMap: 48 name: prometheus-config
5. 部署prom service [prom-svc.yaml]
1 kind: Service 2 apiVersion: v1 3 metadata: 4 labels: 5 app: prometheus 6 name: prometheus 7 namespace: kube-ops 8 spec: 9 type: NodePort 10 ports: 11 - port: 9090 12 targetPort: 9090 13 nodePort: 30003 14 selector: 15 app: prometheus
6. 部署node信息收集 [node-ex.yaml]
1 apiVersion: extensions/v1beta1 2 kind: DaemonSet 3 metadata: 4 name: node-exporter 5 namespace: kube-ops 6 labels: 7 k8s-app: node-exporter 8 spec: 9 template: 10 metadata: 11 labels: 12 k8s-app: node-exporter 13 spec: 14 hostPID: true 15 hostIPC: true 16 hostNetwork: true 17 dnsPolicy: ClusterFirstWithHostNet 18 containers: 19 - image: prom/node-exporter:latest 20 name: node-exporter 21 ports: 22 - containerPort: 9100 23 protocol: TCP 24 name: http
7. 部署 state-metrics [各个文件在下面分别列出]
1 [kube-state-metrics-cluster-role-binding.yaml] 2 3 apiVersion: rbac.authorization.k8s.io/v1 4 # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 5 kind: ClusterRoleBinding 6 metadata: 7 name: kube-state-metrics 8 roleRef: 9 apiGroup: rbac.authorization.k8s.io 10 kind: ClusterRole 11 name: kube-state-metrics 12 subjects: 13 - kind: ServiceAccount 14 name: kube-state-metrics 15 namespace: kube-system 16 17 18 [kube-state-metrics-role-binding.yaml] 19 20 apiVersion: rbac.authorization.k8s.io/v1 21 # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 22 kind: RoleBinding 23 metadata: 24 name: kube-state-metrics 25 namespace: kube-system 26 roleRef: 27 apiGroup: rbac.authorization.k8s.io 28 kind: Role 29 name: kube-state-metrics-resizer 30 subjects: 31 - kind: ServiceAccount 32 name: kube-state-metrics 33 namespace: kube-system 34 35 [kube-state-metrics-service.yaml] 36 37 apiVersion: v1 38 kind: Service 39 metadata: 40 name: kube-state-metrics 41 namespace: kube-system 42 labels: 43 k8s-app: kube-state-metrics 44 annotations: 45 prometheus.io/scrape: 'true' 46 spec: 47 ports: 48 - name: http-metrics 49 port: 8080 50 targetPort: http-metrics 51 protocol: TCP 52 - name: telemetry 53 port: 8081 54 targetPort: telemetry 55 protocol: TCP 56 selector: 57 k8s-app: kube-state-metrics 58 59 [kube-state-metrics-cluster-role.yaml] 60 61 apiVersion: rbac.authorization.k8s.io/v1 62 # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 63 kind: ClusterRole 64 metadata: 65 name: kube-state-metrics 66 rules: 67 - apiGroups: [""] 68 resources: 69 - configmaps 70 - secrets 71 - nodes 72 - pods 73 - services 74 - resourcequotas 75 - replicationcontrollers 76 - limitranges 77 - persistentvolumeclaims 78 - persistentvolumes 79 - namespaces 80 - endpoints 81 verbs: ["list", "watch"] 82 - apiGroups: ["extensions"] 83 resources: 84 - daemonsets 85 - deployments 86 - replicasets 87 - ingresses 88 verbs: ["list", "watch"] 89 - apiGroups: ["apps"] 90 resources: 91 - daemonsets 92 - deployments 93 - replicasets 94 - statefulsets 95 verbs: ["list", "watch"] 96 - apiGroups: ["batch"] 97 resources: 98 - cronjobs 99 - jobs 100 verbs: ["list", "watch"] 101 - apiGroups: ["autoscaling"] 102 resources: 103 - horizontalpodautoscalers 104 verbs: ["list", "watch"] 105 - apiGroups: ["policy"] 106 resources: 107 - poddisruptionbudgets 108 verbs: ["list", "watch"] 109 - apiGroups: ["certificates.k8s.io"] 110 resources: 111 - certificatesigningrequests 112 verbs: ["list", "watch"] 113 114 [kube-state-metrics-role.yaml] 115 apiVersion: rbac.authorization.k8s.io/v1 116 # kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1 117 kind: Role 118 metadata: 119 namespace: kube-system 120 name: kube-state-metrics-resizer 121 rules: 122 - apiGroups: [""] 123 resources: 124 - pods 125 verbs: ["get"] 126 - apiGroups: ["apps"] 127 resources: 128 - deployments 129 resourceNames: ["kube-state-metrics"] 130 verbs: ["get", "update"] 131 - apiGroups: ["extensions"] 132 resources: 133 - deployments 134 resourceNames: ["kube-state-metrics"] 135 verbs: ["get", "update"] 136 137 [kube-state-metrics-deployment.yaml] 138 139 apiVersion: apps/v1 140 # Kubernetes version 1.8.x should use apps/v1beta2 141 # Kubernetes versions before 1.8.0 should use apps/v1beta1 or extensions/v1beta1 142 kind: Deployment 143 metadata: 144 name: kube-state-metrics 145 namespace: kube-system 146 spec: 147 selector: 148 matchLabels: 149 k8s-app: kube-state-metrics 150 replicas: 1 151 template: 152 metadata: 153 labels: 154 k8s-app: kube-state-metrics 155 spec: 156 serviceAccountName: kube-state-metrics 157 containers: 158 - name: kube-state-metrics 159 image: quay.io/coreos/kube-state-metrics:v1.6.0 160 ports: 161 - name: http-metrics 162 containerPort: 8080 163 - name: telemetry 164 containerPort: 8081 165 readinessProbe: 166 httpGet: 167 path: /healthz 168 port: 8080 169 initialDelaySeconds: 5 170 timeoutSeconds: 5 171 172 [kube-state-metrics-service-account.yaml] 173 174 apiVersion: v1 175 kind: ServiceAccount 176 metadata: 177 name: kube-state-metrics 178 namespace: kube-system