• k8s pod重启策略和健康检查实现应用自动修复


    1. 重启策略

    • Always:当容器终止退出后,总是重启容器,默认策略。

    • OnFailure:当容器异常退出(退出状态码非0)时,才重启容器。

    • Never:当容器终止退出,从不重启容器。

    2. 健康检查有以下两种类型:

    • livenessProbe(存活检查):如果检查失败,将杀死容器,根据Pod的restartPolicy来操作。

    • readinessProbe(就绪检查):如果检查失败,Kubernetes会把Pod从service endpoints中剔除。

    • startupProbe(启动检查):

    3. 支持以下三种检查方法:

    • httpGet:发送HTTP请求,返回200-400范围状态码为成功。
    • exec:执行Shell命令返回状态码是0为成功。
    • tcpSocket:发起TCP Socket建立成功。

    image

    4. 案例

    4.1 重启策略+健康检查

    [root@k8s-master yaml]# vim restart-pod.yaml 
    [root@k8s-master yaml]# cat restart-pod.yaml 
    apiVersion: v1
    kind: Pod
    metadata:
      name: probe-demo
      namespace: default
      labels:
        test: liveness
    spec:
      containers:
      - name: web
        image: nginx
        ports:
        - containerPort: 80
        readinessProbe:          # 存活检查,如果失败,将杀死容器,来重启
          tcpSocket:
            port: 80
          initialDelaySeconds: 30 #启动容器后多少秒健康检查
          periodSeconds: 10 #以后间隔多少秒检查一次
        livenessProbe:   # 就绪检查,失败就会剔除 service 
          tcpSocket:
            port: 80
        startupProbe:    # 启动的时候,检查端口
          tcpSocket:
            port: 80
    
    

    4.2 查看pod情况

    [root@k8s-master yaml]# kubectl apply -f restart-pod.yaml 
    pod/probe-demo unchanged
    

    4.3 为了监控livenessProbe监控

    [root@k8s-master yaml]# kubectl get pods
    NAME         READY   STATUS    RESTARTS   AGE
    probe-demo   1/1     Running   0          20m
    
    [root@k8s-master yaml]# kubectl expose pod probe-demo --port=80 --target-port=80
    service/probe-demo exposed
    [root@k8s-master yaml]# kubectl get service
    NAME         TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)   AGE
    kubernetes   ClusterIP   10.96.0.1        <none>        443/TCP   3d23h
    probe-demo   ClusterIP   10.104.161.168   <none>        80/TCP    12s
    [root@k8s-master yaml]# kubectl get pods -o wide           # 详细查看
    NAME         READY   STATUS    RESTARTS   AGE   IP             NODE        NOMINATED NODE   READINESS GATES
    probe-demo   1/1     Running   0          27m   10.244.36.90   k8s-node1   <none>           <none>
    

    5. pod案例2

    5.1 验证pod监控服务修复

    • 编写重启策略和健康检查(应用自动修复)

      [root@k8s-master pod]# kubectl create deployment check-pod --image=nginx --dry-run=client -o yaml > check-pod.yaml
      [root@k8s-master pod]# vim check-pod.yaml 
      [root@k8s-master pod]# cat check-pod.yaml 
      apiVersion: apps/v1
      kind: Deployment
      metadata:
        labels:
          app: check-pod
        name: check-pod
      spec:
        replicas: 3
        selector:
          matchLabels:
            app: check-pod
        template:
          metadata:
            labels:
              app: check-pod
          spec:
            containers:
            - image: nginx
              name: nginx
              livenessProbe:      # 就绪检查,失败就会剔除 service 
                httpGet:
                  path: /index.html
                  port: 80
                initialDelaySeconds: 30 #启动容器后多少秒健康检查
                periodSeconds: 10 #以后间隔多少秒检查一次
              readinessProbe:   # 存活检查,如果失败,将杀死容器,来重启
                httpGet:
                  path: /index.html
                  port: 80
                initialDelaySeconds: 30 #启动容器后多少秒健康检查
                periodSeconds: 10 #以后间隔多少秒检查一次
      
    • 启动服务

      [root@k8s-master pod]# kubectl apply -f check-pod.yaml 
      deployment.apps/check-pod created
      [root@k8s-master pod]# kubectl get pods
      NAME                                     READY   STATUS              RESTARTS   AGE
      check-pod-95d5577d8-9ggfg                0/1     ContainerCreating   0          5s
      check-pod-95d5577d8-mkkwm                0/1     ContainerCreating   0          5s
      check-pod-95d5577d8-zhz5h                0/1     ContainerCreating   0          5s
      nfs-client-provisioner-ff6b5d864-ht8wk   1/1     Running             0          9d
      
    • 查看其中一个pod 的日志

      [root@k8s-master pod]# kubectl logs -f check-pod-95d5577d8-9ggfg 
      /docker-entrypoint.sh: /docker-entrypoint.d/ is not empty, will attempt to perform configuration
      /docker-entrypoint.sh: Looking for shell scripts in /docker-entrypoint.d/
      /docker-entrypoint.sh: Launching /docker-entrypoint.d/10-listen-on-ipv6-by-default.sh
      10-listen-on-ipv6-by-default.sh: info: Getting the checksum of /etc/nginx/conf.d/default.conf
      10-listen-on-ipv6-by-default.sh: info: Enabled listen on IPv6 in /etc/nginx/conf.d/default.conf
      /docker-entrypoint.sh: Launching /docker-entrypoint.d/20-envsubst-on-templates.sh
      /docker-entrypoint.sh: Launching /docker-entrypoint.d/30-tune-worker-processes.sh
      /docker-entrypoint.sh: Configuration complete; ready for start up
      2021/08/17 12:36:47 [notice] 1#1: using the "epoll" event method
      2021/08/17 12:36:47 [notice] 1#1: nginx/1.21.1
      2021/08/17 12:36:47 [notice] 1#1: built by gcc 8.3.0 (Debian 8.3.0-6) 
      2021/08/17 12:36:47 [notice] 1#1: OS: Linux 3.10.0-1160.11.1.el7.x86_64
      2021/08/17 12:36:47 [notice] 1#1: getrlimit(RLIMIT_NOFILE): 1048576:1048576
      2021/08/17 12:36:47 [notice] 1#1: start worker processes
      2021/08/17 12:36:47 [notice] 1#1: start worker process 31
      2021/08/17 12:36:47 [notice] 1#1: start worker process 32
      172.17.0.16 - - [17/Aug/2021:12:37:20 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:37:26 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:37:30 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:37:36 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:37:40 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:37:46 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:37:50 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:37:56 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:38:00 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:38:06 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:38:10 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-"
      172.17.0.16 - - [17/Aug/2021:12:38:16 +0000] "GET /index.html HTTP/1.1" 200 612 "-" "kube-probe/1.19" "-
      
    • 我来启动监控启动状态

      [root@k8s-master pod]# kubectl get pods -w
      NAME                                     READY   STATUS    RESTARTS   AGE
      check-pod-95d5577d8-9ggfg                1/1     Running   0          30m
      check-pod-95d5577d8-mkkwm                1/1     Running   0          30m
      check-pod-95d5577d8-zhz5h                1/1     Running   0          30m
      nfs-client-provisioner-ff6b5d864-ht8wk   1/1     Running   0          9d
      
    • 我们在开一个终端,删除一个pod里的页面

      [root@k8s-master ~]# kubectl get pods
      NAME                                     READY   STATUS    RESTARTS   AGE
      check-pod-95d5577d8-9ggfg                1/1     Running   0          30m
      check-pod-95d5577d8-mkkwm                1/1     Running   0          30m
      check-pod-95d5577d8-zhz5h                1/1     Running   0          30m
      nfs-client-provisioner-ff6b5d864-ht8wk   1/1     Running   0          9d
      [root@k8s-master ~]# kubectl exec -it check-pod-95d5577d8-9ggfg -- /bin/bash         
      root@check-pod-95d5577d8-9ggfg:/# cd /usr/share/nginx/html/
      root@check-pod-95d5577d8-9ggfg:/usr/share/nginx/html# ls -la 
      total 16
      drwxr-xr-x 2 root root 4096 Aug 17 11:46 .
      drwxr-xr-x 3 root root 4096 Aug 17 11:46 ..
      -rw-r--r-- 1 root root  494 Jul  6 14:59 50x.html
      -rw-r--r-- 1 root root  612 Jul  6 14:59 index.html
      root@check-pod-95d5577d8-9ggfg:/usr/share/nginx/html# rm -f index.html 
      
    • 再次查看运行状态

      [root@k8s-master pod]# kubectl get pods -wNAME                                     READY   STATUS    RESTARTS   AGEcheck-pod-95d5577d8-9ggfg                1/1     Running   0          30mcheck-pod-95d5577d8-mkkwm                1/1     Running   0          30mcheck-pod-95d5577d8-zhz5h                1/1     Running   0          30mnfs-client-provisioner-ff6b5d864-ht8wk   1/1     Running   0          9dcheck-pod-95d5577d8-9ggfg                0/1     Running   0          32mcheck-pod-95d5577d8-9ggfg                0/1     Running   1          32mcheck-pod-95d5577d8-9ggfg                1/1     Running   1          33m
      

    5.2 验证endpoint服务

    • 在创建一个service服务

      [root@k8s-master pod]# kubectl expose deployment  check-pod --port=80 --target-port=80 --type=NodePort --dry-run=client -o yaml > check-pod-server.yaml[root@k8s-master pod]# vim check-pod-server.yaml [root@k8s-master pod]# cat check-pod-server.yaml apiVersion: v1kind: Servicemetadata:  creationTimestamp: null  labels:    app: check-pod  name: check-podspec:  ports:  - port: 80    protocol: TCP    targetPort: 80  selector:    app: check-pod  type: NodePortstatus:  loadBalancer: {}
      
    • 创建service服务

      [root@k8s-master pod]# kubectl apply -f check-pod-server.yaml service/check-pod created
      
    • 查看endpoints服务

      [root@k8s-master pod]# kubectl get endpoints -wNAME         ENDPOINTS                                            AGEcheck-pod    10.244.58.220:80,10.244.58.221:80,10.244.85.211:80   18hkubernetes   172.17.0.2:6443                                      14d
      
    • 进入容器删除监听的配置文件

      [root@k8s-master ~]# kubectl get podsNAME                        READY   STATUS    RESTARTS   AGEcheck-pod-95d5577d8-9ggfg   1/1     Running   1          18hcheck-pod-95d5577d8-mkkwm   1/1     Running   0          18hcheck-pod-95d5577d8-zhz5h   1/1     Running   0          18h[root@k8s-master ~]# kubectl exec -it check-pod-95d5577d8-9ggfg -- bashroot@check-pod-95d5577d8-9ggfg:/# cd /usr/share/nginx/html/root@check-pod-95d5577d8-9ggfg:/usr/share/nginx/html# ls 50x.html  index.htmlroot@check-pod-95d5577d8-9ggfg:/usr/share/nginx/html# rm -f index.html 
      
    • 再次观察endpoints服务

      [root@k8s-master pod]# kubectl get endpoints -wNAME         ENDPOINTS                                            AGEcheck-pod    10.244.58.220:80,10.244.58.221:80,10.244.85.211:80   18hkubernetes   172.17.0.2:6443                                      14d# 在这里我们可以看到删除文件后,endpoint检查到监控配置后剔除了服务check-pod    10.244.58.221:80,10.244.85.211:80                    18h     check-pod    10.244.58.220:80,10.244.58.221:80,10.244.85.211:80   18h
      
  • 相关阅读:
    Java Object part1
    StringBuffer StringBuilder append
    Java equal
    java Classloader
    Java NIO
    Oracle中Blob和Clob
    Java8 Lambda 表达式
    HashMap分析 + 哈希表
    android自定义控件之滚动广告条
    android自定义控件之模仿优酷菜单
  • 原文地址:https://www.cnblogs.com/scajy/p/15476017.html
Copyright © 2020-2023  润新知