• kubernetes集群中的pause容器


    昨天晚上搭建好了k8s多主集群,启动了一个nginx的pod,然而每启动一个pod就伴随这一个pause容器,考虑到之前在做kubelet的systemd unit文件时有见到:

    1
    2
    3
    4
    5
    6
    [root@node01 ~]
    CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
    43884d79fe6f nginx "nginx -g 'daemon of…" 11 hours ago Up 11 hours k8s_nginx-pod_nginx-con-594b8d6b48-46gf7_default_25b0048f-a24f-11e9-8149-00163e0134cf_0
    eff67394c9c8 nginx "nginx -g 'daemon of…" 11 hours ago Up 11 hours k8s_nginx-pod_nginx-con-594b8d6b48-vt589_default_25aefc99-a24f-11e9-8149-00163e0134cf_1
    261226f6b92a registry.cn-hangzhou.aliyuncs.com/google_containers/pause-amd64:3.1 "/pause" 11 hours ago Up 11 hours k8s_POD_nginx-con-594b8d6b48-vt589_default_25aefc99-a24f-11e9-8149-00163e0134cf_1
    fc94013b93dd registry.cn-hangzhou.aliyuncs.com/google_containers/pause-amd64:3.1 "/pause" 11 hours ago Up 11 hours k8s_POD_nginx-con-594b8d6b48-46gf7_default_25b0048f-a24f-11e9-8149-00163e0134cf_1

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    ### 6 创建和分发kubelet systemd unit文件
    ```bash
    cat >/etc/systemd/system/kubelet.service <<EOF
    [Unit]
    Description=Kubernetes Kubelet
    Documentation=https://github.com/GoogleCloudPlatform/kubernetes
    After=docker.service
    Requires=docker.service
    [Service]
    WorkingDirectory=/var/lib/kubelet
    ExecStart=/usr/local/bin/kubelet
    --bootstrap-kubeconfig=/etc/kubernetes/cert/kubelet-bootstrap.kubeconfig
    --cert-dir=/etc/kubernetes/cert
    --kubeconfig=/etc/kubernetes/cert/kubelet.kubeconfig
    --config=/etc/kubernetes/cert/kubelet.config.json
    --hostname-override=172.24.150.89
    --pod-infra-container-image=registry.cn-hangzhou.aliyuncs.com/google_containers/pause-amd64:3.1
    --allow-privileged=true
    --alsologtostderr=true
    --logtostderr=false
    --log-dir=/var/log/kubernetes
    --v=2
    Restart=on-failure
    RestartSec=5
    [Install]
    WantedBy=multi-user.target
    EOF

    首先找一下源码看一下:
    pause源码C语言编写的主要有四个文件:
    orphan.c文件

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    #include <stdio.h>
    #include <unistd.h>
    int () {
    pid_t pid;
    pid = fork();
    if (pid == 0) {
    while (getppid() > 1)
    ;
    printf("Child exiting: pid=%d ppid=%dn", getpid(), getppid());
    return 0;
    } else if (pid > 0) {
    printf("Parent exiting: pid=%d ppid=%dn", getpid(), getppid());
    return 0;
    }
    perror("Could not create child");
    return 1;
    }

    pause.c文件

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    #include <signal.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <sys/types.h>
    #include <sys/wait.h>
    #include <unistd.h>
    #define STRINGIFY(x) #x
    #define VERSION_STRING(x) STRINGIFY(x)
    #ifndef VERSION
    #define VERSION HEAD
    #endif
    static void sigdown(int signo) {
    psignal(signo, "Shutting down, got signal");
    exit(0);
    }
    static void sigreap(int signo) {
    while (waitpid(-1, NULL, WNOHANG) > 0)
    ;
    }
    int main(int argc, char **argv) {
    int i;
    for (i = 1; i < argc; ++i) {
    if (!strcasecmp(argv[i], "-v")) {
    printf("pause.c %sn", VERSION_STRING(VERSION));
    return 0;
    }
    }
    if (getpid() != 1)
    /* Not an error because pause sees use outside of infra containers. */
    fprintf(stderr, "Warning: pause should be the first processn");
    if (sigaction(SIGINT, &(struct sigaction){.sa_handler = sigdown}, NULL) < 0)
    return 1;
    if (sigaction(SIGTERM, &(struct sigaction){.sa_handler = sigdown}, NULL) < 0)
    return 2;
    if (sigaction(SIGCHLD, &(struct sigaction){.sa_handler = sigreap,
    .sa_flags = SA_NOCLDSTOP},
    NULL) < 0)
    return 3;
    for (;;)
    pause();
    fprintf(stderr, "Error: infinite loop terminatedn");
    return 42;
    }

    至于里面写的啥,我也看不懂,pose出来让你们看看哈!
    Makefile文件用于制作pause镜像,制作镜像的模板是Dockerfile,首先看这个Dockerfile(去除注释)文件:

    1
    2
    3
    4
    FROM scratch
    ARG ARCH
    ADD bin/pause-${ARCH} /pause
    ENTRYPOINT ["/pause"]

    FROM scratch: 基础镜像是一个空镜像(an explicity empty image)
    ARG ARCH: 等待在docker-build -build-arg时提供的ARCH参数
    ADD bin/pause-$(ARCH)/pause: 添加外部文件到内部
    ENTRYPOINT[“/pause”]: 开启容器,运行命令
    中间两部非常重要,至于如何实现的不懂哦?

    pause的Makefile

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    .PHONY: all push container clean orphan all-push push-manifest
    include ../../hack/make-rules/Makefile.manifest
    REGISTRY ?= staging-k8s.gcr.io
    IMAGE = $(REGISTRY)/pause
    IMAGE_WITH_ARCH = $(IMAGE)-$(ARCH)
    TAG = 3.1
    REV = $(shell git describe --contains --always --match='v*')
    # Architectures supported: amd64, arm, arm64, ppc64le and s390x
    ARCH ?= amd64
    ALL_ARCH = amd64 arm arm64 ppc64le s390x
    CFLAGS = -Os -Wall -Werror -static -DVERSION=v$(TAG)-$(REV)
    KUBE_CROSS_IMAGE ?= k8s.gcr.io/kube-cross
    KUBE_CROSS_VERSION ?= $(shell cat ../build-image/cross/VERSION)
    BIN = pause
    SRCS = pause.c
    TRIPLE ?= x86_64-linux-gnu
    endif
    ifeq ($(ARCH),arm)
    TRIPLE ?= arm-linux-gnueabihf
    endif
    ifeq ($(ARCH),arm64)
    TRIPLE ?= aarch64-linux-gnu
    endif
    ifeq ($(ARCH),ppc64le)
    TRIPLE ?= powerpc64le-linux-gnu
    endif
    ifeq ($(ARCH),s390x)
    TRIPLE ?= s390x-linux-gnu
    endif
    # If you want to build AND push all containers, see the 'all-push' rule.
    all: all-container
    all-push: all-push-images push-manifest
    push-manifest: manifest-tool
    manifest-tool push from-args --platforms $(call join_platforms,$(ALL_ARCH)) --template $(IMAGE)-ARCH:$(TAG) --target $(IMAGE):$(TAG)
    sub-container-%:
    $(MAKE) ARCH=$* container
    sub-push-%:
    $(MAKE) ARCH=$* push
    all-container: $(addprefix sub-container-,$(ALL_ARCH))
    all-push-images: $(addprefix sub-push-,$(ALL_ARCH))
    build: bin/$(BIN)-$(ARCH)
    # 先启动一个容器
    ```bash
    bin/$(BIN)-$(ARCH): $(SRCS)
    mkdir -p bin
    docker run --rm -u $$(id -u):$$(id -g) -v $$(pwd):/build
    $(KUBE_CROSS_IMAGE):$(KUBE_CROSS_VERSION)
    /bin/bash -c "
    cd /build &&
    $(TRIPLE)-gcc $(CFLAGS) -o $@ $^ &&
    $(TRIPLE)-strip $@"

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    container: .container-$(ARCH)
    .container-$(ARCH): bin/$(BIN)-$(ARCH)
    docker build --pull -t $(IMAGE_WITH_ARCH):$(TAG) --build-arg ARCH=$(ARCH) .
    touch $@
    push: .push-$(ARCH)
    .push-$(ARCH): .container-$(ARCH)
    docker push $(IMAGE_WITH_ARCH):$(TAG)
    touch $@
    # Useful for testing, not automatically included in container image
    orphan: bin/orphan-$(ARCH)
    bin/orphan-$(ARCH): orphan.c
    mkdir -p bin
    docker run -u $$(id -u):$$(id -g) -v $$(pwd):/build
    $(KUBE_CROSS_IMAGE):$(KUBE_CROSS_VERSION)
    /bin/bash -c "
    cd /build &&
    $(TRIPLE)-gcc $(CFLAGS) -o $@ $^ &&
    $(TRIPLE)-strip $@"
    clean:
    rm -rf .container-* .push-* bin/

    ARCH值:

    1
    2
    3
    4
    # Architectures supported: amd64, arm, arm64, ppc64le and s390x
    ARCH ?= amd64
    ALL_ARCH = amd64 arm arm64 ppc64le s390x

    可以看出支持很多架构类型,默认为amd64

    制作pause镜像的过程如下:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    REGISTRY ?= staging-k8s.gcr.io
    IMAGE = $(REGISTRY)/pause
    IMAGE_WITH_ARCH = $(IMAGE)-$(ARCH)
    TAG = 3.1
    ARCH ?= amd64
    BIN = pause
    SRCS = pause.c
    REV = $(shell git describe --contains --always --match='v*')
    CFLAGS = -Os -Wall -Werror -static -DVERSION=v$(TAG)-$(REV)
    KUBE_CROSS_IMAGE ?= k8s.gcr.io/kube-cross
    KUBE_CROSS_VERSION ?= $(shell cat ../build-image/cross/VERSION)
    ifeq ($(ARCH),amd64)
    TRIPLE ?= x86_64-linux-gnu
    build: bin/$(BIN)-$(ARCH)
    # 启动一个容器
    bin/$(BIN)-$(ARCH): $(SRCS)
    mkdir -p bin
    docker run --rm -u $$(id -u):$$(id -g) -v $$(pwd):/build
    $(KUBE_CROSS_IMAGE):$(KUBE_CROSS_VERSION)
    /bin/bash -c "
    cd /build &&
    $(TRIPLE)-gcc $(CFLAGS) -o $@ $^ &&
    $(TRIPLE)-strip $@"
    # build一个pause镜像
    container: .container-$(ARCH)
    .container-$(ARCH): bin/$(BIN)-$(ARCH)
    docker build --pull -t $(IMAGE_WITH_ARCH):$(TAG) --build-arg ARCH=$(ARCH) .
    touch $@

    $(KUBE_CROSS_IMAGE):$(KUBE_CROSS_VERSION)=k8s.gcr.io/kube-cross:v1.12.6-1

    启动一个容器

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    bin/pause-amd64:pause.c
    mkdir -p bin
    docker run --rm -u $$(id -u):$$(id -g) -v $$(pwd):/build
    k8s.gcr.io/kube-cross:v1.12.6-1
    /bin/bash -c "
    cd /build &&
    x86_64-linux-gnu-gcc v3.1-$(REV) -o $@ $^ &&
    x86_64-linux-gnu-strip $@"
    # build一个pause镜像
    container: .container-amd64
    .container-amd64: bin/pause-amd64
    docker build --pull -t staging-k8s.gcr.io/pause-amd64:3.1 --build-arg ARCH=amd64 .

    至于详细的构建过程,我就不写了, 我看不懂呀!

    pause容器的工作
    可知kubernetes的pod抽象基于Linux的namespace和cgroups,为容器提供了良好的隔离环境。在同一个pod中,不同容器犹如在localhost中。

    在Unix系统中,PID为1的进程为init进程,即所有进程的父进程。它很特殊,维护一张进程表,不断地检查进程状态。例如,一旦某个子进程由于父进程的错误而变成了“孤儿进程”,其便会被init进程进行收养并最终回收资源,从而结束进程。

    或者,某子进程已经停止但进程表中仍然存在该进程,因为其父进程未进行wait syscall进行索引,从而该进程变成“僵尸进程”,这种僵尸进程存在时间较短。不过如果父进程只wait,而未syscall的话,僵尸进程便会存在较长时间。

    同时,init进程不能处理某个信号逻辑,拥有“信号屏蔽”功能,从而防止init进程被误杀。

    容器中使用pid namespace来对pid进行隔离,从而每个容器中均有其独立的init进程。例如对于寄主机上可以用个发送SIGKILL或者SIGSTOP(也就是docker kill 或者docker stop)来强制终止容器的运行,即终止容器内的init进程。一旦init进程被销毁, 同一pid namespace下的进程也随之被销毁,并容器进程被回收相应资源。

    kubernetes中的pause容器便被设计成为每个业务容器提供以下功能:
    在pod中担任Linux命名空间共享的基础;
    启用pid命名空间,开启init进程。

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    [root@harbor1 ~]# docker run -idt --name pause-ceshi cntsp/pause:3.1
    ce2a4ffccf1d2b190b9f8cc45f6e4912f278aa7a55280ce8a69433e5345e5816
    [root@harbor1 ~]# docker run -idt --name busybox11 --net=container:pause11 --pid=container:pause11 --ipc=container:pause11 busybox
    docker: Error response from daemon: No such container: pause11.
    See 'docker run --help'.
    [root@harbor1 ~]# docker run -idt --name busybox11 --net=container:pause-ceshi --pid=container:pause-ceshi --ipc=container:pause-ceshi busybox
    Unable to find image 'busybox:latest' locally
    latest: Pulling from library/busybox
    8e674ad76dce: Pull complete
    Digest: sha256:c94cf1b87ccb80f2e6414ef913c748b105060debda482058d2b8d0fce39f11b9
    Status: Downloaded newer image for busybox:latest
    6bec8c69198eaa444580ccaa0a50afbc16a51eea489fa7e81729d535cb691ea6
    [root@harbor1 ~]# docker ps -a
    CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
    6bec8c69198e busybox "sh" bout a minute ago Up About a minute busybox11
    ce2a4ffccf1d cntsp/pause:3.1 "/pause" 2 minutes ago Up 2 minutes pause-ceshi
    [root@harbor1 ~]# docker exec -it busybox11 /bin/bash
    OCI runtime exec failed: exec failed: container_linux.go:348: starting container process caused "exec: "/bin/bash": stat /bin/bash: no such file or directory": unknown
    [root@harbor1 ~]# docker exec -it busybox11 /bin/sh
    / # ps aux
    PID USER TIME COMMAND
    1 root 0:00 /pause
    6 root 0:00 sh
    16 root 0:00 /bin/sh
    21 root 0:00 ps aux
    / #
    这里补充一点:Docker网络模式:
    其中有一种是Container:
    > * 复用其它容器的网络
    Container模式:
    > * 重用另一个容器的Network Namespace,新创建的容器不会创建网卡,配置IP
    > * 和一个指定的容器共享IP、端口范围
    > * 两个容器除了网络方面,其它的如文件系统、进程列表等还是隔离的,两个容器的进程可以通过lo网卡设备通信。
    > * Pod里的Pause容器和Pod里的其它容器就是此种网络

  • 相关阅读:
    第十八章 大浏览量系统的静态化架构设计(待续)
    第十七章 Velocity优化实践(待续)
    第十六章 Velocity工作原理解析(待续)
    第十五章 深入分析iBatis框架之系统架构与映射原理(待续)
    第十四章 Spring MVC的工作机制与设计模式(待续)
    第十三章 Spring框架的设计理念与设计模式分析(待续)
    第十二章 Jetty的工作原理解析(待续)
    第十一章 Tomcat的系统架构与设计模式(待续)
    Luogu P1137 旅行计划 【拓扑排序+Dp】By cellur925
    [USACO4.1]麦香牛块Beef McNuggets By cellur925
  • 原文地址:https://www.cnblogs.com/lijianming180/p/12147607.html
Copyright © 2020-2023  润新知