1. 简介
测试环境Kubernetes 1.14.2版本高可用搭建文档,搭建方式为kubeadm
2. 服务器版本和架构信息
系统版本:CentOS Linux release 7.6.1810 (Core)
内核:4.4.184-1.el7.elrepo.x86_64 注意:有可能后面安装的内核版本高于此版本
Kubernetes: v1.14.2
Docker-ce: 18.06
网络组件:calico
硬件配置:16核64G
Keepalived保证apiserever服务器的IP高可用
Haproxy实现apiserver的负载均衡
3. 服务器角色规划
一定注意对应自己的服务器IP和主机名
master01/02节点上面部署了kubelet、keepalived、haproxy、controllmanager、apiserver、scheduler、docker、kube-proxy、calico组件
master03节点上面部署了kubelet、controllmanager、apiserver、scheduler、docker、kube-proxy、calico组件
node01/node02节点上面部署了kubelet、kube-proxy、docker、calico组件
其中除了kubelet和docker组件,其他组件都是以静态pod模式存在
节点名称 | 角色 | IP | 安装软件 |
---|---|---|---|
负载VIP | VIP | 172.16.4.231 | |
master-01 | master | 172.16.4.129 | kubeadm、kubelet、kubectl、docker、haproxy、keepalived |
master-02 | master | 172.16.4.130 | kubeadm、kubelet、kubectl、docker、haproxy、keepalived |
master-03 | master | 172.16.4.133 | kubeadm、kubelet、kubectl、docker |
node-01 | node | 172.16.4.128 | kubeadm、kubelet、kubectl、docker |
node-03 | node | 172.16.4.132 | kubeadm、kubelet、kubectl、docker |
service网段 | 10.209.0.0/16 |
4. 服务器初始化
4.1 关闭Selinux/firewalld/iptables(所有机器执行)
setenforce 0 \
&& sed -i 's/^SELINUX=.*$/SELINUX=disabled/' /etc/selinux/config \
&& getenforce
systemctl stop firewalld \
&& systemctl daemon-reload \
&& systemctl disable firewalld \
&& systemctl daemon-reload \
&& systemctl status firewalld
yum install -y iptables-services \
&& systemctl stop iptables \
&& systemctl disable iptables \
&& systemctl status iptables
4.2 为每台服务器添加host解析记录(所有机器执行)
cat >>/etc/hosts<<EOF
172.16.4.129 master01
172.16.4.130 master02
172.16.4.133 master03
172.16.4.128 node01
172.16.4.132 node03
EOF
4.3 更换阿里源(所有机器执行)
yum install wget -y
cp -r /etc/yum.repos.d /etc/yum.repos.d.bak
rm -f /etc/yum.repos.d/*.repo
wget -O /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo \
&& wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
yum clean all && yum makecache
4.4 设置limits.conf(所有机器执行)
cat >> /etc/security/limits.conf <<EOF
# End of file
* soft nproc 10240000
* hard nproc 10240000
* soft nofile 10240000
* hard nofile 10240000
EOF
4.5 设置sysctl.conf(所有机器执行)
[ ! -e "/etc/sysctl.conf_bk" ] && /bin/mv /etc/sysctl.conf{,_bk} \
&& cat > /etc/sysctl.conf << EOF
fs.file-max=1000000
fs.nr_open=20480000
net.ipv4.tcp_max_tw_buckets = 180000
net.ipv4.tcp_sack = 1
net.ipv4.tcp_window_scaling = 1
net.ipv4.tcp_rmem = 4096 87380 4194304
net.ipv4.tcp_wmem = 4096 16384 4194304
net.ipv4.tcp_max_syn_backlog = 16384
net.core.netdev_max_backlog = 32768
net.core.somaxconn = 32768
net.core.wmem_default = 8388608
net.core.rmem_default = 8388608
net.core.rmem_max = 16777216
net.core.wmem_max = 16777216
net.ipv4.tcp_timestamps = 0
net.ipv4.tcp_fin_timeout = 20
net.ipv4.tcp_synack_retries = 2
net.ipv4.tcp_syn_retries = 2
net.ipv4.tcp_syncookies = 1
#net.ipv4.tcp_tw_len = 1
net.ipv4.tcp_tw_reuse = 1
net.ipv4.tcp_mem = 94500000 915000000 927000000
net.ipv4.tcp_max_orphans = 3276800
net.ipv4.ip_local_port_range = 1024 65000
#net.nf_conntrack_max = 6553500
#net.netfilter.nf_conntrack_max = 6553500
#net.netfilter.nf_conntrack_tcp_timeout_close_wait = 60
#net.netfilter.nf_conntrack_tcp_timeout_fin_wait = 120
#net.netfilter.nf_conntrack_tcp_timeout_time_wait = 120
#net.netfilter.nf_conntrack_tcp_timeout_established = 3600
EOF
sysctl -p
4.6 配置时间同步(所有机器执行)
ntpdate -u pool.ntp.org
crontab -e #加入定时任务
*/15 * * * * /usr/sbin/ntpdate -u pool.ntp.org >/dev/null 2>&1
4.7 配置k8s.conf(所有机器执行)
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_nonlocal_bind = 1
net.ipv4.ip_forward = 1
vm.swappiness=0
EOF
#执行命令使其修改生效
modprobe br_netfilter \
&& sysctl -p /etc/sysctl.d/k8s.conf
4.8 关闭交换分区(所有机器执行)
swapoff -a
yes | cp /etc/fstab /etc/fstab_bak
cat /etc/fstab_bak |grep -v swap > /etc/fstab
4.9 升级系统内核(所有机器执行)
yum update -y
rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm ;yum --enablerepo=elrepo-kernel install kernel-lt-devel kernel-lt -y
查看内核修改结果
grub2-editenv list
#注意,这里执行下面的命令会出现多个内核版本
[root@master01 ~]# cat /boot/grub2/grub.cfg |grep "menuentry "
menuentry 'CentOS Linux (4.4.184-1.el7.elrepo.x86_64) 7 (Core)' --class centos --class gnu-linux --class gnu --class os --unrestricted $menuentry_id_option 'gnulinux-3.10.0-862.el7.x86_64-advanced-021a955b-781d-425a-8250-f39857437658'
设置默认内核版本,改版本必须已经存在,请注意执行命令cat /boot/grub2/grub.cfg |grep "menuentry "后生成的内容,切勿随意复制
grub2-set-default 'CentOS Linux (4.4.184-1.el7.elrepo.x86_64) 7 (Core)'
查看内核修改结果
grub2-editenv list
# 检查默认内核版本高于4.1,否则请调整默认启动参数
# 查看内核修改结果
grub2-editenv list
#重启以更换内核使其生效
reboot
4.10 加载ipvs模块(所有机器执行)
cat > /etc/sysconfig/modules/ipvs.modules <<EOF
#!/bin/bash
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack_ipv4
EOF
chmod 755 /etc/sysconfig/modules/ipvs.modules && bash /etc/sysconfig/modules/ipvs.modules && lsmod | grep -e ip_vs -e nf_conntrack_ipv4
4.11 添加k8s yum源(所有机器执行)
cat << EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
4.12 安装服务器必备软件
yum -y install wget vim iftop iotop net-tools nmon telnet lsof iptraf nmap httpd-tools lrzsz mlocate ntp ntpdate strace libpcap nethogs iptraf iftop nmon bridge-utils bind-utils telnet nc nfs-utils rpcbind nfs-utils dnsmasq python python-devel tcpdump mlocate tree
5. 安装keepalived和haproxy
5.1 在master01和master02上安装keepalived和haproxy
master01的priority
为250,master02的priority
为200,其他配置一样。
master01(172.16.4.129)
vim /etc/keepalived/keepalived.conf
注意interface这个配置,配置成你服务器的网卡,切勿随意粘贴
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_haproxy {
script "killall -0 haproxy"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state MASTER
interface ens160
virtual_router_id 51
priority 250
advert_int 1
authentication {
auth_type PASS
auth_pass 35f18af7190d51c9f7f78f37300a0cbd
}
virtual_ipaddress {
172.16.4.231
}
track_script {
check_haproxy
}
}
master02(172.16.4.130)
vim /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_haproxy {
script "killall -0 haproxy"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state BACKUP
interface ens160
virtual_router_id 51
priority 200
advert_int 1
authentication {
auth_type PASS
auth_pass 35f18af7190d51c9f7f78f37300a0cbd
}
virtual_ipaddress {
172.16.4.231
}
track_script {
check_haproxy
}
}
5.2 haproxy配置
master01和master02的haproxy配置是一样的。此处我们监听的是172.16.4.231的8443端口,因为haproxy是和k8s apiserver是部署在同一台服务器上,都用6443会冲突。
172.16.4.129
vim /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
# to have these messages end up in /var/log/haproxy.log you will
# need to:
#
# 1) configure syslog to accept network log events. This is done
# by adding the '-r' option to the SYSLOGD_OPTIONS in
# /etc/sysconfig/syslog
#
# 2) configure local2 events to go to the /var/log/haproxy.log
# file. A line like the following can be added to
# /etc/sysconfig/syslog
#
# local2.* /var/log/haproxy.log
#
#log 127.0.0.1 local2
log 127.0.0.1 local0 info
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
# turn on stats unix socket
stats socket /var/lib/haproxy/stats
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
#---------------------------------------------------------------------
# kubernetes apiserver frontend which proxys to the backends
#---------------------------------------------------------------------
frontend kubernetes-apiserver
mode tcp
bind *:8443
option tcplog
default_backend kubernetes-apiserver
#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend kubernetes-apiserver
mode tcp
balance roundrobin
server master01 172.16.4.129:6443 check
server master02 172.16.4.130:6443 check
server master03 172.16.4.133:6443 check
#---------------------------------------------------------------------
# collection haproxy statistics message
#---------------------------------------------------------------------
listen stats
bind *:1080
stats auth admin:awesomePassword
stats refresh 5s
stats realm HAProxy\ Statistics
stats uri /admin?stats
172.16.4.130
vim /etc/haproxy/haproxy.cfg
#---------------------------------------------------------------------
# Global settings
#---------------------------------------------------------------------
global
# to have these messages end up in /var/log/haproxy.log you will
# need to:
#
# 1) configure syslog to accept network log events. This is done
# by adding the '-r' option to the SYSLOGD_OPTIONS in
# /etc/sysconfig/syslog
#
# 2) configure local2 events to go to the /var/log/haproxy.log
# file. A line like the following can be added to
# /etc/sysconfig/syslog
#
# local2.* /var/log/haproxy.log
#
#log 127.0.0.1 local2
log 127.0.0.1 local0 info
```
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4000
user haproxy
group haproxy
daemon
# turn on stats unix socket
stats socket /var/lib/haproxy/stats
```
#---------------------------------------------------------------------
# common defaults that all the 'listen' and 'backend' sections will
# use if not designated in their block
#---------------------------------------------------------------------
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 10s
timeout client 1m
timeout server 1m
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
#---------------------------------------------------------------------
# kubernetes apiserver frontend which proxys to the backends
#---------------------------------------------------------------------
frontend kubernetes-apiserver
mode tcp
bind *:8443
option tcplog
default_backend kubernetes-apiserver
#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend kubernetes-apiserver
mode tcp
balance roundrobin
server master01 172.16.4.129:6443 check
server master02 172.16.4.130:6443 check
server master03 172.16.4.133:6443 check
#---------------------------------------------------------------------
# collection haproxy statistics message
#---------------------------------------------------------------------
listen stats
bind *:1080
stats auth admin:awesomePassword
stats refresh 5s
stats realm HAProxy\ Statistics
stats uri /admin?stats
5.3 设置服务启动顺序及依赖关系(master01和master02操作)
vim /usr/lib/systemd/system/keepalived.service
[Unit]
Description=LVS and VRRP High Availability Monitor
After=syslog.target network-online.target haproxy.service
Requires=haproxy.service
5.4 启动服务
systemctl enable keepalived && systemctl start keepalived \
&& systemctl enable haproxy && systemctl start haproxy && systemctl status keepalived && systemctl status haproxy
6.安装docker
6.1 安装必要的一些系统工具(所有服务器安装)
yum install -y yum-utils device-mapper-persistent-data lvm2
6.2 添加软件源信息(所有服务器配置)
yum-config-manager --add-repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum list docker-ce --showduplicates | sort -r
yum -y install docker-ce-18.06.3.ce-3.el7
usermod -aG docker bumblebee
6.3 配置daemon.json文件(所有服务器配置)
mkdir -p /etc/docker/ \
&& cat > /etc/docker/daemon.json << EOF
{
"registry-mirrors":[
"https://c6ai9izk.mirror.aliyuncs.com"
],
"max-concurrent-downloads":3,
"data-root":"/data/docker",
"log-driver":"json-file",
"log-opts":{
"max-size":"100m",
"max-file":"1"
},
"max-concurrent-uploads":5,
"storage-driver":"overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
]
}
EOF
6.4 启动检查docker服务
systemctl enable docker \
&& systemctl restart docker \
&& systemctl status docker
7 使用kubeadm部署kubernetes
7.1 配置kubernetes.repo(每台机器都需要配置)
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
7.2 安装必备软件(所有机器安装)
yum install -y kubelet-1.14.2 kubeadm-1.14.2 kubectl-1.14.2 ipvsadm ipset
#设置kubelet开机自启动,注意:这一步不能直接执行 systemctl start kubelet,会报错,成功初始化完后kubelet会自动起来
systemctl enable kubelet
7.3 修改初始化配置
使用kubeadm config print init-defaults > kubeadm-init.yaml 打印出默认配置,然后在根据自己的环境修改配置
注意需要修改advertiseAddress、controlPlaneEndpoint、imageRepository、serviceSubnet
其中advertiseAddress为master01的ip,controlPlaneEndpoint为VIP+8443端口,imageRepository修改为阿里的源,serviceSubnet找网络组要一段没人使用的IP段
[root@master01 ~]# cat kubeadm-init.yaml
apiVersion: kubeadm.k8s.io/v1beta1
bootstrapTokens:
- groups:
- system:bootstrappers:kubeadm:default-node-token
token: abcdef.0123456789abcdef
ttl: 24h0m0s
usages:
- signing
- authentication
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: 172.16.4.129
bindPort: 6443
nodeRegistration:
criSocket: /var/run/dockershim.sock
name: master01
taints:
- effect: NoSchedule
key: node-role.kubernetes.io/master
---
apiServer:
timeoutForControlPlane: 4m0s
apiVersion: kubeadm.k8s.io/v1beta1
certificatesDir: /etc/kubernetes/pki
clusterName: kubernetes
controlPlaneEndpoint: "172.16.4.231:8443"
controllerManager: {}
dns:
type: CoreDNS
etcd:
local:
dataDir: /var/lib/etcd
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
kind: ClusterConfiguration
kubernetesVersion: v1.14.2
networking:
dnsDomain: cluster.local
podSubnet: ""
serviceSubnet: "10.209.0.0/16"
scheduler: {}
7.4 预下载镜像
[root@master01 ~]# kubeadm config images pull --config kubeadm-init.yaml
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.14.2
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.14.2
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.14.2
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.14.2
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.1
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.3.10
[config/images] Pulled registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.3.1
7.5 初始化
[root@master01 ~]# kubeadm init --config kubeadm-init.yaml
[init] Using Kubernetes version: v1.14.2
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Activating the kubelet service
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [master01 localhost] and IPs [172.16.4.129 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [master01 localhost] and IPs [172.16.4.129 127.0.0.1 ::1]
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [master01 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.209.0.1 172.16.4.129 172.16.4.231]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "admin.conf" kubeconfig file
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 17.506253 seconds
[upload-config] storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.14" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Skipping phase. Please see --experimental-upload-certs
[mark-control-plane] Marking the node master01 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master01 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: abcdef.0123456789abcdef
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] creating the "cluster-info" ConfigMap in the "kube-public" namespace
[addons] Applied essential addon: CoreDNS
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes by copying certificate authorities
and service account keys on each node and then running the following as root:
kubeadm join 172.16.4.231:8443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1 \
--experimental-control-plane
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join 172.16.4.231:8443 --token abcdef.0123456789abcdef \
--discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1
kubeadm init主要执行了以下操作:
- [init]:指定版本进行初始化操作
- [preflight] :初始化前的检查和下载所需要的Docker镜像文件
- [kubelet-start] :生成kubelet的配置文件”/var/lib/kubelet/config.yaml”,没有这个文件kubelet无法启动,所以初始化之前的kubelet实际上启动失败。
- [certificates]:生成Kubernetes使用的证书,存放在/etc/kubernetes/pki目录中。
- [kubeconfig] :生成 KubeConfig 文件,存放在/etc/kubernetes目录中,组件之间通信需要使用对应文件。
- [control-plane]:使用/etc/kubernetes/manifest目录下的YAML文件,安装 Master 组件。
- [etcd]:使用/etc/kubernetes/manifest/etcd.yaml安装Etcd服务。
- [wait-control-plane]:等待control-plan部署的Master组件启动。
- [apiclient]:检查Master组件服务状态。
- [uploadconfig]:更新配置
- [kubelet]:使用configMap配置kubelet。
- [patchnode]:更新CNI信息到Node上,通过注释的方式记录。
- [mark-control-plane]:为当前节点打标签,打了角色Master,和不可调度标签,这样默认就不会使用Master节点来运行Pod。
- [bootstrap-token]:生成token记录下来,后边使用kubeadm join往集群中添加节点时会用到
- [addons]:安装附加组件CoreDNS和kube-proxy
7.6 为kubectl准备Kubeconfig文件
kubectl默认会在执行的用户家目录下面的.kube目录下寻找config文件。这里是将在初始化时[kubeconfig]步骤生成的admin.conf拷贝到.kube/config。
[root@master01 ~]# mkdir -p $HOME/.kube
[root@master01 ~]# sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@master01 ~]# sudo chown $(id -u):$(id -g) $HOME/.kube/config
在该配置文件中,记录了API Server的访问地址,所以后面直接执行kubectl命令就可以正常连接到API Server中。
7.7 查看组件状态
[root@master01 ~]# kubectl get cs
NAME STATUS MESSAGE ERROR
controller-manager Healthy ok
scheduler Healthy ok
etcd-0 Healthy {"health":"true"}
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 NotReady master 4m20s v1.14.2
目前只有一个节点,角色是Master,状态是NotReady,状态是NotReady状态是因为还没有安装网络插件
7.8 其他master部署(在master01机器上执行)
在master01将证书文件拷贝至master02、master03节点
#拷贝正式至master02节点
USER=root
CONTROL_PLANE_IPS="master02"
for host in ${CONTROL_PLANE_IPS}; do
ssh "${USER}"@$host "mkdir -p /etc/kubernetes/pki/etcd"
scp /etc/kubernetes/pki/ca.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/etcd/ca.* "${USER}"@$host:/etc/kubernetes/pki/etcd/
scp /etc/kubernetes/admin.conf "${USER}"@$host:/etc/kubernetes/
done
#拷贝正式至master03节点
USER=root
CONTROL_PLANE_IPS="master03"
for host in ${CONTROL_PLANE_IPS}; do
ssh "${USER}"@$host "mkdir -p /etc/kubernetes/pki/etcd"
scp /etc/kubernetes/pki/ca.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/sa.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/front-proxy-ca.* "${USER}"@$host:/etc/kubernetes/pki/
scp /etc/kubernetes/pki/etcd/ca.* "${USER}"@$host:/etc/kubernetes/pki/etcd/
scp /etc/kubernetes/admin.conf "${USER}"@$host:/etc/kubernetes/
done
在master02上执行,注意注意--experimental-control-plane
参数
[root@master02 ~]# kubeadm join 172.16.4.231:8443 --token abcdef.0123456789abcdef \
> --discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1 \
> --experimental-control-plane
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[preflight] Running pre-flight checks before initializing the new control plane instance
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [master02 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.209.0.1 172.16.4.130 172.16.4.231]
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [master02 localhost] and IPs [172.16.4.130 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [master02 localhost] and IPs [172.16.4.130 127.0.0.1 ::1]
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
[certs] Using the existing "sa" key
[kubeconfig] Generating kubeconfig files
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Using existing kubeconfig file: "/etc/kubernetes/admin.conf"
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[check-etcd] Checking that the etcd cluster is healthy
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.14" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Activating the kubelet service
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
[etcd] Announced new etcd member joining to the existing etcd cluster
[etcd] Wrote Static Pod manifest for a local etcd member to "/etc/kubernetes/manifests/etcd.yaml"
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
[upload-config] storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[mark-control-plane] Marking the node master02 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master02 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
注意**:token有效期是有限的,如果旧的token过期,可以使用
kubeadm token create --print-join-command
重新创建一条token。
mkdir -p $HOME/.kube \
&& cp -i /etc/kubernetes/admin.conf $HOME/.kube/config \
&& chown $(id -u):$(id -g) $HOME/.kube/config
在master03上执行,注意注意--experimental-control-plane
参数
[root@master03 ~]# kubeadm join 172.16.4.231:8443 --token abcdef.0123456789abcdef \
> --discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1 \
> --experimental-control-plane
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected "cgroupfs" as the Docker cgroup driver. The recommended driver is "systemd". Please follow the guide at https://kubernetes.io/docs/setup/cri/
[preflight] Reading configuration from the cluster...
[preflight] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[preflight] Running pre-flight checks before initializing the new control plane instance
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [master03 localhost] and IPs [172.16.4.133 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [master03 localhost] and IPs [172.16.4.133 127.0.0.1 ::1]
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [master03 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.209.0.1 172.16.4.133 172.16.4.231]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
[certs] Using the existing "sa" key
[kubeconfig] Generating kubeconfig files
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[endpoint] WARNING: port specified in controlPlaneEndpoint overrides bindPort in the controlplane address
[kubeconfig] Using existing kubeconfig file: "/etc/kubernetes/admin.conf"
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[check-etcd] Checking that the etcd cluster is healthy
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.14" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Activating the kubelet service
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap...
[etcd] Announced new etcd member joining to the existing etcd cluster
[etcd] Wrote Static Pod manifest for a local etcd member to "/etc/kubernetes/manifests/etcd.yaml"
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
[upload-config] storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[mark-control-plane] Marking the node master03 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node master03 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
[root@master03 ~]# mkdir -p $HOME/.kube \
> && cp -i /etc/kubernetes/admin.conf $HOME/.kube/config \
> && chown $(id -u):$(id -g) $HOME/.kube/config
[root@master03 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 NotReady master 15m v1.14.2
master02 NotReady master 3m40s v1.14.2
master03 NotReady master 2m1s v1.14.2
8. node节点部署
在node01、node02执行,注意没有--experimental-control-plane
参数
注意**:token有效期是有限的,如果旧的token过期,可以在master节点上使用
kubeadm token create --print-join-command
重新创建一条token。
在node01和node02上执行下面这条命令
kubeadm join 172.16.4.231:8443 --token lwsk91.y2ywpq0y74wt03tb --discovery-token-ca-cert-hash sha256:0ca0a5fd28409faecba5d2f21aeb010a945a5dae42023fe361424d621708edc1
9. 部署网络插件calico
9.1 下载calico.yaml文件
wget -c https://docs.projectcalico.org/v3.6/getting-started/kubernetes/installation/hosted/kubernetes-datastore/calico-networking/1.7/calico.yaml
9.2 修改calico.yaml(根据实际情况配置)
修改CALICO_IPV4POOL_CIDR这个下面的vaule值,默认是192.168.0.0/16
# The default IPv4 pool to create on startup if none exists. Pod IPs will be
# chosen from this range. Changing this value after installation will have
# no effect. This should fall within `--cluster-cidr`.
- name: CALICO_IPV4POOL_CIDR
value: "10.209.0.0/16"
9.3 执行kubectl apply -f calico.yaml
[root@master01 ~]# kubectl apply -f calico.yaml
configmap/calico-config created
customresourcedefinition.apiextensions.k8s.io/felixconfigurations.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ipamblocks.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/blockaffinities.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ipamhandles.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ipamconfigs.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/bgppeers.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/bgpconfigurations.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/ippools.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/hostendpoints.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/clusterinformations.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/globalnetworkpolicies.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/globalnetworksets.crd.projectcalico.org created
customresourcedefinition.apiextensions.k8s.io/networkpolicies.crd.projectcalico.org created
clusterrole.rbac.authorization.k8s.io/calico-kube-controllers created
clusterrolebinding.rbac.authorization.k8s.io/calico-kube-controllers created
clusterrole.rbac.authorization.k8s.io/calico-node created
clusterrolebinding.rbac.authorization.k8s.io/calico-node created
daemonset.extensions/calico-node created
serviceaccount/calico-node created
deployment.extensions/calico-kube-controllers created
serviceaccount/calico-kube-controllers created
9.4 查看节点状态
一开始没安装网络组件,是显示notReady的,装完cailco后就变成Ready,说明集群已就绪了,可以进行下一步验证集群是否搭建成功
[root@master01 ~]# kubectl get nodes
NAME STATUS ROLES AGE VERSION
master01 Ready master 23h v1.14.2
master02 Ready master 22h v1.14.2
master03 Ready master 22h v1.14.2
node01 NotReady <none> 19m v1.14.2
node03 NotReady <none> 5s v1.14.2
10. kube-proxy开启ipvs[单个master节点执行]
10.1 修改ConfigMap的kube-system/kube-proxy中的config.conf,mode: "ipvs"
:
kubectl edit cm kube-proxy -n kube-system
10.2 之后重启各个节点上的kube-proxy pod:
[root@master01 ~]# kubectl get pod -n kube-system | grep kube-proxy | awk '{system("kubectl delete pod "$1" -n kube-system")}'
pod "kube-proxy-8fpjb" deleted
pod "kube-proxy-dqqxh" deleted
pod "kube-proxy-mxvz2" deleted
pod "kube-proxy-np9x9" deleted
pod "kube-proxy-rtzcn" deleted
10.3 查看kube-proxy pod状态
[root@master01 ~]# kubectl get pod -n kube-system | grep kube-proxy
kube-proxy-4fhpg 1/1 Running 0 81s
kube-proxy-9f2x6 1/1 Running 0 109s
kube-proxy-cxl5m 1/1 Running 0 89s
kube-proxy-lvp9q 1/1 Running 0 78s
kube-proxy-v4mg8 1/1 Running 0 99s
10.4 查看是否开启了ipvs
日志中打印出了
Using ipvs Proxier
,说明ipvs模式已经开启
[root@master01 ~]# kubectl logs kube-proxy-4fhpg -n kube-system
I0705 07:53:05.254157 1 server_others.go:176] Using ipvs Proxier.
W0705 07:53:05.255130 1 proxier.go:380] clusterCIDR not specified, unable to distinguish between internal and external traffic
W0705 07:53:05.255181 1 proxier.go:386] IPVS scheduler not specified, use rr by default
I0705 07:53:05.255599 1 server.go:562] Version: v1.14.2
I0705 07:53:05.280930 1 conntrack.go:52] Setting nf_conntrack_max to 131072
I0705 07:53:05.281426 1 config.go:102] Starting endpoints config controller
I0705 07:53:05.281473 1 controller_utils.go:1027] Waiting for caches to sync for endpoints config controller
I0705 07:53:05.281523 1 config.go:202] Starting service config controller
I0705 07:53:05.281548 1 controller_utils.go:1027] Waiting for caches to sync for service config controller
I0705 07:53:05.381724 1 controller_utils.go:1034] Caches are synced for endpoints config controller
I0705 07:53:05.381772 1 controller_utils.go:1034] Caches are synced for service config controller
11. 查看ipvs状态
[root@master01 ~]# ipvsadm -L -n
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 10.209.0.1:443 rr
-> 172.16.4.129:6443 Masq 1 0 0
-> 172.16.4.130:6443 Masq 1 0 0
-> 172.16.4.133:6443 Masq 1 0 0
TCP 10.209.0.10:53 rr
-> 10.209.59.193:53 Masq 1 0 0
-> 10.209.59.194:53 Masq 1 0 0
TCP 10.209.0.10:9153 rr
-> 10.209.59.193:9153 Masq 1 0 0
-> 10.209.59.194:9153 Masq 1 0 0
UDP 10.209.0.10:53 rr
-> 10.209.59.193:53 Masq 1 0 0
-> 10.209.59.194:53 Masq 1 0 0
12. 测试一个运行一个容器
[root@master01 ~]# kubectl run nginx --image=nginx:1.14 --replicas=2
kubectl run --generator=deployment/apps.v1 is DEPRECATED and will be removed in a future version. Use kubectl run --generator=run-pod/v1 or kubectl create instead.
deployment.apps/nginx created
12.1 查看nginx pod
[root@master01 ~]# kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-84b67f57c4-d9k8m 1/1 Running 0 59s 10.209.196.129 node01 <none> <none>
nginx-84b67f57c4-zcrxn 1/1 Running 0 59s 10.209.186.193 node03 <none> <none>
12.2 通过curl命令测试nginx
[root@master01 ~]# curl 10.209.196.129
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
body {
35em;
margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif;
}
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>
<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>
<p><em>Thank you for using nginx.</em></p>
</body>
</html>
[root@master01 ~]# curl 10.209.186.193
<!DOCTYPE html>
<html>
<head>
<title>Welcome to nginx!</title>
<style>
body {
35em;
margin: 0 auto;
font-family: Tahoma, Verdana, Arial, sans-serif;
}
</style>
</head>
<body>
<h1>Welcome to nginx!</h1>
<p>If you see this page, the nginx web server is successfully installed and
working. Further configuration is required.</p>
<p>For online documentation and support please refer to
<a href="http://nginx.org/">nginx.org</a>.<br/>
Commercial support is available at
<a href="http://nginx.com/">nginx.com</a>.</p>
<p><em>Thank you for using nginx.</em></p>
</body>
</html>
能显示出Welcome to nginx,说明pod运行正常,间接也说明集群可以正常使用
13. 测试dns
进入后执行
nslookup kubernetes.default
[root@master01 ~]# kubectl run curl --image=radial/busyboxplus:curl -it
kubectl run --generator=deployment/apps.v1 is DEPRECATED and will be removed in a future version. Use kubectl run --generator=run-pod/v1 or kubectl create instead.
If you don't see a command prompt, try pressing enter.
[ root@curl-66bdcf564-njcqk:/ ]$ nslookup kubernetes.default
Server: 10.209.0.10
Address 1: 10.209.0.10 kube-dns.kube-system.svc.cluster.local
Name: kubernetes.default
Address 1: 10.209.0.1 kubernetes.default.svc.cluster.local #能显示类似这样的输出,说明dns是okay的
至此kubernetes集群部署完成。
14.典型报错
14.1 镜像无法拉取报错
ct: connection timed out
Warning FailedCreatePodSandBox 3m44s (x17 over 28m) kubelet, node01 Failed create pod sandbox: rpc error: code = Unknown desc = failed pulling image "k8s.gcr.io/pause:3.1": Error response from daemon: Get https://k8s.gcr.io/v2/: dial tcp 74.125.204.82:443: connect: connection timed out
解决办法
先去其他渠道找到对应的镜像,然后docker tag下
docker pull registry.cn-hangzhou.aliyuncs.com/openthings/k8s-gcr-io-pause:3.1
docker tag registry.cn-hangzhou.aliyuncs.com/openthings/k8s-gcr-io-pause:3.1 k8s.gcr.io/pause:3.1