• kubeproxy


    cmd\kube-proxy\proxy.go
    func main() {
    	// 新构建一个proxy运行命令,结合之前scheduler,我们可以看出k8s跑各组件的套路是一样的
    	command := app.NewProxyCommand() --->cmd\kube-proxy\app\server.go
    	code := cli.Run(command)
    	os.Exit(code)
    }
    
    cmd\kube-proxy\app\server.go
    NewProxyCommand
    	cmd := &cobra.Command{
    		Run: func(cmd *cobra.Command, args []string) {
    		...
    			if err := opts.Run(); err != nil {
    				klog.ErrorS(err, "Error running ProxyServer")
    				os.Exit(1)
    			}
    	}
    
    
    Run
    	...
    	proxyServer, err := NewProxyServer(o) --->cmd\kube-proxy\app\server_others.go
    	...
    	return o.runLoop()
    
    
    runLoop
    	go func() {
    		err := o.proxyServer.Run()
    		o.errCh <- err
    	}()
    
    cmd\kube-proxy\app\server_others.go
    // 由于我们一般都是在linux系统上跑k8s,所以代理开启的模式一般为ipvs或者iptables
    func NewProxyServer(o *Options) (*ProxyServer, error) {
    	return newProxyServer(o.config, o.CleanupAndExit, o.master)
    }
    
    
    **newProxyServer**
    	...
    	// 即便指定使用ipvs模式,仍然需要经过验证
    	canUseIPVS, err := ipvs.CanUseIPVSProxier(kernelHandler, ipsetInterface, config.IPVS.Scheduler) --->pkg\proxy\ipvs\proxier.go
    	...
    	if proxyMode == proxyModeIPTables {
    		...
    		proxier, err = iptables.NewDualStackProxier() --->pkg\proxy\iptables\proxier.go
    	} else {
    		proxier, err = ipvs.NewDualStackProxier() --->pkg\proxy\ipvs\proxier.go
    	}
    
    pkg\proxy\ipvs\proxier.go
    // 该函数就是ipvs模式必须的一些模块的验证比对,如果失败则会降级成iptables模式
    func CanUseIPVSProxier(handle KernelHandler, ipsetver IPSetVersioner, scheduler string) (bool, error)
    
    pkg\proxy\iptables\proxier.go
    // kube-proxy已经支持并开启了ipv6
    NewDualStackProxier
    	ipv4Proxier, err := NewProxier(ipt[0], sysctl,
    		exec, syncPeriod, minSyncPeriod, masqueradeAll, masqueradeBit, localDetectors[0], hostname,
    		nodeIP[0], recorder, healthzServer, ipFamilyMap[v1.IPv4Protocol])
    	ipv6Proxier, err := NewProxier(ipt[1], sysctl,
    		exec, syncPeriod, minSyncPeriod, masqueradeAll, masqueradeBit, localDetectors[1], hostname,
    		nodeIP[1], recorder, healthzServer, ipFamilyMap[v1.IPv6Protocol])
    	return metaproxier.NewMetaProxier(ipv4Proxier, ipv6Proxier), nil --->pkg\proxy\metaproxier\meta_proxier.go
    
    
    var _ proxy.Provider = &Proxier{}
    
    
    NewProxier
    	go ipt.Monitor(kubeProxyCanaryChain, []utiliptables.Table{utiliptables.TableMangle, utiliptables.TableNAT, utiliptables.TableFilter},
    		proxier.syncProxyRules, syncPeriod, wait.NeverStop)
    
    
    // 对比几个pkg\proxy\metaproxier\meta_proxier.go里的接口方法可以发现,具体实现是基于syncProxyRules设置具体实现的,这个方法居然有800多行...
    // 这是iptables触发的所有代码了,其他配置iptables规则的地方只有在iptablesInit处了
    // This is where all of the iptables-save/restore calls happen.
    // The only other iptables rules are those that are setup in iptablesInit()
    // This assumes proxier.mu is NOT held
    // 这个方法体现出的问题有很多
    // 1.代码量稍多,调用频繁效率低
    // 2.有新变更时就要把以前的全扫一遍再跟新的合并然后落盘
    // 3.iptables是规则链表,创建一条逻辑加了N条规则。随集群规模增加iptables规则链表会很多,查询效率低
    // 4.逻辑上感觉处理的不太好,规则先全都加完再删
    **syncProxyRules**
    	// 开头直接上锁
    	proxier.mu.Lock()
    	defer proxier.mu.Unlock()
    	...
    	// 获取svc跟endpoint的变更信息
    	serviceUpdateResult := proxier.serviceMap.Update(proxier.serviceChanges) --->pkg\proxy\service.go
    	endpointUpdateResult := proxier.endpointsMap.Update(proxier.endpointsChanges) --->pkg\proxy\endpoints.go
    	...
    	// 对svc所用到ClusterIP/ExternalIP/LoadBalancerIP及NodePort都会有追踪清理工作
    	for _, svcPortName := range endpointUpdateResult.StaleServiceNames {
    		if svcInfo, ok := proxier.serviceMap[svcPortName]; ok && svcInfo != nil && conntrack.IsClearConntrackNeeded(svcInfo.Protocol()) {
    			conntrackCleanupServiceIPs.Insert(svcInfo.ClusterIP().String())
    			for _, extIP := range svcInfo.ExternalIPStrings() {
    				conntrackCleanupServiceIPs.Insert(extIP)
    			}
    			for _, lbIP := range svcInfo.LoadBalancerIPStrings() {
    				conntrackCleanupServiceIPs.Insert(lbIP)
    			}
    			nodePort := svcInfo.NodePort()
    			if svcInfo.Protocol() == v1.ProtocolUDP && nodePort != 0 {
    				conntrackCleanupServiceNodePorts.Insert(nodePort)
    			}
    		}
    	}
    	...
    	// 如果失败了,则会按幂增长时间重试
    	success := false
    	defer func() {
    		if !success {
    			klog.InfoS("Sync failed", "retryingTime", proxier.syncPeriod)
    			proxier.syncRunner.RetryAfter(proxier.syncPeriod)
    		}
    	}()
    	...
    	// 创建并链接kube链,没懂他每次还要这里双验证然后拼固定参数的含义,感觉完全可以优化掉成常量
    	for _, jump := range iptablesJumpChains {
    		...
    		args := append(jump.extraArgs,
    			"-m", "comment", "--comment", jump.comment,
    			"-j", string(jump.dstChain),
    		)
    		...
    	}
    	...
    	// 获取原本的的Filter、NAT规则并构造成字典
    	existingFilterChains := make(map[utiliptables.Chain][]byte)
    	proxier.existingFilterChainsData.Reset()
    	err := proxier.iptables.SaveInto(utiliptables.TableFilter, proxier.existingFilterChainsData)
    		existingFilterChains = utiliptables.GetChainLines(utiliptables.TableFilter, proxier.existingFilterChainsData.Bytes())
    	existingNATChains := make(map[utiliptables.Chain][]byte)
    	proxier.iptablesData.Reset()
    	err = proxier.iptables.SaveInto(utiliptables.TableNAT, proxier.iptablesData)
    		existingNATChains = utiliptables.GetChainLines(utiliptables.TableNAT, proxier.iptablesData.Bytes())
    	...
    	// 将原本的的Filter、NAT规则回写到对应buffer里
    	proxier.filterChains.Write("*filter")
    	proxier.natChains.Write("*nat")
    	for _, chainName := range []utiliptables.Chain{kubeServicesChain, kubeExternalServicesChain, kubeForwardChain, kubeNodePortsChain} {
    		if chain, ok := existingFilterChains[chainName]; ok {
    			proxier.filterChains.WriteBytes(chain)
    		} else {
    			proxier.filterChains.Write(utiliptables.MakeChainLine(chainName))
    		}
    	}
    	for _, chainName := range []utiliptables.Chain{kubeServicesChain, kubeNodePortsChain, kubePostroutingChain, KubeMarkMasqChain} {
    		if chain, ok := existingNATChains[chainName]; ok {
    			proxier.natChains.WriteBytes(chain)
    		} else {
    			proxier.natChains.Write(utiliptables.MakeChainLine(chainName))
    		}
    	}
    	...
    	// 构造postrouting规则写到buffer里
    	proxier.natRules.Write(
    		"-A", string(kubePostroutingChain),
    		"-m", "mark", "!", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
    		"-j", "RETURN",
    	)
    	proxier.natRules.Write(
    		"-A", string(kubePostroutingChain),
    		"-j", "MARK", "--xor-mark", proxier.masqueradeMark,
    	)
    	masqRule := []string{
    		"-A", string(kubePostroutingChain),
    		"-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`,
    		"-j", "MASQUERADE",
    	}
    	if proxier.iptables.HasRandomFully() {
    		masqRule = append(masqRule, "--random-fully")
    	}
    	proxier.natRules.Write(masqRule)
    	...
    	// 构造伪装规则写到buffer里
    	proxier.natRules.Write(
    		"-A", string(KubeMarkMasqChain),
    		"-j", "MARK", "--or-mark", proxier.masqueradeMark,
    	)
    	...
    	...
    	**// 该方法最核心逻辑,为每个svc构建对应规则,共计550行**
    	for svcName, svc := range proxier.serviceMap {
    		// 获取一些关于svc及其endpoints的信息
    		svcInfo, ok := svc.(*serviceInfo)
    		allEndpoints := proxier.endpointsMap[svcName]
    		...
    		var hasEndpoints, hasLocalReadyEndpoints, hasLocalServingTerminatingEndpoints bool
    		for _, ep := range allEndpoints {
    			if ep.IsReady() {
    				hasEndpoints = true
    				if ep.GetIsLocal() {
    					hasLocalReadyEndpoints = true
    				}
    			} else if svc.NodeLocalExternal() && utilfeature.DefaultFeatureGate.Enabled(features.ProxyTerminatingEndpoints) {
    				// 需要svc配置允许使用不可用后端才会去验证
    				if ep.IsServing() && ep.IsTerminating() {
    					hasEndpoints = true
    					if ep.GetIsLocal() {
    						hasLocalServingTerminatingEndpoints = true
    					}
    				}
    			}
    		}
    		useTerminatingEndpoints := !hasLocalReadyEndpoints && hasLocalServingTerminatingEndpoints
    		for _, ep := range allEndpoints {
    			...
    			// 添加到转发到endpoints的规则
    			if svc.NodeLocalExternal() && epInfo.IsLocal {
    				if useTerminatingEndpoints {
    					if epInfo.Serving && epInfo.Terminating {
    						localEndpointChains = append(localEndpointChains, endpointChain)
    						endpointInUse = true
    					}
    				} else if epInfo.Ready {
    					localEndpointChains = append(localEndpointChains, endpointChain)
    					endpointInUse = true
    				}
    			}
    		}
    		...
    		// 记录之前的svc链
    		// 记录之前的lb类型链
    		// 创建clusterIP类型规则
    		args = append(args[:0],
    				"-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcNameString),
    				"-m", protocol, "-p", protocol,
    				"-d", utilproxy.ToCIDR(svcInfo.ClusterIP()),
    				"--dport", strconv.Itoa(svcInfo.Port()),
    			)
    		// 创建externalIPs类型规则
    		args = append(args[:0],
    					"-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcNameString),
    					"-m", protocol, "-p", protocol,
    					"-d", utilproxy.ToCIDR(netutils.ParseIPSloppy(externalIP)),
    					"--dport", strconv.Itoa(svcInfo.Port()),
    				)
    		// 创建load-balancer类型规则
    		args = append(args[:0],
    						"-A", string(kubeServicesChain),
    						"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcNameString),
    						"-m", protocol, "-p", protocol,
    						"-d", utilproxy.ToCIDR(netutils.ParseIPSloppy(ingress)),
    						"--dport", strconv.Itoa(svcInfo.Port()),
    					)
    		// 创建节点端口转发类型规则
    		args = append(args[:0],
    					"-m", "comment", "--comment", svcNameString,
    					"-m", protocol, "-p", protocol,
    					"--dport", strconv.Itoa(svcInfo.NodePort()),
    				)
    	}
    	...
    	...
    	// 清理不用的规则
    	for chain := range existingNATChains {
    		if !activeNATChains[chain] {
    			chainString := string(chain)
    			if !strings.HasPrefix(chainString, "KUBE-SVC-") && !strings.HasPrefix(chainString, "KUBE-SEP-") && !strings.HasPrefix(chainString, "KUBE-FW-") && !strings.HasPrefix(chainString, "KUBE-XLB-") {
    				// 不是咱自己的不能乱操作
    				continue
    			}
    			// 必须按iptables的规则多谢一条用来删除它的链规则
    			proxier.natChains.WriteBytes(existingNATChains[chain])
    			proxier.natRules.Write("-X", chainString)
    		}
    	}
    	...
    	// 将所有表的新规则落盘
    	err = proxier.iptables.RestoreAll(proxier.iptablesData.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters)
    	// 此次修改成功
    	success = true
    
    
    type iptablesJumpChain struct {
    	table     utiliptables.Table
    	dstChain  utiliptables.Chain
    	srcChain  utiliptables.Chain
    	comment   string
    	extraArgs []string
    }
    
    var iptablesJumpChains = []iptablesJumpChain{
    	{utiliptables.TableFilter, kubeExternalServicesChain, utiliptables.ChainInput, "kubernetes externally-visible service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}},
    	{utiliptables.TableFilter, kubeExternalServicesChain, utiliptables.ChainForward, "kubernetes externally-visible service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}},
    	{utiliptables.TableFilter, kubeNodePortsChain, utiliptables.ChainInput, "kubernetes health check service ports", nil},
    	{utiliptables.TableFilter, kubeServicesChain, utiliptables.ChainForward, "kubernetes service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}},
    	{utiliptables.TableFilter, kubeServicesChain, utiliptables.ChainOutput, "kubernetes service portals", []string{"-m", "conntrack", "--ctstate", "NEW"}},
    	{utiliptables.TableFilter, kubeForwardChain, utiliptables.ChainForward, "kubernetes forwarding rules", nil},
    	{utiliptables.TableNAT, kubeServicesChain, utiliptables.ChainOutput, "kubernetes service portals", nil},
    	{utiliptables.TableNAT, kubeServicesChain, utiliptables.ChainPrerouting, "kubernetes service portals", nil},
    	{utiliptables.TableNAT, kubePostroutingChain, utiliptables.ChainPostrouting, "kubernetes postrouting rules", nil},
    }
    
    pkg\proxy\ipvs\proxier.go
    **syncProxyRules**
    	// 跟iptables的前置流程一样
    	proxier.mu.Lock()
    	defer proxier.mu.Unlock()
    	...
    	// 获取svc跟endpoint的变更信息
    	serviceUpdateResult := proxier.serviceMap.Update(proxier.serviceChanges) --->pkg\proxy\service.go
    	endpointUpdateResult := proxier.endpointsMap.Update(proxier.endpointsChanges) --->pkg\proxy\endpoints.go
    	...
    	// 对svc所用到ClusterIP/ExternalIP/LoadBalancerIP及NodePort都会有追踪清理工作
    	for _, svcPortName := range endpointUpdateResult.StaleServiceNames {
    		if svcInfo, ok := proxier.serviceMap[svcPortName]; ok && svcInfo != nil && conntrack.IsClearConntrackNeeded(svcInfo.Protocol()) {
    			conntrackCleanupServiceIPs.Insert(svcInfo.ClusterIP().String())
    			for _, extIP := range svcInfo.ExternalIPStrings() {
    				conntrackCleanupServiceIPs.Insert(extIP)
    			}
    			for _, lbIP := range svcInfo.LoadBalancerIPStrings() {
    				conntrackCleanupServiceIPs.Insert(lbIP)
    			}
    			nodePort := svcInfo.NodePort()
    			if svcInfo.Protocol() == v1.ProtocolUDP && nodePort != 0 {
    				conntrackCleanupServiceNodePorts.Insert(nodePort)
    			}
    		}
    	}
    	// ipvs模式依然会使用iptables的一些规则,这是因为ipvs的设计是做负载均衡的,不支持过滤钩子这种功能,所以依然由iptables实现
    	proxier.filterChains.Write("*filter")
    	proxier.natChains.Write("*nat")
    	proxier.createAndLinkKubeChain()
    	
    	**// 为每个svc添加对应ipvs设置,该段代码共计457行**
    	for svcName, svc := range proxier.serviceMap {
    		// 记录原本的规则
    		for _, e := range proxier.endpointsMap[svcName] {
    			...
    			proxier.ipsetList[kubeLoopBackIPSet].activeEntries.Insert(entry.String())
    		}
    		// clusterIP类型
    		proxier.ipsetList[kubeClusterIPSet].activeEntries.Insert(entry.String())
    		// 这里会有SessionAffinity对比,可以将同一请求转发给相同后端,SessionAffinity默认10800秒
    		if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
    			serv.Flags |= utilipvs.FlagPersistent
    			serv.Timeout = uint32(svcInfo.StickyMaxAgeSeconds())
    		}
    		// externalIPs类型
    		proxier.ipsetList[kubeExternalIPSet].activeEntries.Insert(entry.String())
    		// load-balancer类型
    		proxier.ipsetList[kubeLoadBalancerSet].activeEntries.Insert(entry.String())
    		// Nodeports类型
    		nodePortSet.activeEntries.Insert(entry.String())
    	}
    	
    	// 设置ipvs规则
    	for _, set := range proxier.ipsetList {
    		set.syncIPSetEntries()
    	}
    	
    	// 后面继续是iptables的一些补充工作
    

    // 这个文件里可以看到所有的操作都是ipv4、ipv6顺序执行的,即kube-proxy已经具备了ipv6生产能力

    pkg\proxy\metaproxier\meta_proxier.go
    func NewMetaProxier(ipv4Proxier, ipv6Proxier proxy.Provider) proxy.Provider {
    	return proxy.Provider(&metaProxier{
    		ipv4Proxier: ipv4Proxier,
    		ipv6Proxier: ipv6Proxier,
    	}) --->pkg\proxy\xxx\proxier.go
    }
    
  • 相关阅读:
    做题记录
    关于有向图强连通分量的一点想法
    浅谈二分图匹配(未完)
    水题狂欢赛 (爬楼梯赛)题解(偏向自我反省)
    浅谈迭代加深(iddfs)
    浅谈单调队列优化
    [cqbzoj#10644]鱼肉炸弹题解
    树形背包[2/ 50] luogu [P1273]
    树形背包[1/ 50] luogu [P2015] (超级板)
    (树状数组)区间修改,区间查询
  • 原文地址:https://www.cnblogs.com/bfmq/p/15593952.html
Copyright © 2020-2023  润新知