cmd\kube-controller-manager\app\core.go
func startPodGCController(ctx context.Context, controllerContext ControllerContext) (controller.Interface, bool, error) {
go podgc.NewPodGC(
ctx,
controllerContext.ClientBuilder.ClientOrDie("pod-garbage-collector"),
controllerContext.InformerFactory.Core().V1().Pods(),
controllerContext.InformerFactory.Core().V1().Nodes(),
int(controllerContext.ComponentConfig.PodGCController.TerminatedPodGCThreshold),
).Run(ctx)
return nil, true, nil
}
pkg\controller\podgc\gc_controller.go
NewPodGC
gcc := &PodGCController{
...
deletePod: func(namespace, name string) error {
klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(namespace, name))
return kubeClient.CoreV1().Pods(namespace).Delete(ctx, name, *metav1.NewDeleteOptions(0)) --->
},
}
Run
go wait.UntilWithContext(ctx, gcc.gc, gcCheckPeriod)
func (gcc *PodGCController) gc(ctx context.Context) {
// 获取集群所有pod、node
pods, err := gcc.podLister.List(labels.Everything())
nodes, err := gcc.nodeLister.List(labels.Everything())
// pod在Terminating状态时长,可在yaml文件里自定义terminationGracePeriodSeconds延长,默认30秒,可以结合preStop做优雅终止、gc抓取等自定义操作
if gcc.terminatedPodThreshold > 0 {
gcc.gcTerminated(pods)
}
gcc.gcOrphaned(ctx, pods, nodes)
gcc.gcUnscheduledTerminating(pods)
}
**gcTerminated**
// 获取所有要terminated的pod
terminatedPods := []*v1.Pod{}
for _, pod := range pods {
if isPodTerminated(pod) {
terminatedPods = append(terminatedPods, pod)
}
}
terminatedPodCount := len(terminatedPods)
// gcc.terminatedPodThreshold是可以存在terminated状态pod的阈值,当超出了这个值时,才会对pod执行delete操作,如果设置为0则代表关闭pod gc清理
deleteCount := terminatedPodCount - gcc.terminatedPodThreshold
if deleteCount <= 0 {
return
}
// 优先清理创建时间更长的pod,如果创建时间相同则比较pod名称
sort.Sort(byCreationTimestamp(terminatedPods))
// 为每个pod的删除单开goroutine执行删除任务,由WaitGroup进行回调结束
var wait sync.WaitGroup
for i := 0; i < deleteCount; i++ {
wait.Add(1)
go func(namespace string, name string) {
defer wait.Done()
if err := gcc.deletePod(namespace, name); err != nil {
// ignore not founds
defer utilruntime.HandleError(err)
}
}(terminatedPods[i].Namespace, terminatedPods[i].Name)
}
wait.Wait()
// 清理绑定到不存在node上的pod
gcOrphaned
// 获取当前Node集合
existingNodeNames := sets.NewString()
for _, node := range nodes {
existingNodeNames.Insert(node.Name)
}
// 遍历所有pod,寻找不存在node集合里的未知node并延长等待quarantineTime(默认40秒)
for _, pod := range pods {
if pod.Spec.NodeName != "" && !existingNodeNames.Has(pod.Spec.NodeName) {
gcc.nodeQueue.AddAfter(pod.Spec.NodeName, quarantineTime)
}
}
// 检测等待结束后仍未知node
deletedNodesNames, quit := gcc.discoverDeletedNodes(ctx, existingNodeNames)
// 遍历所有pod,清理绑定在不存在node上的pod。这样等于循环了2次所有pods,感觉可以优化
for _, pod := range pods {
if !deletedNodesNames.Has(pod.Spec.NodeName) {
continue
}
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {}
}
discoverDeletedNodes
exists, err := gcc.checkIfNodeExists(ctx, nodeName)
checkIfNodeExists
_, fetchErr := gcc.kubeClient.CoreV1().Nodes().Get(ctx, name, metav1.GetOptions{})
// 清理还未绑定node就被终止的pod
gcUnscheduledTerminating
for _, pod := range pods {
if pod.DeletionTimestamp == nil || len(pod.Spec.NodeName) > 0 {
continue
}
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {}