• operator-sdk 实战开发

    系统:CentOS Linux release 7.5.1804 (Core)
    docker:v1.19 # 因为 operator-sdk 使用了多阶段构建功能,所以 docker 必须大于等于 v1.17 版本
    k8s:k3s v1.20.6+k3s1 单节点


    定义一个 crd ,spec 包含以下信息:

    Replicas	# 副本数
    Image		# 镜像
    Resources	# 资源限制
    Envs		# 环境变量
    Ports		# 服务端口

    根据以上信息,controller 自动创建或者更新一个 deployment + service

    注意:本实例只是展示了大概开发流程,故 crd 资源的 status 状态未做任何更新,实际编写中需要根据实际情况自行更新 status 状态

    创建 APP

    $ mkdir -p $GOPATH/src/github.com/leffss/app
    $ cd $GOPATH/src/github.com/leffss/app
    $ operator-sdk init --domain=example.com --repo=github.com/leffss/app

    创建 API

    $ operator-sdk create api --group app --version v1 --kind App --resource=true --controller=true

    修改 CRD 类型定义代码 api/v1/app_types.go

    package v1
    import (
    	appsv1 "k8s.io/api/apps/v1"
    	corev1 "k8s.io/api/core/v1"
    	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    修改定义后需要使用 make generate 生成新的 zz_generated.deepcopy.go 文件
    // NOTE: json tags are required.  Any new fields you add must have json tags for the fields to be serialized.
    // AppSpec defines the desired state of App
    type AppSpec struct {
    	// INSERT ADDITIONAL SPEC FIELDS - desired state of cluster
    	// Important: Run "make" to regenerate code after modifying this file
    	Replicas *int32               `json:"replicas"`		// 副本数
    	Image    string               `json:"image"`		// 镜像
    	Resources corev1.ResourceRequirements  `json:"resources,omitempty"`	// 资源限制
    	Envs     []corev1.EnvVar      `json:"envs,omitempty"`	// 环境变量
    	Ports    []corev1.ServicePort `json:"ports,omitempty"`	// 服务端口
    // AppStatus defines the observed state of App
    type AppStatus struct {
    	// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
    	// Important: Run "make" to regenerate code after modifying this file
    	//Conditions []AppCondition
    	//Phase string
    	appsv1.DeploymentStatus `json:",inline"`	// 直接引用 DeploymentStatus
    //type AppCondition struct {
    //	Type string
    //	Message string
    //	Reason string
    //	Ready bool
    //	LastUpdateTime metav1.Time
    //	LastTransitionTime metav1.Time
    // App is the Schema for the apps API
    type App struct {
    	metav1.TypeMeta   `json:",inline"`
    	metav1.ObjectMeta `json:"metadata,omitempty"`
    	Spec   AppSpec   `json:"spec,omitempty"`
    	Status AppStatus `json:"status,omitempty"`
    // AppList contains a list of App
    type AppList struct {
    	metav1.TypeMeta `json:",inline"`
    	metav1.ListMeta `json:"metadata,omitempty"`
    	Items           []App `json:"items"`
    func init() {
    	SchemeBuilder.Register(&App{}, &AppList{})

    新增 resource/deployment/deployment.go

    package deployment
    import (
    	appv1 "github.com/leffss/app/api/v1"
    	appsv1 "k8s.io/api/apps/v1"
    	corev1 "k8s.io/api/core/v1"
    	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    func New(app *appv1.App) *appsv1.Deployment {
    	labels := map[string]string{"app.example.com/v1": app.Name}
    	selector := &metav1.LabelSelector{MatchLabels: labels}
    	return &appsv1.Deployment{
    		TypeMeta:   metav1.TypeMeta{
    			APIVersion: "apps/v1",
    			Kind: "Deployment",
    		ObjectMeta: metav1.ObjectMeta{
    			Name: app.Name,
    			Namespace: app.Namespace,
    			OwnerReferences: []metav1.OwnerReference{
    				*metav1.NewControllerRef(app, schema.GroupVersionKind{
    					Group: appv1.GroupVersion.Group,
    					Version: appv1.GroupVersion.Version,
    					Kind: "App",
    		Spec:       appsv1.DeploymentSpec{
    			Replicas: app.Spec.Replicas,
    			Selector: selector,
    			Template: corev1.PodTemplateSpec{
    				ObjectMeta: metav1.ObjectMeta{
    					Labels: labels,
    				Spec: corev1.PodSpec{
    					Containers: newContainers(app),
    func newContainers(app *appv1.App) []corev1.Container  {
    	var containerPorts []corev1.ContainerPort
    	for _, servicePort := range app.Spec.Ports {
    		var cport corev1.ContainerPort
    		cport.ContainerPort = servicePort.TargetPort.IntVal
    		containerPorts = append(containerPorts, cport)
    	return []corev1.Container{
    			Name:            app.Name,
    			Image:           app.Spec.Image,
    			Ports:           containerPorts,
    			Env:             app.Spec.Envs,
    			Resources:       app.Spec.Resources,
    			ImagePullPolicy: corev1.PullIfNotPresent,

    新增 resource/service/service.go

    package service
    import (
    	appv1 "github.com/leffss/app/api/v1"
    	corev1 "k8s.io/api/core/v1"
    	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    func New(app *appv1.App) *corev1.Service {
    	return &corev1.Service{
    		TypeMeta: metav1.TypeMeta{
    			Kind:       "Service",
    			APIVersion: "v1",
    		ObjectMeta: metav1.ObjectMeta{
    			Name:                       app.Name,
    			Namespace: app.Namespace,
    			OwnerReferences: []metav1.OwnerReference{
    				*metav1.NewControllerRef(app, schema.GroupVersionKind{
    					Group: appv1.GroupVersion.Group,
    					Version: appv1.GroupVersion.Version,
    					Kind: "App",
    		Spec: corev1.ServiceSpec{
    			Ports:                    app.Spec.Ports,
    			Selector: map[string]string{
    				"app.example.com/v1": app.Name,

    修改 controller 代码 controllers/app_controller.go

    package controllers
    import (
    	ctrl "sigs.k8s.io/controller-runtime"
    	appv1 "github.com/leffss/app/api/v1"
    	corev1 "k8s.io/api/core/v1"
    	appsv1 "k8s.io/api/apps/v1"
    // AppReconciler reconciles a App object
    type AppReconciler struct {
    	Log    logr.Logger
    	Scheme *runtime.Scheme
    // Reconcile is part of the main kubernetes reconciliation loop which aims to
    // move the current state of the cluster closer to the desired state.
    // TODO(user): Modify the Reconcile function to compare the state specified by
    // the App object against the actual cluster state, and then
    // perform operations to make the cluster state reflect the state specified by
    // the user.
    // For more details, check Reconcile and its Result here:
    // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.2/pkg/reconcile
    func (r *AppReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
    	_ = r.Log.WithValues("app", req.NamespacedName)
    	// your logic here
    	// 获取 crd 资源
    	instance := &appv1.App{}
    	if err := r.Client.Get(ctx, req.NamespacedName, instance); err != nil {
    		if errors.IsNotFound(err) {
    			return ctrl.Result{}, nil
    		return ctrl.Result{}, err
    	// crd 资源已经标记为删除
    	if instance.DeletionTimestamp != nil {
    		return ctrl.Result{}, nil
    	oldDeploy := &appsv1.Deployment{}
    	if err := r.Client.Get(ctx, req.NamespacedName, oldDeploy); err != nil {
    		// deployment 不存在,创建
    		if errors.IsNotFound(err) {
    			// 创建deployment
    			if err := r.Client.Create(ctx, deployment.New(instance)); err != nil {
    				return ctrl.Result{}, err
    			// 创建service
    			if err := r.Client.Create(ctx, service.New(instance)); err != nil {
    				return ctrl.Result{}, err
    			// 更新 crd 资源的 Annotations
    			data, _ := json.Marshal(instance.Spec)
    			if instance.Annotations != nil {
    				instance.Annotations["spec"] = string(data)
    			} else {
    				instance.Annotations = map[string]string{"spec": string(data)}
    			if err := r.Client.Update(ctx, instance); err != nil {
    				return ctrl.Result{}, err
    		} else {
    			return  ctrl.Result{}, err
    	} else {
    		// deployment 存在,更新
    		oldSpec := appv1.AppSpec{}
    		if err := json.Unmarshal([]byte(instance.Annotations["spec"]), &oldSpec); err != nil {
    			return ctrl.Result{}, err
    		if !reflect.DeepEqual(instance.Spec, oldSpec) {
    			// 更新deployment
    			newDeploy := deployment.New(instance)
    			oldDeploy.Spec = newDeploy.Spec
    			if err := r.Client.Update(ctx, oldDeploy); err != nil {
    				return ctrl.Result{}, err
    			// 更新service
    			newService := service.New(instance)
    			oldService := &corev1.Service{}
    			if err := r.Client.Get(ctx, req.NamespacedName, oldService); err != nil {
    				return ctrl.Result{}, err
    			clusterIP := oldService.Spec.ClusterIP	// 更新 service 必须设置老的 clusterIP
    			oldService.Spec = newService.Spec
    			oldService.Spec.ClusterIP = clusterIP
    			if err := r.Client.Update(ctx, oldService); err != nil {
    				return ctrl.Result{}, err
    			// 更新 crd 资源的 Annotations
    			data, _ := json.Marshal(instance.Spec)
    			if instance.Annotations != nil {
    				instance.Annotations["spec"] = string(data)
    			} else {
    				instance.Annotations = map[string]string{"spec": string(data)}
    			if err := r.Client.Update(ctx, instance); err != nil {
    				return ctrl.Result{}, err
    	return ctrl.Result{}, nil
    // SetupWithManager sets up the controller with the Manager.
    func (r *AppReconciler) SetupWithManager(mgr ctrl.Manager) error {
    	return ctrl.NewControllerManagedBy(mgr).

    修改 CRD 资源定义 config/samples/app_v1_app.yaml

    apiVersion: app.example.com/v1
    kind: App
      name: app-sample
      namespace: default
      # Add fields here
      replicas: 2
      image: nginx:1.16.1
      - targetPort: 80
        port: 8080
      - name: DEMO
        value: app
      - name: GOPATH
        value: gopath
          cpu: 500m
          memory: 500Mi
          cpu: 100m
          memory: 100Mi

    修改 Dockerfile

    # Build the manager binary
    FROM golang:1.15 as builder
    WORKDIR /workspace
    # Copy the Go Modules manifests
    COPY go.mod go.mod
    COPY go.sum go.sum
    # cache deps before building and copying source so that we don't need to re-download as much
    # and so that source changes don't invalidate our downloaded layer
    ENV GOPROXY https://goproxy.cn,direct
    RUN go mod download
    # Copy the go source
    COPY main.go main.go
    COPY api/ api/
    COPY controllers/ controllers/
    COPY resource/ resource/
    # Build
    RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 GO111MODULE=on go build -a -o manager main.go
    # Use distroless as minimal base image to package the manager binary
    # Refer to https://github.com/GoogleContainerTools/distroless for more details
    #FROM gcr.io/distroless/static:nonroot
    FROM kubeimages/distroless-static:latest
    COPY --from=builder /workspace/manager .
    USER 65532:65532
    ENTRYPOINT ["/manager"]
    • 添加了 goproxy 环境变量
    • 新增 COPY 自定义的文件夹 resource
    • gcr.io/distroless/static:nonroot 变更为 kubeimages/distroless-static:latest


    第一种:本地运行 controller


    app 项目根目录运行:

    $ make generate && make manifests && make install && make run
    • 本机需确保安装了 kubectl 工具,并且证书文件 ~/.kube/config 存在(保证为集群管理员权限)
    • 测试完毕后使用 ctrl + c 停止程序,然后 make uninstall 删除 crd 定义



    $ make generate && make manifests && make install


    $ make docker-build IMG=leffss/app:v1
    $ docker images |grep app
    leffss/app                v1                  1eaa4b6a4781        About a minute ago   46.5MB


    # 因为 k3s 安装的 k8s v1.20 默认使用的是 containerd,所以要导入镜像
    $ docker save leffss/app:v1 > app.tar
    $ ctr image import app.tar
    $ docker pull kubesphere/kube-rbac-proxy:v0.8.0
    $ docker tag kubesphere/kube-rbac-proxy:v0.8.0 gcr.io/kubebuilder/kube-rbac-proxy:v0.8.0
    $ docker save gcr.io/kubebuilder/kube-rbac-proxy:v0.8.0 > kube-rabc-proxy.tar
    $ ctr image import kube-rabc-proxy.tar 


    $ make deploy IMG=leffss/app:v1


    $ kubectl get service -A |grep app
    $ kubectl -n app-system get pod
    $ kubectl -n app-system get deployment

    6、CRD 确认

    $ kubectl get crd
    NAME                              CREATED AT
    addons.k3s.cattle.io              2021-05-02T01:03:34Z
    helmcharts.helm.cattle.io         2021-05-02T01:03:34Z
    helmchartconfigs.helm.cattle.io   2021-05-02T01:03:34Z
    apps.app.example.com              2021-05-04T12:10:43Z


    $ kubectl apply -f config/samples/app_v1_app.yaml 
    app.app.example.com/app-sample created


    $ kubectl -n memcached-operator-system logs memcached-operator-controller-manager-6cf86db855-sqhpj -c manager
    2021-05-05T19:56:32.798+0800    INFO    controller-runtime.metrics      metrics server is starting to listen      {"addr": ":8080"}
    2021-05-05T19:56:32.798+0800    INFO    setup   starting manager
    2021-05-05T19:56:32.799+0800    INFO    controller-runtime.manager      starting metrics server {"path": "/metrics"}
    2021-05-05T19:56:32.800+0800    INFO    controller-runtime.manager.controller.app       Starting EventSource      {"reconciler group": "app.example.com", "reconciler kind": "App", "source": "kind source: /, Kind="}
    2021-05-05T19:56:32.901+0800    INFO    controller-runtime.manager.controller.app       Starting Controller       {"reconciler group": "app.example.com", "reconciler kind": "App"}
    2021-05-05T19:56:32.901+0800    INFO    controller-runtime.manager.controller.app       Starting workers {"reconciler group": "app.example.com", "reconciler kind": "App", "worker count": 1}
    • 如果是第一种部署方式则直接查看控制台,第二种则查看集群中 app-controller-manager 的 pod 中的 manager 容器控制台日志

    如果是第二种直接将 controller 部署到 k8s 集群的方式,可能会出现 RBAC 权限错误,解决方法是修改部署时的权限配置,这里我们使用最简单的方法是直接给 controller 绑定到 cluster-admin 集群管理员即可

    apiVersion: rbac.authorization.k8s.io/v1
    kind: ClusterRoleBinding
      name: cluster-admin-rolebinding
      apiGroup: rbac.authorization.k8s.io
      kind: ClusterRole
      name: cluster-admin
    - kind: ServiceAccount
      name: app-controller-manager
      namespace: app-system

    删除 CRD 资源

    $ kubectl apply -f config/samples/app_v1_app.yaml 
    app.app.example.com/app-sample deleted

    删除 CRD 定义

    $ make uninstall

    删除 controller

    $ make undeploy
    • 只适用第二种部署方式


    实际开发中只需要使用 operator-sdk(或者 kubebuilder)创建 CRD 以及相应的 controller,然后根据需求自行定义 crd 属性,并编写对应的 controller 逻辑代码,最终就可以实现一个完整的 operator。

    补充:make deploy 含义

    部署时使用 make deploy 实际是执行的什么命令呢?通过查看项目根目录 Makefile 可知实际运行命令为:

    deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
         cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
         $(KUSTOMIZE) build config/default | kubectl apply -f -
    • 实际上就是使用 kustomize 工具生成部署的 yaml 文件,然后使用 kubectl 工具应用

    其他的 make install,make run 等命令也都在 Makefile 中有相关定义。

    所以当在实际部署过程中,如果向手动部署的话,可以直接参考 Makefile 首先生成部署 yaml 文件,然后手动应用到 k8s 集群即可。

