1.在管理节点上,进入刚创建的放置配置文件的目录,用 ceph-deploy 执行如下步骤
mkdir /opt/cluster-ceph cd /opt/cluster-ceph ceph-deploy new master1 master2 master3
2.添加epel源
yum install -y yum-utils && yum-config-manager --add-repo https://dl.fedoraproject.org/pub/epel/7/x86_64/ && yum install --nogpgcheck -y epel-release && rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-EPEL-7 && rm -f /etc/yum.repos.d/dl.fedoraproject.org*
直接进行ceph的安装,会报如下的错误:
--> Finished Dependency Resolution Error: Package: 2:ceph-common-10.2.11-0.el7.x86_64 (ceph) Requires: libbabeltrace-ctf.so.1()(64bit) Error: Package: 2:ceph-osd-10.2.11-0.el7.x86_64 (ceph) Requires: libleveldb.so.1()(64bit) Error: Package: 2:ceph-mon-10.2.11-0.el7.x86_64 (ceph) Requires: libleveldb.so.1()(64bit) Error: Package: 2:librbd1-10.2.11-0.el7.x86_64 (ceph) Requires: liblttng-ust.so.0()(64bit) Error: Package: 2:ceph-base-10.2.11-0.el7.x86_64 (ceph) Requires: liblttng-ust.so.0()(64bit) Error: Package: 2:librgw2-10.2.11-0.el7.x86_64 (ceph) Requires: libfcgi.so.0()(64bit) Error: Package: 2:ceph-common-10.2.11-0.el7.x86_64 (ceph) Requires: libbabeltrace.so.1()(64bit) Error: Package: 2:librados2-10.2.11-0.el7.x86_64 (ceph) Requires: liblttng-ust.so.0()(64bit)
2. 安装 Ceph
[root@localhost ~]# yum install --downloadonly --downloaddir=/tmp/ceph ceph 在每台主机上安装ceph [root@localhost ~]# yum localinstall -C -y --disablerepo=* /tmp/ceph/*.rpm
配置初始 monitor(s)、并收集所有密钥
# 请务必在 ceph-cluster 目录下
[root@admin ceph-cluster]# ceph-deploy mon create-initial
初始化 ceph.osd 节点
创建存储空间 [root@osd1 ~]# mkdir -p /data/ceph-osd [root@osd1 ~]# chown ceph.ceph /data/ceph-osd/ -R [root@osd2 ~]# mkdir -p /data/ceph-osd [root@osd2 ~]# chown ceph.ceph /data/ceph-osd/ -R
[root@osd3 ~]# mkdir -p /data/ceph-osd
[root@osd3 ~]# chown ceph.ceph /data/ceph-osd/ -R
[root@osd4 ~]# mkdir -p /data/ceph-osd
[root@osd4 ~]# chown ceph.ceph /data/ceph-osd/ -R
创建OSD: [root@admin ceph-cluster]# ceph-deploy osd prepare node1:/data/ceph-osd node2:/data/ceph-osd node3:/data/ceph-osd node4:/data/ceph-osd 激活 OSD [root@admin ceph-cluster]# ceph-deploy osd activate node1:/data/ceph-osd node2:/data/ceph-osd node3:/data/ceph-osd node4:/data/ceph-osd
用 ceph-deploy 把配置文件和 admin 密钥拷贝到管理节点和 Ceph 节点,这样你每次执行 Ceph 命令行时就无需指定 monitor 地址和 ceph.client.admin.keyring 了 [root@admin ceph-cluster]# ceph-deploy admin master1 master2 master3 node1 node2 node3 node4 确保你对 ceph.client.admin.keyring 有正确的操作权限。 chmod +r /etc/ceph/ceph.client.admin.keyring (所有机器) 如果配置文件更改,需要同步配置文件到所有节点 [root@admin ceph-cluster]# ceph-deploy --overwrite-conf admin master1 master2 master3 node1 node2 node3 node4
官网的zookeeper yaml,去掉了亲和性
apiVersion: v1 kind: Service metadata: namespace: testsubject name: zk-hs labels: app: zk spec: ports: - port: 2888 name: server - port: 3888 name: leader-election clusterIP: None selector: app: zk --- apiVersion: v1 kind: Service metadata: namespace: testsubject name: zk-cs labels: app: zk spec: ports: - port: 2181 name: client selector: app: zk --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: namespace: testsubject name: zk-pdb spec: selector: matchLabels: app: zk maxUnavailable: 1 --- apiVersion: apps/v1beta2 kind: StatefulSet metadata: namespace: testsubject name: zk spec: selector: matchLabels: app: zk serviceName: zk-hs replicas: 3 updateStrategy: type: RollingUpdate podManagementPolicy: Parallel template: metadata: labels: app: zk spec: containers: - name: kubernetes-zookeeper imagePullPolicy: Always image: "192.168.200.10/senyint/kubernetes-zookeeper:1.0-3.4.10" resources: requests: memory: "4Gi" cpu: "1" ports: - containerPort: 2181 name: client - containerPort: 2888 name: server - containerPort: 3888 name: leader-election command: - sh - -c - "start-zookeeper --servers=3 --data_dir=/var/lib/zookeeper/data --data_log_dir=/var/lib/zookeeper/data/log --conf_dir=/opt/zookeeper/conf --client_port=2181 --election_port=3888 --server_port=2888 --tick_time=2000 --init_limit=10 --sync_limit=5 --heap=512M --max_client_cnxns=60 --snap_retain_count=3 --purge_interval=12 --max_session_timeout=40000 --min_session_timeout=4000 --log_level=OFF" readinessProbe: exec: command: - sh - -c - "zookeeper-ready 2181" initialDelaySeconds: 10 timeoutSeconds: 5 livenessProbe: exec: command: - sh - -c - "zookeeper-ready 2181" initialDelaySeconds: 10 timeoutSeconds: 5 volumeMounts: - name: datazk mountPath: /var/lib/zookeeper volumeClaimTemplates: - metadata: name: datazk spec: accessModes: [ "ReadWriteOnce" ] storageClassName: ceph-rbd-database resources: requests: storage: 20Gi
ceph-secret.yaml
apiVersion: v1 kind: Secret metadata: name: ceph-secret-admin-testsubject namespace: testsubject type: "kubernetes.io/rbd" data: key: QVFERkcvQmF5ckFkSnhBQVVkM2VCdC82K3dOTnZIM3V0ZHpnTnc9PQo=
rbd-storage-data-class.yaml
apiVersion: storage.k8s.io/v1 kind: StorageClass metadata: name: ceph-rbd-database namespace: testsubject provisioner: kubernetes.io/rbd parameters: monitors: 192.168.200.11:6789,192.168.200.12:6789,192.168.200.13:6789 adminId: admin adminSecretName: ceph-secret-admin-testsubject adminSecretNamespace: "testsubject" pool: fengjian userId: admin userSecretName: ceph-secret-admin-testsubject imageFormat: "2" imageFeatures: "layering"
不建立pv, 直接 使用storageclass,然后建立pvc, deployment 指定 claimName
ceph-secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: ceph-secret-admin
type: "kubernetes.io/rbd"
data:
key: QVFERkcvQmF5ckFkSnhBQVVkM2VCdC82K3dOTnZIM3V0ZHpnTnc9PQo=
rbd-storage-data-class.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ceph-rbd-provisioner
provisioner: kubernetes.io/rbd
parameters:
monitors: 192.168.200.11:6789,192.168.200.12:6789,192.168.200.13:6789
adminId: admin
adminSecretName: ceph-secret-admin
adminSecretNamespace: default
pool: fengjian
userId: admin
userSecretName: ceph-secret-admin
imageFormat: "2"
imageFeatures: "layering"
redis.yaml
apiVersion: apps/v1 # for versions before 1.9.0 use apps/v1beta2 kind: Deployment metadata: name: redis-master spec: selector: matchLabels: app: redis role: master tier: backend replicas: 1 template: metadata: labels: app: redis role: master tier: backend spec: containers: - name: master image: 192.168.200.10/redis/redis:master resources: requests: cpu: 100m memory: 100Mi ports: - containerPort: 6379 volumeMounts: - name: datadir mountPath: /data volumes: - name: datadir persistentVolumeClaim: claimName: redis-master-rbd-pvc --- apiVersion: v1 kind: Service metadata: name: redis-master labels: app: redis role: master tier: backend spec: ports: - port: 6379 targetPort: 6379 selector: app: redis role: master tier: backend
kafka配置文件
参考 : https://kow3ns.github.io/kubernetes-kafka/manifests/
[root@master1 ceph_rbd]# cat kafka.yaml apiVersion: v1 kind: Service metadata: name: kafka-hs labels: app: kafka spec: ports: - port: 9093 name: server clusterIP: None selector: app: kafka --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: kafka-pdb spec: selector: matchLabels: app: kafka maxUnavailable: 1 --- apiVersion: apps/v1beta1 kind: StatefulSet metadata: name: kafka spec: serviceName: kafka-hs replicas: 3 podManagementPolicy: Parallel updateStrategy: type: RollingUpdate template: metadata: labels: app: kafka spec: affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: matchExpressions: - key: "app" operator: In values: - kafka topologyKey: "kubernetes.io/hostname" podAffinity: preferredDuringSchedulingIgnoredDuringExecution: - weight: 1 podAffinityTerm: labelSelector: matchExpressions: - key: "app" operator: In values: - zk topologyKey: "kubernetes.io/hostname" terminationGracePeriodSeconds: 300 containers: - name: k8skafka imagePullPolicy: Always image: 192.168.200.10/source/kubernetes-kafka:1.0-10.2.1 resources: requests: memory: "12Gi" cpu: 4 ports: - containerPort: 9093 name: server command: - sh - -c - "exec kafka-server-start.sh /opt/kafka/config/server.properties --override broker.id=${HOSTNAME##*-} --override listeners=PLAINTEXT://:9093 --override zookeeper.connect=zk-cs.default.svc.cluster.local:2181 --override log.dir=/var/lib/kafka --override auto.create.topics.enable=true --override auto.leader.rebalance.enable=true --override background.threads=10 --override compression.type=producer --override delete.topic.enable=false --override leader.imbalance.check.interval.seconds=300 --override leader.imbalance.per.broker.percentage=10 --override log.flush.interval.messages=9223372036854775807 --override log.flush.offset.checkpoint.interval.ms=60000 --override log.flush.scheduler.interval.ms=9223372036854775807 --override log.retention.bytes=-1 --override log.retention.hours=168 --override log.roll.hours=168 --override log.roll.jitter.hours=0 --override log.segment.bytes=1073741824 --override log.segment.delete.delay.ms=60000 --override message.max.bytes=1000012 --override min.insync.replicas=1 --override num.io.threads=8 --override num.network.threads=3 --override num.recovery.threads.per.data.dir=1 --override num.replica.fetchers=1 --override offset.metadata.max.bytes=4096 --override offsets.commit.required.acks=-1 --override offsets.commit.timeout.ms=5000 --override offsets.load.buffer.size=5242880 --override offsets.retention.check.interval.ms=600000 --override offsets.retention.minutes=1440 --override offsets.topic.compression.codec=0 --override offsets.topic.num.partitions=50 --override offsets.topic.replication.factor=3 --override offsets.topic.segment.bytes=104857600 --override queued.max.requests=500 --override quota.consumer.default=9223372036854775807 --override quota.producer.default=9223372036854775807 --override replica.fetch.min.bytes=1 --override replica.fetch.wait.max.ms=500 --override replica.high.watermark.checkpoint.interval.ms=5000 --override replica.lag.time.max.ms=10000 --override replica.socket.receive.buffer.bytes=65536 --override replica.socket.timeout.ms=30000 --override request.timeout.ms=30000 --override socket.receive.buffer.bytes=102400 --override socket.request.max.bytes=104857600 --override socket.send.buffer.bytes=102400 --override unclean.leader.election.enable=true --override zookeeper.session.timeout.ms=6000 --override zookeeper.set.acl=false --override broker.id.generation.enable=true --override connections.max.idle.ms=600000 --override controlled.shutdown.enable=true --override controlled.shutdown.max.retries=3 --override controlled.shutdown.retry.backoff.ms=5000 --override controller.socket.timeout.ms=30000 --override default.replication.factor=1 --override fetch.purgatory.purge.interval.requests=1000 --override group.max.session.timeout.ms=300000 --override group.min.session.timeout.ms=6000 --override inter.broker.protocol.version=0.10.2-IV0 --override log.cleaner.backoff.ms=15000 --override log.cleaner.dedupe.buffer.size=134217728 --override log.cleaner.delete.retention.ms=86400000 --override log.cleaner.enable=true --override log.cleaner.io.buffer.load.factor=0.9 --override log.cleaner.io.buffer.size=524288 --override log.cleaner.io.max.bytes.per.second=1.7976931348623157E308 --override log.cleaner.min.cleanable.ratio=0.5 --override log.cleaner.min.compaction.lag.ms=0 --override log.cleaner.threads=1 --override log.cleanup.policy=delete --override log.index.interval.bytes=4096 --override log.index.size.max.bytes=10485760 --override log.message.timestamp.difference.max.ms=9223372036854775807 --override log.message.timestamp.type=CreateTime --override log.preallocate=false --override log.retention.check.interval.ms=300000 --override max.connections.per.ip=2147483647 --override num.partitions=1 --override producer.purgatory.purge.interval.requests=1000 --override replica.fetch.backoff.ms=1000 --override replica.fetch.max.bytes=1048576 --override replica.fetch.response.max.bytes=10485760 --override reserved.broker.max.id=1000 " env: - name: KAFKA_HEAP_OPTS value : "-Xmx2G -Xms2G" - name: KAFKA_OPTS value: "-Dlogging.level=INFO" volumeMounts: - name: datadir mountPath: /var/lib/kafka readinessProbe: tcpSocket: port: 9093 initialDelaySeconds: 30 periodSeconds: 10 securityContext: runAsUser: 1000 fsGroup: 1000 volumeClaimTemplates: - metadata: name: datadir annotations: volume.beta.kubernetes.io/storage-class: "ceph-rbd-provisioner" spec: accessModes: [ "ReadWriteOnce" ] resources: requests: storage: 100Gi
zookeeper.yaml
https://github.com/kow3ns apiVersion: v1 kind: Service metadata: name: zk-hs labels: app: zk spec: ports: - port: 2888 name: server - port: 3888 name: leader-election clusterIP: None selector: app: zk --- apiVersion: v1 kind: Service metadata: name: zk-cs labels: app: zk spec: ports: - port: 2181 name: client selector: app: zk --- apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: name: zk-pdb spec: selector: matchLabels: app: zk maxUnavailable: 1 --- apiVersion: apps/v1beta1 kind: StatefulSet metadata: name: zk spec: serviceName: zk-hs replicas: 3 podManagementPolicy: Parallel updateStrategy: type: RollingUpdate template: metadata: labels: app: zk spec: affinity: podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: matchExpressions: - key: "app" operator: In values: - zk topologyKey: "kubernetes.io/hostname" containers: - name: kubernetes-zookeeper imagePullPolicy: Always image: "gcr.io/google_containers/kubernetes-zookeeper:1.0-3.4.10" resources: requests: memory: "4Gi" cpu: "2" ports: - containerPort: 2181 name: client - containerPort: 2888 name: server - containerPort: 3888 name: leader-election command: - sh - -c - "start-zookeeper --servers=3 --data_dir=/var/lib/zookeeper/data --data_log_dir=/var/lib/zookeeper/data/log --conf_dir=/opt/zookeeper/conf --client_port=2181 --election_port=3888 --server_port=2888 --tick_time=2000 --init_limit=10 --sync_limit=5 --heap=3G --max_client_cnxns=60 --snap_retain_count=3 --purge_interval=12 --max_session_timeout=40000 --min_session_timeout=4000 --log_level=INFO" readinessProbe: exec: command: - sh - -c - "zookeeper-ready 2181" initialDelaySeconds: 10 timeoutSeconds: 5 livenessProbe: exec: command: - sh - -c - "zookeeper-ready 2181" initialDelaySeconds: 10 timeoutSeconds: 5 volumeMounts: - name: datadir mountPath: /var/lib/zookeeper securityContext: runAsUser: 1000 fsGroup: 1000 volumeClaimTemplates: - metadata: name: datadir spec: accessModes: [ "ReadWriteOnce" ] resources: requests: storage: 250Gi