0
点赞
收藏
分享

微信扫一扫

利用K8S CronJob来实现etcd集群的自动备份

前言:

利用k8s CronJob 来实现etcd集群的自动备份,并通过sftp传输到本k8s集群外的服务器上,进行存储。


实验步骤:

基本环境情况:

服务器角色

IP

系统

ETCD版本

K8S集群操作服务器

192.168.1.136

Centos7.9

3.4.9

存储服务器

192.168.1.105

Centos7.9

-

创建Dockerfile镜像:

[root@k8s-master1 ~]# mkdir /software/k8s-yaml/etcd-backup/
[root@k8s-master1 ~]# cd /software/k8s-yaml/etcd-backup/

[root@k8s-master1 etcd-backup]# vim Dockerfile

FROM python:3-alpine

RUN mkdir /root/.ssh  \
    && touch /root/.ssh/config \
    && echo -e "Host *\n\tStrictHostKeyChecking no\n\tUserKnownHostsFile /dev/null\n\tKexAlgorithms +diffie-hellman-group1-sha1\n\tPubkeyAcceptedKeyTypes +ssh-rsa\n\tHostkeyAlgorithms +ssh-rsa" > /root/.ssh/config

RUN apk add -U --no-cache curl lftp ca-certificates openssh \ 
    && curl -L https://yunwei-software.oss-cn-zhangjiakou.aliyuncs.com/etcdctl -o /usr/local/bin/etcdctl \
    && chmod +x /usr/local/bin/etcdctl

PS:etcd版本为3.4.9,如ETCD版本是不是3.4.9,可以使用ADD将自己集群中的etcdctl打入镜像中。或调整下面的Dockerfile,从Gitlab上拉去。

GitHub拉取使用的Dockerfile:

FROM python:3-alpine

RUN sed -i 's/dl-cdn.alpinelinux.org/mirrors.aliyun.com/g' /etc/apk/repositories

# 设置自己集群中etcd的版本
ARG ETCD_VERSION=v3.4.9

RUN apk add -U --no-cache curl lftp ca-certificates openssh

RUN mkdir /root/.ssh  \
    && touch /root/.ssh/config \
    && echo -e "Host *\n\tStrictHostKeyChecking no\n\tUserKnownHostsFile /dev/null\n\tKexAlgorithms +diffie-hellman-group1-sha1\n\tPubkeyAcceptedKeyTypes +ssh-rsa\n\tHostkeyAlgorithms +ssh-rsa" > /root/.ssh/config

ADD s3cmd-master.zip /s3cmd-master.zip
RUN unzip /s3cmd-master.zip -d /opt \
    && cd /opt/s3cmd-master \
    && python setup.py install \
    && rm -rf /s3cmd-master.zip

RUN curl -L https://github.com/etcd-io/etcd/releases/download/${ETCD_VERSION}/etcd-${ETCD_VERSION}-linux-amd64.tar.gz -o /opt/etcd-${ETCD_VERSION}-linux-amd64.tar.gz \
    && cd /opt && tar xzf etcd-${ETCD_VERSION}-linux-amd64.tar.gz \
    && mv etcd-${ETCD_VERSION}-linux-amd64/etcdctl /usr/local/bin/etcdctl \
    && rm -rf etcd-${ETCD_VERSION}-linux-amd64*

镜像创建并上传至镜像仓库(本地和云上都可,方便其他节点拉取该镜像)

[root@k8s-master1 etcd-backup]# docker build -t lws_etcd_backups:v1 .
[root@k8s-master1 etcd-backup]# docker tag lws_etcd_backups:v1 registry.cn-zhangjiakou.aliyuncs.com/newtime-test/etcd_backups:lws_v1
[root@k8s-master1 etcd-backup]# docker push registry.cn-zhangjiakou.aliyuncs.com/newtime-test/etcd_backups:lws_v1

ConfigMap创建:

[root@k8s-master1 etcd-backup]# vim etcd-backup-cm.yaml

apiVersion: v1
kind: ConfigMap
metadata:
  name: cron-sftp
  namespace: backups
data:
  entrypoint.sh: |
    #!/bin/bash

    #variables
    sftp_user="ftp01"
    sftp_passwd="Nisec123456"
    sftp_url="sftp://192.168.1.105:22"
    backup_dir=/home/ftp/etcd-backup/$CLUSTER_NAME

    # backup etcd data
    mkdir -p /snapshot
    chmod +x /usr/local/bin/etcdctl
    file=etcd-snapshot-$(date +%Y%m%d-%H%M%S).db
    etcdctl --endpoints $ENDPOINTS \
    --cert=/opt/etcd/ssl/server.pem \
    --key=/opt/etcd/ssl/server-key.pem \
    --cacert=/opt/etcd/ssl/ca.pem \
    snapshot save /snapshot/$file

    # upload etcd snapshot file
    lftp -u $sftp_user,$sftp_passwd $sftp_url<<EOF
    mkdir -p $backup_dir
    cd $backup_dir
    lcd /snapshot
    put $file
    by
    EOF

    # remove the expired snapshot file
    total_num=$(lftp -u $sftp_user,$sftp_passwd $sftp_url -e "ls $backup_dir | wc -l;by")
    if [ $total_num -gt $BACKUP_COUNTS ]; then
      expired_num=$(expr $total_num - $BACKUP_COUNTS)
      expired_files=$(lftp -u $sftp_user,$sftp_passwd $sftp_url -e "ls $backup_dir | head -n $expired_num;by" | awk '{print $NF}')
      for f in $expired_files; do
        to_remove=${backup_dir}/${f}
        echo "start to remove $to_remove"
        lftp -u $sftp_user,$sftp_passwd $sftp_url -e "rm -f $to_remove;by"
      done
    fi

    # remove local etcd snapshot file
    rm -f /snapshot/$file

PS:按实际情况修改SFTP段落的配置。

#创建cm类型的cron-sftp
[root@k8s-master1 etcd-backup]# kubectl create ns backups
[root@k8s-master1 etcd-backup]# kubectl apply -f etcd-backup-cm.yaml
[root@k8s-master1 etcd-backup]# kubectl get cm -n backups
NAME               DATA   AGE
cron-sftp          1      6s
kube-root-ca.crt   1      11s

CronJob创建:

[root@k8s-master1 etcd-backup]# vim etcd-backup-cronjob.yaml

apiVersion: batch/v1beta1
kind: CronJob
metadata:
  name: etcd-backup-sftp
  namespace: backups
spec:
 schedule: "*/5 * * * *"
 jobTemplate:
  spec:
    template:
      metadata:
       labels:
        app: etcd-backup
      spec:
        containers:
        - name: etcd-backup
          image: registry.cn-zhangjiakou.aliyuncs.com/newtime-test/etcd_backups:lws_v1
          imagePullPolicy: IfNotPresent
          workingDir: /
          command: ["sh", "./entrypoint.sh"]
          env:
          - name: ENDPOINTS
            value: "192.168.1.136:2379"
          - name: ETCDCTL_API
            value: "3"
          - name: BACKUP_COUNTS
            value: "5"
          - name: CLUSTER_NAME
            value: "cluster1"
          volumeMounts:
            - mountPath: /entrypoint.sh
              name: configmap-volume
              readOnly: true
              subPath: entrypoint.sh
            - mountPath: /opt/etcd/ssl
              name: etcd-certs
              readOnly: true
            - mountPath: /etc/localtime
              name: lt-config
            - mountPath: /etc/timezone
              name: tz-config
        volumes:
          - name: configmap-volume
            configMap:
              defaultMode: 0777
              name: cron-sftp
          - name: etcd-certs
            hostPath:
              path: /opt/etcd/ssl
          - name: lt-config
            hostPath:
              path: /etc/localtime
          - name: tz-config
            hostPath:
              path: /etc/timezone
        hostNetwork: true
        restartPolicy: OnFailure

PS:可以通过nodeAffinity将执行etcd备份的CrobJob调度到任意etcd节点上运行。示例如下:

affinity:
  nodeAffinity:
    requiredDuringSchedulingIgnoredDuringExecution:
      nodeSelectorTerms:
      - matchExpressions:
        - key: node-role.kubernetes.io/etcd
          operator: Exists

我这边共有4个节点,是将ETCD的SSL证书放到了每个节点中,所以没有设置nodeAffinity。

#把SSL证书放到所有节点中:
[root@k8s-master1 etcd-backup]# scp -p /opt/etcd/ssl/ 192.168.1.139:/opt/etcd/ssl

运行etcd-backup-cronjob.yaml:

[root@k8s-master1 etcd-backup]# kubectl apply -f etcd-backup-cronjob.yaml
[root@k8s-master1 etcd-backup]# kubectl get cj -n backups
NAME               SCHEDULE      SUSPEND   ACTIVE   LAST SCHEDULE   AGE
etcd-backup-sftp   */5 * * * *   False     0        <none>           7s

#5分钟后查询pods创建情况:
[root@k8s-master1 etcd-backup]# kubectl get pods -n backups
NAME                                READY   STATUS      RESTARTS   AGE
etcd-backup-sftp-1677308100-cw4b8   0/1     Completed   0          1m51s

[root@k8s-master1 etcd-backup]# kubectl logs etcd-backup-sftp-1677308100-cw4b8 -n backups
{"level":"info","ts":1677308105.1600003,"caller":"snapshot/v3_snapshot.go:119","msg":"created temporary db file","path":"/snapshot/etcd-snapshot-20230225-145505.db.part"}
{"level":"info","ts":"2023-02-25T14:55:05.191+0800","caller":"clientv3/maintenance.go:200","msg":"opened snapshot stream; downloading"}
{"level":"info","ts":1677308105.1914499,"caller":"snapshot/v3_snapshot.go:127","msg":"fetching snapshot","endpoint":"192.168.1.136:2379"}
{"level":"info","ts":"2023-02-25T14:55:05.872+0800","caller":"clientv3/maintenance.go:208","msg":"completed snapshot read; closing"}
{"level":"info","ts":1677308106.153034,"caller":"snapshot/v3_snapshot.go:142","msg":"fetched snapshot","endpoint":"192.168.1.136:2379","size":"18 MB","took":0.992465311}
{"level":"info","ts":1677308106.1532946,"caller":"snapshot/v3_snapshot.go:152","msg":"saved","path":"/snapshot/etcd-snapshot-20230225-145505.db"}
Snapshot saved at /snapshot/etcd-snapshot-20230225-145505.db
mkdir: Access failed: Failure (/home/ftp/etcd-backup/cluster1)
start to remove /home/ftp/etcd-backup/cluster1/.
start to remove /home/ftp/etcd-backup/cluster1/..
start to remove /home/ftp/etcd-backup/cluster1/etcd-snapshot-20230225-143011.db

查看etcd备份情况:

利用K8S CronJob来实现etcd集群的自动备份_Dockerfile

因为机房的K8S集群目前没有出现过问题,自己目前也没有时间去测试使用snapshot.db文件恢复,等有时间了再去做实验吧。

恢复以及参考的链接如下:

参考链接


举报

相关推荐

0 条评论