• FTP 文件传输服务


    昨晚心血来潮,尝试用python写了一个ftp文件传输服务,可以接收指令,从远程ftp服务器同步指定目录数据,最后没用上,开源出来。

    https://github.com/jadepeng/ftp_transfer_service.git

    运行原理

    • 'task_server' 是一个web服务器,可以接收传入任务,接收到任务后,将task写入mysql
    • 启动任务后,'task_server'会扫描ftp文件列表,写入redis队列
    • transfer_client 是传输执行程序,可以多点部署,该程序会读取redis队列,进行文件下载

    使用

    配置

    修改 .env 文件, 配置mysql和redis地址

    REDIS_SERVER=""
    REDIS_PORT=6380
    REDIS_PASSWORD=""
    MYSQL_HOST=""
    MYSQL_PORT=3306
    MYSQL_PASSWORD=""
    MYSQL_USER=""
    MYSQL_DB=""
    

    启动服务

    server 端

    python3 task_server.py
    

    传输端,可以部署多个

    python3 transfer_client.py
    

    接收任务

    POST /task/

    {
      "taskId": "9",
      "serverPath": "/weblog",
      "storagePath": "/data",
      "host": "ftpServer",
      "port": 21,
      "user": "user",
      "password": "password"
    }
    

    启动传输

    GET /task/{taskId}/start

    查看进度

    GET /task/{taskId}/progress

    实现简介

    第一次用fastapi来写web服务,这里记录下有意思的地方。

    配置

    可以通过配置类实现app的配置参数,pydantic还可以加载env文件更新配置

    setting.py

    from pydantic import BaseSettings
    
    
    class APISettings(BaseSettings):
        mysql_host: str = "127.0.0.1"
        mysql_port: int = 3306
        mysql_password: str
        mysql_user: str
        mysql_db: str
        redis_server: str = "127.0.0.1"
        redis_port: int = 6380
        redis_password: str
    
        max_wait_time_count: int = 10
    
        class Config:
            env_file = ".env"
            env_file_encoding = 'utf-8'
    
    

    redis 队列

    通过list实现队列,rpush,blpop

    import redis
    
    class RedisQueue(object):
    
        def __init__(self, name, namespace='queue', **redis_kwargs):
            self.__db= redis.Redis(**redis_kwargs)
            self.key = '%s:%s' %(namespace, name)
    
        def qsize(self):
            return self.__db.llen(self.key)  # 返回队列里面list内元素的数量
    
        def put(self, item):
            self.__db.rpush(self.key, item)  # 添加新元素到队列最右方
    
        def get_wait(self, timeout=None):
            item = self.__db.blpop(self.key, timeout=timeout)
            return item
    
        def get_nowait(self):
            item = self.__db.lpop(self.key)
            return item
    

    redis BloomFilter

    BloomFilter 可以用来去重

    import mmh3
    import redis
    
    
    class BloomFilter(object):
        def __init__(self, bf_key, bit_size=2000000, hash_count=4, start_seed=41, **redis_kwargs):
            self.bit_size = bit_size
            self.hash_count = hash_count
            self.start_seed = start_seed
            self.client = redis.Redis(**redis_kwargs)
            self.bf_key = bf_key
    
        def add(self, data):
            bit_points = self._get_hash_points(data)
            for index in bit_points:
                self.client.setbit(self.bf_key, index, 1)
    
        def madd(self, m_data):
            if isinstance(m_data, list):
                for data in m_data:
                    self.add(data)
            else:
                self.add(m_data)
    
        def exists(self, data):
            bit_points = self._get_hash_points(data)
            result = [
                self.client.getbit(self.bf_key, index) for index in bit_points
            ]
            return all(result)
    
        def mexists(self, m_data):
            result = {}
            if isinstance(m_data, list):
                for data in m_data:
                    result[data] = self.exists(data)
            else:
                result[m_data] = self.exists[m_data]
            return result
    
        def _get_hash_points(self, data):
            return [
                mmh3.hash(data, index) % self.bit_size
                for index in range(self.start_seed, self.start_seed +
                                   self.hash_count)
            ]
    
    

    python的orm框架sqlalchemy

    sqlalchemy 需要先定义ORM类

    class TransferTask(Base):
        __tablename__ = 'transfer_task'
    
        taskId = Column(String(255), primary_key=True, index=True)
        serverPath = Column(String(255), nullable=False)
        storagePath = Column(String(255), nullable=False)
        host = Column(String(255), nullable=False)
        port = Column(Integer, nullable=False)
        user = Column(String(255), nullable=False)
        password = Column(String(255), nullable=False)
        time = Column(DateTime, nullable=False, default=datetime.now)
    
    class TransferFailedFile(Base):
        __tablename__ = 'transfer_failed_file'
        id = Column(Integer, primary_key=True, index=True, autoincrement=True)
        taskId = Column(String(255), index=True)
        filePath = Column(String(1024), nullable=False)
        time = Column(DateTime, nullable=False, default=datetime.now)
    
    class TransferProgress(Base):
        __tablename__ = 'transfer_task_progress'
    
        taskId = Column(String(255), primary_key=True, index=True)
        total = Column(Integer, nullable=False)
        status = Column(Integer, nullable=False)
        finished = Column(Integer, nullable=False)
        failed = Column(Integer, nullable=False)
        time = Column(DateTime, nullable=False, default=datetime.now)
    
    if __name__ == '__main__':
        settings = APISettings()
        db = Database(settings.mysql_host, settings.mysql_port, settings.mysql_user, settings.mysql_password,
                      settings.mysql_db)
        Base.metadata.create_all(db.engine)
    

    使用了sqlalchemy CRUD就比较方便了, 可以通过query,filter来查询和过滤

     def get_or_create_progress(self, task: TransferTask):
            db = self.database.get_session()
            dbitem = db.query(TransferProgress).filter(TransferProgress.taskId == task.taskId).first()
            if not dbitem:
                dbitem = TransferProgress()
                dbitem.taskId = task.taskId
                dbitem.total = 0
                dbitem.status = TaskStatus.SCANNING.value
                dbitem.finished = 0
                dbitem.failed = 0
                db.add(dbitem)
                db.commit()
            return dbitem
    

    这里需要注意的是,session需要close,不然session过多会报错,可以封装一个get_session,利用yield来自动释放

    见database.py

        def get_db(self):
            db = self.SessionLocal()
            try:
                yield db
            finally:
                db.close()
    
        def get_session(self):
            return next(self.get_db())
    
    

    python ftp操作

    python有个ftplib,可以用来操作ftp,这里简单封装一个client类, 实现listfiles和下载文件

    
    import ftplib
    import os
    from datetime import datetime
    import ntpath
    
    
    class FtpClient:
    
        def __init__(self, host: str, port: int, user: str, password: str):
            self.host = host
            self.port = port
            self.user = user
            self.password = password
            self.connect()
    
        def connect(self):
            self.ftp = ftplib.FTP()
            self.ftp.connect(host=self.host, port=self.port)
            self.ftp.login(self.user, self.password)
            self.ftp.encoding = "utf-8"
    
        def list_files(self, dir):
            self.ftp.cwd(dir)
            for file_data in self.ftp.mlsd():
                file_name, meta = file_data
                file_type = meta.get("type")
                if file_type == "file":
                    try:
                        self.ftp.voidcmd("TYPE I")
                        file_size = self.ftp.size(file_name)
                        yield f"{dir}/{file_name}", file_size
                    except Exception as e:
                        print(e)
                else:
                    yield from self.list_files(dir + "/" + file_name)
    
        def download_file(self, file_name:str, local_file_name:str):
            try:
                self.ftp.retrbinary('RETR %s' % file_name, open(local_file_name, 'wb').write)
            except ftplib.error_perm:
                print('ERROR: cannot read file "%s"' % file_name)
                os.unlink(local_file_name)
    

    下载程序

    作为redis mq的消费者,要考虑的是下载失败了如何处理,异常退出如何处理?进度如何更新?

    针对异常退出,这里用一个简单的方案,获取mq消息后,先将item写入到本地文件,这样如果client程序异常退出,下次进来还能继续
    针对下载失败,这里失败后先重新放入队列,retryCount+1,如果超过最大重试次数,则写到错误记录。
    进度更新,则依靠update+1执行。

    def transfer_task_item(ftp, local_path, queque, task, task_item):
        try:
            local_file_name = local_path + task_item['fileName']
            print("transfer %s to %s" % (task_item['fileName'], local_file_name))
    
            # 文件已存在
            if os.path.exists(local_file_name):
                # 比较大小
                size = os.path.getsize(local_file_name)
                if size == task_item["fileSize"]:
                    db_service.update_finished(task.taskId, 1)
                    return
    
            dir = os.path.abspath(os.path.dirname(local_file_name))
            os.makedirs(dir, exist_ok=True)
            ftp.download_file(task_item['fileName'], local_file_name)
            # 更新进度
            db_service.update_finished(task.taskId, 1)
        except Exception as e:
            print(e)
            if task_item['retryCount'] < 3:
                task_item['retryCount'] = task_item['retryCount'] + 1
                queque.put(json.dumps(task_item))
            else:
                print(task_item['fileName'] + " transfer failed with max_retry_count")
                db_service.add_failed_file(task.taskId, task_item['fileName'])
        finally:
            remove_lock()
    
  • 相关阅读:
    scala-class
    uva-10422-骑士-搜索题
    HDU 5724
    HDU 5728
    CodeForces 414B
    CodeForces 698A
    Codeforces Round #363 (Div. 2)
    BestCoder 2nd Anniversary 1001 Oracle
    BestCoder 2nd Anniversary 1002 Arrange
    HDU 4798
  • 原文地址:https://www.cnblogs.com/xiaoqi/p/ftp_transfer_service.html
Copyright © 2020-2023  润新知