• python 操作hdfs


    from hdfs.client import Client
     
     
     
    # 关于python操作hdfs的API可以查看官网:
    # https://hdfscli.readthedocs.io/en/latest/api.html
     
     
    # 读取hdfs文件内容,将每行存入数组返回
    def read_hdfs_file(client, filename):
        # with client.read('samples.csv', encoding='utf-8', delimiter='
    ') as reader:
        #  for line in reader:
        # pass
        lines = []
        with client.read(filename, encoding='utf-8', delimiter='
    ') as reader:
            for line in reader:
                # pass
                # print line.strip()
                lines.append(line.strip())
        return lines
     
     
    # 创建目录
    def mkdirs(client, hdfs_path):
        client.makedirs(hdfs_path)
     
     
    # 删除hdfs文件
    def delete_hdfs_file(client, hdfs_path):
        client.delete(hdfs_path)
     
     
    # 上传文件到hdfs
    def put_to_hdfs(client, local_path, hdfs_path):
        client.upload(hdfs_path, local_path, cleanup=True)
     
     
    # 从hdfs获取文件到本地
    def get_from_hdfs(client, hdfs_path, local_path):
        client.download(hdfs_path, local_path, overwrite=False)
     
     
    # 追加数据到hdfs文件
    def append_to_hdfs(client, hdfs_path, data):
        client.write(hdfs_path, data, overwrite=False, append=True, encoding='utf-8')
     
     
    # 覆盖数据写到hdfs文件
    def write_to_hdfs(client, hdfs_path, data):
        client.write(hdfs_path, data, overwrite=True, append=False, encoding='utf-8')
     
     
    # 移动或者修改文件
    def move_or_rename(client, hdfs_src_path, hdfs_dst_path):
        client.rename(hdfs_src_path, hdfs_dst_path)
     
     
    # 返回目录下的文件
    def list(client, hdfs_path):
        return client.list(hdfs_path, status=False)
     
    # client = Client(url, root=None, proxy=None, timeout=None, session=None)
    # client = Client("http://hadoop:50070")
    client = Client("http://120.78.186.82:50070/",root="/",timeout=10000,session=False)
    # client = InsecureClient("http://120.78.186.82:50070", user='ann');
     
    # move_or_rename(client,'/input/2.csv', '/input/emp.csv')
    # read_hdfs_file(client,'/input/emp.csv')
    put_to_hdfs(client, 'D:\bbb.txt', '/file')
    # append_to_hdfs(client,'/input/emp.csv','我爱你'+'
    ')
    # write_to_hdfs(client, '/emp.csv', "sadfafdadsf")
    # read_hdfs_file(client,'/input/emp.csv')
    # move_or_rename(client,'/input/emp.csv', '/input/2.csv')
    # mkdirs(client,'/input/python')
    # print(list(client, '/'))
    # chown(client,'/input/1.csv', 'root')
  • 相关阅读:
    YII2操作mongodb笔记(转)
    MongoDB创建数据库和集合命令db.createCollection详解(转)
    MongoDB设置访问权限、设置用户(转)
    使用Robomongo 连接MongoDB 3.x 报 Authorization failed 解决办法(转)
    Yii2框架与MongoDB拓展、Redis拓展的安装流程
    YII2 Model 类切换数据库连接
    Centos6.5搭建java开发环境
    YII切换开发和生产环境(命令)
    YII2 实现后台操作记录日志(转)
    phpstorm2018激活方法--使用激活码
  • 原文地址:https://www.cnblogs.com/cxhzy/p/11051334.html
Copyright © 2020-2023  润新知