• Data_r_and_w(csv,json,xlsx)


    import os
    import sys
    import argparse

    try:
        import cStringIO as StringIO
    except:
        import StringIO
    import struct
    import json
    import csv

    def import_data(import_file):
        '''
        Imports data from import_file.
        Expects to find fixed width row
        Sample row: 161322597 0386544351896 0042
        '''
        mask = '9s14s5s'
        data = []
        with open(import_file, 'r') as f:
            for line in f:
                # unpack line to tuple
                fields = struct.Struct(mask).unpack_from(line)
                # strip any whitespace for each field
                # pack everything in a list and add to full dataset
                data.append(list([f.strip() for f in fields]))
        return data

    def write_data(data, export_format):
        '''
        Dispatches call to a specific transformer
        and returns data set.
        Exception is xlsx where we have to save data in a file.
        '''
        if export_format == 'csv':
            return write_csv(data)
        elif export_format == 'json':
            return write_json(data)
        elif export_format == 'xlsx':
            return write_xlsx(data)
        else:
            raise Exception("Illegal format defined")

    def write_csv(data):
        '''
        Transforms data into csv.
        Returns csv as string.
        '''
        # Using this to simulate file IO,
        # as csv can only write to files.
        f = StringIO.StringIO()
        writer = csv.writer(f)
        for row in data:
            writer.writerow(row)
        # Get the content of the file-like object
        return f.getvalue()

    def write_json(data):
        '''
        Transforms data into json.
        Very straightforward.
        '''
        j = json.dumps(data)
        return j

    def write_xlsx(data):
        '''
        Writes data into xlsx file.
        
        '''
        from xlwt import Workbook
        book = Workbook()
        sheet1 = book.add_sheet("Sheet 1")
        row = 0
        for line in data:
            col = 0
            for datum in line:
                print datum
                sheet1.write(row, col, datum)
                col += 1
            row += 1
            # We have hard limit here of 65535 rows
            # that we are able to save in spreadsheet.
            if row > 65535:
                print >> sys.stderr, "Hit limit of # of rows in one sheet (65535)."
                break
        # XLS is special case where we have to
        # save the file and just return 0
        f = StringIO.StringIO()
        book.save(f)
        return f.getvalue()
       
       
    if __name__ == '__main__':
        # parse input arguments
        parser = argparse.ArgumentParser()
        parser.add_argument("import_file", help="Path to a fixed-width data file.")
        parser.add_argument("export_format", help="Export format: json, csv, xlsx.")
        args = parser.parse_args()

        if args.import_file is None:
            print >> sys.stderr, "You myst specify path to import from."
            sys.exit(1)

        if args.export_format not in ('csv','json','xlsx'):
            print >> sys.stderr, "You must provide valid export file format."
            sys.exit(1)

        # verify given path is accesible file
        if not os.path.isfile(args.import_file):
            print >> sys.stderr, "Given path is not a file: %s" % args.import_file
            sys.exit(1)

        # read from formated fixed-width file
        data = import_data(args.import_file)

        # export data to specified format
        # to make this Unix-lixe pipe-able
        # we just print to stdout
        print write_data(data, args.export_format)

  • 相关阅读:
    C#中 栈,堆你真的懂吗?不理解引用类型和值类型区别的程序员将会给代码引入诡异的bug和性能问题
    c# 可空类型,语法糖,lambda,命名规则(Pascal 帕斯卡命名,Camel 驼峰命名),注释,封装,继承,多态
    数据库事务,游标,触发器,存储过程,索引,数字,日期转换为字符,字符串操作,查询,分类,内连接,外连接,全连接,模糊查询,范围查询,5种聚合函数,分组查询,主键,外键,标识列
    html,css 知识汇总
    html,css,js,jquery
    数据库文件托管
    final,finally,finalize的区别
    Thread,Threadpool,task的区别
    ABP 一个开源的web开发框架
    redis 40问
  • 原文地址:https://www.cnblogs.com/cmnz/p/8329494.html
Copyright © 2020-2023  润新知