• 源数据的换行符


    from openpyxl import Workbook
    import time
    import sys
    import os
    import pprint
    import re
    
    curPath = os.path.abspath(os.path.dirname(__file__))
    rootPath = os.path.split(curPath)[0]
    sys.path.append(rootPath)
    
    db_db_code_dic = {}
    fname = 'jmtool0315_public_tbl_area_type.csv'
    fd = '%s\%s' % (curPath, fname)
    with open(fd, 'r', encoding='utf-8') as fo:
        for i in fo:
            l = i.split(',')
            db_db_code_dic[l[1]] = l[2]
    fo.close()
    
    curPath = os.path.abspath(os.path.dirname(__file__))
    rootPath = os.path.split(curPath)[0]
    sys.path.append(rootPath)
    
    tag_jmtool_list = ['(', '(', '-']
    tail_list = ['号楼', '栋', '幢', '单元']
    for i in range(0, 30, 1):
        for tail in tail_list:
            str_ = '%s%s' % (i, tail)
            tag_jmtool_list.append(str_)
    
    zh_num_list = ['零', '一', '二', '三', '四', '五', '六', '七', '八', '九']
    zh_num_zhk_dic = {}
    zh_num_numk_dic = {}
    for i in range(0, 20, 1):
        if i < 10:
            zh_num = zh_num_list[i]
        elif i == 10:
            zh_num = '十'
        else:
            ii = i - 10
            zh_num = '%s%s' % ('十', zh_num_list[ii])
        zh_num_numk_dic[str(i)] = zh_num
        zh_num_zhk_dic[zh_num] = str(i)
    
    for i in range(0, 30, 1):
        for tail in tail_list:
            str_ = '%s%s' % (i, tail)
            tag_jmtool_list.append(str_)
    
    for k in zh_num_zhk_dic:
        for tail in tail_list:
            str_ = '%s%s' % (k, tail)
            tag_jmtool_list.append(str_)
    
    
    def extract_name(name_):
        for i in tag_jmtool_list:
            name_ = name_.split(i)[0]
        # 润景怡园10号
        name_ = re.compile('d+').split(name_)[0]
        # '宝安智谷G座'
        name_ = re.compile('[a-zA-Z]+').split(name_)[0]
        return name_
    
    
    def gen_bd_query_name(name_, type_):
        res_ = name_
        if type_ == '住宅小区':
            res_ = extract_name(name_)
        if type_ == '写字楼':
            # 海星城市广场B座
            name_ = re.compile('[a-zA-Z]+').split(name_)[0]
        res_ = res_.replace('·', '').replace('.', '').replace('(', '(').replace(')', ')').replace('?', '').replace('?',
                                                                                                                   '').replace(
            ' ', '')
        return res_
    
    
    fname = '采务.csv'
    fd = '%s\%s' % (curPath, fname)
    wb = Workbook()
    worksheet = wb.active
    file_title_l = 'id,area_code,uid, name,type_,city,district,address,street,submit_time'.split(',')
    worksheet.append(file_title_l)
    
    res_dic = {}
    
    with open(fd, 'r', encoding='utf-8') as fc:
        str_ = ''
        for i in fc:
            if i.find('"id"') > -1:
                continue
            if i.find('"
    ') == -1:
                str_ = '%s%s' % (str_, i.replace('
    ', ''))
                continue
            else:
                if len(i.replace(' ', '')) < 6:
                    continue
                else:
                    str_ = '%s%s' % (str_, i.replace('
    ', ''))
            ii = str_.replace('
    ', '').replace('";', ';').split(';"')
            id, area_code, uid, name, db_code, city, district, address, street, submit_time = ii
            submit_time = submit_time.replace('"', '')
            type_ = db_db_code_dic[db_code]
            ll = id, area_code, uid, name, type_, city, district, address, street, submit_time
            worksheet.append(ll)
            str_ = ''
    
            name_reduction = gen_bd_query_name(name, type_)
            if city not in res_dic:
                res_dic[city] = {}
            if district not in res_dic[city]:
                res_dic[city][district] = {}
            if name_reduction not in res_dic[city][district]:
                res_dic[city][district][name_reduction] = []
            ll = id, area_code, type_, city, district, uid, name, address, street, name_reduction, submit_time
            res_dic[city][district][name_reduction].append(ll)
    
    fc.close()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    file_name = '%s%s%s' % (fname, '-TO-EXCEL', localtime_)
    file_name_save = '%s\%s%s' % (curPath, file_name, '.xlsx')
    wb.save(file_name_save)
    wb.close()
    
    wb = Workbook()
    worksheet = wb.active
    file_title_str = 'id, area_code,type_, city, district, uid, name,address, street, name_reduction, submit_time'
    file_title_l = file_title_str.replace(' ', '').split(',')
    worksheet.append(file_title_l)
    type_dic, uid_l = {}, []
    for city in res_dic:
        for district in res_dic[city]:
            for name_reduction in res_dic[city][district]:
                for l in res_dic[city][district][name_reduction]:
                    worksheet.append(l)
                    type_, uid = l[2], l[5]
                    if type_ not in type_dic:
                        type_dic[type_] = 0
                    type_dic[type_] += 1
                    if uid not in uid_l:
                        uid_l.append(uid)
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    file_name = '%s%s%s' % (file_name, '-ORDERED', localtime_)
    file_name_save = '%s\%s%s' % (curPath, file_name, '.xlsx')
    wb.save(file_name_save)
    wb.close()
    
    sorted(type_dic.items(), key=lambda x: x[1])
    for type_ in type_dic:
        print(type_, ';', type_dic[type_])
    
    print(len(uid_l))
    

      

    from openpyxl import Workbook
    import time
    import sys
    import os
    import pprint
    
    curPath = os.path.abspath(os.path.dirname(__file__))
    rootPath = os.path.split(curPath)[0]
    sys.path.append(rootPath)
    
    db_code_dic = {}
    fname = 'j_area_type.csv'
    fd = '%s\%s' % (curPath, fname)
    with open(fd, 'r', encoding='utf-8') as fo:
        for i in fo:
            l = i.split(',')
            db_code_dic[l[1]] = l[2]
    fo.close()
    
    fname = '采任务.csv'
    fd = '%s\%s' % (curPath, fname)
    wb = Workbook()
    worksheet = wb.active
    file_title_l = 'id, area_code, uid, name, type_, district, dname, address, street, submit_time'.split(',')
    worksheet.append(file_title_l)
    with open(fd, 'r', encoding='utf-8') as fc:
        str_ = ''
        for i in fc:
            if i.find('"id"') > -1:
                continue
            if i.find('"
    ') == -1:
                str_ = '%s%s' % (str_, i.replace('
    ', ''))
                continue
            else:
                if len(i.replace(' ', '')) < 6:
                    continue
                else:
                    str_ = '%s%s' % (str_, i.replace('
    ', ''))
            ii = str_.replace('
    ', '').replace('";', ';').split(';"')
            id, area_code, uid, name, ref_area_type_code, district, dname, address, street, submit_time = ii
            type_ = db_code_dic[ref_area_type_code]
            ll = id, area_code, uid, name, type_, district, dname, address, street, submit_time.replace('"','')
            worksheet.append(ll)
            str_ = ''
    fc.close()
    localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
    file_name = '%s%s%s' % (fname, '-TO-EXCEL', localtime_)
    file_name_save = '%s\%s%s' % (curPath, file_name, '.xlsx')
    wb.save(file_name_save)
    wb.close()
    

      

  • 相关阅读:
    [原创] 基础中的基础(三):理解数据库的几种键和几个范式
    [原创] 思维导图笔记(一):设计模式
    [原创] 基础中的基础(二):C/C++ 中 const 修饰符用法总结
    [原创] 基础中的基础(一):简单排序算法总结(附代码)
    [转载] Linux 下产生和调试core文件
    [转载] C++异常处理机制
    spring cloud开发、部署注意
    使用Spring Sleuth和Zipkin跟踪微服务
    HttpClient4.5 SSL访问工具类
    多线程处理中Future的妙用
  • 原文地址:https://www.cnblogs.com/rsapaper/p/7455011.html
Copyright © 2020-2023  润新知