• 电商 Python html格式访客数据转为excel格式的数据 html格式的excel转换为excel


    原文:
    https://www.jianshu.com/p/4f38fe021fb3

    # -*- coding: utf-8 -*-
    
    
    
    
    
    # # 找出非excel格式的文件
    # import pandas as pd
    # import os
    # from shutil import copyfile
    
    # excel_dir = 'D:/工作/数据转换/访客数据/'
    # os.chdir(excel_dir)
    # for filename in os.listdir(excel_dir):
        # root_dir = ""
        # print(filename)    
        # try:
            # frame = pd.read_excel(excel_dir+filename)       
            # root_dir = "D:/工作/数据转换/excel格式/"        
        # except Exception as e:
            # print("异常:", e)
            # root_dir = "D:/工作/数据转换/html格式/"
        # copyfile(excel_dir+filename, root_dir+filename)
    
    
    
    
    
    
    
    
    
    # # html的table转换为excel
    # # 原文:https://www.jianshu.com/p/4f38fe021fb3
    
    # import pandas
    # import os
    # from shutil import copyfile
    
    # excel_dir = 'D:/工作/数据转换/html格式/'
    # os.chdir(excel_dir)
    # for filename in os.listdir(excel_dir):
        # print(filename)
        
        # try:
            # with open(excel_dir+filename, 'rb') as f:  
                # df = pandas.read_html(f.read(),encoding='utf-8')           
            # bb = pandas.ExcelWriter("D:/工作/数据转换/转换数据/" + filename)  
            # df[0].to_excel(bb,index=False)  
            # bb.close()  
        # except Exception as e:
            # print("异常:"+e)
            # # copyfile(excel_dir+filename, "D:/工作/数据转换/出错数据/" + filename)
        
    
    
    
    
    # # 单文件测试
    # import pandas
    # import os
    # with open('访客-2020-09-18 23_45_16.xlsx', 'rb') as f:  
        # df = pandas.read_html(f.read(),encoding='utf-8')           
    # bb = pandas.ExcelWriter('访访客-2020-09-18 23_45_16-1.xlsx')  
    # df[0].to_excel(bb,index=False)  
    # bb.close()  
    
    
    
    
    
    
    
    
    # # 批量文件改名
    # import os
    
    # excel_dir = 'D:/工作/数据转换/1/'
    # os.chdir(excel_dir)
    # for filename in os.listdir(excel_dir):
        # print(filename)
        # oldFullName = excel_dir+filename
        # newFullName = excel_dir+"1店-"+filename
        # os.rename(oldFullName, newFullName)
    
    
        
    
    
    
    
    
    # # excel转换为csv文件
    # import pandas as pd
    # import os
    # import re
    
    
    # excel_dir = 'D:/工作/数据转换/转换数据/'
    # os.chdir(excel_dir)
    # for filename in os.listdir(excel_dir):
        # print(filename)
       
        # # filename = "访客-2020-07-30 23_11_30.xlsx"
        # df = pd.read_excel(filename,index_col=0)
    
    
        # # 访问时间列 增加 日期
        # m = re.search("(d{4}-d{1,2}-d{1,2})", filename)   
        # strdate = m.group(1)
        # df["访问时间"] =['%s %s' % (strdate, s) for s in df["访问时间"]]
    
        # col_name = df.columns.tolist()
        # if '搜索关键字' not in col_name:
            # # 插入列
            # index = col_name.index('入店来源') + 1
            # col_name.insert(index, '搜索关键字')
            # df = df.reindex(columns = col_name)
    
            # # 修改值
            # df.loc[df['入店来源'].str.find('手淘搜索') > -1 , '搜索关键字'] = df['入店来源'].str.replace('手淘搜索','')
            # df.loc[df['入店来源'].str.find('手淘搜索') > -1 , '入店来源'] = '手淘搜索'
    
    
        # file_name = os.path.splitext(filename)[0]
        # df.to_csv('D:/工作/数据转换/CSV数据/'+file_name+'.csv',encoding='utf-8')
    
    
    
    
    
    
    
    # # 数据合并
    # import pandas as pd
    # import os
    
    # path = "./2店/"
    # files = os.listdir(path)
    # list_excel = []
    
    # for filename in files:
        # fullname = path + filename# excel的相对路径
        # df = pd.read_csv(fullname)# 默认读取Excel的第一个表单    
        # list_excel.append(df)# 把Excel追加到list中
    
    # writer = pd.ExcelWriter('合并后的数据.xlsx')
    # pd.concat(list_excel).to_excel(writer,'sheet1',index=False)
    # writer.save()
    # print('合并完成')
    
    
    
    
    
    
    
    
    
    
    
    
    
  • 相关阅读:
    Linux使用退格键时出现^H解决方法
    Linux centos7下php安装cphalcon扩展的方法
    Webstorm/Phpstorm中设置连接FTP,并快速进行文件比较,上传下载,同步等操作
    在Vmware中安装CentOS7
    php stomp.dll 下载地址
    WIN7 64位系统安装JDK并配置环境变量
    SVN如何将版本库url访问地址中的https改为http
    两个日期这间的间隔天数
    vi/vim 命令速查手册
    判断PC或mobile设备
  • 原文地址:https://www.cnblogs.com/guxingy/p/13999562.html
Copyright © 2020-2023  润新知