• get_data_use_notbom 自定义外部数据自动写入


    import urllib.request;
    from pandas import DataFrame;
    from pandas import Series;
    from bs4 import BeautifulSoup;
    
    import pandas as pd
    import chardet
    
    file_name = "2222-11.txt"
    #file_name = "2222.txt"
    file_path = 'file:///F:/python/untitled1/core/do_data/save2/'
    
    response = urllib.request.urlopen(file_path + file_name)
    html = response.read();
    #result = chardet.detect(html) # 检测文件内容
    #print(result)
    #print(html)
    
    soup = BeautifulSoup(html,"html.parser")
    trs = soup.find_all('tr')
    ths = trs[0].find_all('th');
    
    index_d = []
    for th in ths:
        #print(th.getText)
    
        index_d.append(th.getText())
    data = DataFrame(columns=index_d)
    print(index_d)
    
    for tr in trs :
        tds = tr.find_all('td')
        td_datas = []
        for td in tds:
            td_datas.append(td.getText())
        if len(td_datas) != 0:
            data=data.append(
                Series(
                    td_datas,
                    index=index_d
                ), ignore_index=True
            )
    
    print(len(data))
    
    str2s = []
    
    for i in range(len(data["股票全码"])):
    
        str2 = data["涨停时间"][i] +" "+ data["历史涨停原因"][i] +" "+ data["涨停选原因"][i]
        str2s.append(str2)
    
    data["new"] = str2s
    data=data.drop_duplicates(subset=['股票代码'],keep='last',inplace=False)
    print(len(data))
    
    
    ofile = "extern_user.txt"
    
    
    def gb_trans_utf8(file_path):
        with open(file_path, 'r', encoding='gb18030') as f:
            content = f.read()
        #print(content)
        with open("utf"+file_path, 'w', encoding='utf-8') as f:
            f.write(content)
    
    def utf8_trans_gb(file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        #print(content)
        with open(file_path, 'w', encoding='gb18030') as f:
            f.write(content)
    
    gb_trans_utf8(ofile)
    
    
    new_data = pd.read_table("utf"+ofile,header= None,sep="|",encoding="utf-8",dtype=str)
    
    new_data = new_data.iloc[:,0:4]
    new_data = new_data.dropna()
    
    new_data.columns.name = ["a","b","c","d"]
    new_data.columns = ["a","b","c","d"]
    data = data.reset_index(drop=True)
    #data = data.reindex(range(len(data)))
    #print(data.iloc[:])
    for i in range(len(data)):
    #for i in range(10):
        #print(i)
        #print(data.loc[i,"股票代码"])
        #print("haham")
    
        d_code = str(data.loc[i,"股票代码"])
        #new_data.loc[((new_data["b"] == d_code) &  (new_data["c"] == "31")),"d"]=data.loc[i,"new"]
    
        flag = ''
        if d_code[0] == "6":
            flag = "1"
        else:
            flag = "0"
        row=[flag,d_code,"31",data.loc[i,"new"]]
        #print(i)
        print(row)
        #print(new_data.iloc[:])
    
        new_data = new_data.append(
                Series(
                    row,
                    index=new_data.columns
                ), ignore_index = True
        )
        #print("haha")
    
    
    new_data = new_data.drop_duplicates(subset=["b","c"],keep='last',inplace=False)
    
    new_data["c"]=new_data["c"].astype(int)
    new_data = new_data.sort_values(by=["c","b"] , ascending=(True,True))
    
    print(new_data.columns)
    new_data=new_data.reset_index(drop=True)
    
    new_data["e"]="0.00"
    
    
    new_data.to_csv('save/extern_user.txt', sep='|', index=False,header=None,)
    
    utf8_trans_gb('save/extern_user.txt')
  • 相关阅读:
    thinkphp在模型中自动完成session赋值
    highcharts实例教程二:结合php与mysql生成饼图
    程序员应该经常看看的网站
    highcharts实例教程一:结合php与mysql生成折线图
    2015-2-10 ecshop
    一个简单的javascript获取URL参数的代码
    table 西边框样式
    PHP 获取当前日期及格式化
    mysql 获取当前日期及格式化
    mysql时间int日期转换
  • 原文地址:https://www.cnblogs.com/rongye/p/12991155.html
Copyright © 2020-2023  润新知