• Python实现读取多个excel;以及统计词频;使用词典


    # -*- coding: utf8 -*-
    import xlrd
    import codecs
    import os
    def handExcel(path):
    #定义输出文件
    output1 = codecs.open("7.8_userQue.txt","w","utf-8")
    output2 = codecs.open("7.8_StandQue.txt","w","utf-8")

    #定义输入文件
    os.chdir(path)
    names = os.listdir(path)

    j = 1
    for name in names:
    print("读取第%d个excel"%j)
    print(name)
    bk = xlrd.open_workbook(name)
    shxrange = range(bk.nsheets)
    try:
    sh = bk.sheet_by_name("Sheet1")
    except:
    print
    "no sheet in %s named Sheet1" % name
    # 获取行数
    nrows = sh.nrows
    # 获取列数
    ncols = sh.ncols
    print("nrows %d, ncols %d" % (nrows, ncols))
    for i in range(1,nrows):
    cell_value = sh.cell_value(i, 3)
    cell_value1 = sh.cell_value(i, 4)
    output1.write(cell_value+" ")
    output2.write(cell_value1 + " ")
    j += 1
    handExcel("D:/Users/cassie.xiao/PycharmProjects/read_excel/three")



    ---------------------------------------统计词频----------------------------------
    # -*- coding: utf8 -*-

    import codecs
    def getfreq(freqdict):
    output1 = codecs.open("xiaoi_userQue_seg_hanlp.txt", "r", "utf-8")
    print("getfreq....")
    for line in output1.readlines():
    line = line.split(" ")
    for word in line:
    if word in freqdict.keys():
    freqdict[word] += 1
    elif word not in freqdict.keys():
    freqdict[word] = 1
    return freqdict

    def sort_out(dic,outfilename):
    print("sort....")
    sort = sorted(dic.items(), key = lambda item:item[1],reverse=True)
    print("out....")
    with codecs.open(outfilename,'w','utf-8') as f:
    for each in sort:
    f.write(each[0]+":"+str(each[1])+" ")
    def main():
    freqdict = {}
    freqdict1 = getfreq(freqdict)
    outfilename = "xiaoi_userQue_seg_hanlp_freq.txt"
    sort_out(freqdict1,outfilename)

    if __name__ == "__main__":
    main()

    --------------------------------------使用词典操作-------------------------------------
    # -*- coding: utf8 -*-

    import codecs
    def getfreq():
    input_xiaoi_userQ = codecs.open(r"300W_xiaoi_jieba_UQ.txt", "r", "utf-8")
    input_xiaoi_standQ = codecs.open(r"300W_xiaoi_jieba_SQ.txt", "r", "utf-8")
    input_noun = codecs.open("noun.txt", "r", "utf-8")
    output_xiaoi_standQ = codecs.open(r"freq_xiaoi&Noun_standQ.txt", "w", "utf-8")
    output_xiaoi_userQ = codecs.open(r"freq_xiaoi&Noun_userQ.txt", "w", "utf-8")
    print("getfreq....")
    #先处理用户问
    dict_userQ = {}
    for line in input_xiaoi_userQ.readlines():
    if not line.startswith(":"):
    pair = line.strip().split(":")
    dict_userQ[pair[0]] = pair[1]
    for line in input_noun.readlines():
    if dict_userQ.has_key(line.strip()):

    ferq = dict_userQ.get(line.strip())
    output_xiaoi_userQ.write(line.strip() + ":" + ferq + " ")
    output_xiaoi_userQ.close()

    #处理标准问
    dict_userQ1 = {}
    for line in input_xiaoi_standQ.readlines():
    if not line.startswith(":"):
    pair = line.strip().split(":")
    # print pair
    dict_userQ1[pair[0]] = pair[1]
    # print(len(dict_userQ1))
    input_noun.seek(0)
    for line in input_noun.readlines():
    print line
    if dict_userQ1.has_key(line.strip()):
    print(line.strip())
    ferq = dict_userQ1.get(line.strip())
    output_xiaoi_standQ.write(line.strip() + ":" + ferq + " ")
    output_xiaoi_standQ.close()
    getfreq()

  • 相关阅读:
    转载:混淆包含SlidingMenu、gson等Android代码的proguard写法
    今天解决的两个问题
    C++中指针和引用的区别
    负载均衡服务器session共享的解决方案 (转载)
    Entity Framework的默认值BUG解决方法
    【转】SAPI中的IspeechRecoContext(接口)
    Sapi 添加语法的文章(转载)
    SAPI训练文件存储位置
    Flask第九篇 Flask 中的蓝图(BluePrint)
    Flask 第八篇 实例化Flask的参数 及 对app的配置
  • 原文地址:https://www.cnblogs.com/maowuyu-xb/p/7421495.html
Copyright © 2020-2023  润新知