• 综合练习:词频统计


    f = open("peng.txt", "r", encoding='utf-8')
    song = f.read()
    f.close()
    
    sep = ''',.?—!"'''
    
    exclude = {'the', 'and', 'i', 'in', "i'm", 'a', 'of', 'an', 'on', 'to', 'with'}
    
    for c in sep:
        song = song.replace(c, ' ')
    
    swl = song.lower().split()
    
    swd = {}
    
    sws = set(swl) - exclude
    
    for w in sws:
        swd[w] = swl.count(w)
    
    fl = list(swd.items())
    
    fl.sort(key=lambda x: x[1], reverse=True)
    
    for i in fl:
        print(i)
    
    f = open("result.txt", "w")
    for i in range(20):
        f.write(fl[i][0] + "  " + str(fl[i][1]) + "
    ")
    f.close()
    

      

    import jieba
    
    f = open('weicheng.txt', 'r', encoding='utf-8')
    text = f.read()
    f.close()
    
    p = ''',。‘’“”:;()!?、 '''
    a = {
        '的', '
    ', 'u3000',
        '曰', '之', '不', '人', '一', '大', '马', '来', '有', '于', '下', '此',
    }
    for i in p:
        text = text.replace(i, '')
    print(list(jieba.cut(text)))
    t = list(jieba.lcut(text))
    print(t)
    count = {}
    wl = list(set(t) - a)
    print(wl)
    
    for i in range(0, len(wl)):
        count[wl[i]] = text.count(str(wl[i]))
    
    cl = list(count.items())
    cl.sort(key=lambda x: x[1], reverse=True)
    print(cl)
    
    f = open('wcCount.txt', 'a')
    for i in range(20):
        f.write(cl[i][0] + ':' + str(cl[i][1]) + '
    ')
    f.close()
    

      

  • 相关阅读:
    一位年轻女董事长的忠告:不想穷下去就请看
    Ajax拓展02
    Ajax拓展
    Ajax概念及基础
    PHP了解01
    CSS3新增功能03
    CSS3新增功能02
    CSS3新增功能01
    html5介绍
    jQuery学习03
  • 原文地址:https://www.cnblogs.com/phoenlix/p/8666515.html
Copyright © 2020-2023  润新知