利用jieba分析词语频数

# 导入依赖
import jieba
 
def fun():
    # 读取文本
    file = open("D:\\yx.txt", "r", encoding='utf-8').read()
    # 使用精确模式对文本进行分词
    words = jieba.lcut(file)
    # 通过键值对的形式存储词语及其出现的次数
    counts = {}
    for word in words:
        # 去掉词语中的空格
        word = word.replace('  ', '')
        # 如果词语长度为1，则忽略统计
        if len(word) == 1:
            continue
        # 进行累计
        else:
            counts[word] = counts.get(word, 0) + 1
    # 将字典转为列表
    items = list(counts.items())
    # 根据词语出现的次数进行从大到小排序
    items.sort(key=lambda x: x[1], reverse=True)
    # 输出统计结果
    fo = open("D:\\yxcount.txt", "w")
    fo.write("word,count\n")
    for item in items:
        word, count = item
        #print("词语:【{}】,出现次数:{}".format(word, count))
        fo.write(word+","+str(count)+"\n")
 
 
# 主函数
if __name__ == '__main__':
    fun()

相关阅读:
分享一个一直在用的golang单测小脚本
JakeCoffman/Cron定时任务库核心实现源码解析
uniapp h5部署二级目录
Selenium
Unable to connect to the server: x509: cannot validate certificate for 172.25.97.19 because it doesn't contain any IP SANs
python
chrome
edit-plus 添加单引号 ''
CALL_AND_RETRY_LAST Allocation failed
nacos 客户端异常：SocketTimeoutException: connect timed out

原文地址：https://www.cnblogs.com/a8047/p/15652606.html