• 统计词频


    
    
    import re
    from collections import Counter
    
    string = """   Lorem ipsum dolor sit amet, consectetur
        adipiscing elit. Nunc ut elit id mi ultricies
        adipiscing. Nulla facilisi. Praesent pulvinar,
        sapien vel feugiat vestibulum, nulla dui pretium orci,
        non ultricies elit lacus quis ante. Lorem ipsum dolor
        sit amet, consectetur adipiscing elit. Aliquam
        pretium ullamcorper urna quis iaculis. Etiam ac massa
        sed turpis tempor luctus. Curabitur sed nibh eu elit
        mollis congue. Praesent ipsum diam, consectetur vitae
        ornare a, aliquam a nunc. In id magna pellentesque
        tellus posuere adipiscing. Sed non mi metus, at lacinia
        augue. Sed magna nisi, ornare in mollis in, mollis
        sed nunc. Etiam at justo in leo congue mollis.
        Nullam in neque eget metus hendrerit scelerisque
        eu non enim. Ut malesuada lacus eu nulla bibendum
        id euismod urna sodales.  """
    
    words = re.findall(r'w+', string) #This finds words in the document
    
    lower_words = [word.lower() for word in words] #lower all the words
    
    word_counts = Counter(lower_words) #counts the number each time a word appears
    print word_counts
    
    # Counter({'elit': 5, 'sed': 5, 'in': 5, 'adipiscing': 4, 'mollis': 4, 'eu': 3, 
    # 'id': 3, 'nunc': 3, 'consectetur': 3, 'non': 3, 'ipsum': 3, 'nulla': 3, 'pretium':
    # 2, 'lacus': 2, 'ornare': 2, 'at': 2, 'praesent': 2, 'quis': 2, 'sit': 2, 'congue': 2, 'amet': 2, 
    # 'etiam': 2, 'urna': 2, 'a': 2, 'magna': 2, 'lorem': 2, 'aliquam': 2, 'ut': 2, 'ultricies': 2, 'mi': 2, 
    # 'dolor': 2, 'metus': 2, 'ac': 1, 'bibendum': 1, 'posuere': 1, 'enim': 1, 'ante': 1, 'sodales': 1, 'tellus': 1,
    # 'vitae': 1, 'dui': 1, 'diam': 1, 'pellentesque': 1, 'massa': 1, 'vel': 1, 'nullam': 1, 'feugiat': 1, 'luctus': 1, 
    # 'pulvinar': 1, 'iaculis': 1, 'hendrerit': 1, 'orci': 1, 'turpis': 1, 'nibh': 1, 'scelerisque': 1, 'ullamcorper': 1,
    # 'eget': 1, 'neque': 1, 'euismod': 1, 'curabitur': 1, 'leo': 1, 'sapien': 1, 'facilisi': 1, 'vestibulum': 1, 'nisi': 1, 
    # 'justo': 1, 'augue': 1, 'tempor': 1, 'lacinia': 1, 'malesuada': 1})
    
    
    
    
    
  • 相关阅读:
    09.安装Collabora Online服务
    08.nextcloud搭建
    07.安装及使用gitlub
    winmerge vs2010
    C#中时间计算汇总
    JS正则表达式大全 转
    js 验证正则
    js验证大全
    CSC 命令编译cs文件
    网站PV、UV以及查看方法(转)
  • 原文地址:https://www.cnblogs.com/tingshuo123/p/6917817.html
Copyright © 2020-2023  润新知