#!/usr/bin/env python # -*- coding:utf-8 -*- """ str1 = 'as,gh,rt,ujrk' str2 = ',' str1 = str1[str1.find(str2)+1:] print(str1) s='as,gh,rt,ujrk' print(s.split(',')) import re pattern = re.compile(r'hello.*!') match = pattern.match('hello,aklhgslhgfhg!gfdh') if match: print(match.group()) """ import jieba import time import sys import jieba.analyse as analyse lines = open(u'西游记.txt',encoding='gb18030').read() print(' '.join(analyse.extract_tags(lines,topK=20,withWeight=False, allowPOS=()))) """ jieba.enable_parallel(4) #并行模式只支持POSIX系统 content = open(u'西游记.txt',"r").read() t1 = time.time() words = "/".join(jieba.cut(content)) t2 = time.time() tm_cost = t2-t1 print('并行速度为:%s bytes/second'% (len(content)/tm_cost)) """ jieba.disable_parallel() content = open(u'西游记.txt',"r",encoding='gb18030', errors='ignore').read() t1 = time.time() words = "/".join(jieba.cut(content)) t2 = time.time() tm_cost = t2-t1 print('非并行速度为:%s bytes/second'% (len(content)/tm_cost)) list = jieba.cut('我在学习自然语言处理',cut_all=False) print(list) print('/'.join(list)) print("/".join(jieba.cut('如果放到旧字典中将出错',HMM=False))) jieba.suggest_freq(('中','将'),True) print('/'.join(jieba.cut('如果放到旧字典中将出错',HMM=False))) line = open('西游记.txt',encoding='gb18030').read() print(" ".join(analyse.textrank(line,topK=20,withWeight=False, allowPOS=('ns','n','v','vn'))))