• 【ES】简单使用


     1 import sys
     2 reload(sys)
     3 sys.setdefaultencoding('utf-8')
     4 
     5 from datetime import datetime
     6 from elasticsearch import Elasticsearch 
     7 from os import path
     8 import jieba
     9 import random
    10 es = Elasticsearch()
    11 
    12 filePath = path.dirname(__file__)
    13 
    14 
    15 # index1:wordcount
    16 # stopwords
    17 stopWordFile = u'stopwords.txt'
    18 stopWordList = []
    19 for L in open(path.join(filePath , stopWordFile)).readlines():
    20     stopWordList.append(L.strip().decode('utf-8'))
    21 stopWordList.extend([u'腾讯',u'视频' , u''])
    22 stopWordList = set(stopWordList)
    23 
    24 # information words
    25 new = 'words.txt'
    26 text = open(path.join( filePath , new )).read().strip('
    ')
    27 wordDict = {}
    28 for w in jieba.cut(text):
    29     if w not in stopWordList:
    30         wordDict.setdefault(w , 0)
    31         wordDict[w] += 1
    32                 
    33 for key in wordDict.keys():
    34     data = {'word':key , 'count':wordDict[key]}
    35     es.index(index = 'wordcount' , doc_type = 'test' , body = data)
    36     
  • 相关阅读:
    many2many
    oneselfone
    one2one
    10-many2one
    08-one2many
    05-curd
    动态SQl
    文件系统缓存dirty_ratio与dirty_background_ratio两个参数区别
    expect用法举例
    通过命令修改mysql的提示符
  • 原文地址:https://www.cnblogs.com/colipso/p/6837845.html
Copyright © 2020-2023  润新知