• lucene 的关键字变色 与排序


    package com.zte.adc.search.service.impl;

    import java.io.File;
    import java.io.IOException;
    import java.io.StringReader;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Date;
    import java.util.List;

    import org.apache.commons.logging.Log;
    import org.apache.commons.logging.LogFactory;
    import org.apache.lucene.analysis.TokenStream;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.search.Filter;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.QueryFilter;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.Sort;
    import org.apache.lucene.search.SortField;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.search.BooleanClause.Occur;
    import org.apache.lucene.search.highlight.Highlighter;
    import org.apache.lucene.search.highlight.QueryScorer;
    import org.apache.lucene.search.highlight.SimpleFragmenter;
    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
    import org.apache.lucene.store.LockObtainFailedException;
    import org.wltea.analyzer.lucene.IKAnalyzer;
    import org.wltea.analyzer.lucene.IKQueryParser;
    import org.wltea.analyzer.lucene.IKSimilarity;

    import com.zte.adc.search.dao.SearchStateBeanDAO;
    import com.zte.adc.search.dao.impl.IndexBeanDAOImpl;
    import com.zte.adc.search.dao.impl.SearchStateBeanDAOImpl;
    import com.zte.adc.search.entity.IndexBean;
    import com.zte.adc.search.entity.SearchStateBean;
    import com.zte.adc.search.entity.XmlBean;
    import com.zte.adc.search.service.IndexService;
    import com.zte.adc.search.service.XmlParserService;
    import com.zte.adc.search.utils.PageCondition;
    import com.zte.adc.search.utils.PageDataSet;
    import com.zte.adc.search.utils.PropertyManager;
    import com.zte.adc.search.utils.SearchStateEnum;
    import com.zte.adc.search.utils.StringTools;

    public class IndexServiceImpl implements IndexService {

     private static final Log log = LogFactory.getLog(IndexServiceImpl.class);
     // 用于匹配搜索的字段
     private String[] ids = { "id", "type" };
     // 匹配的时段是否是必须
     private Occur[] occur = { Occur.MUST, Occur.MUST };
     // 获得索引目录
     private String indexDir = PropertyManager
       .getPropertyByName("index.file.path");
     // 读取xml接口
     private XmlParserService xmlService = null;
     // xml取得的结果
     private List<XmlBean> beans = new ArrayList<XmlBean>();
     // 索引数据操作类
     private IndexBeanDAOImpl dao = new IndexBeanDAOImpl();
     // 数据库行状态变化类
     private SearchStateBeanDAO sdao = new SearchStateBeanDAOImpl();

     // 创建索引
     public void createIndex() {
      log.warn("开始创建索引");
      if (ifIndexExist()) {
       return;
      }
      File file = new File(indexDir);
      if (!file.exists()) {
       log.warn("索引目录不存,创建索引目录");
       file.mkdir();
      }
      xmlService = new SAXXmlParserServiceImpl();
      beans = xmlService.getXmlBean();
      IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
      IndexWriter writer = null;
      List<IndexBean> list;
      try {
       writer = new IndexWriter(file, new IKAnalyzer());
       writeIndexBean(beans, writer);
       log.warn("创建索引成功");
      } catch (CorruptIndexException e) {
       log.error("创建索引失败,file不能写入或读取或不存在,不能创建索引" + e);
      } catch (LockObtainFailedException e) {
       log.error("创建索引失败,file不能写入或读取或不存在,不能创建索引" + e);
      } catch (IOException e) {
       log.error("创建索引失败,file不能写入或读取或不存在,不能创建索引" + e);
      } finally {
       try {
        if (writer != null)
         writer.close();
       } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        log.error(e.toString());
       } catch (IOException e) {
        // TODO Auto-generated catch block
        log.error(e.toString());
       }
      }
     }

     // 删除所有索引
     public void deleteAllIndex() {
      log.warn("开始删除所有索引");
      IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
      IndexWriter writer = null;
      try {
       writer = new IndexWriter(indexDir, new IKAnalyzer(), false);
       writer.deleteAll();
       writer.optimize();
       log.warn("删除索引成功");
      } catch (CorruptIndexException e) {
       log.error("删除索引失败,错误的indexDir,不能创建indexWriter" + e);
      } catch (LockObtainFailedException e) {
       log.error("删除索引失败,错误的indexDir,不能创建indexWriter" + e);
      } catch (IOException e) {
       log.error("删除索引失败,错误的indexDir,不能创建indexWriter" + e);
      } finally {
       try {
        if (writer != null)
         writer.close();
       } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        log.error(e.toString());
       } catch (IOException e) {
        // TODO Auto-generated catch block
        log.error(e.toString());
       }
      }
     }

     // 根据xmlBean查询数据库得到indexBean
     public List<IndexBean> getIndexBean(List<XmlBean> xmlBean) {
      List<IndexBean> list = new ArrayList<IndexBean>();
      XmlBean bean = null;
      String sql = null;
      List<IndexBean> listBean = null;
      for (int i = 0; i < xmlBean.size(); i++) {
       bean = xmlBean.get(i);
       sql = this.getSqlByXmlBean(bean);
       listBean = dao.getIndexBean(sql, null, bean);
       list.addAll(listBean);
      }
      return list;
     }

     // 根据xmlBean查询数据库得到indexBean
     public void writeIndexBean(List<XmlBean> xmlBean, IndexWriter writer) {
      List<IndexBean> list = new ArrayList<IndexBean>();
      XmlBean bean;
      String sql;
      for (int i = 0; i < xmlBean.size(); i++) {
       bean = xmlBean.get(i);
       sql = this.getSqlByXmlBean(bean);
    //   if (bean.getTable().equalsIgnoreCase("nn_faq")) {
    //    sql = sql + " where INTSTATUS=1";
    //   }
    //   if (bean.getTable().equalsIgnoreCase("nn_news")
    //     || bean.getTable().equalsIgnoreCase("nn_affichedetail")) {
    //    sql = sql + " where strsendto = 'OP'";
    //   }
       log.warn("根据xmlBean得到sql语句: " + sql);
       dao.writeIndexBean(sql, null, bean, writer);
      }
     }

     // 取配置文件中的设置的每页行数
     public int getPageSize() {
      return Integer.parseInt(PropertyManager
        .getPropertyByName("index.pageSize"));
     }

     @Override
     // 判断索引是否存在
     public boolean ifIndexExist() {
      File directory = new File(indexDir);
      if (!directory.exists())
       directory.mkdir();
      if (directory.listFiles() != null) {
       if (directory.listFiles().length >= 3) {
        return true;
       } else {
        return false;
       }
      }
      return false;
     }

     // 查询索引
     public List<IndexBean> searchIndex(String keyWord, int currentPage) {
      log.warn("开始查询索引");
      List list = new ArrayList();
      if (!ifIndexExist()) {
       createIndex();
       log.warn("索引不存时创建索引");
      }
      IndexSearcher searcher;
      Query query;
      TopDocs doc;
      SimpleHTMLFormatter sHtmlF;
      Highlighter highlighter;
      try {
       log.warn("根据关键字检索索引文件");
       searcher = new IndexSearcher(this.getIndexDir());
       searcher.setSimilarity(new IKSimilarity());
       query = IKQueryParser.parse("content", keyWord);
       // 设置高亮属性
       sHtmlF = new SimpleHTMLFormatter("<b><font color='red'>",
         "</font></b>");
       highlighter = new Highlighter(sHtmlF, new QueryScorer(query));
       highlighter.setTextFragmenter(new SimpleFragmenter(300));
       doc = searcher.search(query, this.getIndexCount());
       ScoreDoc[] docArray = doc.scoreDocs;
       Document document;
       IndexBean bean;
       SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
       String indexdate;
       Date day;
       for (ScoreDoc d : docArray) {
        document = searcher.doc(d.doc);
        bean = new IndexBean();
        bean.setIndexId(document.getField("id").stringValue());
        bean.setTitle(document.getField("title").stringValue());
        bean.setContent(document.getField("content").stringValue());
        bean.setHigherContent(getHigherStr("content",
          bean.getContent(), highlighter));
        bean.setType(document.getField("type").stringValue());
        bean.setUrl(PropertyManager.getPropertyByName(bean.getType(),
          "urlConfig") == null ? null : PropertyManager
          .getPropertyByName(bean.getType(), "urlConfig")
          + bean.getTargetId());
        bean.setClickCount(Integer.parseInt(document.getField(
          "clickcount").stringValue()));
        bean.setDescription(document.getField("description")
          .stringValue());
        indexdate = document.getField("indexdate").stringValue();
        if (null != indexdate && !"".equals(indexdate)) {
         day = sdf.parse(document.getField("indexdate")
           .stringValue());
         bean.setIndexDate(day);
        } else {
         bean.setIndexDate(new Date());
        }
        list.add(bean);
       }
      } catch (CorruptIndexException e) {
       log.error("检索失败,检索目录出错");
      } catch (IOException e) {
       log.error("检索失败,解析关键字出错");
      } catch (java.text.ParseException e) {
       log.error("时间格式化错误");
      }
      return list;
     }

     // 分页查询索引
     public PageDataSet searchIndex(String keyWord, PageCondition page) {
      log.warn("开始分页查询索引");
      PageDataSet pds = new PageDataSet();
      // 每页行数
      pds.setPageSize(page.getPageSize());
      //List<Document> list = new ArrayList<Document>();
      List<IndexBean> l = new ArrayList<IndexBean>();
      if (!this.ifIndexExist()) {
       return pds;
      }
      IndexSearcher searcher;
      Query query;
      TopDocs doc;
      Sort sort;
      SimpleHTMLFormatter sHtmlF;
      Highlighter highlighter;
      try {
       log.warn("根据关键字取得索引并按每页记录数取得每页数据");
       searcher = new IndexSearcher(getIndexDir());
       searcher.setSimilarity(new IKSimilarity());
       if (null == keyWord) {
        keyWord = "";
       }
       query = IKQueryParser.parse("content", keyWord);
       Filter f = new QueryFilter(query);
       sort = new Sort(new SortField("indexdate", SortField.STRING, true));
       // 设置高亮属性
       sHtmlF = new SimpleHTMLFormatter("<b><font color='red'>",
         "</font></b>");
       highlighter = new Highlighter(sHtmlF, new QueryScorer(query));
       // 设置高亮片段长度
       highlighter.setTextFragmenter(new SimpleFragmenter(PropertyManager
         .getPropertyByName("index.contentSize") == null ? 300
         : Integer.parseInt(PropertyManager
           .getPropertyByName("index.contentSize"))));
       doc = searcher.search(query, f, this.getIndexCount(), sort);
       ScoreDoc[] docArray = doc.scoreDocs;
       // 总记录数
       pds.setTotalCount(docArray.length);
       // 当前页
       pds.setCurrentPage(page.getCurrentPage());
       // 取每页数据从startIndex到endIndex
       int startIndex = (page.getCurrentPage() - 1) * page.getPageSize();
       int endIndex = startIndex + page.getPageSize();
       if (endIndex > pds.getTotalCount()) {
        endIndex = pds.getTotalCount();
       }
       // System.out.println("开始:" + startIndex + ",到" + endIndex);
       // 取出分页所需的document
       // 从startIndex到endIndex-1的数据.
       Document document;
       IndexBean bean;
       Date day;
       SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
       for (int i = startIndex; i < endIndex; i++) {
        document = searcher.doc(docArray[i].doc);
        bean = new IndexBean();
        bean.setIndexId(document.getField("id").stringValue());
        bean.setTitle(document.getField("title").stringValue());
        bean.setContent(document.getField("content").stringValue());
        bean.setHigherContent(getHigherStr("content", StringTools
          .getHtmlContent(bean.getContent()), highlighter));
        bean.setTargetId(document.getField("targetid").stringValue());
        bean.setType(document.getField("type").stringValue());
        bean.setUrl(PropertyManager.getPropertyByName(bean.getType(),
          "urlConfig") == null ? null : PropertyManager
          .getPropertyByName(bean.getType(), "urlConfig")
          + bean.getTargetId());
        bean.setClickCount(Integer.parseInt(document.getField(
          "clickcount").stringValue()));
        bean.setDescription(document.getField("description")
          .stringValue());
        String indexdate = document.getField("indexdate").stringValue();
        if (null != indexdate && !"".equals(indexdate)) {
         day = sdf.parse(document.getField("indexdate")
           .stringValue());
         bean.setIndexDate(day);
        } else {
         bean.setIndexDate(new Date());
        }
        l.add(bean);
       }
       // 分页数据
       pds.setDataset(l);
      } catch (CorruptIndexException e) {
       log.error("分页检索失败,检索目录出错",e);
      } catch (IOException e) {
       log.error("分页检索失败",e);
      } catch (java.text.ParseException e) {
       log.error("时间格式化错误");
      }
      return pds;
     }

     // 更新索引
     public void updateIndex() {
      File file = new File(indexDir);
      if (!file.exists()) {
       file.mkdir();
       createIndex();
      }
      List<SearchStateBean> bean = sdao.getUpdateInfo();
      for (SearchStateBean b : bean) {
       // 根据sate值执行相应操作
       boolean isswitch = true;
       switch (b.getState()) {
       case SearchStateEnum.INSERT: {
        insert(b);
        break;
       }
       case SearchStateEnum.UPDATE: {
        update(b);
        break;
       }
       case SearchStateEnum.DELETE: {
        delete(b);
        break;
       }
       default: {
        System.out.println(b.getId() + "状态出现问题 请更正");
        isswitch = false;
       }
       }
       if (isswitch) {
        sdao.delete(b);
       }
      }
     }

     // 传入xmlBean得到索引sql
     public String getSqlByXmlBean(XmlBean bean) {
      String sql = "select " + bean.getPrimaryKeyName() + ",";
      // 可能title与content 同时取了 同一个字段所以要过滤掉一个
      for (String field : bean.getContents()) {
       sql = sql + field + ",";
      }
      for (String field : bean.getTitles()) {
       boolean ishave = true;
       for (String f : bean.getContents()) {
        if (field.equals(f)) {
         ishave = false;
         break;
        }
       }
       if (ishave == true)
        sql = sql + field + ",";
      }
      if (bean.getUpdateTime() != null && !bean.getUpdateTime().equals("")) {
       sql = sql + bean.getCreateTime() + "," + bean.getUpdateTime() + ",";
      } else {
       sql = sql + bean.getCreateTime() + ",";
      }
      sql = sql.substring(0, sql.length() - 1) + " ";
      sql = sql + "from " + bean.getTable() + " where 1=1";
      for (String field : bean.getConditions()) {
       if (field != null && !field.trim().equals("")) {
        sql = sql + " and " + field;
       }
      }
      return sql;
     }

     public int getIndexCountByCondition(IndexBean bean) {
      int i = 0;
      Query query = null;
      IndexSearcher s = null;
      try {
       query = IKQueryParser.parseMultiField(ids, new String[] {
         bean.getTargetId(), bean.getType() }, occur);
       s = new IndexSearcher(this.indexDir);
       i = s.search(query).length();
      } catch (IOException e) {
       log.error("取出索引个数错误:" + e);
      }
      return i;
     }

     // 添加索引document
     public void addDocument(List<IndexBean> list, IndexWriter writer)
       throws CorruptIndexException, IOException {
      log.warn("开始写入索引字段");
      Document document;
      for (IndexBean bean : list) {
       if (getIndexCountByCondition(bean) == 0) {
        document = new Document();
        Field id = new Field("id", bean.getIndexId() == null ? ""
          : bean.getIndexId(), Field.Store.YES,
          Field.Index.ANALYZED);
        Field title = new Field("title", bean.getTitle() == null ? ""
          : bean.getTitle(), Field.Store.YES,
          Field.Index.ANALYZED);
        Field content = new Field("content",
          bean.getContent() == null ? "" : bean.getContent(),
          Field.Store.YES, Field.Index.ANALYZED);
        Field type = new Field("type", bean.getType() == null ? ""
          : bean.getType(), Field.Store.YES, Field.Index.ANALYZED);
        Field url = new Field("url", bean.getUrl() == null ? "" : bean
          .getUrl(), Field.Store.YES, Field.Index.ANALYZED);
        Field clickCount = new Field("clickcount", bean.getClickCount()
          + "" == null ? "" : bean.getClickCount() + "",
          Field.Store.YES, Field.Index.ANALYZED);
        Field indexDate = new Field("indexdate",
          bean.getIndexDate() == null ? "" : bean.getIndexDate()
            .toString(), Field.Store.YES,
          Field.Index.NOT_ANALYZED);
        Field description = new Field("description", bean
          .getDescription() == null ? "" : bean.getDescription(),
          Field.Store.YES, Field.Index.ANALYZED);
        document.add(new Field("targetid",
          bean.getTargetId() == null ? "" : bean.getTargetId(),
          Field.Store.YES, Field.Index.ANALYZED));
        document.add(id);
        document.add(title);
        document.add(content);
        document.add(type);
        document.add(url);
        document.add(clickCount);
        document.add(indexDate);
        document.add(description);
        writer.addDocument(document);
       }
      }
     }

     //
     public IndexBean getIndexBean(XmlBean xmlBean) {
      List<XmlBean> xmlBeans = new ArrayList<XmlBean>();
      xmlBeans.add(xmlBean);
      List<IndexBean> indexbean = this.getIndexBean(xmlBeans);
      if (indexbean.size() > 0)
       return indexbean.get(0);
      return null;
     }

     // 与数据集库同步更新
     public void update(SearchStateBean bean) {
      log.warn("先删除已更改的索引");
      deleteIndex(ids, new String[] { bean.getTargetId(),
        bean.getTargetTable() }, occur);
      log.warn("再加入更改后的索引");
      insert(bean);

     }

     // 与数据同步插入
     public void insert(SearchStateBean bean) {
      log.warn("开始同步数据");
      xmlService = new SAXXmlParserServiceImpl();
      XmlBean xmlBean = xmlService.getXmlBeanByType(bean.getTargetTable());
      String sql = this.getSqlByXmlBean(xmlBean) + " and "
        + xmlBean.getPrimaryKeyName() + "='" + bean.getTargetId() + "'";
      log.warn("需要更新的sql语句: " + sql);
      List<IndexBean> indexBeans = dao.getIndexBean(sql, null, xmlBean);
      IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
      IndexWriter writer = null;
      try {
       writer = new IndexWriter(indexDir, new IKAnalyzer(), false);
       addDocument(indexBeans, writer);
       log.warn("同步数据成功,关闭writer");
      } catch (CorruptIndexException e) {
       log.error("同步数据失败" + e);
      } catch (LockObtainFailedException e) {
       log.error("同步数据失败" + e);
      } catch (IOException e) {
       log.error("同步数据失败" + e);
      } finally {
       try {
        if (writer != null)
         writer.close();
       } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        log.error(e.toString());
       } catch (IOException e) {
        // TODO Auto-generated catch block
        log.error(e.toString());
       }
      }
     }

     // 与数据同步删除
     public void delete(SearchStateBean bean) {
      this.deleteIndex(ids, new String[] { bean.getTargetId(),
        bean.getTargetTable() }, occur);
     }

     // 根据索引条件删除索引
     public void deleteIndex(String[] fields, String[] values, Occur[] occur) {
      log.warn("开始根据条件删除索引");
      IndexWriter.setDefaultWriteLockTimeout(Integer.MAX_VALUE);
      IndexWriter writer = null;
      Query query;
      try {
       writer = new IndexWriter(indexDir, new IKAnalyzer(), false);
       query = IKQueryParser.parseMultiField(fields, values, occur);
       writer.deleteDocuments(query);
       writer.optimize();
       log.warn("删除索引成功,关闭writer");
      } catch (CorruptIndexException e) {
       log.error("删除索引失败" + e);
      } catch (LockObtainFailedException e) {
       log.error("删除索引失败" + e);
      } catch (IOException e) {
       log.error("删除索引失败" + e);
      } finally {
       try {
        if (writer != null)
         writer.close();
       } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        log.error(e.toString());
       } catch (IOException e) {
        // TODO Auto-generated catch block
        log.error(e.toString());
       }
      }
     }

     // 高亮字段
     public String getHigherStr(String field, String value,
       Highlighter highlighter) {
      TokenStream stream = new IKAnalyzer().tokenStream(field,
        new StringReader(value));
      String str = null;
      try {
       str = highlighter.getBestFragment(stream, value);
      } catch (IOException e) {
       log.error("获得高亮字段失败" + e);
      }
      return str;
     }

     public String getIndexDir() {
      return indexDir;
     }

     public void setIndexDir(String indexDir) {
      this.indexDir = indexDir;
     }

     // 获得当前数据库中的所有数据的数量
     public int getDataCount() {
      String sql = "select count(*) from ";
      int count = 0;
      for (XmlBean bean : beans) {
       count = count + dao.getUniqueValue(sql + bean.getTable());
      }
      return count;
     }

     // 获得当前索引的数量
     public int getIndexCount() {
      int count = 0;
      if (ifIndexExist()) {
       IndexReader read = null;
       try {
        read = IndexReader.open(this.indexDir);
        count = read.numDocs();
        read.close();
       } catch (CorruptIndexException e) {
        log.error("获得当前索引数量失败" + e);
       } catch (IOException e) {
        log.error("获得当前索引数量失败" + e);
       }
      }
      return count;
     }
    }

  • 相关阅读:
    MySQL性能优化的最佳20+条经验
    初窥Linux 之 我最常用的20条命令
    Java内存模型
    未能加载文件或程序集“System.Net.Http.Formatting, Version=4.0.0.0, Culture=n
    Nginx 负载均衡之 upstream 参数 和 location 参数
    Nginx 简单配置方法
    关于使用 autoFac 的注入方法
    关于 VS 调用存储过程加载很慢和SQL 执行很快的那些事
    C# 客户端读取共享目录文件
    NodeJS、NPM安装配置步骤
  • 原文地址:https://www.cnblogs.com/liaomin416100569/p/9331855.html
Copyright © 2020-2023  润新知