• Lucene 1.9 多目录搜索的的一个bug


       这个问题解决有一段时间了。刚才在Lucene的群有朋友提问了这个问题。所以方便其他朋友遇到此问题时,好参考下。特贴出来

    在需要搜索多个索引目录,多个字段的时候,发现有错误,提示的大概意思是(具体忘记了),关键词已经存在字典中。研究Lucene的源代码发现,是Lucene.net 1.9 rc1的一个bug :

    具体代码在Lucene.Net.Search命名空间下的Query类的Combine method :

    修改前的代码如下:

    public virtual Query Combine(Query[] queries)
      {
       System.Collections.Hashtable uniques = new System.Collections.Hashtable();
       for (int i = 0; i < queries.Length; i++)
       {
        Query query = queries[i];
        BooleanClause[] clauses = null;
        // check if we can split the query into clauses
        bool splittable = (query is BooleanQuery);
        if (splittable)
        {
         BooleanQuery bq = (BooleanQuery) query;
         splittable = bq.IsCoordDisabled();
         clauses = bq.GetClauses();
         for (int j = 0; splittable && j < clauses.Length; j++)
         {
          splittable = (clauses[j].GetOccur() == BooleanClause.Occur.SHOULD);
         }
        }
        if (splittable)
        {
         for (int j = 0; j < clauses.Length; j++)
         {
                            Query tmp = clauses[j].GetQuery();
          uniques.Add(tmp, tmp);
         }
        }
        else
        {
                        uniques.Add(query, query);
        }
       }
       // optimization: if we have just one query, just return it
       if (uniques.Count == 1)
       {
                    return (Query) uniques.GetEnumerator().Current;
       }
       System.Collections.IEnumerator it = uniques.GetEnumerator();
       BooleanQuery result = new BooleanQuery(true);
       while (it.MoveNext())
       {
        result.Add((Query) it.Current, BooleanClause.Occur.SHOULD);
       }
       return result;
      }

    修改后的代码(飘红的地方是更改过的):

    public virtual Query Combine(Query[] queries)
      {
       System.Collections.Hashtable uniques = new System.Collections.Hashtable();
       for (int i = 0; i < queries.Length; i++)
       {
        Query query = queries[i];
        BooleanClause[] clauses = null;
        // check if we can split the query into clauses
        bool splittable = (query is BooleanQuery);
        if (splittable)
        {
         BooleanQuery bq = (BooleanQuery) query;
         splittable = bq.IsCoordDisabled();
         clauses = bq.GetClauses();
         for (int j = 0; splittable && j < clauses.Length; j++)
         {
          splittable = (clauses[j].GetOccur() == BooleanClause.Occur.SHOULD);
         }
        }
        if (splittable)
        {
         for (int j = 0; j < clauses.Length; j++)
         {
                            Query tmp = clauses[j].GetQuery();
          uniques.Add(tmp, tmp);
         }
        }
        else
        {
                        //uniques.Add(query, query);

                        //modify by kwklover 2008/1/17 bug fixed for 防止加入多个重复的字典项
                        if (uniques.Contains(query) == false)
                        {
                            uniques.Add(query, query);
                        }

        }
       }
       // optimization: if we have just one query, just return it
       if (uniques.Count == 1)
       {
                   //return (Query) uniques.GetEnumerator().Current;

                    //modify by kwklover 2008/1/17
                    System.Collections.IDictionaryEnumerator iter = uniques.GetEnumerator();
                    iter.MoveNext();
                    return iter.Value as Query;

       }
       System.Collections.IEnumerator it = uniques.GetEnumerator();
       BooleanQuery result = new BooleanQuery(true);
       while (it.MoveNext())
       {
        result.Add((Query) it.Current, BooleanClause.Occur.SHOULD);
       }
       return result;
      }

    另外在Lucene 2.0版已经修复了此问题。不过Lucene 2.0 和 Lucene 1.9的 API有比较大的变化

  • 相关阅读:
    6_10 下落的树叶(UVa699)<二叉树的DFS>
    6_9 天平(UVa839)<二叉树的DFS>
    6_8 树(UVa548)<从中序和后序恢复二叉树>
    6_7 树的层次遍历(UVa122)<二叉树的动态创建与BFS>
    6_6 小球下落(UVa679)<完全二叉树编号>
    6_4 破损的键盘(UVa11988)<链表>
    6_3 矩阵链乘(UVa424)<用栈实现简单的表达式解析>
    6_2 铁轨(UVa514)<栈>
    第五周课程总结&试验报告(三)
    第四周课程总结和实验报告
  • 原文地址:https://www.cnblogs.com/kwklover/p/1100556.html
Copyright © 2020-2023  润新知