Lucene是一个全文检索类库(Library),基本原理是索引检索.在搜索引擎领域,Lucene的影响非常大.在这里就不介绍了,这方面网上文章很多.这里主要通过一个本地文件检索程序来简单学习一下Lucene的基本使用.
利用Lucene来建立检索系统主要有以下几步:首先是建立索引,然后再对索引进行检索,当然分析器贯穿始终.
(1)建立索引
Code
//为目录建立索引
private static void IndexDirectory(IndexWriter writer, FileInfo file)
{
if (Directory.Exists(file.FullName))//如果是目录,则对目录下文件建立索引
{
String[] files = Directory.GetFileSystemEntries(file.FullName);
if (files != null)
{
for (int i = 0; i < files.Length; i++)
{
IndexDirectory(writer, new FileInfo(files[i])); //递归
}
}
}
else //如果是文件,则直接建立索引
{
IndexFile(writer, file);
}
}
//为文件建立索引
private static void IndexFile(IndexWriter writer, FileInfo file)
{
Lucene.Net.Store.Directory ramDirctory = new Lucene.Net.Store.RAMDirectory();
IndexWriter ramWriter = new IndexWriter(ramDirctory, new StandardAnalyzer(), true);
Console.Out.WriteLine("创建索引" + file.FullName);
filecount++;//文件个数加1
Document doc = new Document();
Field field_name = new Field("name", file.Name, Field.Store.YES, Field.Index.UN_TOKENIZED);
doc.Add(field_name);
Field field_path = new Field("path", file.FullName, Field.Store.YES, Field.Index.UN_TOKENIZED);
doc.Add(field_path);
ramWriter.AddDocument(doc);
ramWriter.Close();
writer.AddIndexes(new Lucene.Net.Store.Directory[] { ramDirctory });
}
//为目录建立索引
private static void IndexDirectory(IndexWriter writer, FileInfo file)
{
if (Directory.Exists(file.FullName))//如果是目录,则对目录下文件建立索引
{
String[] files = Directory.GetFileSystemEntries(file.FullName);
if (files != null)
{
for (int i = 0; i < files.Length; i++)
{
IndexDirectory(writer, new FileInfo(files[i])); //递归
}
}
}
else //如果是文件,则直接建立索引
{
IndexFile(writer, file);
}
}
//为文件建立索引
private static void IndexFile(IndexWriter writer, FileInfo file)
{
Lucene.Net.Store.Directory ramDirctory = new Lucene.Net.Store.RAMDirectory();
IndexWriter ramWriter = new IndexWriter(ramDirctory, new StandardAnalyzer(), true);
Console.Out.WriteLine("创建索引" + file.FullName);
filecount++;//文件个数加1
Document doc = new Document();
Field field_name = new Field("name", file.Name, Field.Store.YES, Field.Index.UN_TOKENIZED);
doc.Add(field_name);
Field field_path = new Field("path", file.FullName, Field.Store.YES, Field.Index.UN_TOKENIZED);
doc.Add(field_path);
ramWriter.AddDocument(doc);
ramWriter.Close();
writer.AddIndexes(new Lucene.Net.Store.Directory[] { ramDirctory });
}
(2)利用索引查询
Code
//根据文件名查找
private static void Search(string filename)
{
IndexSearcher searcher = new IndexSearcher(Dest_Index_Path);
Term term = new Term("name", filename);
Query query = null;
if (type == 0)
{
query = new PrefixQuery(term);
}
else if (type == 1)
{
query = new FuzzyQuery(term);
}
else
{
throw new Exception("查询类别参数不对!");
}
Hits hits = searcher.Search(query);
Console.WriteLine("满足条件的查找结果:");
for (int i = 0; i < hits.Length(); i++)
{
Console.WriteLine("路径为:" + hits.Doc(i).GetField("path").StringValue());
}
}
//根据文件名查找
private static void Search(string filename)
{
IndexSearcher searcher = new IndexSearcher(Dest_Index_Path);
Term term = new Term("name", filename);
Query query = null;
if (type == 0)
{
query = new PrefixQuery(term);
}
else if (type == 1)
{
query = new FuzzyQuery(term);
}
else
{
throw new Exception("查询类别参数不对!");
}
Hits hits = searcher.Search(query);
Console.WriteLine("满足条件的查找结果:");
for (int i = 0; i < hits.Length(); i++)
{
Console.WriteLine("路径为:" + hits.Doc(i).GetField("path").StringValue());
}
}
我对Lucene.Net的api文档下的2465个文件做了一个测试.运行程序输入:MySearcher -file MSDN.
结果如下:
另附源代码: