• lucnen 中文分词器 和 删除 和修改词库


    导入jar包

    package com.bw.lucene;
    
    import java.nio.file.Paths;
    
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.LongPoint;
    import org.apache.lucene.index.DirectoryReader;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.queryparser.classic.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.MatchAllDocsQuery;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.TopDocs;
    import org.apache.lucene.store.FSDirectory;
    import org.wltea.analyzer.lucene.IKAnalyzer;
    
    public class WriteDocument {
        static String path = "E://lucene";
    
        public static void main(String[] args) throws Exception {
            // writeDoc();
            //deleteDocById();
            update();
            queryAll();
            // System.out.println("删除以后");
        }
    
        public static void queryParser() throws Exception {
            FSDirectory directory = FSDirectory.open(Paths.get(path));
            DirectoryReader reader = DirectoryReader.open(directory);
            IndexSearcher searcher = new IndexSearcher(reader);
    
            Analyzer analyzer = new IKAnalyzer(true);
            QueryParser parser = new QueryParser("content", analyzer);
            Query query = parser.parse("大数据");
            TopDocs search = searcher.search(query, 10);
            ScoreDoc[] scoreDocs = search.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                int docID = scoreDoc.doc;
                Document doc = searcher.doc(docID);
                System.out.println(Artical.toAritical(doc));
            }
        }
    
        public static void writeDoc() throws Exception {
            FSDirectory directory = FSDirectory.open(Paths.get(path));
            // 使用中文分词器 开启smart模式 智能模式
            Analyzer analyzer = new IKAnalyzer(true);
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            Artical art = new Artical();
            art.setAuthor("海员");
            art.setContent("大数据那家强,北京找北网 学不会报销往返路费 学会后在交费");
            art.setId(129L);
            art.setTitle("招生简章");
            art.setUrl("www.txstory.com");
            writer.addDocument(art.toDocument());
            writer.close();
        }
    
        // 修改
        public static void update() throws Exception {
            FSDirectory directory = FSDirectory.open(Paths.get(path));
            // 使用中文分词器 开启smart模式 智能模式
            Analyzer analyzer = new IKAnalyzer(true);
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            Artical artical = new Artical();
            artical.setAuthor("海员");
            artical.setContent("大学生班开班了");
            artical.setId(130L);
            artical.setTitle("招生简章");
            artical.setUrl("www.txstory.com"); 
            
            
            
            writer.updateDocument(new Term("author","海员"),artical.toDocument());
            writer.commit();
            writer.close();
            
            
        }
        
        // 删除
    
        // 根据id删除
        public static void deleteDocById() throws Exception {
            FSDirectory directory = FSDirectory.open(Paths.get(path));
            // 使用中文分词器 开启smart模式 智能模式
            Analyzer analyzer = new IKAnalyzer(true);
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            // Query query = LongPoint.newRangeQuery("id", 126, 128);// 都删掉 126到128 包括他们都删掉
            Query query = LongPoint.newExactQuery("id", 129);// 单个删除 129
            writer.deleteDocuments(query);
            writer.commit();
            writer.close();
        }
    
        // 对数据的删除要用到write对象 根据词条删除
        public static void deleteDoc() throws Exception {
            FSDirectory directory = FSDirectory.open(Paths.get(path));
            // 使用中文分词器 开启smart模式 智能模式
            Analyzer analyzer = new IKAnalyzer(true);
            IndexWriterConfig config = new IndexWriterConfig(analyzer);
            IndexWriter writer = new IndexWriter(directory, config);
            QueryParser parser = new QueryParser("title", analyzer);
            Query query = parser.parse("招生");
            writer.deleteDocuments(query);
            writer.commit();
            writer.close();
        }
    
        // 查询所有
        public static void queryAll() throws Exception {
            FSDirectory directory = FSDirectory.open(Paths.get(path));
            DirectoryReader reader = DirectoryReader.open(directory);
            IndexSearcher searcher = new IndexSearcher(reader);
            Query query = new MatchAllDocsQuery();
            TopDocs search = searcher.search(query, 10);
            ScoreDoc[] scoreDocs = search.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                int docID = scoreDoc.doc;
                Document doc = searcher.doc(docID);
                System.out.println(Artical.toAritical(doc));
            }
        }
    }
  • 相关阅读:
    【教程分享】嵌入式Linux+QT开发零基础入门+项目实战
    4412开发板图像识别项目-移植百度AI依赖库curl(三)
    8月10日学习日志
    每周进度汇总
    8月9日学习日志
    8月8日学习日志
    8月7日学习日志
    8月6日学习日志
    《大道至简》读后感
    8月5日学习日志
  • 原文地址:https://www.cnblogs.com/JBLi/p/10903714.html
Copyright © 2020-2023  润新知