• lucene3.5 example


    /*
     * To change this template, choose Tools | Templates
     * and open the template in the editor.
     */
    package com.jd.lucene;

    import java.io.File;
    import java.io.IOException;
    import java.sql.Date;

    import java.text.Format;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.LinkedList;
    import java.util.List;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Fieldable;
    import org.apache.lucene.document.NumericField;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.queryParser.ParseException;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.search.Sort;
    import org.apache.lucene.search.SortField;
    import org.apache.lucene.search.TopFieldDocs;
    import org.apache.lucene.search.TopScoreDocCollector;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;

    public class Search {
        private static String indexPath = "/home/mlzboy/my/crawler/index";//索引存放目录  
        /**
         * @param args
         * @throws IOException
         * @throws CorruptIndexException
         * @throws ParseException
         */
        public static void main(String[] args) throws CorruptIndexException, IOException, ParseException {
            // TODO Auto-generated method stub
            IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(indexPath)));
            System.out.println("total blogs:"+searcher.getIndexReader().numDocs());
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
            QueryParser parser = new QueryParser(Version.LUCENE_34, "title", analyzer);//有变化的地方  
            Query query = parser.parse("诺基亚95");  
            query = parser.parse("lucene");
            
            
    //        SortField s1=new SortField("read",SortField.INT,true);
    List<SortField> sortFields = new ArrayList<SortField>();      
          sortFields.add(new SortField("read", SortField.INT, true));
          sortFields.add(new SortField("date", SortField.LONG, true));
          SortField[] aa=new SortField[2];
          sortFields.toArray(aa);
          Sort sort=new Sort(aa);

            //        Sort sort=new Sort();
    //        sort.setSort(s1);

            TopFieldDocs tfd=searcher.search(query,100,sort);
            ScoreDoc[] hits = tfd.scoreDocs;
    //        TopScoreDocCollector collector = TopScoreDocCollector.create(100,false);//有变化的地方  
    //        searcher.search(query, collector);  
    //        ScoreDoc[] hits = collector.topDocs().scoreDocs;            
            
            System.out.println(hits.length);  
            for (int i = 0; i < hits.length; i++) {  
                Document doc = searcher.doc(hits[i].doc);//new method is.doc()
                System.out.print(doc.getFieldable("id")+" "+doc.getFieldable("title")+"   "+hits[i].toString()+" ");  
                System.out.print("=="+hits[i].doc+"====");
                System.out.print(doc.getFieldable("link"));
                Format formatter;
      formatter = new SimpleDateFormat("yyyy-MM-dd hh:mm");
      String s = formatter.format(Long.parseLong(doc.get("date")));
     
      System.out.print(s+" ");
      System.out.println(Integer.parseInt(doc.get("read")));
            }  
          
    //        System.out.println("Found " + collector.getTotalHits());          
    System.out.println("Found "+tfd.totalHits);
           }

    }

    /*
     * To change this template, choose Tools | Templates
     * and open the template in the editor.
     */
    package com.jd.lucene;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.text.SimpleDateFormat;
    import java.util.Date;
    import java.util.Scanner;
    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.NumericField;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;

    public class Index {
        private static String indexPath = "/home/mlzboy/my/crawler/index";//索引存放目录 
        /**
         * @param args
         */
        public static void main(String[] args) {
            // TODO Auto-generated method stub
            try {

                Directory dir = FSDirectory.open(new File(indexPath));
                Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_34);
                IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_34, analyzer);

    //            if (create) {
    //              // Create a new index in the directory, removing any
    //              // previously indexed documents:
    //              iwc.setOpenMode(OpenMode.CREATE);
    //            } else {
    //              // Add new documents to an existing index:
                  iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    //            }

                // Optional: for better indexing performance, if you
                // are indexing many documents, increase the RAM
                // buffer.  But if you do this, increase the max heap
                // size to the JVM (eg add -Xmx512m or -Xmx1g):
                //
                // iwc.setRAMBufferSizeMB(256.0);

                IndexWriter writer = new IndexWriter(dir, iwc);
                writer.deleteAll();
                Scanner scanner = new Scanner(new FileInputStream("/home/mlzboy/my/crawler/d.txt"), "UTF-8");
        try {
          int ii=0;
          while (scanner.hasNextLine()){
            String line=scanner.nextLine();
            String[] elems=line.split(",");
            System.out.println(elems[0]);
             if (elems.length>2){
                 ii+=1;
                
                 Document doc = new Document();
            Field f0=new Field("id",Integer.toString(ii),Field.Store.YES,Field.Index.NOT_ANALYZED);
            Field f1=new Field("title",elems[0],Field.Store.YES,Field.Index.ANALYZED);
            Field f2=new Field("link",elems[1],Field.Store.YES,Field.Index.NO);

                System.out.println(elems[2]);
                        SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd hh:mm");
                 Date date=new Date();
                    String dateString = "2007-07-07 20:29";
                    dateString = elems[2];
                 try {
                   date = df.parse(dateString);
                   System.out.println(date.toLocaleString());
                 }
                 catch (Exception ex)
                 { System.out.println(ex.getMessage());}

                    NumericField f3=new NumericField("date",Field.Store.YES,true);
                    f3.setLongValue(date.getTime());
                    doc.add(f3);
                    System.out.println(elems[2]);
                   
                    doc.add(new NumericField("read",Field.Store.YES,true).setIntValue(Integer.parseInt(elems[3])));
                    doc.add(new NumericField("comment",Field.Store.YES,true).setIntValue(Integer.parseInt(elems[4])));

            doc.add(f0);
            doc.add(f1);
            doc.add(f2);

            writer.addDocument(doc);
                    }

          }
        }
        finally{
          scanner.close();
        }
    //            Document doc = new Document();
    //              Field f=new Field("title","诺基亚返乡贴补n95",Field.Store.YES,Field.Index.ANALYZED);
               
    //              doc.add(f);
    //            writer.addDocument(doc);
                // NOTE: if you want to maximize search performance,
                // you can optionally call optimize here.  This can be
                // a costly operation, so generally it's only worth
                // it when your index is relatively static (ie you're
                // done adding documents to it):
                //
                writer.forceMerge(1);
    //            Term term=new Term("link","http://www.cnblogs.com/lexus/archive/2011/09/30/2196819.html");
    //            writer.deleteDocuments(term);
                Term term=new Term("id","2162");
                writer.deleteDocuments(term);
                writer.close();

                System.out.println(" caught b " );
                System.out.println(new Date());
                System.out.println(new Date().getTime());

              } catch (IOException e) {
                System.out.println(" caught a " );
              }
           
           
           

           
           
           
           
        }

    }

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

      <modelVersion>4.0.0</modelVersion>

      <groupId>com.jd</groupId>

      <artifactId>lucene</artifactId>

      <version>1.0-SNAPSHOT</version>

      <packaging>jar</packaging>

      <name>lucene</name>

      <url>http://maven.apache.org</url>

      <properties>

        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

      </properties>

      <dependencies>

        <dependency>

          <groupId>junit</groupId>

          <artifactId>junit</artifactId>

          <version>3.8.1</version>

          <scope>test</scope>

        </dependency>

        <dependency>

          <groupId>org.apache.lucene</groupId>

          <artifactId>lucene-core</artifactId>

          <version>3.5.0</version>

          <type>jar</type>

        </dependency>

        <dependency>

          <groupId>org.apache.lucene</groupId>

          <artifactId>lucene-demo</artifactId>

          <version>3.5.0</version>

        </dependency>

      </dependencies>

    </project>

  • 相关阅读:
    234. Palindrome Linked List(判断链表是否回文)
    141. Linked List Cycle(判断链表是否有环)
    第二届“中国高校计算机大赛-大数据挑战赛” 20名
    Spark集群 Python Package管理
    Android中单选框RadioButton的基本用法
    【Android】进程间通信IPC——Binder
    Spring Boot 集成 JWT 实现单点登录授权
    pythonGUI编程——Qt库(1)
    Android获取SD卡路径/内存的几种方法
    Android主题更换换肤
  • 原文地址:https://www.cnblogs.com/lexus/p/2291732.html
Copyright © 2020-2023  润新知