• 开源搜索引擎Solr的快速搭建及集成到企业门户最佳实施方案--转载


    笔者经过研究查阅solr官方相关资料经过两周的研究实现了毫秒级百万数据的搜索引擎的搭建并引入到企业门户。现将实施心得和步骤分享一下。

    1.      jdk1.6

    安装jdk1.6到系统默认目录下X:qcJava目录下(注意要点:配置好环境变量)。

    2.      tomcat安装

    a)      安装tomcat到X:qc omcat6目录下。

    b)     配置server.xml

    Connector port="80" protocol="HTTP/1.1" connectionTimeout="20000" redirectPort="8443"URIEncoding="UTF-8" />

    3.      下载并部署solr1.4.1

    a)      拷贝本地solr(注意该solr文件夹并非war包而是solr连接数据库的配置包)目录到目标服务器X:qc目录下。

    l  data-onfig.xml

    <dataConfig>

        <dataSource name="itwhhsol" type="JdbcDataSource" driver="com.mysql.jdbc.Driver" url="jdbc:mysql://127.0.0.1:3306/itwhhsol" user="root" password="123ttt"/>

    <!--在这里配置要连接的数据库-->

        <document name="article">

                <entity name="article" dataSource="itwhhsol"

                      query="select * from article" pk="article_id"

                deltaQuery="select * from article where update_time > '${dataimporter.last_index_time}'">

                      <field column="article_id" name="id" />

                </entity>

        </document>

    <!--配置要索引的表-->

    </dataConfig>

    l  solrconfig.xml

    <dataDir>${solr.data.dir:f:/qc/solr/data}</dataDir>

    <!--配置搜索引擎索引后的数据-->

    <!--add for dih-->

        <requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">

               <lst name="defaults">

                      <str name="config">f:/qc/solr/conf/data-config.xml</str>

               </lst>

        </requestHandler>

    l  schema.xml

    field name="id" type="string" indexed="true" stored="true" required="true" />

    <field name="user_id" type="string" indexed="true" stored="true"/>

    <field name="article_id" type="string" indexed="true" stored="true"/>

    <field name="update_time" type="date" indexed="true" stored="true"/>

    <field name="url" type="string" indexed="true" stored="true"/>

    <field name="title" type="textMaxWord" indexed="true" stored="true"/>

    <field name="content" type="textMaxWord" indexed="true" stored="true"/>

    <field name="in_out_flag" type="string" indexed="true" stored="true"/>

    <field name="article_state" type="string" indexed="true" stored="true"/>

    <field name="article_click" type="int" indexed="true" stored="true" />

    <field name="clerk_id" type="string" indexed="true" stored="true"/>

    <field name="sort_id" type="string" indexed="true" stored="true"/>

    <field name="keyword" type="textMaxWord" indexed="true" stored="true"/>

    <field name="clerk_suggest" type="textMaxWord" indexed="true" stored="true"/>

    <copyField source="title" dest="text"/>

    <copyField source="content" dest="text"/>

    <copyField source="keyword" dest="text"/>

    <copyField source="clerk_suggest" dest="text"/>

    b)     部署war包solr到tomcat下

    在X:qc omcat6webapps下放置ache-solr-1.4.0examplewebappssolr.war。

    c)      配置solr.xml

    l  创建目录X:qcTomcat6confCatalinalocalhost。

    l  增加solr.xml内容如下:

    <?xml version="1.0" encoding="UTF-8"?>

    <Context docBase="X:/qc/Tomcat6/webapps/solr.war" debug="0" crossContext="true" >

    <Environment name="solr/home" type="java.lang.String" value="X:/qc/solr/" override="true" />

    </Context>

    4.      启动f:qc omcat6in omcat6.exe(带dos控制台)(注意这里tomcat安装时一定要改成tomcat6或者tomcat不能有横线或者空格否则可能导致solr无法正常索引哦)

    5.      索引全部数据:http://127.0.0.1/solr/dataimport?command=full-import(可能需要重新刷一次窗口才能真正的索引全部数据哦)

    6.测试url:http://127.0.0.1/solr/admin/如果可以看到solr管理界面说明搭建成功了

    7.把如下代码copy到本地另存成jsp放到你的工程里就可以实现毫秒级搜索百万数据量啦:

    <%@ page language="java" pageEncoding="UTF-8"%>
    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
    <html:html locale="true">
    <STYLE>BODY {
     FONT-FAMILY: arial
    }
    TD {
     FONT-FAMILY: arial
    }
    .p1 {
     FONT-FAMILY: arial
    }
    .p2 {
     FONT-FAMILY: arial
    }
    .i {
     FONT-FAMILY: arial
    }
    BODY {
     PADDING-RIGHT: 0px; PADDING-LEFT: 0px; PADDING-BOTTOM: 0px; MARGIN: 0px; COLOR: #000; PADDING-TOP: 6px; POSITION: relative; BACKGROUND-COLOR: #fff
    }
    INPUT {
     PADDING-BOTTOM: 0px; PADDING-TOP: 0px; moz-box-sizing: border-box; webkit-box-sizing: border-box; box-sizing: border-box
    }
    TABLE {
     BORDER-TOP-WIDTH: 0px; BORDER-LEFT-WIDTH: 0px; BORDER-BOTTOM-WIDTH: 0px; BORDER-RIGHT-WIDTH: 0px
    }
    TD {
     FONT-SIZE: 9pt; LINE-HEIGHT: 18px
    }
    EM {
     COLOR: #cc0000; FONT-STYLE: normal
    }
    A EM {
     TEXT-DECORATION: underline
    }
    .f14 {
     FONT-SIZE: 14px
    }
    .f10 {
     FONT-SIZE: 10.5pt
    }
    .f16 {
     FONT-SIZE: 16px; FONT-FAMILY: Arial
    }
    .c {
     COLOR: #7777cc
    }
    .p1 {
     MARGIN-LEFT: -12pt; LINE-HEIGHT: 120%
    }
    .p2 {
     MARGIN-LEFT: -12pt; WIDTH: 100%; LINE-HEIGHT: 120%
    }
    .i {
     FONT-SIZE: 16px; LINE-HEIGHT: 1.4em; HEIGHT: 24px
    }
    .t {
     COLOR: #0000cc; TEXT-DECORATION: none
    }
    A.t:hover {
     TEXT-DECORATION: underline
    }
    .p {
     PADDING-LEFT: 18px; FONT-SIZE: 14px; MARGIN: 0px 0px 20px; WORD-SPACING: 4px
    }
    .f {
     PADDING-LEFT: 15px; FONT-SIZE: 100%; WIDTH: 33.7em; WORD-BREAK: break-all; LINE-HEIGHT: 120%; WORD-WRAP: break-word
    }
    .h {
     MARGIN-LEFT: 8px; WIDTH: 100%
    }
    .s {
     PADDING-LEFT: 10px; WIDTH: 8%; HEIGHT: 25px
    }
    .m {
     FONT-SIZE: 100%; COLOR: #666
    }
    A.m:link {
     FONT-SIZE: 100%; COLOR: #666
    }
    A.m:visited {
     COLOR: #660066
    }
    .g {
     FONT-SIZE: 12px; COLOR: #008000
    }
    .r {
     WIDTH: 238px; CURSOR: hand; WORD-BREAK: break-all
    }
    .bi {
     MARGIN-BOTTOM: 12px; HEIGHT: 20px; BACKGROUND-COLOR: #d9e1f7
    }
    .pl {
     PADDING-RIGHT: 2px; PADDING-LEFT: 3px; FONT-SIZE: 14px; HEIGHT: 8px
    }
    .Tit {
     FONT-SIZE: 14px; Z-INDEX: 200; POSITION: relative; HEIGHT: 21px
    }
    .Tit A {
     COLOR: #0000cc
    }
    .fB {
     FONT-WEIGHT: bold
    }
    .mo {
     FONT-SIZE: 100%; COLOR: #666666; LINE-HEIGHT: 10px
    }
    A.mo:link {
     FONT-SIZE: 100%; COLOR: #666666; LINE-HEIGHT: 10px
    }
    A.mo:visited {
     FONT-SIZE: 100%; COLOR: #666666; LINE-HEIGHT: 10px
    }
    .htb {
     MARGIN-BOTTOM: 5px
    }
    #ft {
     CLEAR: both; BACKGROUND: #e6e6e6; LINE-HEIGHT: 20px; TEXT-ALIGN: center
    }
    #ft {
     FONT-SIZE: 12px; COLOR: #77c; FONT-FAMILY: Arial
    }
    #ft  {
     FONT-SIZE: 12px; COLOR: #77c; FONT-FAMILY: Arial
    }
    #ft SPAN {
     COLOR: #666
    }
    FORM {
     Z-INDEX: 9; MARGIN: 0px; POSITION: relative
    }
    .jc A {
     COLOR: #cc0000
    }
    .btn {
     FONT-SIZE: 14px; MARGIN-LEFT: 3px; WIDTH: 5.6em; PADDING-TOP: 2px; HEIGHT: 2em
    }
    .i {
     VERTICAL-ALIGN: baseline
    }
    .btn {
     VERTICAL-ALIGN: baseline
    }
    UNKNOWN {
     TEXT-DECORATION: underline
    }
    #tb_mr {
     Z-INDEX: 200; CURSOR: pointer; COLOR: #0000cc; POSITION: relative
    }
    #tb_mr B {
     FONT-WEIGHT: normal; TEXT-DECORATION: underline
    }
    #tb_mr SMALL {
     FONT-SIZE: 11px
    }
    #more {
     BORDER-RIGHT: #9a99ff 1px solid; BORDER-TOP: #9a99ff 1px solid; DISPLAY: none; FONT-SIZE: 14px; Z-INDEX: 200; BACKGROUND: #fff; LEFT: 314px; OVERFLOW: hidden; BORDER-LEFT: #9a99ff 1px solid; WIDTH: 58px; BORDER-BOTTOM: #9a99ff 1px solid; POSITION: absolute; TOP: 22px; HEIGHT: 100px; outline: none
    }
    #more A {
     PADDING-RIGHT: 0px; DISPLAY: block; PADDING-LEFT: 7px; PADDING-BOTTOM: 0px; WIDTH: 4em; COLOR: #0001cf; LINE-HEIGHT: 24px; PADDING-TOP: 0px; HEIGHT: 25%; TEXT-DECORATION: none
    }
    #more A SPAN {
     FONT-FAMILY: "宋体"
    }
    #more A:hover {
     BACKGROUND: #d9e1f6
    }
    #more DIV {
     BACKGROUND: #ccccff; MARGIN: 0px 3px; OVERFLOW: hidden; HEIGHT: 1px
    }
    #out {
     MARGIN-LEFT: 880px; ZOOM: 1
    }
    #in {
     FLOAT: left; MARGIN-LEFT: -880px; POSITION: relative
    }
    #wrapper {
     ZOOM: 1; min- 880px
    }
    #sx {
     CURSOR: pointer; COLOR: #00c; TEXT-DECORATION: underline
    }
    #u {
     FONT-SIZE: 12px; Z-INDEX: 210; RIGHT: 10px; MARGIN: 0px; WHITE-SPACE: nowrap; POSITION: absolute; TOP: 0px; TEXT-ALIGN: right
    }
    .result {
     TABLE-LAYOUT: fixed; WIDTH: 34em
    }
    </STYLE>
    <head><html:base />    
    <title>搜索引擎</title>
     <meta http-equiv="pragma" content="no-cache">
     <meta http-equiv="cache-control" content="no-cache">
     <meta http-equiv="expires" content="0">    
     <meta http-equiv="keywords" content="keyword1,keyword2,keyword3">
     <meta http-equiv="description" content="This is my page">
       <script language="javascript" src="/OnLHS/jsp/lib/jquery.js"></script>
     <script language="javascript" src="/OnLHS/jsp/lib/pagination.js"></script>
     <link href="/OnLHS/jsp/lib/pagination.css" rel="stylesheet" type="text/css" />
      </head>
      <body onload="xmlhttpPost('/solr/select')">
      <div class="wa_mode" id=s_nav  align="center"  style="100%;overflow:hidden;white-space:nowrap;text-overflow:ellipsis;" >
      <span class="STYLE1">&nbsp;<img src="/OnLHS/jsp/img/mobile.jpg" height="50"/><img src="/OnLHS/jsp/img/Logom1.png"/></span>
    </div>
     <hr noshade="noshade"  color="#6699FF">
      <form action="show.jsp" name="f1" method="get" accept-charset="UTF-8" onSubmit="xmlhttpPost('/solr/select'); return false;">&nbsp;<h5>搜索内容:<input type="text" name="q" size="50" value="<%=request.getAttribute("key") %>">
          <input name="start" type="hidden" value="0">
          <input name="rows" type="hidden" value="10">
          <input name="indent" type="hidden" value="on">
        <input name="wt" type="hidden" value="">
          <input type="button" value=" 搜 索 " onClick="xmlhttpPost('/solr/select');">
          <input type="hidden" value=" get json " onClick="document.forms['f1'].wt.value='json';document.forms['f1'].submit();">
          <input type="hidden" value=" get xml " onClick="document.forms['f1'].wt.value='';document.forms['f1'].submit();">
          </h5>
      </form>
      <p>
       
        <div id="header" style="background-color: #D9E1F6; height: 15px;" align="left"></div>
        <div id="response">
          
        </div>
     <table>
     <tr>
     <td width="10"></td>
     <td>
       
         <table id="docs" class="tab" cellspacing="1">
                <tr>
                <td></td>
                <td></td>
                <td></td>
                <td></td>
                </tr>
            </table>
         
      </td> 
      </tr>
      <tr>
      <td colspan="2" align="center"><div align="center" id="pages"></div></td>
      </tr>
     </table>
        
       <script type="text/javascript">
       
       function xmlhttpPost(strURL) {
       if(document.forms['f1'].q.value.length==0||document.forms['f1'].q.value.replace(/(^s*)|(s*$)/g,"")=="")
        {
         alert("请输入您要检索的内容...");
            return false;
        }
        var xmlHttpReq = false;
        var self = this;
        if (window.XMLHttpRequest) { // Mozilla/Safari
            self.xmlHttpReq = new XMLHttpRequest(); 
        }
        else if (window.ActiveXObject) { // IE
            self.xmlHttpReq = new ActiveXObject("Microsoft.XMLHTTP");
        }
        
        var params = getstandardargs().concat(getquerystring());
        var strData = params.join('&');
        
        var header = document.getElementById("response");
        //header.innerHTML = strURL '?' strData;

        self.xmlHttpReq.open('get', strURL '?' strData '&time=' new Date().getTime(), true);
        self.xmlHttpReq.setRequestHeader('Content-Type', 'application/x-www-form-urlencoded');
        self.xmlHttpReq.onreadystatechange = function() {
            if (self.xmlHttpReq.readyState == 4) {
                updatepage(self.xmlHttpReq.responseText);
            }
        }
        self.xmlHttpReq.send(null);
    }

    function getstandardargs() {
        var params = [
            'wt=json'
            , 'indent=on'
            , 'hl=true'
            , 'hl.fl='
            , 'fl=*,score'
            , 'start=0'
            , 'rows=100'
            ];

        return params;
    }
    function getquerystring() {
      var form = document.forms['f1'];
      var query = form.q.value;
      qstr = 'q=' encodeURI(query);    //escape
      return qstr;
    }

    // this function does all the work of parsing the solr response and updating the page.
    function updatepage(str){
      //document.getElementById("response").innerHTML = str;
       var rsp = eval("(" str ")"); 
      // use eval to parse Solr's JSON response
      //eval拼接返回一个合理的字符串
      parse(rsp);
    }

    function parse(j) {
        var header = document.getElementById("header");
        var rh = j.responseHeader;
        var header_str = " 搜索关键字: "" rh.params.q "", 共为您找到: " j.response.numFound "条相关的数据, 耗时: " rh.QTime "ms";
        header.innerHTML = "<font color=#000000>" "&nbsp;&nbsp;" header_str "</font>";
        var docs = j.response.docs;
         $.pagination('pages',1,12, docs.length, test, docs);
       }
    function test(e)
     {
       
      $.pagination('pages', e.data.current, e.data.pagination.pageSize, e.data.pagination.totalRecord,test,e.data.docs)
        }
       </script>
      </body>
      
    </html:html>

     引用:http://blog.chinaunix.net/uid-25723371-id-3221379.html
  • 相关阅读:
    Python_命名空间和作用域_25
    Python_函数_复习_习题_24
    Python_每日习题_0001_数字组合
    Python_试题_23
    Python_初识函数和返回值_22
    linux-shell-引用-命令替换-命令退出状态-逻辑操作符
    linux-shell-变量参数
    Python-复习-文件操作-21
    Python-注册登陆-20
    linux-vim
  • 原文地址:https://www.cnblogs.com/fang-beny/p/3303736.html
Copyright © 2020-2023  润新知