• coreseek增量索引


    表 

    CREATE table sph_counter(counter_id int auto_increment key ,max_id int )engine=myisam default charset=utf8;

    配置文件csft.conf

    #
    # Minimal Sphinx configuration sample (clean, simple, functional)
    #
    
    source post
    {
        type                    = mysql
    
        sql_host                = 192.168.33.90
        sql_user                = root
        sql_pass                = root
        sql_db                    = test
        sql_port                = 3306    # optional, default is 3306
        sql_sock                                = /tmp/mysql.sock
        sql_query_pre = SET NAMES utf8
        sql_query_pre = SET SESSION query_cache_type=OFF
        sql_query                = 
            SELECT * from post
    
        sql_query_info            = SELECT * FROM post WHERE id=$id
    }
    
    
    index post
    {
        source                    = post
        path                    = /usr/local/coreseek/var/data/post
        
        charset_type            = zh_cn.utf-8
        charset_dictpath                = /usr/local/mmseg/etc/
    }
    
    
    indexer
    {
        mem_limit                = 32M
    }
    
    
    searchd
    {
        port                    = 9312
        log                        = /usr/local/coreseek/var/log/searchd.log
        query_log                = /usr/local/coreseek/var/log/query.log
        read_timeout            = 5
        max_children            = 30
        pid_file                = /usr/local/coreseek/var/log/searchd.pid
        max_matches                = 1000
        seamless_rotate            = 1
        preopen_indexes            = 0
        unlink_old                = 1
    }
    
    source documents
    {
        type                    = mysql
    
        sql_host                = 192.168.33.90
        sql_user                = root
        sql_pass                = root
        sql_db                    = spider
        sql_port                = 3306    # optional, default is 3306
        sql_sock                                = /tmp/mysql.sock
        sql_query_pre = SET NAMES utf8
        sql_query_pre = SET SESSION query_cache_type=OFF
        sql_query_pre = replace into sph_counter select 1,max(id) from documents
        sql_query                = 
            SELECT * from documents where id <= (select max_id from sph_counter where counter_id=1)
    
        sql_query_info            = SELECT * FROM documents WHERE id=$id
    }
    
    source delta : documents
    {
        sql_query_pre = set names utf8
        sql_query                = 
            SELECT * from documents where id > (select max_id from sph_counter where counter_id=1)
    }
    
    
    index documents
    {
        source                    = documents
        path                    = /usr/local/coreseek/var/data/documents
        
        charset_type            = zh_cn.utf-8
        charset_dictpath                = /usr/local/mmseg/etc/
    }
    
    index delta : documents
    {
        source=delta
        path = /usr/local/coreseek/var/data/delta
        
    }

    php代码

    ./indexer delta --rotate

    <?php
    header("Content-type:text/html;charset=utf-8");
    ini_set("display_errors",1);
    error_reporting(E_ALL);
    $keyword = $_GET['key'];
    //实例化Sphinx对象
    $sphinx=new SphinxClient();
    
    //连接sphinx服务器
    $sphinx->SetServer("192.168.33.90",9312);
    //拆词
    //SPH_MATCH_ALL 和 SPH_MATCH_ANY 的区别:
    //搜索“LAMP兄弟连”,ALL的结果:完整包含“LAMP兄弟连”才能被搜出来,
    //单纯包含“LAMP”或单纯包含“兄弟连”的搜索不出来,没有拆词的功能。
    //ANY则可以搜索出来拆开后的词的结果。此处使用ANY
    $sphinx->SetMatchMode(SPH_MATCH_ANY);
    //通过query方法搜索,“*”表示在所有的索引中搜索,相当于命令行里面的“./indexer --all”
    $result=$sphinx->query("$keyword","documents,delta");
    //打印搜索的结果
    echo "<pre>";
    print_r($result);
    echo "</pre>";
    
    //上面打印的结果中,数组的 [matches]循环便利,下标就是搜索到的文档的主键Id
    //使用PHP中的 array_keys()函数即可拿到下标,即:要查找的文档的主键
    //print_r(array_keys($result['matches']));
    //结果如下:Array([0]=>1)
    
    //使用implode或者 join用逗号把查询出来的主键连接起来:
    if (!isset($result['matches'])) {
        echo "为搜索到结果";
        exit;
    }
    $ids = join(',',array_keys($result['matches']));
    //echo $ids; //6,7
    
    /*连接数据库的操作*/
    $p1 = mysql_connect("192.168.33.90","root","root");
    mysql_select_db("spider");
    mysql_query("set names utf8");
    $sql="select * from documents where id in ($ids)";
    $rst=mysql_query($sql);
    
    $opts=array(
       "before_match"=>"<font color='red'>",
       "after_match"=>"</font>",
    );
    while($row=mysql_fetch_assoc($rst)){
    
        //下面是高亮显示所需,具体可以查手册    
        $final=$sphinx->buildExcerpts($row,"documents",$keyword,$opts);
        
        echo "标题:".$final['1']."<br>";
        echo $final[2].'<hr>';
    }
    
    ?>
  • 相关阅读:
    python .npy 存取 dict
    python 找零钱方案
    docker 容器里显示图形
    docker 安装vim
    软件测试基础知识
    Git命令——学习笔记2
    Git命令——学习笔记1
    Win10配置Git环境变量与基本使用
    Selenium Web自动化测试——基于unittest框架的PO设计模式
    Django+Celery学习笔记5——定时推送消息
  • 原文地址:https://www.cnblogs.com/brady-wang/p/6103423.html
Copyright © 2020-2023  润新知