• Sphinx(Coreseek)安装和使用指南


    1.安装

    1.1安装mmseg

    ./bootstrap # 必须执行,不然安装会失败
    ./configure --prefix=/usr/local/mmseg-3.2.14 #指定安装目录
    make
    make install

    1.2安装coreseek

    # 在csft-4.1/configure.ac中
    # 查找 AM_INIT_AUTOMAKE([-Wall -Werror foreign])
    # 更改 AM_INIT_AUTOMAKE([-Wall foreign])
    sh buildconf.sh # 必须执行,不然安装失败
    ./configure --prefix=/usr/local/coreseek-4.1 --with-mysql --with-mysql-includes=/usr/local/mysql-5.6.21/include --with-mysql-libs=/usr/local/mysql-5.6.21/lib --with-mmseg --with-mmseg-includes=/usr/local/mmseg-3.2.14/include/mmseg --with-mmseg-libs=/usr/local/mmseg-3.2.14/lib
    # 在csft-4.1/src/sphinxexpr.cpp中
    # 查找 T val = ExprEval ( this->m_pArg, tMatch );
    # 替换 T val = this->ExprEval ( this->m_pArg, tMatch );
    make
    make install

    2.配置

    测试数据表结构

    CREATE TABLE `sph_counter` (
      `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
      `code` varchar(50) NOT NULL DEFAULT '',
      `max` int(10) unsigned NOT NULL,
      `add_time` datetime NOT NULL,
      PRIMARY KEY (`id`),
      UNIQUE KEY `code` (`code`)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
    
    CREATE TABLE `movie` (
      `movie_id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT '影片ID',
      `old_id` int(10) unsigned NOT NULL DEFAULT '0' COMMENT '旧ID',
      `sn` varchar(50) NOT NULL COMMENT '影片番号',
      `title` varchar(255) NOT NULL COMMENT '片名',
      `cover` varchar(255) NOT NULL DEFAULT '' COMMENT '影片封面图',
      `publisher_id` int(10) unsigned NOT NULL COMMENT '发行商ID',
      `publish_date` date NOT NULL COMMENT '发行日期',
      `add_time` datetime NOT NULL COMMENT '添加时间',
      PRIMARY KEY (`movie_id`),
      UNIQUE KEY `sn` (`sn`)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8;

    coreseek主索引配置

    source main_javzoo_movie
    {
        type            = mysql
    
        sql_host        = 127.0.0.1
        sql_user        = root
        sql_pass        = 
        sql_db            = javzoo
        sql_port        = 3306    # optional, default is 3306
    
        sql_query        = 
            SELECT movie_id, movie_id AS record_id, title 
            FROM movie WHERE movie_id >= $start AND movie_id <= $end ORDER BY movie_id ASC
        sql_query_pre        = SET NAMES utf8
        sql_query_pre        = REPLACE INTO sph_counter SET `code` = 'movie', `max` = (SELECT MAX(movie_id) FROM movie), add_time = NOW()
        sql_query_range        = SELECT MIN(movie_id), (SELECT `max` FROM `sph_counter` WHERE `code` = 'movie') FROM movie
        sql_range_step        = 10000
    
        sql_attr_uint        = record_id
    
    }
    
    index main_javzoo_movie_title
    {
        source            = main_javzoo_movie
        path            = /usr/local/coreseek-4.1/var/data/main_javzoo_movie_title
        mlock            = 0
        morphology        = none
        min_word_len        = 1
        html_strip        = 0
        docinfo            = extern
        charset_type        = zh_cn.utf-8
        charset_dictpath    = /usr/local/mmseg-3.2.14/etc
        ngram_len        = 0
    }

    3.增量配置

    source delta_javzoo_movie
    {
        type            = mysql
        
        sql_host        = 127.0.0.1
        sql_user        = root
        sql_pass        = shia802927
        sql_db            = javzoo
        sql_port        = 3306
        
        sql_query_pre        = SET NAMES utf8
        sql_query_range        = SELECT (SELECT `max` FROM `sph_counter` WHERE `code` = 'movie'), MAX(movie_id) FROM movie
        sql_range_step        = 10000
        sql_query        = 
            SELECT movie_id, movie_id AS record_id, title 
            FROM movie WHERE movie_id >= $start AND movie_id <= $end ORDER BY movie_id ASC
        sql_query_post        = REPLACE INTO sph_counter SET `code` = 'movie', `max` = (SELECT MAX(movie_id) FROM movie), add_time = NOW()
    
        sql_attr_uint           = record_id
    }
    
    index delta_javzoo_movie_title
    {
        source            = delta_javzoo_movie
        path            = /usr/local/coreseek-4.1/var/data/delta_javzoo_movie_title
        mlock            = 0
        morphology        = none
        min_word_len        = 1
        html_strip        = 0
        docinfo            = extern
        charset_type        = zh_cn.utf-8
        charset_dictpath    = /usr/local/mmseg-3.2.14/etc
        ngram_len        = 0
    }

    4.定时更新脚本

     1 #!/bin/sh -
     2 START_DATE=`date`
     3 echo start $1 index @ $START_DATE
     4 
     5 if [ "$1" = "all" ]
     6 then
     7 /usr/local/coreseek-4.1/bin/indexer --config /usr/local/coreseek-4.1/etc/sphinx.conf --all --rotate >> /dev/null
     8 elif [ "$1" = "main" ]
     9 then
    10 /usr/local/coreseek-4.1/bin/indexer --config /usr/local/coreseek-4.1/etc/sphinx.conf main_javzoo_movie_title --rotate >> /dev/null
    11 elif [ "$1" = "delta" ]
    12 then
    13 /usr/local/coreseek-4.1/bin/indexer --config /usr/local/coreseek-4.1/etc/sphinx.conf delta_javzoo_movie_title --rotate >> /dev/null
    14 elif [ "$1" = "merge" ]
    15 then
    16 /usr/local/coreseek-4.1/bin/indexer --config /usr/local/coreseek-4.1/etc/sphinx.conf --merge main_javzoo_movie_title delta_javzoo_movie_title --rotate >> /dev/null
    17 else
    18 echo error action!
    19 exit 1
    20 fi
    21 
    22 END_DATE=`date`
    23 if [ "$?" -eq "0" ]
    24 then
    25     echo complete @ $END_DATE
    26 else
    27     echo error @ $END_DATE
    28 fi
    29 echo 

    用法

    # 更新全部索引
    sh sphinx_index.sh all
    # 更新主索引
    sh sphinx_index.sh main
    # 更新增量索引
    sh sphinx_index.sh delta
    # 合并增量和主索引
    sh sphinx_index.sh merge

    备注

    配合crond实现定时增量更新和合并

  • 相关阅读:
    Java学习笔记12---向上转型-父类的对象引用指向子类对象
    Java学习笔记11---静态成员变量、静态代码块、成员变量及构造方法的初始化或调用顺序
    Java学习笔记10---访问权限修饰符如何控制成员变量、成员方法及类的访问范围
    Java学习笔记9---类静态成员变量的存储位置及JVM的内存划分
    Java学习笔记8---类的静态成员变量与静态成员方法的访问与调用方式
    Java学习笔记7---父类构造方法有无参数对子类的影响
    Java学习笔记6---字符串比较方法compareTo(String str)
    地址总线、数据总线、寻址能力、字长及cpu位数等概念之间的关系
    Alpha事后诸葛亮
    第05组 Alpha冲刺(4/4)
  • 原文地址:https://www.cnblogs.com/koboshi/p/4298565.html
Copyright © 2020-2023  润新知