1.安装前请先确定安装了常用的组件
yum install -y python Python-devel
2.安装sphinx
tar zxvf sphinx-2.2.10-release.tar.gz
cd sphinx-2.2.10-release
./configure --prefix=/usr/local/sphinx –-with-MySQL
make && make install
在make时如果出现undefined reference to libiconv的错
错误描述
/usr/local/sphinx/src/sphinx.cpp:20060:undefined reference to `libiconv_open'
/usr/local/sphinx/src/sphinx.cpp:20078: undefined reference to `libiconv'
/usr/local/sphinx/src/sphinx.cpp:20084: undefined reference to `libiconv_close'
collect2: ld returned 1exit status
make[2]:***[indexer]Error1 www.111cn.NET
make[2]:Leaving directory `/home/sphinx/src'
make[1]: *** [all] Error 2
make[1]: Leaving directory `/home/sphinx/src'
make: *** [all-recursive] Error 1
解决办法:
打开configure文件,找到“#define USE_LIBICONV 1”,将注释去掉,并将1改成0。
3.libsphinxclient 安装(PHP模块需要)
cd api/libsphinxclient
./configure –prefix=/usr/local/sphinx
make && make install
4.安装PHP的Sphinx模块
下载地址:http://pecl.php.Net/package/sphinx
wget http://pecl.php.net/get/sphinx-1.3.3.tgz
tar zxf sphinx-1.3.3.tgz
cd sphinx-1.3.3
/usr/local/php/bin/phpize
./configure --with-php-config=/usr/local/php/bin/php-config --with-sphinx=/usr/local/sphinx/
make && make install
5.vi /etc/php.ini
添加 :extension = sphinx.so
重启:service php-fpm restart(nginx服务器)
这样配置就完成了,接下来要生成索引和开启守护进程:
/usr/local/sphinx/bin/indexer -c /usr/local/sphinx/etc/test.conf --all
/usr/local/sphinx/bin/searchd -c /usr/local/sphinx/etc/test.conf
如果数据库更新了,需要重新建立索引,重输一遍上面简历索引的指令就行
如果重建索引时守护进程正在运行,会报错,需要运行下面的指令,会重建索引并且重开守护进程
/usr/local/sphinx/bin/indexer -c /usr/local/sphinx/etc/test.conf --all --rotate
关于test.conf的配置:
source message1
{
type = mysql
sql_host = 127.0.0.1
sql_user = root
sql_pass = 123456
sql_db = yh_comment
sql_port = 3308 # optional, default is 3306
sql_query_pre = SET NAMES utf8
sql_query =
SELECT id,bid,uid,content,valid,createtime
FROM tbl_cmt00
UNION SELECT id,bid,uid,content,valid,createtime
FROM tbl_cmt01
UNION SELECT id,bid,uid,content,valid,createtime
FROM tbl_cmt02
UNION SELECT id,bid,uid,content,valid,createtime
FROM tbl_cmt03
sql_field_string = bid
sql_attr_uint = uid
sql_field_string = content
sql_attr_string = valid
sql_field_string = createtime
}
index message1
{
source = message1
path = /usr/local/sphinx/var/data/message1
ngram_len = 1 #表示使用一元字符切分模式,从而得以对单个中文字符进行索引
ngram_chars = U+3000..U+2FA1F, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z, A..Z->a..z, a..z #表示要进行一元字符切分模式的字符集
}
indexer
{
mem_limit = 128M
}
searchd
{
listen = 9412
log = /usr/local/sphinx/var/log/searchd-message1.log
query_log = /usr/local/sphinx/var/log/query-message1.log
read_timeout = 5
max_children = 30
pid_file = /usr/local/sphinx/var/log/searchd-message1.pid
#max_matches = 1000
seamless_rotate = 1
preopen_indexes = 1
unlink_old = 1
workers = threads # for RT to work
binlog_path = /usr/local/sphinx/var/data
}
上面的配置是已经开启“一元切分模式”。
详细的配置介绍:
#定义一个数据源
source search_main
{
#定义数据库类型
type = mysql
#定义数据库的IP或者计算机名
sql_host = localhost
#定义连接数据库的帐号
sql_user = root
#定义链接数据库的密码
sql_pass = test123
#定义数据库名称
sql_db = test
#定义连接数据库后取数据之前执行的SQL语句
sql_query_pre = SET NAMESutf8
sql_query_pre = SET SESSIONquery_cache_type=OFF
#创建一个sph_counter用于增量索引
sql_query_pre = CREATETABLE IF NOT EXISTS sph_counter
( counter_id INTEGER PRIMARY KEY NOTNULL,max_doc_id INTEGER NOT NULL)
#取数据之前将表的最大id记录到sph_counter表中
sql_query_pre = REPLACEINTO sph_counter SELECT 1, MAX(searchid) FROM v9_search
#定义取数据的SQL,第一列ID列必须为唯一的正整数值
sql_query = SELECTsearchid,typeid,id,adddate,data FROM v9_search where
searchid<(SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
andsearchid>=$start AND searchid<=$end
#sql_attr_uint和sql_attr_timestamp用于定义用于api过滤或者排序,写多行制定多列
sql_attr_uint = typeid
sql_attr_uint = id
sql_attr_timestamp = adddate
#分区查询设置
sql_query_range = SELECTMIN(searchid),MAX(searchid) FROM v9_search
#分区查询的步长
sql_range_step = 1000
#设置分区查询的时间间隔
sql_ranged_throttle = 0
#用于CLI的调试
sql_query_info = SELECT *FROM v9_search WHERE searchid=$id
}
#定义一个增量的源
source search_main_delta : search_main
{
sql_query_pre = set namesutf8
#增量源只查询上次主索引生成后新增加的数据
#如果新增加的searchid比主索引建立时的searchid还小那么会漏掉
sql_query = SELECTsearchid,typeid,id,adddate,data FROM v9_search where
searchid>(SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
andsearchid>=$start AND searchid<=$end
sql_query_range = SELECTMIN(searchid),MAX(searchid) FROM v9_search where
searchid>(SELECT max_doc_id FROM sph_counter WHERE counter_id=1 )
}
#定义一个index_search_main索引
index index_search_main
{
#设置索引的源
source = search_main
#设置生成的索引存放路径
path =/usr/local/coreseek/var/data/index_search_main
#定义文档信息的存储模式,extern表示文档信息和文档id分开存储
docinfo = extern
#设置已缓存数据的内存锁定,为0表示不锁定
mlock = 0
#设置词形处理器列表,设置为none表示不使用任何词形处理器
morphology = none
#定义最小索引词的长度
min_word_len = 1
#设置字符集编码类型,我这里采用的utf8编码和数据库的一致
charset_type = zh_cn.utf-8
#指定分词读取词典文件的位置
charset_dictpath =/usr/local/mmseg3/etc
#不被搜索的词文件里表。
stopwords =/usr/local/coreseek/var/data/stopwords.txt
#定义是否从输入全文数据中取出HTML标记
html_strip = 0
}
#定义增量索引
index index_search_main_delta : index_search_main
{
source = search_main_delta
path =/usr/local/coreseek/var/data/index_search_main_delta
}
#定义indexer配置选项
indexer
{
#定义生成索引过程使用索引的限制
mem_limit = 512M
}
#定义searchd守护进程的相关选项
searchd
{
#定义监听的IP和端口
#listen = 127.0.0.1
#listen =172.16.88.100:3312
listen = 3312
listen = /var/run/searchd.sock
#定义log的位置
log =/usr/local/coreseek/var/log/searchd.log
#定义查询log的位置
query_log =/usr/local/coreseek/var/log/query.log
#定义网络客户端请求的读超时时间
read_timeout = 5
#定义子进程的最大数量
max_children = 300
#设置searchd进程pid文件名
pid_file =/usr/local/coreseek/var/log/searchd.pid
#定义守护进程在内存中为每个索引所保持并返回给客户端的匹配数目的最大值
max_matches = 100000
#启用无缝seamless轮转,防止searchd轮转在需要预取大量数据的索引时停止响应
#也就是说在任何时刻查询都可用,或者使用旧索引,或者使用新索引
seamless_rotate = 1
#配置在启动时强制重新打开所有索引文件
preopen_indexes = 1
#设置索引轮转成功以后删除以.old为扩展名的索引拷贝
unlink_old = 1
# MVA更新池大小,这个参数不太明白
mva_updates_pool = 1M
#最大允许的包大小
max_packet_size = 32M
#最大允许的过滤器数
max_filters = 256
#每个过滤器最大允许的值的个数
max_filter_values = 4096
}