• Sphinx之配置文件


    #
    # Sphinx configuration file sample
    #
    # WARNING! While this sample file mentions all available options,
    # it contains (very) short helper descriptions only. Please refer to
    # doc/sphinx.html for details.
    #
    
    #############################################################################
    ## data source definition
    #############################################################################
    
    source src1
    {
        # data source type. mandatory, no default value
        # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
        type            = mysql
    
        #####################################################################
        ## SQL settings (for 'mysql' and 'pgsql' types)
        #####################################################################
    
        # some straightforward parameters for SQL source types
        sql_host        = localhost
        sql_user        = test
        sql_pass        =
        sql_db            = test
        sql_port        = 3306    # optional, default is 3306
    
        # UNIX socket name
        # optional, default is empty (reuse client library defaults)
        # usually '/var/lib/mysql/mysql.sock' on Linux
        # usually '/tmp/mysql.sock' on FreeBSD
        #
        # sql_sock        = /tmp/mysql.sock
    
    
        # MySQL specific client connection flags
        # optional, default is 0
        #
        # mysql_connect_flags    = 32 # enable compression
    
        # MySQL specific SSL certificate settings
        # optional, defaults are empty
        #
        # mysql_ssl_cert        = /etc/ssl/client-cert.pem
        # mysql_ssl_key        = /etc/ssl/client-key.pem
        # mysql_ssl_ca        = /etc/ssl/cacert.pem
    
        # MS SQL specific Windows authentication mode flag
        # MUST be in sync with charset_type index-level setting
        # optional, default is 0
        #
        # mssql_winauth        = 1 # use currently logged on user credentials
    
    
        # ODBC specific DSN (data source name)
        # mandatory for odbc source type, no default value
        #
        # odbc_dsn        = DBQ=C:data;DefaultDir=C:data;Driver={Microsoft Text Driver (*.txt; *.csv)};
        # sql_query        = SELECT id, data FROM documents.csv
    
    
        # ODBC and MS SQL specific, per-column buffer sizes
        # optional, default is auto-detect
        #
        # sql_column_buffers    = content=12M, comments=1M
    
    
        # pre-query, executed before the main fetch query
        # multi-value, optional, default is empty list of queries
        # sql_query_pre 查询预处理 比如设置字符集 下面的
        # sql_query_pre        = SET NAMES utf8
        # sql_query_pre        = SET SESSION query_cache_type=OFF
    
    
        # main document fetch query
        # mandatory, integer document ID field MUST be the first selected column
        sql_query        = 
            SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content 
            FROM documents
    
    
        # joined/payload field fetch query
        # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
        # payload fields let you attach custom per-keyword values (eg. for ranking)
        #
        # syntax is FIELD-NAME 'from'  ( 'query' | 'payload-query' ); QUERY
        # joined field QUERY should return 2 columns (docid, text)
        # payload field QUERY should return 3 columns (docid, keyword, weight)
        #
        # REQUIRES that query results are in ascending document ID order!
        # multi-value, optional, default is empty list of queries
        #
        # sql_joined_field    = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
        # sql_joined_field    = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC
    
    
        # file based field declaration
        #
        # content of this field is treated as a file name
        # and the file gets loaded and indexed in place of a field
        #
        # max file size is limited by max_file_field_buffer indexer setting
        # file IO errors are non-fatal and get reported as warnings
        #
        # sql_file_field        = content_file_path
    
    
        # range query setup, query that must return min and max ID values
        # optional, default is empty
        #
        # sql_query will need to reference $start and $end boundaries
        # if using ranged query:
        #
        # sql_query        = 
        #    SELECT doc.id, doc.id AS group, doc.title, doc.data 
        #    FROM documents doc 
        #    WHERE id>=$start AND id<=$end
        # 区间查询 防止锁表 查询出对小的ID 和最大的ID
        # sql_query_range        = SELECT MIN(id),MAX(id) FROM documents
        # range query step
        # optional, default is 1024
        # 在最大和最小的ID中 加入步长默认1024 下面注释的是1000
        # sql_range_step        = 1000
       #ID=(1,2550) query=(1,1000),(1001,2000),(2001,2550) 3 angin
       # sql_query = SELECT * FROM documents WHERE id>=$start AND id<=$end
        # unsigned integer attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # optional bit size can be specified, default is 32
        #
        # sql_attr_uint        = author_id
        # sql_attr_uint        = forum_id:9 # 9 bits for forum_id
        sql_attr_uint        = group_id
    
        # boolean attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # equivalent to sql_attr_uint with 1-bit size
        #
        # sql_attr_bool        = is_deleted
    
    
        # bigint attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # declares a signed (unlike uint!) 64-bit attribute
        #
        # sql_attr_bigint        = my_bigint_id
    
    
        # UNIX timestamp attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # similar to integer, but can also be used in date functions
        #
        # sql_attr_timestamp    = posted_ts
        # sql_attr_timestamp    = last_edited_ts
        sql_attr_timestamp    = date_added
    
    
        # floating point attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # values are stored in single precision, 32-bit IEEE 754 format
        #
        # sql_attr_float        = lat_radians
        # sql_attr_float        = long_radians
    
    
        # multi-valued attribute (MVA) attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # MVA values are variable length lists of unsigned 32-bit integers
        #
        # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
        # ATTR-TYPE is 'uint' or 'timestamp'
        # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
        # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
        # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
        #
        # sql_attr_multi        = uint tag from query; SELECT docid, tagid FROM tags
        # sql_attr_multi        = uint tag from ranged-query; 
        #    SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; 
        #    SELECT MIN(docid), MAX(docid) FROM tags
    
    
        # string attribute declaration
        # multi-value (an arbitrary number of these is allowed), optional
        # lets you store and retrieve strings
        #
        # sql_attr_string        = stitle
    
    
        # JSON attribute declaration
        # multi-value (an arbitrary number of these is allowed), optional
        # lets you store a JSON document as an (in-memory) attribute for later use
        #
        # sql_attr_json        = properties
    
    
        # combined field plus attribute declaration (from a single column)
        # stores column as an attribute, but also indexes it as a full-text field
        #
        # sql_field_string    = author
    
        
        # post-query, executed on sql_query completion
        # optional, default is empty
        # sql_query_post查询完后的操作 比如做清理操作
        # sql_query_post        =
        /**
       #这是注释信息
       sql_query_post 和 sql_query_post_index different?
    前者是当Sphinx从数据库中得到数据后,立刻就会运行,而后者只有当索引真正成功建立后才会运行,这个区别还是很重要的。对于真正严格的程序,不应该在sql_query_pre和sql_query_post中更新增量时间,而应该在sql_query_post_index中更新增量时间。还有一个区别是sql_query_post和sql_query_post_index是存在与两个不同的tcp连接中,因为Sphinx从数据库中得到数据后去建索引,将会花费很长时间,所以它会将数据库连接关闭,等到索引建好之后,再去连接数据库,所以sql_query_post_index会在另一个连接中**/
        # post-index-query, executed on successful indexing completion
        # optional, default is empty
        # $maxid expands to max document ID actually fetched from DB
        # 以执行任何必要的最后的清理; 下面就是插入索引的最大值
        # sql_query_post_index    = REPLACE INTO counters ( id, val ) 
        #    VALUES ( 'max_indexed_id', $maxid )
    
    
        # ranged query throttling, in milliseconds
        # optional, default is 0 which means no delay
        # enforces given delay before each query step
        sql_ranged_throttle    = 0
    
    
        # kill-list query, fetches the document IDs for kill-list
        # k-list will suppress matches from preceding indexes in the same query
        # optional, default is empty
        #
        # sql_query_killlist    = SELECT id FROM documents WHERE edited>=@last_reindex
    
    
        # columns to unpack on indexer side when indexing
        # multi-value, optional, default is empty list
        #
        # unpack_zlib        = zlib_column
        # unpack_mysqlcompress    = compressed_column
        # unpack_mysqlcompress    = compressed_column_2
    
    
        # maximum unpacked length allowed in MySQL COMPRESS() unpacker
        # optional, default is 16M
        #
        # unpack_mysqlcompress_maxsize    = 16M
    
    
        # hook command to run when SQL connection succeeds
        # optional, default value is empty (do nothing)
        #
        # hook_connect            = bash sql_connect.sh
    
    
        # hook command to run after (any) SQL range query
        # it may print out "minid maxid" (w/o quotes) to override the range
        # optional, default value is empty (do nothing)
        #
        # hook_query_range        = bash sql_query_range.sh
    
    
        # hook command to run on successful indexing completion
        # $maxid expands to max document ID actually fetched from DB
        # optional, default value is empty (do nothing)
        #
        # hook_post_index        = bash sql_post_index.sh $maxid
    
        #####################################################################
        ## xmlpipe2 settings
        #####################################################################
    
        # type            = xmlpipe
    
        # shell command to invoke xmlpipe stream producer
        # mandatory
        #
        # xmlpipe_command        = cat /var/test.xml
    
        # xmlpipe2 field declaration
        # multi-value, optional, default is empty
        #
        # xmlpipe_field        = subject
        # xmlpipe_field        = content
    
    
        # xmlpipe2 attribute declaration
        # multi-value, optional, default is empty
        # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
        # examples:
        #
        # xmlpipe_attr_timestamp    = published
        # xmlpipe_attr_uint    = author_id
        # xmlpipe_attr_bool    = is_enabled
        # xmlpipe_attr_float    = latitude
        # xmlpipe_attr_bigint    = guid
        # xmlpipe_attr_multi    = tags
        # xmlpipe_attr_multi_64    = tags64
        # xmlpipe_attr_string    = title
        # xmlpipe_attr_json    = extra_data
        # xmlpipe_field_string    = content
    
    
        # perform UTF-8 validation, and filter out incorrect codes
        # avoids XML parser choking on non-UTF-8 documents
        # optional, default is 0
        #
        # xmlpipe_fixup_utf8    = 1
    }
    
    
    # inherited source example
    #
    # all the parameters are copied from the parent source,
    # and may then be overridden in this source definition
    source src1throttled : src1
    {
        sql_ranged_throttle    = 100
    }
    
    #############################################################################
    ## index definition
    #############################################################################
    
    # local index example
    #
    # this is an index which is stored locally in the filesystem
    #
    # all indexing-time options (such as morphology and charsets)
    # are configured per local index
    index test1
    {
        # index type
        # optional, default is 'plain'
        # known values are 'plain', 'distributed', and 'rt' (see samples below)
        # type            = plain
    
        # document source(s) to index
        # multi-value, mandatory
        # document IDs must be globally unique across all sources
        source            = src1
    
        # index files path and file name, without extension
        # mandatory, path must be writable, extensions will be auto-appended
        path            = /var/data/test1
    
        # document attribute values (docinfo) storage mode
        # optional, default is 'extern'
        # known values are 'none', 'extern' and 'inline'
        docinfo            = extern
    
        # dictionary type, 'crc' or 'keywords'
        # crc is faster to index when no substring/wildcards searches are needed
        # crc with substrings might be faster to search but is much slower to index
        # (because all substrings are pre-extracted as individual keywords)
        # keywords is much faster to index with substrings, and index is much (3-10x) smaller
        # keywords supports wildcards, crc does not, and never will
        # optional, default is 'keywords'
        dict            = keywords
    
        # memory locking for cached data (.spa and .spi), to prevent swapping
        # optional, default is 0 (do not mlock)
        # requires searchd to be run from root
        mlock            = 0
    
        # a list of morphology preprocessors to apply
        # optional, default is empty
        #
        # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
        # 'soundex', and 'metaphone'; additional preprocessors available from
        # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
        # (see libstemmer_c/libstemmer/modules.txt)
        #
        # morphology        = stem_en, stem_ru, soundex
        # morphology        = libstemmer_german
        # morphology        = libstemmer_sv
        morphology        = none
    
        # minimum word length at which to enable stemming
        # optional, default is 1 (stem everything)
        #
        # min_stemming_len    = 1
    
    
        # stopword files list (space separated)
        # optional, default is empty
        # contents are plain text, charset_table and stemming are both applied
        #
        # stopwords        = /var/data/stopwords.txt
    
    
        # wordforms file, in "mapfrom > mapto" plain text format
        # optional, default is empty
        #
        # wordforms        = /var/data/wordforms.txt
    
    
        # tokenizing exceptions file
        # optional, default is empty
        #
        # plain text, case sensitive, space insensitive in map-from part
        # one "Map Several Words => ToASingleOne" entry per line
        #
        # exceptions        = /var/data/exceptions.txt
    
    
        # embedded file size limit
        # optional, default is 16K
        #
        # exceptions, wordforms, and stopwords files smaller than this limit
        # are stored in the index; otherwise, their paths and sizes are stored
        #
        # embedded_limit        = 16K
    
        # minimum indexed word length
        # default is 1 (index everything)
        min_word_len        = 1
    
    
        # ignored characters list
        # optional, default value is empty
        #
        # ignore_chars        = U+00AD
    
    
        # minimum word prefix length to index
        # optional, default is 0 (do not index prefixes)
        #
        # min_prefix_len        = 0
    
    
        # minimum word infix length to index
        # optional, default is 0 (do not index infixes)
        #
        # min_infix_len        = 0
    
    
        # maximum substring (prefix or infix) length to index
        # optional, default is 0 (do not limit substring length)
        #
        # max_substring_len    = 8
    
    
        # list of fields to limit prefix/infix indexing to
        # optional, default value is empty (index all fields in prefix/infix mode)
        #
        # prefix_fields        = filename
        # infix_fields        = url, domain
    
    
        # expand keywords with exact forms and/or stars when searching fit indexes
        # search-time only, does not affect indexing, can be 0 or 1
        # optional, default is 0 (do not expand keywords)
        #
        # expand_keywords        = 1
    
        
        # n-gram length to index, for CJK indexing
        # only supports 0 and 1 for now, other lengths to be implemented
        # optional, default is 0 (disable n-grams)
        #
        # ngram_len        = 1
    
    
        # n-gram characters list, for CJK indexing
        # optional, default is empty
        #
        # ngram_chars        = U+3000..U+2FA1F
    
    
        # phrase boundary characters list
        # optional, default is empty
        #
        # phrase_boundary        = ., ?, !, U+2026 # horizontal ellipsis
    
    
        # phrase boundary word position increment
        # optional, default is 0
        #
        # phrase_boundary_step    = 100
    
    
        # blended characters list
        # blended chars are indexed both as separators and valid characters
        # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
        # optional, default is empty
        #
        # blend_chars        = +, &, U+23
    
    
        # blended token indexing mode
        # a comma separated list of blended token indexing variants
        # known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure
        # optional, default is trim_none
        #
        # blend_mode        = trim_tail, skip_pure
    
    
        # whether to strip HTML tags from incoming documents
        # known values are 0 (do not strip) and 1 (do strip)
        # optional, default is 0
        html_strip        = 0
    
        # what HTML attributes to index if stripping HTML
        # optional, default is empty (do not index anything)
        #
        # html_index_attrs    = img=alt,title; a=title;
    
    
        # what HTML elements contents to strip
        # optional, default is empty (do not strip element contents)
        #
        # html_remove_elements    = style, script
    
    
        # whether to preopen index data files on startup
        # optional, default is 0 (do not preopen), searchd-only
        #
        # preopen            = 1
    
    
        # whether to enable in-place inversion (2x less disk, 90-95% speed)
        # optional, default is 0 (use separate temporary files), indexer-only
        #
        # inplace_enable        = 1
    
    
        # in-place fine-tuning options
        # optional, defaults are listed below
        #
        # inplace_hit_gap        = 0 # preallocated hitlist gap size
        # inplace_docinfo_gap    = 0 # preallocated docinfo gap size
        # inplace_reloc_factor    = 0.1 # relocation buffer size within arena
        # inplace_write_factor    = 0.1 # write buffer size within arena
    
    
        # whether to index original keywords along with stemmed versions
        # enables "=exactform" operator to work
        # optional, default is 0
        #
        # index_exact_words    = 1
    
    
        # position increment on overshort (less that min_word_len) words
        # optional, allowed values are 0 and 1, default is 1
        #
        # overshort_step        = 1
    
    
        # position increment on stopword
        # optional, allowed values are 0 and 1, default is 1
        #
        # stopword_step        = 1
    
    
        # hitless words list
        # positions for these keywords will not be stored in the index
        # optional, allowed values are 'all', or a list file name
        #
        # hitless_words        = all
        # hitless_words        = hitless.txt
    
    
        # detect and index sentence and paragraph boundaries
        # required for the SENTENCE and PARAGRAPH operators to work
        # optional, allowed values are 0 and 1, default is 0
        #
        # index_sp            = 1
    
    
        # index zones, delimited by HTML/XML tags
        # a comma separated list of tags and wildcards
        # required for the ZONE operator to work
        # optional, default is empty string (do not index zones)
        #
        # index_zones        = title, h*, th
    
    
        # index per-document and average per-index field lengths, in tokens
        # required for the BM25A(), BM25F() in expression ranker
        # optional, default is 0 (do not index field lenghts)
        #
        # index_field_lengths    = 1
    
    
        # regular expressions (regexps) to filter the fields and queries with
        # gets applied to data source fields when indexing
        # gets applied to search queries when searching
        # multi-value, optional, default is empty list of regexps
        #
        # regexp_filter        = (d+)" => 1inch
        # regexp_filter        = (blue|red) => color
    
    
        # list of the words considered frequent with respect to bigram indexing
        # optional, default is empty
        #
        # bigram_freq_words    = the, a, i, you, my
    
    
        # bigram indexing mode
        # known values are none, all, first_freq, both_freq
        # option, default is none (do not index bigrams)
        #
        # bigram_index        = both_freq
    
    
        # snippet document file name prefix
        # preprended to file names when generating snippets using load_files option
        # WARNING, this is a prefix (not a path), trailing slash matters!
        # optional, default is empty
        #
        # snippets_file_prefix    = /mnt/mydocs/server1
    
    
        # whether to apply stopwords before or after stemming
        # optional, default is 0 (apply stopwords after stemming)
        #
        # stopwords_unstemmed    = 0
    
    
        # path to a global (cluster-wide) keyword IDFs file
        # optional, default is empty (use local IDFs)
        #
        # global_idf        = /usr/local/sphinx/var/global.idf
    }
    
    
    # inherited index example
    #
    # all the parameters are copied from the parent index,
    # and may then be overridden in this index definition
    index test1stemmed : test1
    {
        path            = /var/data/test1stemmed
        morphology        = stem_en
    }
    
    
    # distributed index example
    #
    # this is a virtual index which can NOT be directly indexed,
    # and only contains references to other local and/or remote indexes
    index dist1
    {
        # 'distributed' index type MUST be specified
        type            = distributed
    
        # local index to be searched
        # there can be many local indexes configured
        local            = test1
        local            = test1stemmed
    
        # remote agent
        # multiple remote agents may be specified
        # syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'
        # syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'
        agent            = localhost:9313:remote1
        agent            = localhost:9314:remote2,remote3
        # agent            = /var/run/searchd.sock:remote4
    
        # remote agent mirrors groups, aka mirrors, aka HA agents
        # defines 2 or more interchangeable mirrors for a given index part
        #
        # agent            = server3:9312 | server4:9312 :indexchunk2
        # agent            = server3:9312:chunk2server3 | server4:9312:chunk2server4
        # agent            = server3:chunk2server3 | server4:chunk2server4
        # agent            = server21|server22|server23:chunk2
    
    
        # blackhole remote agent, for debugging/testing
        # network errors and search results will be ignored
        #
        # agent_blackhole        = testbox:9312:testindex1,testindex2
    
    
        # persistenly connected remote agent
        # reduces connect() pressure, requires that workers IS threads
        #
        # agent_persistent        = testbox:9312:testindex1,testindex2
    
    
        # remote agent connection timeout, milliseconds
        # optional, default is 1000 ms, ie. 1 sec
        agent_connect_timeout    = 1000
    
        # remote agent query timeout, milliseconds
        # optional, default is 3000 ms, ie. 3 sec
        agent_query_timeout        = 3000
    
        # HA mirror agent strategy
        # optional, defaults to ??? (random mirror)
        # know values are nodeads, noerrors, roundrobin, nodeadstm, noerrorstm
        #
        # ha_strategy                = nodeads
    
        # path to RLP context file
        # optional, defaut is empty
        #
        # rlp_context = /usr/local/share/sphinx/rlp/rlp-context.xml
    }
    
    
    # realtime index example
    #
    # you can run INSERT, REPLACE, and DELETE on this index on the fly
    # using MySQL protocol (see 'listen' directive below)
    index rt
    {
        # 'rt' index type must be specified to use RT index
        type            = rt
    
        # index files path and file name, without extension
        # mandatory, path must be writable, extensions will be auto-appended
        path            = /var/data/rt
    
        # RAM chunk size limit
        # RT index will keep at most this much data in RAM, then flush to disk
        # optional, default is 128M
        #
        # rt_mem_limit        = 512M
    
        # full-text field declaration
        # multi-value, mandatory
        rt_field        = title
        rt_field        = content
    
        # unsigned integer attribute declaration
        # multi-value (an arbitrary number of attributes is allowed), optional
        # declares an unsigned 32-bit attribute
        rt_attr_uint        = gid
    
        # RT indexes currently support the following attribute types:
        # uint, bigint, float, timestamp, string, mva, mva64, json
        #
        # rt_attr_bigint        = guid
        # rt_attr_float        = gpa
        # rt_attr_timestamp    = ts_added
        # rt_attr_string        = author
        # rt_attr_multi        = tags
        # rt_attr_multi_64    = tags64
        # rt_attr_json        = extra_data
    }
    
    #############################################################################
    ## indexer settings
    #############################################################################
    
    indexer
    {
        # memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
        # optional, default is 128M, max is 2047M, recommended is 256M to 1024M
        mem_limit        = 128M
    
        # maximum IO calls per second (for I/O throttling)
        # optional, default is 0 (unlimited)
        #
        # max_iops        = 40
    
    
        # maximum IO call size, bytes (for I/O throttling)
        # optional, default is 0 (unlimited)
        #
        # max_iosize        = 1048576
    
    
        # maximum xmlpipe2 field length, bytes
        # optional, default is 2M
        #
        # max_xmlpipe2_field    = 4M
    
    
        # write buffer size, bytes
        # several (currently up to 4) buffers will be allocated
        # write buffers are allocated in addition to mem_limit
        # optional, default is 1M
        #
        # write_buffer        = 1M
    
    
        # maximum file field adaptive buffer size
        # optional, default is 8M, minimum is 1M
        #
        # max_file_field_buffer    = 32M
    
    
        # how to handle IO errors in file fields
        # known values are 'ignore_field', 'skip_document', and 'fail_index'
        # optional, default is 'ignore_field'
        #
        # on_file_field_error = skip_document
    
    
        # lemmatizer cache size
        # improves the indexing time when the lemmatization is enabled
        # optional, default is 256K
        #
        # lemmatizer_cache = 512M
    }
    
    #############################################################################
    ## searchd settings
    #############################################################################
    
    searchd
    {
        # [hostname:]port[:protocol], or /unix/socket/path to listen on
        # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
        #
        # multi-value, multiple listen points are allowed
        # optional, defaults are 9312:sphinx and 9306:mysql41, as below
        #
        # listen            = 127.0.0.1
        # listen            = 192.168.0.1:9312
        # listen            = 9312
        # listen            = /var/run/searchd.sock
        listen            = 9312
        listen            = 9306:mysql41
    
        # log file, searchd run info is logged here
        # optional, default is 'searchd.log'
        log            = /var/log/searchd.log
    
        # query log file, all search queries are logged here
        # optional, default is empty (do not log queries)
        query_log        = /var/log/query.log
    
        # client read timeout, seconds
        # optional, default is 5
        read_timeout        = 5
    
        # request timeout, seconds
        # optional, default is 5 minutes
        client_timeout        = 300
    
        # maximum amount of children to fork (concurrent searches to run)
        # optional, default is 0 (unlimited)
        max_children        = 30
    
        # maximum amount of persistent connections from this master to each agent host
        # optional, but necessary if you use agent_persistent. It is reasonable to set the value
        # as max_children, or less on the agent's hosts.
        persistent_connections_limit    = 30
    
        # PID file, searchd process ID file name
        # mandatory
        pid_file        = /var/log/searchd.pid
    
        # seamless rotate, prevents rotate stalls if precaching huge datasets
        # optional, default is 1
        seamless_rotate        = 1
    
        # whether to forcibly preopen all indexes on startup
        # optional, default is 1 (preopen everything)
        preopen_indexes        = 1
    
        # whether to unlink .old index copies on succesful rotation.
        # optional, default is 1 (do unlink)
        unlink_old        = 1
    
        # attribute updates periodic flush timeout, seconds
        # updates will be automatically dumped to disk this frequently
        # optional, default is 0 (disable periodic flush)
        #
        # attr_flush_period    = 900
    
    
        # MVA updates pool size
        # shared between all instances of searchd, disables attr flushes!
        # optional, default size is 1M
        mva_updates_pool    = 1M
    
        # max allowed network packet size
        # limits both query packets from clients, and responses from agents
        # optional, default size is 8M
        max_packet_size        = 8M
    
        # max allowed per-query filter count
        # optional, default is 256
        max_filters        = 256
    
        # max allowed per-filter values count
        # optional, default is 4096
        max_filter_values    = 4096
    
    
        # socket listen queue length
        # optional, default is 5
        #
        # listen_backlog        = 5
    
    
        # per-keyword read buffer size
        # optional, default is 256K
        #
        # read_buffer        = 256K
    
    
        # unhinted read size (currently used when reading hits)
        # optional, default is 32K
        #
        # read_unhinted        = 32K
    
    
        # max allowed per-batch query count (aka multi-query count)
        # optional, default is 32
        max_batch_queries    = 32
    
    
        # max common subtree document cache size, per-query
        # optional, default is 0 (disable subtree optimization)
        #
        # subtree_docs_cache    = 4M
    
    
        # max common subtree hit cache size, per-query
        # optional, default is 0 (disable subtree optimization)
        #
        # subtree_hits_cache    = 8M
    
    
        # multi-processing mode (MPM)
        # known values are none, fork, prefork, and threads
        # threads is required for RT backend to work
        # optional, default is threads
        workers            = threads # for RT to work
    
    
        # max threads to create for searching local parts of a distributed index
        # optional, default is 0, which means disable multi-threaded searching
        # should work with all MPMs (ie. does NOT require workers=threads)
        #
        # dist_threads        = 4
    
    
        # binlog files path; use empty string to disable binlog
        # optional, default is build-time configured data directory
        #
        # binlog_path        = # disable logging
        # binlog_path        = /var/data # binlog.001 etc will be created there
    
    
        # binlog flush/sync mode
        # 0 means flush and sync every second
        # 1 means flush and sync every transaction
        # 2 means flush every transaction, sync every second
        # optional, default is 2
        #
        # binlog_flush        = 2
    
    
        # binlog per-file size limit
        # optional, default is 128M, 0 means no limit
        #
        # binlog_max_log_size    = 256M
    
    
        # per-thread stack size, only affects workers=threads mode
        # optional, default is 64K
        #
        # thread_stack            = 128K
    
    
        # per-keyword expansion limit (for dict=keywords prefix searches)
        # optional, default is 0 (no limit)
        #
        # expansion_limit        = 1000
    
    
        # RT RAM chunks flush period
        # optional, default is 0 (no periodic flush)
        #
        # rt_flush_period        = 900
    
    
        # query log file format
        # optional, known values are plain and sphinxql, default is plain
        #
        # query_log_format        = sphinxql
    
    
        # version string returned to MySQL network protocol clients
        # optional, default is empty (use Sphinx version)
        #
        # mysql_version_string    = 5.0.37
    
    
        # default server-wide collation
        # optional, default is libc_ci
        #
        # collation_server        = utf8_general_ci
    
    
        # server-wide locale for libc based collations
        # optional, default is C
        #
        # collation_libc_locale    = ru_RU.UTF-8
    
    
        # threaded server watchdog (only used in workers=threads mode)
        # optional, values are 0 and 1, default is 1 (watchdog on)
        #
        # watchdog                = 1
    
        
        # costs for max_predicted_time model, in (imaginary) nanoseconds
        # optional, default is "doc=64, hit=48, skip=2048, match=64"
        #
        # predicted_time_costs    = doc=64, hit=48, skip=2048, match=64
    
    
        # current SphinxQL state (uservars etc) serialization path
        # optional, default is none (do not serialize SphinxQL state)
        #
        # sphinxql_state            = sphinxvars.sql
    
    
        # maximum RT merge thread IO calls per second, and per-call IO size
        # useful for throttling (the background) OPTIMIZE INDEX impact
        # optional, default is 0 (unlimited)
        #
        # rt_merge_iops            = 40
        # rt_merge_maxiosize        = 1M
    
    
        # interval between agent mirror pings, in milliseconds
        # 0 means disable pings
        # optional, default is 1000
        #
        # ha_ping_interval        = 0
    
    
        # agent mirror statistics window size, in seconds
        # stats older than the window size (karma) are retired
        # that is, they will not affect master choice of agents in any way
        # optional, default is 60 seconds
        #
        # ha_period_karma            = 60
    
    
        # delay between preforked children restarts on rotation, in milliseconds
        # optional, default is 0 (no delay)
        #
        # prefork_rotation_throttle    = 100
    
    
        # a prefix to prepend to the local file names when creating snippets
        # with load_files and/or load_files_scatter options
        # optional, default is empty
        #
        # snippets_file_prefix        = /mnt/common/server1/
    }
    
    #############################################################################
    ## common settings
    #############################################################################
    
    common
    {
    
        # lemmatizer dictionaries base path
        # optional, defaut is /usr/local/share (see ./configure --datadir)
        #
        # lemmatizer_base = /usr/local/share/sphinx/dicts
    
    
        # how to handle syntax errors in JSON attributes
        # known values are 'ignore_attr' and 'fail_index'
        # optional, default is 'ignore_attr'
        #
        # on_json_attr_error = fail_index
    
    
        # whether to auto-convert numeric values from strings in JSON attributes
        # with auto-conversion, string value with actually numeric data
        # (as in {"key":"12345"}) gets stored as a number, rather than string
        # optional, allowed values are 0 and 1, default is 0 (do not convert)
        #
        # json_autoconv_numbers = 1
    
    
        # whether and how to auto-convert key names in JSON attributes
        # known value is 'lowercase'
        # optional, default is unspecified (do nothing)
        #
        # json_autoconv_keynames = lowercase
    
    
        # path to RLP root directory
        # optional, defaut is /usr/local/share (see ./configure --datadir)
        #
        # rlp_root = /usr/local/share/sphinx/rlp
    
    
        # path to RLP environment file
        # optional, defaut is /usr/local/share/rlp-environment.xml (see ./configure --datadir)
        #
        # rlp_environment = /usr/local/share/sphinx/rlp/rlp/etc/rlp-environment.xml
    
    
        # maximum total size of documents batched before processing them by the RLP
        # optional, default is 51200
        #
        # rlp_max_batch_size = 100k
    
    
        # maximum number of documents batched before processing them by the RLP
        # optional, default is 50
        #
        # rlp_max_batch_docs = 100
    
    
        # trusted plugin directory
        # optional, default is empty (disable UDFs)
        #
        # plugin_dir            = /usr/local/sphinx/lib
    
    }
    
    # --eof--
  • 相关阅读:
    完成一个MVC+Nhibernate+Jquery-EasyUI信息发布系统
    SQL Server监控清单
    三大线性排序之基数排序
    使用C#实现DHT磁力搜索的BT种子后端管理程序+数据库设计(开源)
    复制中的大批量更新
    碎片解决方案
    老猫总结的如何发布文件到手机中
    FMX对象释放
    MFC 窗口重绘问题
    如何将内存中的位图数据绘制在DC上
  • 原文地址:https://www.cnblogs.com/similarface/p/5133723.html
Copyright © 2020-2023  润新知