Linux Sphinx/Coreseek安裝 Mysql全文檢索

百聯達發表於2014-09-29

背景:

Coreseek版本:coreseek-4.1-beta.tar.gz

Mysql版本:  mysql-5.5.31.tar.gz

 

一:安裝

Tar –zxvf coreseek-4.1-beta.tar.gz

Cd coreseek-4.1-beta

cd mmseg-3.2.14

./configure --prefix=/usr/local/mmseg

Make

Make install

如果報錯,執行下面的操作

Aclocal

libtoolize –force

automake --add-missing

autoconf

autoheader

make clear

./configure --prefix=/usr/local/mmseg

Make

make install

 

cd csft-4.1/

./buildconf.sh

./configure --prefix=/usr/local/coreseek --with-mysql=/usr/local/mysql --with-mmseg=/usr/local/mmseg --with-mmseg-includes=/usr/local/mmseg/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg/lib/ --without-unixodbc

Make

make install

 

二:sphinx.conf引數配置

Cd /usr/local/coreseek/etc

Cp sphinx.conf.dist  sphinx.conf

Vi sphinx.conf

 

三:Mysql資料來源配置

 

#MySQL資料來源配置,詳情請檢視:

#請先將var/test/documents.sql匯入資料庫,並配置好以下的MySQL使用者密碼資料庫

 

#源定義

source mysql

{

    type                    = mysql

 

    sql_host                = 10.1.58.191

    sql_user                = root

    sql_pass                = 123456

    sql_db                    = test

    sql_port                = 3306

    sql_query_pre            = SET NAMES utf8

 

    sql_query                = SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title,title as title1, content FROM documents

                                                              #sql_query第一列id需為整數

                                                              #titlecontent作為字串/文字欄位,被全文索引

    sql_attr_uint            = group_id           #SQL讀取到的值必須為整數

    sql_attr_timestamp       = date_added #SQL讀取到的值必須為整數,作為時間屬性

    sql_attr_str2ordinal     =  title

 

    sql_query_info_pre      = SET NAMES utf8                                        #命令列查詢時,設定正確的字符集

    sql_query_info            = SELECT * FROM documents WHERE id=$id #命令列查詢時,從資料庫讀取原始資料資訊

}

 

#index定義

index mysql

{

    source            = mysql             #對應的source名稱

    path            = /data/coreseek/mysql/ #請修改為實際使用的絕對路徑,例如:/usr/local/coreseek/var/...

    docinfo            = extern

    mlock            = 0

    morphology        = none

    min_word_len        = 1

    html_strip                = 0

 

    #中文分詞配置,詳情請檢視:

    charset_dictpath = /usr/local/mmseg3/etc/ #BSDLinux環境下設定,/符號結尾

    #charset_dictpath = etc/                             #Windows環境下設定,/符號結尾,最好給出絕對路徑,例如:C:/usr/local/coreseek/etc/...

    charset_type        = zh_cn.utf-8

    ngram_len           = 0

}

 

#全域性index定義

indexer

{

    mem_limit            = 128M

}

 

#searchd服務定義

searchd

{

    listen                  =   9312

    read_timeout        = 5

    max_children        = 30

    max_matches            = 1000

    seamless_rotate        = 0

    preopen_indexes        = 0

    unlink_old            = 1

    pid_file = /data/coreseek/logs/searchd_mysql.pid  #請修改為實際使用的絕對路徑,例如:/usr/local/coreseek/var/...

    log = /data/coreseek/logs/searchd_mysql.log        #請修改為實際使用的絕對路徑,例如:/usr/local/coreseek/var/...

    query_log = /data/coreseek/logs/query_mysql.log #請修改為實際使用的絕對路徑,例如:/usr/local/coreseek/var/...

}

 

 

四:測試

/usr/local/src/coreseek-4.1-beta/testpack

/usr/local/coreseek/bin/indexer -c etc/csft_mysql.conf –all

/usr/local/coreseek/bin/search  -c etc/csft_mysql.conf  搜尋關鍵字

五:啟動與停止

/usr/local/coreseek/bin/searchd -c etc/ csft_mysql.conf

/usr/local/coreseek/bin/searchd -c etc/ csft_mysql.conf –stop

 

六:java客戶端API

         public static void main ( String[] argv ) throws SphinxException

         {

 

 

                   StringBuffer q = new StringBuffer();

                   String host = "10.1.58.191";

                   int port = 9312;

                   int mode = SphinxClient.SPH_MATCH_ALL;

                   String index = "*";

                   int offset = 0;

                   int limit = 20;

                   int sortMode = SphinxClient.SPH_MATCH_EXTENDED;

                   String sortClause = "@relevance DESC,@id DESC";

                   String groupBy = "";

                   String groupSort = "";

 

                   SphinxClient cl = new SphinxClient();

 

 

 

                   cl.SetServer (host, port );

                   cl.SetWeights ( new int[] { 100, 1 } );

                   cl.SetMatchMode ( mode );

                   cl.SetLimits ( offset, limit );

                   cl.SetSortMode ( sortMode, sortClause );

                   if ( groupBy.length()>0 )

                            cl.SetGroupBy ( groupBy, SphinxClient.SPH_GROUPBY_ATTR, groupSort );

                  

                   cl.SetSelect("*");

 

                   SphinxResult res = cl.Query("300", index);

                   if ( res==null )

                   {

                            System.err.println ( "Error: " + cl.GetLastError() );

                            System.exit ( 1 );

                   }

                   if ( cl.GetLastWarning()!=null && cl.GetLastWarning().length()>0 )

                            System.out.println ( "WARNING: " + cl.GetLastWarning() + "\n" );

 

                   /* print me out */

                   System.out.println ( "Query '" + q + "' retrieved " + res.total + " of " + res.totalFound + " matches in " + res.time + " sec." );

                   System.out.println ( "Query stats:" );

                   for ( int i=0; i

                   {

                            SphinxWordInfo wordInfo = res.words[i];

                            System.out.println ( "\t'" + wordInfo.word + "' found " + wordInfo.hits + " times in " + wordInfo.docs + " documents" );

                   }

 

                   System.out.println ( "\nMatches:" );

                   for ( int i=0; i

                   {

                            SphinxMatch info = res.matches[i];

                            //System.out.print ( (i+1) + ". id=" + info.docId + ", weight=" + info.weight );

 

                            //獲取蒐集結果欄位值

                            if ( res.attrNames==null || res.attrTypes==null )

                                     continue;

                           

        

 

                            for ( int a=0; a

                            {

                                     System.out.print ( ", " + res.attrNames[a] + "=" );

 

        

                                               switch ( res.attrTypes[a] )

                                               {

                                                   case SphinxClient.SPH_ATTR_INTEGER:

                                                        case SphinxClient.SPH_ATTR_ORDINAL:

                                                                 System.out.print ( info.attrValues.get(a) );

                                                                 break;

                                                        case SphinxClient.SPH_ATTR_FLOAT:

                                                        case SphinxClient.SPH_ATTR_STRING:

                                                                 System.out.print ( info.attrValues.get(a) );

                                                                 break;

                                                        case SphinxClient.SPH_ATTR_BIGINT:

                                                                 /* longs or floats; print as is */

                                                                 System.out.print ( info.attrValues.get(a) );

                                                                 break;

 

                                                        case SphinxClient.SPH_ATTR_TIMESTAMP:

                                                                 Long iStamp = (Long) info.attrValues.get(a);

                                                                 Date date = new Date ( iStamp.longValue()*1000 );

                                                                 System.out.print ( date.toString() );

                                                                 break;

 

                                                        default:

                                                                 System.out.print ( "(unknown-attr-type=" + res.attrTypes[a] + ")" );

                                               }

                                    

                            }

 

                            System.out.println();

                   }

         }

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/28624388/viewspace-1284505/,如需轉載,請註明出處,否則將追究法律責任。

相關文章