【搜尋引擎】Solr全文檢索近實時查詢優化

monkjavaer發表於2019-06-27

設定多個搜尋建議查詢演算法

 <searchComponent name="suggest" class="solr.SuggestComponent">
    <lst name="suggester">
      <str name="name">AnalyzingSuggester</str>
      <str name="lookupImpl">AnalyzingLookupFactory</str>      
      <str name="dictionaryImpl">DocumentDictionaryFactory</str>
      <str name="field">suggest_name</str>
      <str name="weightField">suggest_name</str>
      <str name="payloadField">gid</str>
      <str name="suggestAnalyzerFieldType">text_suggest</str>
      <str name="buildOnStartup">false</str>
      <str name="buildOnCommit">true</str>
    </lst>
    
    <lst name="suggester">
      <str name="name">AnalyzingInfixSuggester</str>
      <str name="lookupImpl">AnalyzingInfixLookupFactory</str>      
      <str name="dictionaryImpl">DocumentDictionaryFactory</str>
      <str name="field">suggest_name</str>
      <str name="weightField">suggest_name</str>
      <str name="highlight">false</str>
      <str name="payloadField">gid</str>
      <str name="suggestAnalyzerFieldType">text_suggest</str>
      <str name="buildOnStartup">false</str>
      <str name="buildOnCommit">true</str>
    </lst>
  </searchComponent>
  • 設定AnalyzingLookupFactory和AnalyzingInfixLookupFactory兩種查詢演算法。首先通過AnalyzingLookupFactory先分析傳入文字並將分析後的表單新增到加權FST的查詢,然後在查詢時執行相同的操作,若查詢不夠你需求的數量。再通過AnalyzingInfixLookupFactory字首分析。
  • 例如 AnalyzingInfixLookupFactory "aaa bbb ccc",可通過bbb,或者ccc搜尋到,而 AnalyzingLookupFactory必須是先從a開始匹配才能出結果。
  • AnalyzingInfixLookupFactory可通過標籤false關閉高亮提示。
  • true可通過此標籤設定軟提交時才進行文字構建。注意此種需求需要在提交文字不頻繁的場景設定。

    設定軟提交時間

  • 配置在自己core下的conf資料夾中的solrconfig.xml檔案

vim solrconfig.xml
    <autoSoftCommit>
      <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
    </autoSoftCommit>

將maxTime可以設定成你需要的時間,單位是毫秒ms.

  • 也可以在solr啟動的時候通過命令設定軟提交:
bin/solr start -force -Dsolr.autoSoftCommit.maxTime=10000

設定了軟提交時間後,當有新的文件提交時,會達到設定的軟提交時間才真正提交。

關閉停用詞過濾器

在建立索引的時候,fileType定義的欄位可不加入停用詞過濾器,因為我們要檢索的詞很短,加入會影響檢索結果。

 <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />

Java伺服器呼叫suggest介面時,禁用suggest.build=true

加入suggest.build=true這個條件,每輸入一個字元檢索的時候都會去重新構建suggest索引,檢索效率大大減低。通過上面的軟提交方式達到近實時檢索。

Java伺服器測試用例

/**
 * @author monkjavaer
 * @version V1.0
 * @date 2019/6/21 0021 22:42
 */
public class SolJTest {
    /**
     * 日誌
     */
    private static Logger logger = LoggerFactory.getLogger(SolJTest.class);

    /**
     * solr 地址
     */
    private static String SOLR_URL = PropertyReaderUtils.getProValue("solr.address_url");

    /**
     * suggest AnalyzingLookupFactory
     */
    public final static String SOLR_ANALYZINGSUGGESTER = PropertyReaderUtils.getProValue("solr.AnalyzingSuggester");

    /**
     * suggest AnalyzingInfixLookupFactory
     */
    public final static String SOLR_ANALYZINGINFIXSUGGESTER = PropertyReaderUtils.getProValue("solr.AnalyzingInfixSuggester");

    /**
     * HttpSolrClient
     */
    private HttpSolrClient httpSolrClient;

    /**
     * default socket connection timeout in ms
     */
    private static int DEFAULT_CONNECTION_TIMEOUT = 60000;

    /**
     * @return void
     * @author monkjavaer
     * @description get HttpSolrClient
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Before
    public void getHttpSolrClient() {
        logger.info("start getHttpSolrClient......");
        try {
            if (httpSolrClient == null) {
                httpSolrClient = new HttpSolrClient.Builder(SOLR_URL).build();
                httpSolrClient.setConnectionTimeout(DEFAULT_CONNECTION_TIMEOUT);
                httpSolrClient.setDefaultMaxConnectionsPerHost(100);
                httpSolrClient.setMaxTotalConnections(100);
            }
        } catch (Exception e) {
            e.printStackTrace();
            logger.error(e.getMessage());
        }
        logger.info("end getHttpSolrClient......");
    }

    /**
     * @return void
     * @author monkjavaer
     * @description test suggester response object
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testSuggesterResponseObject() throws IOException, SolrServerException {
        SolrQuery query = new SolrQuery("*:*");
        query.set(CommonParams.QT, "/suggest");
        query.set("suggest.dictionary", SOLR_ANALYZINGSUGGESTER, SOLR_ANALYZINGINFIXSUGGESTER);
        query.set("suggest.q", "aoa");
        query.set("suggest.build", true);
        QueryRequest request = new QueryRequest(query);
        QueryResponse queryResponse = request.process(httpSolrClient);
        SuggesterResponse response = queryResponse.getSuggesterResponse();
        Map<String, List<Suggestion>> suggestionsMap = response.getSuggestions();
        assertTrue(suggestionsMap.keySet().contains(SOLR_ANALYZINGSUGGESTER));

        List<Suggestion> mySuggester = suggestionsMap.get(SOLR_ANALYZINGSUGGESTER);
        logger.info(mySuggester.get(0).getTerm());
        logger.info(mySuggester.get(0).getPayload());
    }

    /**
     * @return void
     * @author monkjavaer
     * @description test suggester response terms
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testSuggesterResponseTerms() throws Exception {
        SolrQuery query = new SolrQuery("*:*");
        query.set(CommonParams.QT, "/suggest");
        query.set("suggest.dictionary", SOLR_ANALYZINGSUGGESTER, SOLR_ANALYZINGINFIXSUGGESTER);
        query.set("suggest.q", "aoa");
//        query.set("suggest.build", true);
        QueryRequest request = new QueryRequest(query);
        QueryResponse queryResponse = request.process(httpSolrClient);
        SuggesterResponse response = queryResponse.getSuggesterResponse();
        Map<String, List<String>> dictionary2suggestions = response.getSuggestedTerms();
        assertTrue(dictionary2suggestions.keySet().contains(SOLR_ANALYZINGSUGGESTER));

        List<String> mySuggester = dictionary2suggestions.get(SOLR_ANALYZINGSUGGESTER);
        assertEquals("aoa", mySuggester.get(0));
        assertEquals("aoa bob", mySuggester.get(1));
    }

    /**
     * @return void
     * @author monkjavaer
     * @description 簡單查詢自動轉換為bean
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testSolrQueryGetBeans() throws IOException, SolrServerException {
        final SolrQuery query = new SolrQuery();
        query.setQuery("Zhong Hua Yuan");
        //設定查詢列
        query.addField("id");
        query.addField("name");
        //排序
        query.setSort("id", SolrQuery.ORDER.asc);

        final QueryResponse response = httpSolrClient.query("adress", query);
        final List<Adress> adresses = response.getBeans(Adress.class);

        logger.info("Found " + adresses.size() + " documents");
        for (Adress adress : adresses) {
            logger.info("id:{} ; name:{}; ", adress.getId(), adress.getName());
        }
    }

    /**
     * @return void
     * @author monkjavaer
     * @description 批量新增
     * @date 13:27 2019/6/19
     * @param: []
     **/
    @Test
    public void testAddIndex() throws IOException, SolrServerException {
        List<Adress> lists = new ArrayList<>();
        Adress adress = new Adress();
        adress.setId(1);
        adress.setName("aoa");
        lists.add(adress);
        //向solr批量新增索引資料
        long startTime = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        httpSolrClient.addBeans(lists);
        httpSolrClient.commit();
        long endTime = TimeUnit.MILLISECONDS.convert(System.nanoTime(), TimeUnit.NANOSECONDS);
        logger.info("commit solr data cost {} ms.", endTime - startTime);
    }
}

相關文章