FIX: search within topic not working correctly in CJK

We were splitting the term prior to search causing everything to miss
This commit is contained in:
Sam 2018-11-07 09:41:55 +11:00
parent 30501b6660
commit 06b9d8223a
2 changed files with 29 additions and 14 deletions

View File

@ -55,20 +55,26 @@ class Search
end
def self.prepare_data(search_data, purpose = :query)
data = search_data.squish
# TODO cppjieba_rb is designed for chinese, we need something else for Japanese
# Korean appears to be safe cause words are already space seperated
# For Japanese we should investigate using kakasi
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
require 'cppjieba_rb' unless defined? CppjiebaRb
mode = (purpose == :query ? :query : :mix)
data = CppjiebaRb.segment(search_data, mode: mode)
data = CppjiebaRb.filter_stop_word(data).join(' ')
end
purpose ||= :query
data = search_data.dup
data.force_encoding("UTF-8")
if SiteSetting.search_ignore_accents
data = strip_diacritics(data)
if purpose != :topic
# TODO cppjieba_rb is designed for chinese, we need something else for Japanese
# Korean appears to be safe cause words are already space seperated
# For Japanese we should investigate using kakasi
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
require 'cppjieba_rb' unless defined? CppjiebaRb
mode = (purpose == :query ? :query : :mix)
data = CppjiebaRb.segment(search_data, mode: mode)
data = CppjiebaRb.filter_stop_word(data).join(' ')
else
data.squish!
end
if SiteSetting.search_ignore_accents
data = strip_diacritics(data)
end
end
data
end
@ -155,7 +161,7 @@ class Search
term = process_advanced_search!(term)
if term.present?
@term = Search.prepare_data(term)
@term = Search.prepare_data(term, Topic === @search_context ? :topic : nil)
@original_term = PG::Connection.escape_string(@term)
end

View File

@ -246,10 +246,19 @@ describe Search do
context 'search within topic' do
def new_post(raw, topic)
def new_post(raw, topic = nil)
topic ||= Fabricate(:topic)
Fabricate(:post, topic: topic, topic_id: topic.id, user: topic.user, raw: raw)
end
it 'works in Chinese' do
SiteSetting.search_tokenize_chinese_japanese_korean = true
post = new_post('I am not in English 何点になると思いますか')
results = Search.execute('何点になると思', search_context: post.topic)
expect(results.posts.map(&:id)).to eq([post.id])
end
it 'displays multiple results within a topic' do
topic = Fabricate(:topic)
topic2 = Fabricate(:topic)