mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
FIX: search within topic not working correctly in CJK
We were splitting the term prior to search causing everything to miss
This commit is contained in:
parent
30501b6660
commit
06b9d8223a
@ -55,20 +55,26 @@ class Search
|
|||||||
end
|
end
|
||||||
|
|
||||||
def self.prepare_data(search_data, purpose = :query)
|
def self.prepare_data(search_data, purpose = :query)
|
||||||
data = search_data.squish
|
purpose ||= :query
|
||||||
# TODO cppjieba_rb is designed for chinese, we need something else for Japanese
|
|
||||||
# Korean appears to be safe cause words are already space seperated
|
|
||||||
# For Japanese we should investigate using kakasi
|
|
||||||
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
|
|
||||||
require 'cppjieba_rb' unless defined? CppjiebaRb
|
|
||||||
mode = (purpose == :query ? :query : :mix)
|
|
||||||
data = CppjiebaRb.segment(search_data, mode: mode)
|
|
||||||
data = CppjiebaRb.filter_stop_word(data).join(' ')
|
|
||||||
end
|
|
||||||
|
|
||||||
|
data = search_data.dup
|
||||||
data.force_encoding("UTF-8")
|
data.force_encoding("UTF-8")
|
||||||
if SiteSetting.search_ignore_accents
|
if purpose != :topic
|
||||||
data = strip_diacritics(data)
|
# TODO cppjieba_rb is designed for chinese, we need something else for Japanese
|
||||||
|
# Korean appears to be safe cause words are already space seperated
|
||||||
|
# For Japanese we should investigate using kakasi
|
||||||
|
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
|
||||||
|
require 'cppjieba_rb' unless defined? CppjiebaRb
|
||||||
|
mode = (purpose == :query ? :query : :mix)
|
||||||
|
data = CppjiebaRb.segment(search_data, mode: mode)
|
||||||
|
data = CppjiebaRb.filter_stop_word(data).join(' ')
|
||||||
|
else
|
||||||
|
data.squish!
|
||||||
|
end
|
||||||
|
|
||||||
|
if SiteSetting.search_ignore_accents
|
||||||
|
data = strip_diacritics(data)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
data
|
data
|
||||||
end
|
end
|
||||||
@ -155,7 +161,7 @@ class Search
|
|||||||
term = process_advanced_search!(term)
|
term = process_advanced_search!(term)
|
||||||
|
|
||||||
if term.present?
|
if term.present?
|
||||||
@term = Search.prepare_data(term)
|
@term = Search.prepare_data(term, Topic === @search_context ? :topic : nil)
|
||||||
@original_term = PG::Connection.escape_string(@term)
|
@original_term = PG::Connection.escape_string(@term)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -246,10 +246,19 @@ describe Search do
|
|||||||
|
|
||||||
context 'search within topic' do
|
context 'search within topic' do
|
||||||
|
|
||||||
def new_post(raw, topic)
|
def new_post(raw, topic = nil)
|
||||||
|
topic ||= Fabricate(:topic)
|
||||||
Fabricate(:post, topic: topic, topic_id: topic.id, user: topic.user, raw: raw)
|
Fabricate(:post, topic: topic, topic_id: topic.id, user: topic.user, raw: raw)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'works in Chinese' do
|
||||||
|
SiteSetting.search_tokenize_chinese_japanese_korean = true
|
||||||
|
post = new_post('I am not in English 何点になると思いますか')
|
||||||
|
|
||||||
|
results = Search.execute('何点になると思', search_context: post.topic)
|
||||||
|
expect(results.posts.map(&:id)).to eq([post.id])
|
||||||
|
end
|
||||||
|
|
||||||
it 'displays multiple results within a topic' do
|
it 'displays multiple results within a topic' do
|
||||||
topic = Fabricate(:topic)
|
topic = Fabricate(:topic)
|
||||||
topic2 = Fabricate(:topic)
|
topic2 = Fabricate(:topic)
|
||||||
|
Loading…
Reference in New Issue
Block a user