diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index e4f16c52f19..7ccc46d6f9b 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -190,8 +190,10 @@ class SearchIndexer end end + DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/ + def characters(string) - scrubbed << " #{ActiveSupport::Inflector.transliterate(string).strip} " + scrubbed << " #{string.unicode_normalize(:nfd).gsub(DIACRITICS, "").strip} " end end end diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index 8b6bcae498f..42a61fa3675 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -30,11 +30,11 @@ describe SearchIndexer do end it 'removes diacritics' do - html = "
Hétérogénéité
" + html = "HELLO Hétérogénéité Здравствуйте هتاف للترحيب 你好
" scrubbed = SearchIndexer::HtmlScrubber.scrub(html) - expect(scrubbed).to eq(" Heterogeneite ") + expect(scrubbed).to eq(" HELLO Heterogeneite Здравствуите هتاف للترحيب 你好 ") end it 'correctly indexes a post according to version' do