diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb
index d81facf7ae4..fd7f65cddf9 100644
--- a/app/services/search_indexer.rb
+++ b/app/services/search_indexer.rb
@@ -121,7 +121,8 @@ class SearchIndexer
def self.index(obj, force: false)
return if @disabled
- category_name, tag_names = nil
+ category_name = nil
+ tag_names = nil
topic = nil
if Topic === obj
@@ -148,8 +149,7 @@ class SearchIndexer
if Topic === obj && (obj.saved_change_to_title? || force)
if obj.posts
- post = obj.posts.find_by(post_number: 1)
- if post
+ if post = obj.posts.find_by(post_number: 1)
SearchIndexer.update_posts_index(post.id, obj.title, category_name, tag_names, post.cooked)
SearchIndexer.update_topics_index(obj.id, obj.title, post.cooked)
end
@@ -175,43 +175,22 @@ class SearchIndexer
def self.scrub(html)
me = new
parser = Nokogiri::HTML::SAX::Parser.new(me)
- begin
- copy = +"
"
- copy << html unless html.nil?
- copy << "
"
- parser.parse(html) unless html.nil?
- end
+ parser.parse("#{html}
") if html.present?
me.scrubbed
end
- def start_element(name, attributes = [])
+ ATTRIBUTES ||= %w{alt title href data-youtube-title}
+
+ def start_element(_, attributes = [])
attributes = Hash[*attributes.flatten]
- if attributes["alt"]
- scrubbed << " "
- scrubbed << attributes["alt"]
- scrubbed << " "
- end
- if attributes["title"]
- scrubbed << " "
- scrubbed << attributes["title"]
- scrubbed << " "
- end
- if attributes["data-youtube-title"]
- scrubbed << " "
- scrubbed << attributes["data-youtube-title"]
- scrubbed << " "
- end
- if attributes["href"]
- scrubbed << " "
- scrubbed << attributes["href"]
- scrubbed << " "
+
+ ATTRIBUTES.each do |name|
+ characters(attributes[name]) if attributes[name].present?
end
end
def characters(string)
- scrubbed << " "
- scrubbed << string
- scrubbed << " "
+ scrubbed << " #{ActiveSupport::Inflector.transliterate(string).strip} "
end
end
end
diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb
index 5be853bab89..8b6bcae498f 100644
--- a/spec/services/search_indexer_spec.rb
+++ b/spec/services/search_indexer_spec.rb
@@ -29,6 +29,14 @@ describe SearchIndexer do
expect(scrubbed).to eq(" http://meta.discourse.org/ link ")
end
+ it 'removes diacritics' do
+ html = "Hétérogénéité
"
+
+ scrubbed = SearchIndexer::HtmlScrubber.scrub(html)
+
+ expect(scrubbed).to eq(" Heterogeneite ")
+ end
+
it 'correctly indexes a post according to version' do
# Preparing so that they can be indexed to right version
SearchIndexer.update_posts_index(post_id, "dummy", "", nil, nil)