diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index d81facf7ae4..fd7f65cddf9 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -121,7 +121,8 @@ class SearchIndexer def self.index(obj, force: false) return if @disabled - category_name, tag_names = nil + category_name = nil + tag_names = nil topic = nil if Topic === obj @@ -148,8 +149,7 @@ class SearchIndexer if Topic === obj && (obj.saved_change_to_title? || force) if obj.posts - post = obj.posts.find_by(post_number: 1) - if post + if post = obj.posts.find_by(post_number: 1) SearchIndexer.update_posts_index(post.id, obj.title, category_name, tag_names, post.cooked) SearchIndexer.update_topics_index(obj.id, obj.title, post.cooked) end @@ -175,43 +175,22 @@ class SearchIndexer def self.scrub(html) me = new parser = Nokogiri::HTML::SAX::Parser.new(me) - begin - copy = +"
" - copy << html unless html.nil? - copy << "
" - parser.parse(html) unless html.nil? - end + parser.parse("
#{html}
") if html.present? me.scrubbed end - def start_element(name, attributes = []) + ATTRIBUTES ||= %w{alt title href data-youtube-title} + + def start_element(_, attributes = []) attributes = Hash[*attributes.flatten] - if attributes["alt"] - scrubbed << " " - scrubbed << attributes["alt"] - scrubbed << " " - end - if attributes["title"] - scrubbed << " " - scrubbed << attributes["title"] - scrubbed << " " - end - if attributes["data-youtube-title"] - scrubbed << " " - scrubbed << attributes["data-youtube-title"] - scrubbed << " " - end - if attributes["href"] - scrubbed << " " - scrubbed << attributes["href"] - scrubbed << " " + + ATTRIBUTES.each do |name| + characters(attributes[name]) if attributes[name].present? end end def characters(string) - scrubbed << " " - scrubbed << string - scrubbed << " " + scrubbed << " #{ActiveSupport::Inflector.transliterate(string).strip} " end end end diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index 5be853bab89..8b6bcae498f 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -29,6 +29,14 @@ describe SearchIndexer do expect(scrubbed).to eq(" http://meta.discourse.org/ link ") end + it 'removes diacritics' do + html = "

Hétérogénéité

" + + scrubbed = SearchIndexer::HtmlScrubber.scrub(html) + + expect(scrubbed).to eq(" Heterogeneite ") + end + it 'correctly indexes a post according to version' do # Preparing so that they can be indexed to right version SearchIndexer.update_posts_index(post_id, "dummy", "", nil, nil)