PERF: Replace video and audio links in search blurb while indexing.

In the near future, we will be swtiching to PG headlines to generate the
search blurb. As such, we need to replace audio and video links in the
raw data used for headline generation. This also means that we avoid
replacing links each time we need to generate the blurb.
This commit is contained in:
Guo Xiang Tan
2020-08-06 12:25:03 +08:00
parent 06ef87da51
commit 255b0e9f14
6 changed files with 68 additions and 22 deletions

View File

@@ -106,23 +106,24 @@ class Search
end
end
URI_REGEXP = URI.regexp(%w{http https})
def self.blurb_for(cooked: nil, term: nil, blurb_length: BLURB_LENGTH, scrub: true)
blurb = nil
cooked = SearchIndexer.scrub_html_for_search(cooked) if scrub
urls = Set.new
cooked.scan(URI_REGEXP) { urls << $& }
urls.each do |url|
begin
case File.extname(URI(url).path || "")
when Oneboxer::VIDEO_REGEX
cooked.gsub!(url, I18n.t("search.video"))
when Oneboxer::AUDIO_REGEX
cooked.gsub!(url, I18n.t("search.audio"))
if scrub
cooked = SearchIndexer.scrub_html_for_search(cooked)
urls = Set.new
cooked.scan(Discourse::Utils::URI_REGEXP) { urls << $& }
urls.each do |url|
begin
case File.extname(URI(url).path || "")
when Oneboxer::VIDEO_REGEX
cooked.gsub!(url, I18n.t("search.video"))
when Oneboxer::AUDIO_REGEX
cooked.gsub!(url, I18n.t("search.audio"))
end
rescue URI::InvalidURIError
end
rescue URI::InvalidURIError
end
end