mirror of
https://github.com/discourse/discourse.git
synced 2024-11-25 18:30:26 -06:00
PERF: Limit characters used to generate headline for search blurb.
We determined using the following benchmark script that limiting to 2500 chars would mean a maximum of 25ms spent generating headlines. ``` require 'benchmark/ips' string = <<~STRING Far far away, behind the word mountains... STRING def sql_excerpt(string, l = 1000000) DB.query_single(<<~SQL) SELECT TS_HEADLINE('english', left('#{string}', #{l}), PLAINTO_TSQUERY('mountains')) SQL end def ruby_excerpt(string) output = DB.query_single("SELECT '#{string}'")[0] Search::GroupedSearchResults::TextHelper.excerpt(output, 'mountains', radius: 100) end puts "Ruby Excerpt: #{ruby_excerpt(string)}" puts "SQL Excerpt: #{sql_excerpt(string)}" puts Benchmark.ips do |x| x.time = 10 [1000, 2500, 5000, 10000, 20000, 50000].each do |l| short_string = string[0..l] x.report("ts_headline excerpt #{l}") do sql_excerpt(short_string, l) end x.report("actionview excerpt #{l}") do ruby_excerpt(short_string) end end x.compare! end ``` ``` actionview excerpt 1000: 20570.7 i/s actionview excerpt 2500: 17863.1 i/s - 1.15x (± 0.00) slower actionview excerpt 5000: 14228.9 i/s - 1.45x (± 0.00) slower actionview excerpt 10000: 10906.2 i/s - 1.89x (± 0.00) slower actionview excerpt 20000: 6255.0 i/s - 3.29x (± 0.00) slower ts_headline excerpt 1000: 4337.5 i/s - 4.74x (± 0.00) slower actionview excerpt 50000: 3222.7 i/s - 6.38x (± 0.00) slower ts_headline excerpt 2500: 2240.4 i/s - 9.18x (± 0.00) slower ts_headline excerpt 5000: 1258.7 i/s - 16.34x (± 0.00) slower ts_headline excerpt 10000: 667.2 i/s - 30.83x (± 0.00) slower ts_headline excerpt 20000: 348.7 i/s - 58.98x (± 0.00) slower ts_headline excerpt 50000: 131.9 i/s - 155.91x (± 0.00) slower ```
This commit is contained in:
parent
cf2797bf58
commit
053cbe3112
@ -1164,6 +1164,10 @@ class Search
|
||||
query.includes(topic: topic_eager_loads)
|
||||
end
|
||||
|
||||
# Limited for performance reasons since `TS_HEADLINE` is slow when the text
|
||||
# document is too long.
|
||||
MAX_LENGTH_FOR_HEADLINE = 2500
|
||||
|
||||
def posts_scope(default_scope = Post.all)
|
||||
if SiteSetting.use_pg_headlines_for_excerpt
|
||||
search_term = @term.present? ? PG::Connection.escape_string(@term) : nil
|
||||
@ -1174,7 +1178,7 @@ class Search
|
||||
.joins("INNER JOIN topics t1 ON t1.id = posts.topic_id")
|
||||
.select(
|
||||
"TS_HEADLINE(#{ts_config}, t1.fancy_title, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS topic_title_headline",
|
||||
"TS_HEADLINE(#{ts_config}, pd.raw_data, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
|
||||
"TS_HEADLINE(#{ts_config}, LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}), PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
|
||||
default_scope.arel.projections
|
||||
)
|
||||
else
|
||||
|
@ -429,6 +429,20 @@ describe Search do
|
||||
expect(post.topic_title_headline).to eq(topic.fancy_title)
|
||||
end
|
||||
|
||||
it "it limits the headline to #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
reply.update!(raw: "#{'a' * Search::MAX_LENGTH_FOR_HEADLINE} #{reply.raw}")
|
||||
|
||||
result = Search.execute('elephant')
|
||||
|
||||
expect(result.posts.map(&:id)).to contain_exactly(reply.id)
|
||||
|
||||
post = result.posts.first
|
||||
|
||||
expect(post.headline.include?('elephant')).to eq(false)
|
||||
end
|
||||
|
||||
it 'returns the right post and blurb for searches with phrase' do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user