mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
PERF: Limit characters used to generate headline for search blurb.
We determined using the following benchmark script that limiting to 2500 chars would mean a maximum of
25ms spent generating headlines.
```
require 'benchmark/ips'
string = <<~STRING
Far far away, behind the word mountains...
STRING
def sql_excerpt(string, l = 1000000)
DB.query_single(<<~SQL)
SELECT TS_HEADLINE('english', left('#{string}', #{l}), PLAINTO_TSQUERY('mountains'))
SQL
end
def ruby_excerpt(string)
output = DB.query_single("SELECT '#{string}'")[0]
Search::GroupedSearchResults::TextHelper.excerpt(output, 'mountains', radius: 100)
end
puts "Ruby Excerpt: #{ruby_excerpt(string)}"
puts "SQL Excerpt: #{sql_excerpt(string)}"
puts
Benchmark.ips do |x|
x.time = 10
[1000, 2500, 5000, 10000, 20000, 50000].each do |l|
short_string = string[0..l]
x.report("ts_headline excerpt #{l}") do
sql_excerpt(short_string, l)
end
x.report("actionview excerpt #{l}") do
ruby_excerpt(short_string)
end
end
x.compare!
end
```
```
actionview excerpt 1000: 20570.7 i/s
actionview excerpt 2500: 17863.1 i/s - 1.15x (± 0.00) slower
actionview excerpt 5000: 14228.9 i/s - 1.45x (± 0.00) slower
actionview excerpt 10000: 10906.2 i/s - 1.89x (± 0.00) slower
actionview excerpt 20000: 6255.0 i/s - 3.29x (± 0.00) slower
ts_headline excerpt 1000: 4337.5 i/s - 4.74x (± 0.00) slower
actionview excerpt 50000: 3222.7 i/s - 6.38x (± 0.00) slower
ts_headline excerpt 2500: 2240.4 i/s - 9.18x (± 0.00) slower
ts_headline excerpt 5000: 1258.7 i/s - 16.34x (± 0.00) slower
ts_headline excerpt 10000: 667.2 i/s - 30.83x (± 0.00) slower
ts_headline excerpt 20000: 348.7 i/s - 58.98x (± 0.00) slower
ts_headline excerpt 50000: 131.9 i/s - 155.91x (± 0.00) slower
```
This commit is contained in:
@@ -1164,6 +1164,10 @@ class Search
|
||||
query.includes(topic: topic_eager_loads)
|
||||
end
|
||||
|
||||
# Limited for performance reasons since `TS_HEADLINE` is slow when the text
|
||||
# document is too long.
|
||||
MAX_LENGTH_FOR_HEADLINE = 2500
|
||||
|
||||
def posts_scope(default_scope = Post.all)
|
||||
if SiteSetting.use_pg_headlines_for_excerpt
|
||||
search_term = @term.present? ? PG::Connection.escape_string(@term) : nil
|
||||
@@ -1174,7 +1178,7 @@ class Search
|
||||
.joins("INNER JOIN topics t1 ON t1.id = posts.topic_id")
|
||||
.select(
|
||||
"TS_HEADLINE(#{ts_config}, t1.fancy_title, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS topic_title_headline",
|
||||
"TS_HEADLINE(#{ts_config}, pd.raw_data, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
|
||||
"TS_HEADLINE(#{ts_config}, LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}), PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
|
||||
default_scope.arel.projections
|
||||
)
|
||||
else
|
||||
|
||||
@@ -429,6 +429,20 @@ describe Search do
|
||||
expect(post.topic_title_headline).to eq(topic.fancy_title)
|
||||
end
|
||||
|
||||
it "it limits the headline to #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
reply.update!(raw: "#{'a' * Search::MAX_LENGTH_FOR_HEADLINE} #{reply.raw}")
|
||||
|
||||
result = Search.execute('elephant')
|
||||
|
||||
expect(result.posts.map(&:id)).to contain_exactly(reply.id)
|
||||
|
||||
post = result.posts.first
|
||||
|
||||
expect(post.headline.include?('elephant')).to eq(false)
|
||||
end
|
||||
|
||||
it 'returns the right post and blurb for searches with phrase' do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
|
||||
Reference in New Issue
Block a user