mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
FIX: Limit PG headline based search blurb generation to 200 characters.
* Recovers omission characters '...' in blurb as well.
This commit is contained in:
@@ -1177,8 +1177,28 @@ class Search
|
|||||||
.joins("INNER JOIN post_search_data pd ON pd.post_id = posts.id")
|
.joins("INNER JOIN post_search_data pd ON pd.post_id = posts.id")
|
||||||
.joins("INNER JOIN topics t1 ON t1.id = posts.topic_id")
|
.joins("INNER JOIN topics t1 ON t1.id = posts.topic_id")
|
||||||
.select(
|
.select(
|
||||||
"TS_HEADLINE(#{ts_config}, t1.fancy_title, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS topic_title_headline",
|
"TS_HEADLINE(
|
||||||
"TS_HEADLINE(#{ts_config}, LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}), PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
|
#{ts_config},
|
||||||
|
t1.fancy_title,
|
||||||
|
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
|
||||||
|
'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'''
|
||||||
|
) AS topic_title_headline",
|
||||||
|
"TS_HEADLINE(
|
||||||
|
#{ts_config},
|
||||||
|
LEFT(
|
||||||
|
TS_HEADLINE(
|
||||||
|
#{ts_config},
|
||||||
|
LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}),
|
||||||
|
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
|
||||||
|
'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel='''', StopSel='''''
|
||||||
|
),
|
||||||
|
#{Search::GroupedSearchResults::BLURB_LENGTH}
|
||||||
|
),
|
||||||
|
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
|
||||||
|
'HighlightAll=true, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'''
|
||||||
|
) AS headline",
|
||||||
|
"LEFT(pd.raw_data, 50) AS leading_raw_data",
|
||||||
|
"RIGHT(pd.raw_data, 50) AS trailing_raw_data",
|
||||||
default_scope.arel.projections
|
default_scope.arel.projections
|
||||||
)
|
)
|
||||||
else
|
else
|
||||||
|
|||||||
@@ -78,6 +78,9 @@ class Search
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
OMISSION = '...'
|
||||||
|
SCRUB_HEADLINE_REGEXP = /<span(?: \w+="[^"]+")* class="#{Search::HIGHLIGHT_CSS_CLASS}"(?: \w+="[^"]+")*>([^<]*)<\/span>/
|
||||||
|
|
||||||
def blurb(post)
|
def blurb(post)
|
||||||
opts = {
|
opts = {
|
||||||
term: @blurb_term,
|
term: @blurb_term,
|
||||||
@@ -86,7 +89,10 @@ class Search
|
|||||||
|
|
||||||
if post.post_search_data.version > SearchIndexer::MIN_POST_REINDEX_VERSION
|
if post.post_search_data.version > SearchIndexer::MIN_POST_REINDEX_VERSION
|
||||||
if SiteSetting.use_pg_headlines_for_excerpt
|
if SiteSetting.use_pg_headlines_for_excerpt
|
||||||
return post.headline
|
scrubbed_headline = post.headline.gsub(SCRUB_HEADLINE_REGEXP, '\1')
|
||||||
|
prefix_omission = scrubbed_headline.start_with?(post.leading_raw_data) ? '' : OMISSION
|
||||||
|
postfix_omission = scrubbed_headline.end_with?(post.trailing_raw_data) ? '' : OMISSION
|
||||||
|
return "#{prefix_omission}#{post.headline}#{postfix_omission}"
|
||||||
else
|
else
|
||||||
opts[:cooked] = post.post_search_data.raw_data
|
opts[:cooked] = post.post_search_data.raw_data
|
||||||
opts[:scrub] = false
|
opts[:scrub] = false
|
||||||
|
|||||||
@@ -410,7 +410,7 @@ describe Search do
|
|||||||
end
|
end
|
||||||
|
|
||||||
let(:expected_blurb) do
|
let(:expected_blurb) do
|
||||||
"hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. <span class=\"search-highlight\">elephant</span>"
|
"#{Search::GroupedSearchResults::OMISSION}hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. <span class=\"#{Search::HIGHLIGHT_CSS_CLASS}\">elephant</span>"
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'returns the post' do
|
it 'returns the post' do
|
||||||
@@ -429,7 +429,7 @@ describe Search do
|
|||||||
expect(post.topic_title_headline).to eq(topic.fancy_title)
|
expect(post.topic_title_headline).to eq(topic.fancy_title)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "it limits the headline to #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do
|
it "only applies highlighting to the first #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do
|
||||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||||
|
|
||||||
reply.update!(raw: "#{'a' * Search::MAX_LENGTH_FOR_HEADLINE} #{reply.raw}")
|
reply.update!(raw: "#{'a' * Search::MAX_LENGTH_FOR_HEADLINE} #{reply.raw}")
|
||||||
@@ -443,6 +443,20 @@ describe Search do
|
|||||||
expect(post.headline.include?('elephant')).to eq(false)
|
expect(post.headline.include?('elephant')).to eq(false)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "limits the search headline to #{Search::GroupedSearchResults::BLURB_LENGTH} characters" do
|
||||||
|
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||||
|
|
||||||
|
reply.update!(raw: "#{'a' * Search::GroupedSearchResults::BLURB_LENGTH} elephant")
|
||||||
|
|
||||||
|
result = Search.execute('elephant')
|
||||||
|
|
||||||
|
expect(result.posts.map(&:id)).to contain_exactly(reply.id)
|
||||||
|
|
||||||
|
post = result.posts.first
|
||||||
|
|
||||||
|
expect(result.blurb(post)).to eq("#{'a' * Search::GroupedSearchResults::BLURB_LENGTH}#{Search::GroupedSearchResults::OMISSION}")
|
||||||
|
end
|
||||||
|
|
||||||
it 'returns the right post and blurb for searches with phrase' do
|
it 'returns the right post and blurb for searches with phrase' do
|
||||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user