FEATURE: allow consumers to parse a search string (#23528)

This extends search so it can have consumers that:

1. Can split off "term" from various advanced filters and orders
2. Can build a relation of either order or filter

It also moves a lot of stuff around in the search class for clarity.

Two new APIs are exposed:

`.apply_filter` to apply all the special filters to a posts/topics relation
`.apply_order` to force a particular order (eg: order:latest)

This can then be used by semantic search in Discourse AI
This commit is contained in:
Sam 2023-09-12 16:21:01 +10:00 committed by GitHub
parent f08c6d2756
commit f25849501d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 153 additions and 107 deletions

View File

@ -745,6 +745,71 @@ class Search
posts.where("topics.views <= ?", match.to_i) posts.where("topics.views <= ?", match.to_i)
end end
def apply_filters(posts)
@filters.each do |block, match|
if block.arity == 1
posts = instance_exec(posts, &block) || posts
else
posts = instance_exec(posts, match, &block) || posts
end
end if @filters
posts
end
def apply_order(
posts,
aggregate_search: false,
allow_relevance_search: true,
type_filter: "all_topics"
)
if @order == :latest
if aggregate_search
posts = posts.order("MAX(posts.created_at) DESC")
else
posts = posts.reorder("posts.created_at DESC")
end
elsif @order == :oldest
if aggregate_search
posts = posts.order("MAX(posts.created_at) ASC")
else
posts = posts.reorder("posts.created_at ASC")
end
elsif @order == :latest_topic
if aggregate_search
posts = posts.order("MAX(topics.created_at) DESC")
else
posts = posts.order("topics.created_at DESC")
end
elsif @order == :oldest_topic
if aggregate_search
posts = posts.order("MAX(topics.created_at) ASC")
else
posts = posts.order("topics.created_at ASC")
end
elsif @order == :views
if aggregate_search
posts = posts.order("MAX(topics.views) DESC")
else
posts = posts.order("topics.views DESC")
end
elsif @order == :likes
if aggregate_search
posts = posts.order("MAX(posts.like_count) DESC")
else
posts = posts.order("posts.like_count DESC")
end
elsif allow_relevance_search
posts = sort_by_relevance(posts, type_filter: type_filter, aggregate_search: aggregate_search)
end
if @order
advanced_order = Search.advanced_orders&.fetch(@order, nil)
posts = advanced_order.call(posts) if advanced_order
end
posts
end
private private
def search_tags(posts, match, positive:) def search_tags(posts, match, positive:)
@ -1059,11 +1124,6 @@ class Search
"%#{term_without_quote}%", "%#{term_without_quote}%",
) )
else else
# A is for title
# B is for category
# C is for tags
# D is for cooked
weights = @in_title ? "A" : (SiteSetting.tagging_enabled ? "ABCD" : "ABD")
posts = posts.where(post_number: 1) if @in_title posts = posts.where(post_number: 1) if @in_title
posts = posts.where("post_search_data.search_data @@ #{ts_query(weight_filter: weights)}") posts = posts.where("post_search_data.search_data @@ #{ts_query(weight_filter: weights)}")
exact_terms = @term.scan(Regexp.new(PHRASE_MATCH_REGEXP_PATTERN)).flatten exact_terms = @term.scan(Regexp.new(PHRASE_MATCH_REGEXP_PATTERN)).flatten
@ -1075,13 +1135,7 @@ class Search
end end
end end
@filters.each do |block, match| posts = apply_filters(posts)
if block.arity == 1
posts = instance_exec(posts, &block) || posts
else
posts = instance_exec(posts, match, &block) || posts
end
end if @filters
# If we have a search context, prioritize those posts first # If we have a search context, prioritize those posts first
posts = posts =
@ -1120,53 +1174,51 @@ class Search
posts posts
end end
if @order == :latest if type_filter != "private_messages"
if aggregate_search posts =
posts = posts.order("MAX(posts.created_at) DESC") if secure_category_ids.present?
else posts.where(
posts = posts.reorder("posts.created_at DESC") "(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))",
end secure_category_ids,
elsif @order == :oldest ).references(:categories)
if aggregate_search else
posts = posts.order("MAX(posts.created_at) ASC") posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(
else :categories,
posts = posts.reorder("posts.created_at ASC") )
end end
elsif @order == :latest_topic end
if aggregate_search
posts = posts.order("MAX(topics.created_at) DESC")
else
posts = posts.order("topics.created_at DESC")
end
elsif @order == :oldest_topic
if aggregate_search
posts = posts.order("MAX(topics.created_at) ASC")
else
posts = posts.order("topics.created_at ASC")
end
elsif @order == :views
if aggregate_search
posts = posts.order("MAX(topics.views) DESC")
else
posts = posts.order("topics.views DESC")
end
elsif @order == :likes
if aggregate_search
posts = posts.order("MAX(posts.like_count) DESC")
else
posts = posts.order("posts.like_count DESC")
end
elsif !is_topic_search
exact_rank = nil
if SiteSetting.prioritize_exact_search_title_match posts =
exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false) apply_order(
end posts,
aggregate_search: aggregate_search,
allow_relevance_search: !is_topic_search,
type_filter: type_filter,
)
rank = ts_rank_cd(weight_filter: weights) posts = posts.offset(offset)
posts.limit(limit)
end
if type_filter != "private_messages" def weights
category_search_priority = <<~SQL # A is for title
# B is for category
# C is for tags
# D is for cooked
@in_title ? "A" : (SiteSetting.tagging_enabled ? "ABCD" : "ABD")
end
def sort_by_relevance(posts, type_filter:, aggregate_search:)
exact_rank = nil
if SiteSetting.prioritize_exact_search_title_match
exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false)
end
rank = ts_rank_cd(weight_filter: weights)
if type_filter != "private_messages"
category_search_priority = <<~SQL
( (
CASE categories.search_priority CASE categories.search_priority
WHEN #{Searchable::PRIORITIES[:very_high]} WHEN #{Searchable::PRIORITIES[:very_high]}
@ -1178,16 +1230,16 @@ class Search
) )
SQL SQL
rank_sort_priorities = [["topics.archived", 0.85], ["topics.closed", 0.9]] rank_sort_priorities = [["topics.archived", 0.85], ["topics.closed", 0.9]]
rank_sort_priorities = rank_sort_priorities =
DiscoursePluginRegistry.apply_modifier( DiscoursePluginRegistry.apply_modifier(
:search_rank_sort_priorities, :search_rank_sort_priorities,
rank_sort_priorities, rank_sort_priorities,
self, self,
) )
category_priority_weights = <<~SQL category_priority_weights = <<~SQL
( (
CASE categories.search_priority CASE categories.search_priority
WHEN #{Searchable::PRIORITIES[:low]} WHEN #{Searchable::PRIORITIES[:low]}
@ -1204,61 +1256,38 @@ class Search
) )
SQL SQL
posts = posts =
if aggregate_search if aggregate_search
posts.order("MAX(#{category_search_priority}) DESC") posts.order("MAX(#{category_search_priority}) DESC")
else else
posts.order("#{category_search_priority} DESC") posts.order("#{category_search_priority} DESC")
end
if @term.present? && exact_rank
posts =
if aggregate_search
posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
else
posts.order("#{exact_rank} * #{category_priority_weights} DESC")
end
end end
data_ranking = if @term.present? && exact_rank
if @term.blank?
"(#{category_priority_weights})"
else
"(#{rank} * #{category_priority_weights})"
end
posts = posts =
if aggregate_search if aggregate_search
posts.order("MAX(#{data_ranking}) DESC") posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
else else
posts.order("#{data_ranking} DESC") posts.order("#{exact_rank} * #{category_priority_weights} DESC")
end end
end end
posts = posts.order("topics.bumped_at DESC") data_ranking =
end if @term.blank?
"(#{category_priority_weights})"
if type_filter != "private_messages"
posts =
if secure_category_ids.present?
posts.where(
"(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))",
secure_category_ids,
).references(:categories)
else else
posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references( "(#{rank} * #{category_priority_weights})"
:categories, end
)
posts =
if aggregate_search
posts.order("MAX(#{data_ranking}) DESC")
else
posts.order("#{data_ranking} DESC")
end end
end end
if @order posts.order("topics.bumped_at DESC")
advanced_order = Search.advanced_orders&.fetch(@order, nil)
posts = advanced_order.call(posts) if advanced_order
end
posts = posts.offset(offset)
posts.limit(limit)
end end
def ts_rank_cd(weight_filter:, prefix_match: true) def ts_rank_cd(weight_filter:, prefix_match: true)

View File

@ -2748,4 +2748,21 @@ RSpec.describe Search do
expect(result.posts.pluck(:id)).to eq([post1.id, post2.id]) expect(result.posts.pluck(:id)).to eq([post1.id, post2.id])
end end
end end
describe "Extensibility features of search" do
it "is possible to parse queries" do
term = "hello l status:closed"
search = Search.new(term)
posts = Post.all.includes(:topic)
posts = search.apply_filters(posts)
posts = search.apply_order(posts)
sql = posts.to_sql
expect(search.term).to eq("hello")
expect(sql).to include("ORDER BY posts.created_at DESC")
expect(sql).to match(/where.*topics.closed/i)
end
end
end end