FEATURE: allow consumers to parse a search string (#23528)

This extends search so it can have consumers that:

1. Can split off "term" from various advanced filters and orders
2. Can build a relation of either order or filter

It also moves a lot of stuff around in the search class for clarity.

Two new APIs are exposed:

`.apply_filter` to apply all the special filters to a posts/topics relation
`.apply_order` to force a particular order (eg: order:latest)

This can then be used by semantic search in Discourse AI
This commit is contained in:
Sam 2023-09-12 16:21:01 +10:00 committed by GitHub
parent f08c6d2756
commit f25849501d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 153 additions and 107 deletions

View File

@ -745,6 +745,71 @@ class Search
posts.where("topics.views <= ?", match.to_i)
end
def apply_filters(posts)
@filters.each do |block, match|
if block.arity == 1
posts = instance_exec(posts, &block) || posts
else
posts = instance_exec(posts, match, &block) || posts
end
end if @filters
posts
end
def apply_order(
posts,
aggregate_search: false,
allow_relevance_search: true,
type_filter: "all_topics"
)
if @order == :latest
if aggregate_search
posts = posts.order("MAX(posts.created_at) DESC")
else
posts = posts.reorder("posts.created_at DESC")
end
elsif @order == :oldest
if aggregate_search
posts = posts.order("MAX(posts.created_at) ASC")
else
posts = posts.reorder("posts.created_at ASC")
end
elsif @order == :latest_topic
if aggregate_search
posts = posts.order("MAX(topics.created_at) DESC")
else
posts = posts.order("topics.created_at DESC")
end
elsif @order == :oldest_topic
if aggregate_search
posts = posts.order("MAX(topics.created_at) ASC")
else
posts = posts.order("topics.created_at ASC")
end
elsif @order == :views
if aggregate_search
posts = posts.order("MAX(topics.views) DESC")
else
posts = posts.order("topics.views DESC")
end
elsif @order == :likes
if aggregate_search
posts = posts.order("MAX(posts.like_count) DESC")
else
posts = posts.order("posts.like_count DESC")
end
elsif allow_relevance_search
posts = sort_by_relevance(posts, type_filter: type_filter, aggregate_search: aggregate_search)
end
if @order
advanced_order = Search.advanced_orders&.fetch(@order, nil)
posts = advanced_order.call(posts) if advanced_order
end
posts
end
private
def search_tags(posts, match, positive:)
@ -1059,11 +1124,6 @@ class Search
"%#{term_without_quote}%",
)
else
# A is for title
# B is for category
# C is for tags
# D is for cooked
weights = @in_title ? "A" : (SiteSetting.tagging_enabled ? "ABCD" : "ABD")
posts = posts.where(post_number: 1) if @in_title
posts = posts.where("post_search_data.search_data @@ #{ts_query(weight_filter: weights)}")
exact_terms = @term.scan(Regexp.new(PHRASE_MATCH_REGEXP_PATTERN)).flatten
@ -1075,13 +1135,7 @@ class Search
end
end
@filters.each do |block, match|
if block.arity == 1
posts = instance_exec(posts, &block) || posts
else
posts = instance_exec(posts, match, &block) || posts
end
end if @filters
posts = apply_filters(posts)
# If we have a search context, prioritize those posts first
posts =
@ -1120,53 +1174,51 @@ class Search
posts
end
if @order == :latest
if aggregate_search
posts = posts.order("MAX(posts.created_at) DESC")
else
posts = posts.reorder("posts.created_at DESC")
end
elsif @order == :oldest
if aggregate_search
posts = posts.order("MAX(posts.created_at) ASC")
else
posts = posts.reorder("posts.created_at ASC")
end
elsif @order == :latest_topic
if aggregate_search
posts = posts.order("MAX(topics.created_at) DESC")
else
posts = posts.order("topics.created_at DESC")
end
elsif @order == :oldest_topic
if aggregate_search
posts = posts.order("MAX(topics.created_at) ASC")
else
posts = posts.order("topics.created_at ASC")
end
elsif @order == :views
if aggregate_search
posts = posts.order("MAX(topics.views) DESC")
else
posts = posts.order("topics.views DESC")
end
elsif @order == :likes
if aggregate_search
posts = posts.order("MAX(posts.like_count) DESC")
else
posts = posts.order("posts.like_count DESC")
end
elsif !is_topic_search
exact_rank = nil
if type_filter != "private_messages"
posts =
if secure_category_ids.present?
posts.where(
"(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))",
secure_category_ids,
).references(:categories)
else
posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(
:categories,
)
end
end
if SiteSetting.prioritize_exact_search_title_match
exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false)
end
posts =
apply_order(
posts,
aggregate_search: aggregate_search,
allow_relevance_search: !is_topic_search,
type_filter: type_filter,
)
rank = ts_rank_cd(weight_filter: weights)
posts = posts.offset(offset)
posts.limit(limit)
end
if type_filter != "private_messages"
category_search_priority = <<~SQL
def weights
# A is for title
# B is for category
# C is for tags
# D is for cooked
@in_title ? "A" : (SiteSetting.tagging_enabled ? "ABCD" : "ABD")
end
def sort_by_relevance(posts, type_filter:, aggregate_search:)
exact_rank = nil
if SiteSetting.prioritize_exact_search_title_match
exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false)
end
rank = ts_rank_cd(weight_filter: weights)
if type_filter != "private_messages"
category_search_priority = <<~SQL
(
CASE categories.search_priority
WHEN #{Searchable::PRIORITIES[:very_high]}
@ -1178,16 +1230,16 @@ class Search
)
SQL
rank_sort_priorities = [["topics.archived", 0.85], ["topics.closed", 0.9]]
rank_sort_priorities = [["topics.archived", 0.85], ["topics.closed", 0.9]]
rank_sort_priorities =
DiscoursePluginRegistry.apply_modifier(
:search_rank_sort_priorities,
rank_sort_priorities,
self,
)
rank_sort_priorities =
DiscoursePluginRegistry.apply_modifier(
:search_rank_sort_priorities,
rank_sort_priorities,
self,
)
category_priority_weights = <<~SQL
category_priority_weights = <<~SQL
(
CASE categories.search_priority
WHEN #{Searchable::PRIORITIES[:low]}
@ -1204,61 +1256,38 @@ class Search
)
SQL
posts =
if aggregate_search
posts.order("MAX(#{category_search_priority}) DESC")
else
posts.order("#{category_search_priority} DESC")
end
if @term.present? && exact_rank
posts =
if aggregate_search
posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
else
posts.order("#{exact_rank} * #{category_priority_weights} DESC")
end
posts =
if aggregate_search
posts.order("MAX(#{category_search_priority}) DESC")
else
posts.order("#{category_search_priority} DESC")
end
data_ranking =
if @term.blank?
"(#{category_priority_weights})"
else
"(#{rank} * #{category_priority_weights})"
end
if @term.present? && exact_rank
posts =
if aggregate_search
posts.order("MAX(#{data_ranking}) DESC")
posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
else
posts.order("#{data_ranking} DESC")
posts.order("#{exact_rank} * #{category_priority_weights} DESC")
end
end
posts = posts.order("topics.bumped_at DESC")
end
if type_filter != "private_messages"
posts =
if secure_category_ids.present?
posts.where(
"(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))",
secure_category_ids,
).references(:categories)
data_ranking =
if @term.blank?
"(#{category_priority_weights})"
else
posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(
:categories,
)
"(#{rank} * #{category_priority_weights})"
end
posts =
if aggregate_search
posts.order("MAX(#{data_ranking}) DESC")
else
posts.order("#{data_ranking} DESC")
end
end
if @order
advanced_order = Search.advanced_orders&.fetch(@order, nil)
posts = advanced_order.call(posts) if advanced_order
end
posts = posts.offset(offset)
posts.limit(limit)
posts.order("topics.bumped_at DESC")
end
def ts_rank_cd(weight_filter:, prefix_match: true)

View File

@ -2748,4 +2748,21 @@ RSpec.describe Search do
expect(result.posts.pluck(:id)).to eq([post1.id, post2.id])
end
end
describe "Extensibility features of search" do
it "is possible to parse queries" do
term = "hello l status:closed"
search = Search.new(term)
posts = Post.all.includes(:topic)
posts = search.apply_filters(posts)
posts = search.apply_order(posts)
sql = posts.to_sql
expect(search.term).to eq("hello")
expect(sql).to include("ORDER BY posts.created_at DESC")
expect(sql).to match(/where.*topics.closed/i)
end
end
end