From f25849501d34bf4bf650cc3fe0ab95b9cfce1c0b Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 12 Sep 2023 16:21:01 +1000 Subject: [PATCH] FEATURE: allow consumers to parse a search string (#23528) This extends search so it can have consumers that: 1. Can split off "term" from various advanced filters and orders 2. Can build a relation of either order or filter It also moves a lot of stuff around in the search class for clarity. Two new APIs are exposed: `.apply_filter` to apply all the special filters to a posts/topics relation `.apply_order` to force a particular order (eg: order:latest) This can then be used by semantic search in Discourse AI --- lib/search.rb | 243 ++++++++++++++++++++++------------------ spec/lib/search_spec.rb | 17 +++ 2 files changed, 153 insertions(+), 107 deletions(-) diff --git a/lib/search.rb b/lib/search.rb index 748cc9bd991..3183f756afe 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -745,6 +745,71 @@ class Search posts.where("topics.views <= ?", match.to_i) end + def apply_filters(posts) + @filters.each do |block, match| + if block.arity == 1 + posts = instance_exec(posts, &block) || posts + else + posts = instance_exec(posts, match, &block) || posts + end + end if @filters + posts + end + + def apply_order( + posts, + aggregate_search: false, + allow_relevance_search: true, + type_filter: "all_topics" + ) + if @order == :latest + if aggregate_search + posts = posts.order("MAX(posts.created_at) DESC") + else + posts = posts.reorder("posts.created_at DESC") + end + elsif @order == :oldest + if aggregate_search + posts = posts.order("MAX(posts.created_at) ASC") + else + posts = posts.reorder("posts.created_at ASC") + end + elsif @order == :latest_topic + if aggregate_search + posts = posts.order("MAX(topics.created_at) DESC") + else + posts = posts.order("topics.created_at DESC") + end + elsif @order == :oldest_topic + if aggregate_search + posts = posts.order("MAX(topics.created_at) ASC") + else + posts = posts.order("topics.created_at ASC") + end + elsif @order == :views + if aggregate_search + posts = posts.order("MAX(topics.views) DESC") + else + posts = posts.order("topics.views DESC") + end + elsif @order == :likes + if aggregate_search + posts = posts.order("MAX(posts.like_count) DESC") + else + posts = posts.order("posts.like_count DESC") + end + elsif allow_relevance_search + posts = sort_by_relevance(posts, type_filter: type_filter, aggregate_search: aggregate_search) + end + + if @order + advanced_order = Search.advanced_orders&.fetch(@order, nil) + posts = advanced_order.call(posts) if advanced_order + end + + posts + end + private def search_tags(posts, match, positive:) @@ -1059,11 +1124,6 @@ class Search "%#{term_without_quote}%", ) else - # A is for title - # B is for category - # C is for tags - # D is for cooked - weights = @in_title ? "A" : (SiteSetting.tagging_enabled ? "ABCD" : "ABD") posts = posts.where(post_number: 1) if @in_title posts = posts.where("post_search_data.search_data @@ #{ts_query(weight_filter: weights)}") exact_terms = @term.scan(Regexp.new(PHRASE_MATCH_REGEXP_PATTERN)).flatten @@ -1075,13 +1135,7 @@ class Search end end - @filters.each do |block, match| - if block.arity == 1 - posts = instance_exec(posts, &block) || posts - else - posts = instance_exec(posts, match, &block) || posts - end - end if @filters + posts = apply_filters(posts) # If we have a search context, prioritize those posts first posts = @@ -1120,53 +1174,51 @@ class Search posts end - if @order == :latest - if aggregate_search - posts = posts.order("MAX(posts.created_at) DESC") - else - posts = posts.reorder("posts.created_at DESC") - end - elsif @order == :oldest - if aggregate_search - posts = posts.order("MAX(posts.created_at) ASC") - else - posts = posts.reorder("posts.created_at ASC") - end - elsif @order == :latest_topic - if aggregate_search - posts = posts.order("MAX(topics.created_at) DESC") - else - posts = posts.order("topics.created_at DESC") - end - elsif @order == :oldest_topic - if aggregate_search - posts = posts.order("MAX(topics.created_at) ASC") - else - posts = posts.order("topics.created_at ASC") - end - elsif @order == :views - if aggregate_search - posts = posts.order("MAX(topics.views) DESC") - else - posts = posts.order("topics.views DESC") - end - elsif @order == :likes - if aggregate_search - posts = posts.order("MAX(posts.like_count) DESC") - else - posts = posts.order("posts.like_count DESC") - end - elsif !is_topic_search - exact_rank = nil + if type_filter != "private_messages" + posts = + if secure_category_ids.present? + posts.where( + "(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))", + secure_category_ids, + ).references(:categories) + else + posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references( + :categories, + ) + end + end - if SiteSetting.prioritize_exact_search_title_match - exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false) - end + posts = + apply_order( + posts, + aggregate_search: aggregate_search, + allow_relevance_search: !is_topic_search, + type_filter: type_filter, + ) - rank = ts_rank_cd(weight_filter: weights) + posts = posts.offset(offset) + posts.limit(limit) + end - if type_filter != "private_messages" - category_search_priority = <<~SQL + def weights + # A is for title + # B is for category + # C is for tags + # D is for cooked + @in_title ? "A" : (SiteSetting.tagging_enabled ? "ABCD" : "ABD") + end + + def sort_by_relevance(posts, type_filter:, aggregate_search:) + exact_rank = nil + + if SiteSetting.prioritize_exact_search_title_match + exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false) + end + + rank = ts_rank_cd(weight_filter: weights) + + if type_filter != "private_messages" + category_search_priority = <<~SQL ( CASE categories.search_priority WHEN #{Searchable::PRIORITIES[:very_high]} @@ -1178,16 +1230,16 @@ class Search ) SQL - rank_sort_priorities = [["topics.archived", 0.85], ["topics.closed", 0.9]] + rank_sort_priorities = [["topics.archived", 0.85], ["topics.closed", 0.9]] - rank_sort_priorities = - DiscoursePluginRegistry.apply_modifier( - :search_rank_sort_priorities, - rank_sort_priorities, - self, - ) + rank_sort_priorities = + DiscoursePluginRegistry.apply_modifier( + :search_rank_sort_priorities, + rank_sort_priorities, + self, + ) - category_priority_weights = <<~SQL + category_priority_weights = <<~SQL ( CASE categories.search_priority WHEN #{Searchable::PRIORITIES[:low]} @@ -1204,61 +1256,38 @@ class Search ) SQL - posts = - if aggregate_search - posts.order("MAX(#{category_search_priority}) DESC") - else - posts.order("#{category_search_priority} DESC") - end - - if @term.present? && exact_rank - posts = - if aggregate_search - posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC") - else - posts.order("#{exact_rank} * #{category_priority_weights} DESC") - end + posts = + if aggregate_search + posts.order("MAX(#{category_search_priority}) DESC") + else + posts.order("#{category_search_priority} DESC") end - data_ranking = - if @term.blank? - "(#{category_priority_weights})" - else - "(#{rank} * #{category_priority_weights})" - end - + if @term.present? && exact_rank posts = if aggregate_search - posts.order("MAX(#{data_ranking}) DESC") + posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC") else - posts.order("#{data_ranking} DESC") + posts.order("#{exact_rank} * #{category_priority_weights} DESC") end end - posts = posts.order("topics.bumped_at DESC") - end - - if type_filter != "private_messages" - posts = - if secure_category_ids.present? - posts.where( - "(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))", - secure_category_ids, - ).references(:categories) + data_ranking = + if @term.blank? + "(#{category_priority_weights})" else - posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references( - :categories, - ) + "(#{rank} * #{category_priority_weights})" + end + + posts = + if aggregate_search + posts.order("MAX(#{data_ranking}) DESC") + else + posts.order("#{data_ranking} DESC") end end - if @order - advanced_order = Search.advanced_orders&.fetch(@order, nil) - posts = advanced_order.call(posts) if advanced_order - end - - posts = posts.offset(offset) - posts.limit(limit) + posts.order("topics.bumped_at DESC") end def ts_rank_cd(weight_filter:, prefix_match: true) diff --git a/spec/lib/search_spec.rb b/spec/lib/search_spec.rb index b7efa888f23..774b127d1e3 100644 --- a/spec/lib/search_spec.rb +++ b/spec/lib/search_spec.rb @@ -2748,4 +2748,21 @@ RSpec.describe Search do expect(result.posts.pluck(:id)).to eq([post1.id, post2.id]) end end + + describe "Extensibility features of search" do + it "is possible to parse queries" do + term = "hello l status:closed" + search = Search.new(term) + + posts = Post.all.includes(:topic) + posts = search.apply_filters(posts) + posts = search.apply_order(posts) + + sql = posts.to_sql + + expect(search.term).to eq("hello") + expect(sql).to include("ORDER BY posts.created_at DESC") + expect(sql).to match(/where.*topics.closed/i) + end + end end