mirror of
https://github.com/discourse/discourse.git
synced 2024-11-29 20:24:05 -06:00
597d542c33
This changes PG text search to only match the given title against lexemes that are formed from the title. Likewise, the given raw will only be matched against lexemes that are formed from the post's raw.
1154 lines
33 KiB
Ruby
1154 lines
33 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class Search
|
|
DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/
|
|
|
|
cattr_accessor :preloaded_topic_custom_fields
|
|
self.preloaded_topic_custom_fields = Set.new
|
|
|
|
def self.per_facet
|
|
5
|
|
end
|
|
|
|
def self.strip_diacritics(str)
|
|
s = str.unicode_normalize(:nfkd)
|
|
s.gsub!(DIACRITICS, "")
|
|
s.strip!
|
|
s
|
|
end
|
|
|
|
def self.per_filter
|
|
50
|
|
end
|
|
|
|
def self.facets
|
|
%w(topic category user private_messages tags all_topics)
|
|
end
|
|
|
|
def self.ts_config(locale = SiteSetting.default_locale)
|
|
# if adding a text search configuration, you should check PG beforehand:
|
|
# SELECT cfgname FROM pg_ts_config;
|
|
# As an aside, dictionaries can be listed by `\dFd`, the
|
|
# physical locations are in /usr/share/postgresql/<version>/tsearch_data.
|
|
# But it may not appear there based on pg extension configuration.
|
|
# base docker config
|
|
#
|
|
case locale.split("_")[0].to_sym
|
|
when :da then 'danish'
|
|
when :nl then 'dutch'
|
|
when :en then 'english'
|
|
when :fi then 'finnish'
|
|
when :fr then 'french'
|
|
when :de then 'german'
|
|
when :hu then 'hungarian'
|
|
when :it then 'italian'
|
|
when :nb then 'norwegian'
|
|
when :pt then 'portuguese'
|
|
when :ro then 'romanian'
|
|
when :ru then 'russian'
|
|
when :es then 'spanish'
|
|
when :sv then 'swedish'
|
|
when :tr then 'turkish'
|
|
else 'simple' # use the 'simple' stemmer for other languages
|
|
end
|
|
end
|
|
|
|
def self.prepare_data(search_data, purpose = :query)
|
|
purpose ||= :query
|
|
|
|
data = search_data.dup
|
|
data.force_encoding("UTF-8")
|
|
if purpose != :topic
|
|
# TODO cppjieba_rb is designed for chinese, we need something else for Japanese
|
|
# Korean appears to be safe cause words are already space seperated
|
|
# For Japanese we should investigate using kakasi
|
|
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
|
|
require 'cppjieba_rb' unless defined? CppjiebaRb
|
|
mode = (purpose == :query ? :query : :mix)
|
|
data = CppjiebaRb.segment(search_data, mode: mode)
|
|
|
|
# TODO: we still want to tokenize here but the current stopword list is too wide
|
|
# in cppjieba leading to words such as volume to be skipped. PG already has an English
|
|
# stopword list so use that vs relying on cppjieba
|
|
if ts_config != 'english'
|
|
data = CppjiebaRb.filter_stop_word(data)
|
|
else
|
|
data = data.filter { |s| s.present? }
|
|
end
|
|
|
|
data = data.join(' ')
|
|
|
|
else
|
|
data.squish!
|
|
end
|
|
|
|
if SiteSetting.search_ignore_accents
|
|
data = strip_diacritics(data)
|
|
end
|
|
end
|
|
|
|
data.gsub!(EmailCook.url_regexp) do |url|
|
|
uri = URI.parse(url)
|
|
uri.query = nil
|
|
uri.to_s
|
|
rescue URI::Error
|
|
# Don't fail even if URL turns out to be invalid
|
|
end
|
|
|
|
data
|
|
end
|
|
|
|
def self.word_to_date(str)
|
|
|
|
if str =~ /^[0-9]{1,3}$/
|
|
return Time.zone.now.beginning_of_day.days_ago(str.to_i)
|
|
end
|
|
|
|
if str =~ /^([12][0-9]{3})(-([0-1]?[0-9]))?(-([0-3]?[0-9]))?$/
|
|
year = $1.to_i
|
|
month = $2 ? $3.to_i : 1
|
|
day = $4 ? $5.to_i : 1
|
|
|
|
return if day == 0 || month == 0 || day > 31 || month > 12
|
|
|
|
return begin
|
|
Time.zone.parse("#{year}-#{month}-#{day}")
|
|
rescue ArgumentError
|
|
end
|
|
end
|
|
|
|
if str.downcase == "yesterday"
|
|
return Time.zone.now.beginning_of_day.yesterday
|
|
end
|
|
|
|
titlecase = str.downcase.titlecase
|
|
|
|
if Date::DAYNAMES.include?(titlecase)
|
|
return Time.zone.now.beginning_of_week(str.downcase.to_sym)
|
|
end
|
|
|
|
if idx = (Date::MONTHNAMES.find_index(titlecase) ||
|
|
Date::ABBR_MONTHNAMES.find_index(titlecase))
|
|
delta = Time.zone.now.month - idx
|
|
delta += 12 if delta < 0
|
|
Time.zone.now.beginning_of_month.months_ago(delta)
|
|
end
|
|
end
|
|
|
|
def self.min_post_id_no_cache
|
|
return 0 unless SiteSetting.search_prefer_recent_posts?
|
|
|
|
offset, has_more = Post.unscoped
|
|
.order('id desc')
|
|
.offset(SiteSetting.search_recent_posts_size - 1)
|
|
.limit(2)
|
|
.pluck(:id)
|
|
|
|
has_more ? offset : 0
|
|
end
|
|
|
|
def self.min_post_id(opts = nil)
|
|
return 0 unless SiteSetting.search_prefer_recent_posts?
|
|
|
|
# It can be quite slow to count all the posts so let's cache it
|
|
Discourse.cache.fetch("search-min-post-id:#{SiteSetting.search_recent_posts_size}", expires_in: 1.week) do
|
|
min_post_id_no_cache
|
|
end
|
|
end
|
|
|
|
attr_accessor :term
|
|
attr_reader :clean_term
|
|
|
|
def initialize(term, opts = nil)
|
|
@opts = opts || {}
|
|
@guardian = @opts[:guardian] || Guardian.new
|
|
@search_context = @opts[:search_context]
|
|
@blurb_length = @opts[:blurb_length]
|
|
@valid = true
|
|
@page = @opts[:page]
|
|
|
|
term = term.to_s.dup
|
|
|
|
# Removes any zero-width characters from search terms
|
|
term.gsub!(/[\u200B-\u200D\uFEFF]/, '')
|
|
# Replace curly quotes to regular quotes
|
|
term.gsub!(/[\u201c\u201d]/, '"')
|
|
|
|
@clean_term = term
|
|
@in_title = false
|
|
|
|
term = process_advanced_search!(term)
|
|
|
|
if term.present?
|
|
@term = Search.prepare_data(term, Topic === @search_context ? :topic : nil)
|
|
@original_term = PG::Connection.escape_string(@term)
|
|
end
|
|
|
|
if @search_pms && @guardian.user
|
|
@opts[:type_filter] = "private_messages"
|
|
@search_context = @guardian.user
|
|
end
|
|
|
|
if @search_all_topics && @guardian.user
|
|
@opts[:type_filter] = "all_topics"
|
|
end
|
|
|
|
@results = GroupedSearchResults.new(
|
|
type_filter: @opts[:type_filter],
|
|
term: clean_term,
|
|
blurb_term: term,
|
|
search_context: @search_context,
|
|
blurb_length: @blurb_length
|
|
)
|
|
end
|
|
|
|
def limit
|
|
if @opts[:type_filter].present?
|
|
Search.per_filter + 1
|
|
else
|
|
Search.per_facet + 1
|
|
end
|
|
end
|
|
|
|
def offset
|
|
if @page && @opts[:type_filter].present?
|
|
(@page - 1) * Search.per_filter
|
|
else
|
|
0
|
|
end
|
|
end
|
|
|
|
def valid?
|
|
@valid
|
|
end
|
|
|
|
def self.execute(term, opts = nil)
|
|
self.new(term, opts).execute
|
|
end
|
|
|
|
# Query a term
|
|
def execute
|
|
if SiteSetting.log_search_queries? && @opts[:search_type].present? && !Discourse.readonly_mode?
|
|
status, search_log_id = SearchLog.log(
|
|
term: @term,
|
|
search_type: @opts[:search_type],
|
|
ip_address: @opts[:ip_address],
|
|
user_id: @opts[:user_id]
|
|
)
|
|
@results.search_log_id = search_log_id unless status == :error
|
|
end
|
|
|
|
unless @filters.present? || @opts[:search_for_id]
|
|
min_length = @opts[:min_search_term_length] || SiteSetting.min_search_term_length
|
|
terms = (@term || '').split(/\s(?=(?:[^"]|"[^"]*")*$)/).reject { |t| t.length < min_length }
|
|
|
|
if terms.blank?
|
|
@term = ''
|
|
@valid = false
|
|
return
|
|
end
|
|
end
|
|
|
|
# If the term is a number or url to a topic, just include that topic
|
|
if @opts[:search_for_id] && ['topic', 'private_messages', 'all_topics'].include?(@results.type_filter)
|
|
if @term =~ /^\d+$/
|
|
single_topic(@term.to_i)
|
|
else
|
|
begin
|
|
if route = Discourse.route_for(@term)
|
|
if route[:controller] == "topics" && route[:action] == "show"
|
|
topic_id = (route[:id] || route[:topic_id]).to_i
|
|
single_topic(topic_id) if topic_id > 0
|
|
end
|
|
end
|
|
rescue ActionController::RoutingError
|
|
end
|
|
end
|
|
end
|
|
|
|
find_grouped_results if @results.posts.blank?
|
|
|
|
if preloaded_topic_custom_fields.present? && @results.posts.present?
|
|
topics = @results.posts.map(&:topic)
|
|
Topic.preload_custom_fields(topics, preloaded_topic_custom_fields)
|
|
end
|
|
|
|
@results
|
|
end
|
|
|
|
def self.advanced_filter(trigger, &block)
|
|
(@advanced_filters ||= {})[trigger] = block
|
|
end
|
|
|
|
def self.advanced_filters
|
|
@advanced_filters
|
|
end
|
|
|
|
advanced_filter(/^in:personal-direct$/) do |posts|
|
|
if @guardian.user
|
|
posts
|
|
.joins("LEFT JOIN topic_allowed_groups tg ON posts.topic_id = tg.topic_id")
|
|
.where(<<~SQL, user_id: @guardian.user.id)
|
|
tg.id IS NULL
|
|
AND posts.topic_id IN (
|
|
SELECT tau.topic_id
|
|
FROM topic_allowed_users tau
|
|
JOIN topic_allowed_users tau2
|
|
ON tau2.topic_id = tau.topic_id
|
|
AND tau2.id != tau.id
|
|
WHERE tau.user_id = :user_id
|
|
GROUP BY tau.topic_id
|
|
HAVING COUNT(*) = 1
|
|
)
|
|
SQL
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^in:tagged$/) do |posts|
|
|
posts
|
|
.where('EXISTS (SELECT 1 FROM topic_tags WHERE topic_tags.topic_id = posts.topic_id)')
|
|
end
|
|
|
|
advanced_filter(/^in:untagged$/) do |posts|
|
|
posts
|
|
.joins("LEFT JOIN topic_tags ON
|
|
topic_tags.topic_id = posts.topic_id")
|
|
.where("topic_tags.id IS NULL")
|
|
end
|
|
|
|
advanced_filter(/^status:open$/) do |posts|
|
|
posts.where('NOT topics.closed AND NOT topics.archived')
|
|
end
|
|
|
|
advanced_filter(/^status:closed$/) do |posts|
|
|
posts.where('topics.closed')
|
|
end
|
|
|
|
advanced_filter(/^status:public$/) do |posts|
|
|
category_ids = Category
|
|
.where(read_restricted: false)
|
|
.pluck(:id)
|
|
|
|
posts.where("topics.category_id in (?)", category_ids)
|
|
end
|
|
|
|
advanced_filter(/^status:archived$/) do |posts|
|
|
posts.where('topics.archived')
|
|
end
|
|
|
|
advanced_filter(/^status:noreplies$/) do |posts|
|
|
posts.where("topics.posts_count = 1")
|
|
end
|
|
|
|
advanced_filter(/^status:single_user$/) do |posts|
|
|
posts.where("topics.participant_count = 1")
|
|
end
|
|
|
|
advanced_filter(/^posts_count:(\d+)$/) do |posts, match|
|
|
posts.where("topics.posts_count = ?", match.to_i)
|
|
end
|
|
|
|
advanced_filter(/^min_post_count:(\d+)$/) do |posts, match|
|
|
posts.where("topics.posts_count >= ?", match.to_i)
|
|
end
|
|
|
|
advanced_filter(/^in:first|^f$/) do |posts|
|
|
posts.where("posts.post_number = 1")
|
|
end
|
|
|
|
advanced_filter(/^in:pinned$/) do |posts|
|
|
posts.where("topics.pinned_at IS NOT NULL")
|
|
end
|
|
|
|
advanced_filter(/^in:wiki$/) do |posts, match|
|
|
posts.where(wiki: true)
|
|
end
|
|
|
|
advanced_filter(/^badge:(.*)$/) do |posts, match|
|
|
badge_id = Badge.where('name ilike ? OR id = ?', match, match.to_i).pluck_first(:id)
|
|
if badge_id
|
|
posts.where('posts.user_id IN (SELECT ub.user_id FROM user_badges ub WHERE ub.badge_id = ?)', badge_id)
|
|
else
|
|
posts.where("1 = 0")
|
|
end
|
|
end
|
|
|
|
def post_action_type_filter(posts, post_action_type)
|
|
posts.where("posts.id IN (
|
|
SELECT pa.post_id FROM post_actions pa
|
|
WHERE pa.user_id = #{@guardian.user.id} AND
|
|
pa.post_action_type_id = #{post_action_type} AND
|
|
deleted_at IS NULL
|
|
)")
|
|
end
|
|
|
|
advanced_filter(/^in:(likes)$/) do |posts, match|
|
|
if @guardian.user
|
|
post_action_type_filter(posts, PostActionType.types[:like])
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^in:(bookmarks)$/) do |posts, match|
|
|
if @guardian.user
|
|
posts.where("posts.id IN (SELECT post_id FROM bookmarks WHERE bookmarks.user_id = #{@guardian.user.id})")
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^in:posted$/) do |posts|
|
|
posts.where("posts.user_id = #{@guardian.user.id}") if @guardian.user
|
|
end
|
|
|
|
advanced_filter(/^in:created$/) do |posts|
|
|
posts.where(user_id: @guardian.user.id, post_number: 1) if @guardian.user
|
|
end
|
|
|
|
advanced_filter(/^created:@(.*)$/) do |posts, match|
|
|
user_id = User.where(username: match.downcase).pluck_first(:id)
|
|
posts.where(user_id: user_id, post_number: 1)
|
|
end
|
|
|
|
advanced_filter(/^in:(watching|tracking)$/) do |posts, match|
|
|
if @guardian.user
|
|
level = TopicUser.notification_levels[match.to_sym]
|
|
posts.where("posts.topic_id IN (
|
|
SELECT tu.topic_id FROM topic_users tu
|
|
WHERE tu.user_id = :user_id AND
|
|
tu.notification_level >= :level
|
|
)", user_id: @guardian.user.id, level: level)
|
|
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^in:seen$/) do |posts|
|
|
if @guardian.user
|
|
posts
|
|
.joins("INNER JOIN post_timings ON
|
|
post_timings.topic_id = posts.topic_id
|
|
AND post_timings.post_number = posts.post_number
|
|
AND post_timings.user_id = #{ActiveRecord::Base.connection.quote(@guardian.user.id)}
|
|
")
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^in:unseen$/) do |posts|
|
|
if @guardian.user
|
|
posts
|
|
.joins("LEFT JOIN post_timings ON
|
|
post_timings.topic_id = posts.topic_id
|
|
AND post_timings.post_number = posts.post_number
|
|
AND post_timings.user_id = #{ActiveRecord::Base.connection.quote(@guardian.user.id)}
|
|
")
|
|
.where("post_timings.user_id IS NULL")
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^with:images$/) do |posts|
|
|
posts.where("posts.image_upload_id IS NOT NULL")
|
|
end
|
|
|
|
advanced_filter(/^category:(.+)$/) do |posts, match|
|
|
exact = false
|
|
|
|
if match[0] == "="
|
|
exact = true
|
|
match = match[1..-1]
|
|
end
|
|
|
|
category_ids = Category.where('slug ilike ? OR name ilike ? OR id = ?',
|
|
match, match, match.to_i).pluck(:id)
|
|
if category_ids.present?
|
|
|
|
unless exact
|
|
category_ids +=
|
|
Category.where('parent_category_id = ?', category_ids.first).pluck(:id)
|
|
end
|
|
|
|
@category_filter_matched ||= true
|
|
posts.where("topics.category_id IN (?)", category_ids)
|
|
else
|
|
posts.where("1 = 0")
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^\#([\p{L}0-9\-:=]+)$/) do |posts, match|
|
|
|
|
exact = true
|
|
|
|
category_slug, subcategory_slug = match.to_s.split(":")
|
|
next unless category_slug
|
|
|
|
if subcategory_slug
|
|
|
|
category_id, _ = DB.query_single(<<~SQL, category_slug.downcase, subcategory_slug.downcase)
|
|
SELECT sub.id
|
|
FROM categories sub
|
|
JOIN categories c ON sub.parent_category_id = c.id
|
|
WHERE LOWER(c.slug) = ? AND LOWER(sub.slug) = ?
|
|
ORDER BY c.id
|
|
LIMIT 1
|
|
SQL
|
|
|
|
else
|
|
# main category
|
|
if category_slug[0] == "="
|
|
category_slug = category_slug[1..-1]
|
|
else
|
|
exact = false
|
|
end
|
|
|
|
category_id = Category.where("lower(slug) = ?", category_slug.downcase)
|
|
.order('case when parent_category_id is null then 0 else 1 end')
|
|
.pluck(:id)
|
|
.first
|
|
end
|
|
|
|
if category_id
|
|
category_ids = [category_id]
|
|
|
|
unless exact
|
|
category_ids +=
|
|
Category.where('parent_category_id = ?', category_id).pluck(:id)
|
|
end
|
|
|
|
@category_filter_matched ||= true
|
|
posts.where("topics.category_id IN (?)", category_ids)
|
|
else
|
|
# try a possible tag match
|
|
tag_id = Tag.where_name(category_slug).pluck_first(:id)
|
|
if (tag_id)
|
|
posts.where(<<~SQL, tag_id)
|
|
topics.id IN (
|
|
SELECT DISTINCT(tt.topic_id)
|
|
FROM topic_tags tt
|
|
WHERE tt.tag_id = ?
|
|
)
|
|
SQL
|
|
else
|
|
if tag_group_id = TagGroup.find_id_by_slug(category_slug)
|
|
posts.where(<<~SQL, tag_group_id)
|
|
topics.id IN (
|
|
SELECT DISTINCT(tt.topic_id)
|
|
FROM topic_tags tt
|
|
WHERE tt.tag_id in (
|
|
SELECT tag_id
|
|
FROM tag_group_memberships
|
|
WHERE tag_group_id = ?
|
|
)
|
|
)
|
|
SQL
|
|
|
|
# a bit yucky but we got to add the term back in
|
|
elsif match.to_s.length >= SiteSetting.min_search_term_length
|
|
posts.where <<~SQL
|
|
posts.id IN (
|
|
SELECT post_id FROM post_search_data pd1
|
|
WHERE pd1.search_data @@ #{Search.ts_query(term: "##{match}")})
|
|
SQL
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^group:(.+)$/) do |posts, match|
|
|
group_id = Group.where('name ilike ? OR (id = ? AND id > 0)', match, match.to_i).pluck_first(:id)
|
|
if group_id
|
|
posts.where("posts.user_id IN (select gu.user_id from group_users gu where gu.group_id = ?)", group_id)
|
|
else
|
|
posts.where("1 = 0")
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^user:(.+)$/) do |posts, match|
|
|
user_id = User.where(staged: false).where('username_lower = ? OR id = ?', match.downcase, match.to_i).pluck_first(:id)
|
|
if user_id
|
|
posts.where("posts.user_id = #{user_id}")
|
|
else
|
|
posts.where("1 = 0")
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^\@([a-zA-Z0-9_\-.]+)$/) do |posts, match|
|
|
user_id = User.where(staged: false).where(username_lower: match.downcase).pluck_first(:id)
|
|
if user_id
|
|
posts.where("posts.user_id = #{user_id}")
|
|
else
|
|
posts.where("1 = 0")
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^before:(.*)$/) do |posts, match|
|
|
if date = Search.word_to_date(match)
|
|
posts.where("posts.created_at < ?", date)
|
|
else
|
|
posts
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^after:(.*)$/) do |posts, match|
|
|
if date = Search.word_to_date(match)
|
|
posts.where("posts.created_at > ?", date)
|
|
else
|
|
posts
|
|
end
|
|
end
|
|
|
|
advanced_filter(/^tags?:([\p{L}0-9,\-_+]+)$/) do |posts, match|
|
|
search_tags(posts, match, positive: true)
|
|
end
|
|
|
|
advanced_filter(/^\-tags?:([\p{L}0-9,\-_+]+)$/) do |posts, match|
|
|
search_tags(posts, match, positive: false)
|
|
end
|
|
|
|
advanced_filter(/^filetypes?:([a-zA-Z0-9,\-_]+)$/) do |posts, match|
|
|
file_extensions = match.split(",").map(&:downcase)
|
|
posts.where("posts.id IN (
|
|
SELECT post_id
|
|
FROM topic_links
|
|
WHERE extension IN (:file_extensions)
|
|
|
|
UNION
|
|
|
|
SELECT post_uploads.post_id
|
|
FROM uploads
|
|
JOIN post_uploads ON post_uploads.upload_id = uploads.id
|
|
WHERE lower(uploads.extension) IN (:file_extensions)
|
|
)", file_extensions: file_extensions)
|
|
end
|
|
|
|
private
|
|
|
|
def search_tags(posts, match, positive:)
|
|
return if match.nil?
|
|
match.downcase!
|
|
modifier = positive ? "" : "NOT"
|
|
|
|
if match.include?('+')
|
|
tags = match.split('+')
|
|
|
|
posts.where("topics.id #{modifier} IN (
|
|
SELECT tt.topic_id
|
|
FROM topic_tags tt, tags
|
|
WHERE tt.tag_id = tags.id
|
|
GROUP BY tt.topic_id
|
|
HAVING to_tsvector(#{default_ts_config}, array_to_string(array_agg(lower(tags.name)), ' ')) @@ to_tsquery(#{default_ts_config}, ?)
|
|
)", tags.join('&'))
|
|
else
|
|
tags = match.split(",")
|
|
|
|
posts.where("topics.id #{modifier} IN (
|
|
SELECT DISTINCT(tt.topic_id)
|
|
FROM topic_tags tt, tags
|
|
WHERE tt.tag_id = tags.id AND lower(tags.name) IN (?)
|
|
)", tags)
|
|
end
|
|
end
|
|
|
|
def process_advanced_search!(term)
|
|
term.to_s.scan(/(([^" \t\n\x0B\f\r]+)?(("[^"]+")?))/).to_a.map do |(word, _)|
|
|
next if word.blank?
|
|
|
|
found = false
|
|
|
|
Search.advanced_filters.each do |matcher, block|
|
|
cleaned = word.gsub(/["']/, "")
|
|
if cleaned =~ matcher
|
|
(@filters ||= []) << [block, $1]
|
|
found = true
|
|
end
|
|
end
|
|
|
|
if word == 'order:latest' || word == 'l'
|
|
@order = :latest
|
|
nil
|
|
elsif word == 'order:latest_topic'
|
|
@order = :latest_topic
|
|
nil
|
|
elsif word == 'in:title' || word == 't'
|
|
@in_title = true
|
|
nil
|
|
elsif word =~ /topic:(\d+)/
|
|
topic_id = $1.to_i
|
|
if topic_id > 1
|
|
topic = Topic.find_by(id: topic_id)
|
|
if @guardian.can_see?(topic)
|
|
@search_context = topic
|
|
end
|
|
end
|
|
nil
|
|
elsif word == 'order:views'
|
|
@order = :views
|
|
nil
|
|
elsif word == 'order:likes'
|
|
@order = :likes
|
|
nil
|
|
elsif word == 'in:all'
|
|
@search_all_topics = true
|
|
nil
|
|
elsif %w{in:private in:personal}.include?(word) # remove private after 2.4 release
|
|
@search_pms = true
|
|
nil
|
|
elsif word == "in:personal-direct"
|
|
@search_pms = true
|
|
@direct_pms_only = true
|
|
nil
|
|
elsif word =~ /^personal_messages:(.+)$/
|
|
@search_pms = true
|
|
nil
|
|
else
|
|
found ? nil : word
|
|
end
|
|
end.compact.join(' ')
|
|
end
|
|
|
|
def find_grouped_results
|
|
if @results.type_filter.present?
|
|
raise Discourse::InvalidAccess.new("invalid type filter") unless Search.facets.include?(@results.type_filter)
|
|
# calling protected methods
|
|
send("#{@results.type_filter}_search")
|
|
else
|
|
unless @search_context
|
|
user_search if @term.present?
|
|
category_search if @term.present?
|
|
tags_search if @term.present?
|
|
groups_search if @term.present?
|
|
end
|
|
topic_search
|
|
end
|
|
|
|
@results
|
|
rescue ActiveRecord::StatementInvalid
|
|
# In the event of a PG:Error return nothing, it is likely they used a foreign language whose
|
|
# locale is not supported by postgres
|
|
end
|
|
|
|
# If we're searching for a single topic
|
|
def single_topic(id)
|
|
if @opts[:restrict_to_archetype].present?
|
|
archetype = @opts[:restrict_to_archetype] == Archetype.default ? Archetype.default : Archetype.private_message
|
|
post = Post.joins(:topic)
|
|
.where("topics.id = :id AND topics.archetype = :archetype AND posts.post_number = 1", id: id, archetype: archetype)
|
|
.first
|
|
else
|
|
post = Post.find_by(topic_id: id, post_number: 1)
|
|
end
|
|
return nil unless @guardian.can_see?(post)
|
|
|
|
@results.add(post)
|
|
@results
|
|
end
|
|
|
|
def secure_category_ids
|
|
return @secure_category_ids unless @secure_category_ids.nil?
|
|
@secure_category_ids = @guardian.secure_category_ids
|
|
end
|
|
|
|
def category_search
|
|
# scope is leaking onto Category, this is not good and probably a bug in Rails
|
|
# the secure_category_ids will invoke the same method on User, it calls Category.where
|
|
# however the scope from the query below is leaking in to Category, this works around
|
|
# the issue while we figure out what is up in Rails
|
|
secure_category_ids
|
|
|
|
categories = Category.includes(:category_search_data)
|
|
.where("category_search_data.search_data @@ #{ts_query}")
|
|
.references(:category_search_data)
|
|
.order("topics_month DESC")
|
|
.secured(@guardian)
|
|
.limit(limit)
|
|
|
|
categories.each do |category|
|
|
@results.add(category)
|
|
end
|
|
end
|
|
|
|
def user_search
|
|
return if SiteSetting.hide_user_profiles_from_public && !@guardian.user
|
|
|
|
users = User.includes(:user_search_data)
|
|
.references(:user_search_data)
|
|
.where(active: true)
|
|
.where(staged: false)
|
|
.where("user_search_data.search_data @@ #{ts_query("simple")}")
|
|
.order("CASE WHEN username_lower = '#{@original_term.downcase}' THEN 0 ELSE 1 END")
|
|
.order("last_posted_at DESC")
|
|
.limit(limit)
|
|
|
|
users.each do |user|
|
|
@results.add(user)
|
|
end
|
|
end
|
|
|
|
def groups_search
|
|
groups = Group
|
|
.visible_groups(@guardian.user, "name ASC", include_everyone: false)
|
|
.where("name ILIKE :term OR full_name ILIKE :term", term: "%#{@term}%")
|
|
|
|
groups.each { |group| @results.add(group) }
|
|
end
|
|
|
|
def tags_search
|
|
return unless SiteSetting.tagging_enabled
|
|
|
|
tags = Tag.includes(:tag_search_data)
|
|
.where("tag_search_data.search_data @@ #{ts_query}")
|
|
.references(:tag_search_data)
|
|
.order("name asc")
|
|
.limit(limit)
|
|
|
|
tags.each do |tag|
|
|
@results.add(tag)
|
|
end
|
|
end
|
|
|
|
PHRASE_MATCH_REGEXP_PATTERN = '"([^"]+)"'
|
|
|
|
def posts_query(limit, type_filter: nil, aggregate_search: false)
|
|
posts = Post.where(post_type: Topic.visible_post_types(@guardian.user))
|
|
.joins(:post_search_data, :topic)
|
|
.joins("LEFT JOIN categories ON categories.id = topics.category_id")
|
|
|
|
is_topic_search = @search_context.present? && @search_context.is_a?(Topic)
|
|
|
|
posts = posts.where("topics.visible") unless is_topic_search
|
|
|
|
if type_filter === "private_messages" || (is_topic_search && @search_context.private_message?)
|
|
posts = posts.where("topics.archetype = ?", Archetype.private_message)
|
|
|
|
unless @guardian.is_admin?
|
|
posts = posts.private_posts_for_user(@guardian.user)
|
|
end
|
|
elsif type_filter === "all_topics"
|
|
private_posts = posts.where("topics.archetype = ?", Archetype.private_message)
|
|
private_posts = private_posts.private_posts_for_user(@guardian.user)
|
|
|
|
posts = posts.where("topics.archetype <> ?", Archetype.private_message).or(private_posts)
|
|
else
|
|
posts = posts.where("topics.archetype <> ?", Archetype.private_message)
|
|
end
|
|
|
|
if @term.present?
|
|
if is_topic_search
|
|
|
|
term_without_quote = @term
|
|
if @term =~ /"(.+)"/
|
|
term_without_quote = $1
|
|
end
|
|
|
|
if @term =~ /'(.+)'/
|
|
term_without_quote = $1
|
|
end
|
|
|
|
posts = posts.joins('JOIN users u ON u.id = posts.user_id')
|
|
posts = posts.where("posts.raw || ' ' || u.username || ' ' || COALESCE(u.name, '') ilike ?", "%#{term_without_quote}%")
|
|
else
|
|
# A is for title
|
|
# B is for category
|
|
# C is for tags
|
|
# D is for cooked
|
|
weights = @in_title ? 'A' : (SiteSetting.tagging_enabled ? 'ABCD' : 'ABD')
|
|
posts = posts.where(post_number: 1) if @in_title
|
|
posts = posts.where("post_search_data.search_data @@ #{ts_query(weight_filter: weights)}")
|
|
exact_terms = @term.scan(Regexp.new(PHRASE_MATCH_REGEXP_PATTERN)).flatten
|
|
|
|
exact_terms.each do |exact|
|
|
posts = posts.where("posts.raw ilike :exact OR topics.title ilike :exact", exact: "%#{exact}%")
|
|
end
|
|
end
|
|
end
|
|
|
|
@filters.each do |block, match|
|
|
if block.arity == 1
|
|
posts = instance_exec(posts, &block) || posts
|
|
else
|
|
posts = instance_exec(posts, match, &block) || posts
|
|
end
|
|
end if @filters
|
|
|
|
# If we have a search context, prioritize those posts first
|
|
posts =
|
|
if @search_context.present?
|
|
if @search_context.is_a?(User)
|
|
if type_filter === "private_messages"
|
|
@direct_pms_only ? posts : posts.private_posts_for_user(@search_context)
|
|
else
|
|
posts.where("posts.user_id = #{@search_context.id}")
|
|
end
|
|
elsif @search_context.is_a?(Category)
|
|
category_ids = Category
|
|
.where(parent_category_id: @search_context.id)
|
|
.pluck(:id)
|
|
.push(@search_context.id)
|
|
|
|
posts.where("topics.category_id in (?)", category_ids)
|
|
elsif @search_context.is_a?(Topic)
|
|
posts.where("topics.id = #{@search_context.id}")
|
|
.order("posts.post_number #{@order == :latest ? "DESC" : ""}")
|
|
elsif @search_context.is_a?(Tag)
|
|
posts = posts
|
|
.joins("LEFT JOIN topic_tags ON topic_tags.topic_id = topics.id")
|
|
.joins("LEFT JOIN tags ON tags.id = topic_tags.tag_id")
|
|
posts.where("tags.id = #{@search_context.id}")
|
|
end
|
|
else
|
|
posts = categories_ignored(posts) unless @category_filter_matched
|
|
posts
|
|
end
|
|
|
|
if aggregate_search
|
|
aggregate_relation = Post.unscoped
|
|
.select("subquery.topic_id id")
|
|
.group("subquery.topic_id")
|
|
|
|
posts = posts.select(posts.arel.projections)
|
|
end
|
|
|
|
if @order == :latest
|
|
posts = posts.reorder("posts.created_at DESC")
|
|
|
|
if aggregate_search
|
|
aggregate_relation = aggregate_relation
|
|
.select(
|
|
"(ARRAY_AGG(subquery.post_number ORDER BY subquery.created_at DESC))[1] post_number",
|
|
"MAX(subquery.created_at) created_at"
|
|
)
|
|
.order("created_at DESC")
|
|
end
|
|
elsif @order == :latest_topic
|
|
posts = posts.order("topic_created_at DESC")
|
|
|
|
if aggregate_search
|
|
posts = posts.select("topics.created_at topic_created_at")
|
|
|
|
aggregate_relation = aggregate_relation
|
|
.select(
|
|
"(ARRAY_AGG(subquery.post_number ORDER BY subquery.topic_created_at DESC))[1] post_number",
|
|
"MAX(subquery.topic_created_at) topic_created_at"
|
|
)
|
|
.order("topic_created_at DESC")
|
|
end
|
|
elsif @order == :views
|
|
posts = posts.order("topic_views DESC")
|
|
|
|
if aggregate_search
|
|
posts = posts.select("topics.views topic_views")
|
|
|
|
aggregate_relation = aggregate_relation
|
|
.select(
|
|
"(ARRAY_AGG(subquery.post_number ORDER BY subquery.topic_views DESC))[1] post_number",
|
|
"MAX(subquery.topic_views) topic_views"
|
|
)
|
|
.order("topic_views DESC")
|
|
end
|
|
elsif @order == :likes
|
|
posts = posts.order("posts.like_count DESC")
|
|
else
|
|
rank = <<~SQL
|
|
TS_RANK_CD(
|
|
post_search_data.search_data,
|
|
#{@term.blank? ? '' : ts_query(weight_filter: weights)},
|
|
#{SiteSetting.search_ranking_normalization}|32
|
|
)
|
|
SQL
|
|
|
|
category_priority_weights = <<~SQL
|
|
(
|
|
CASE categories.search_priority
|
|
WHEN #{Searchable::PRIORITIES[:very_low]}
|
|
THEN #{SiteSetting.category_search_priority_very_low_weight}
|
|
WHEN #{Searchable::PRIORITIES[:low]}
|
|
THEN #{SiteSetting.category_search_priority_low_weight}
|
|
WHEN #{Searchable::PRIORITIES[:high]}
|
|
THEN #{SiteSetting.category_search_priority_high_weight}
|
|
WHEN #{Searchable::PRIORITIES[:very_high]}
|
|
THEN #{SiteSetting.category_search_priority_very_high_weight}
|
|
ELSE
|
|
CASE WHEN topics.closed
|
|
THEN 0.9
|
|
ELSE 1
|
|
END
|
|
END
|
|
)
|
|
SQL
|
|
|
|
data_ranking =
|
|
if @term.blank?
|
|
"(#{category_priority_weights})"
|
|
else
|
|
"(#{rank} * #{category_priority_weights})"
|
|
end
|
|
|
|
if aggregate_search
|
|
posts = posts.select("#{data_ranking} rank", "topics.bumped_at topic_bumped_at")
|
|
.order("rank DESC", "topic_bumped_at DESC")
|
|
|
|
aggregate_relation = aggregate_relation
|
|
.select(
|
|
"(ARRAY_AGG(subquery.post_number ORDER BY subquery.rank DESC, subquery.topic_bumped_at DESC))[1] post_number",
|
|
"MAX(subquery.rank) rank", "MAX(subquery.topic_bumped_at) topic_bumped_at"
|
|
)
|
|
.order("rank DESC", "topic_bumped_at DESC")
|
|
else
|
|
posts = posts.order("#{data_ranking} DESC", "topics.bumped_at DESC")
|
|
end
|
|
end
|
|
|
|
if secure_category_ids.present?
|
|
posts = posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))", secure_category_ids).references(:categories)
|
|
else
|
|
posts = posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(:categories)
|
|
end
|
|
|
|
if aggregate_search
|
|
posts = yield(posts) if block_given?
|
|
posts = aggregate_relation.from(posts)
|
|
end
|
|
|
|
posts = posts.offset(offset)
|
|
posts.limit(limit)
|
|
end
|
|
|
|
def categories_ignored(posts)
|
|
posts.where(<<~SQL, Searchable::PRIORITIES[:ignore])
|
|
categories.id NOT IN (
|
|
SELECT categories.id WHERE categories.search_priority = ?
|
|
)
|
|
SQL
|
|
end
|
|
|
|
def self.default_ts_config
|
|
"'#{Search.ts_config}'"
|
|
end
|
|
|
|
def default_ts_config
|
|
self.class.default_ts_config
|
|
end
|
|
|
|
def self.ts_query(term: , ts_config: nil, joiner: nil, weight_filter: nil)
|
|
to_tsquery(
|
|
ts_config: ts_config,
|
|
term: set_tsquery_weight_filter(term, weight_filter),
|
|
joiner: joiner
|
|
)
|
|
end
|
|
|
|
def self.to_tsquery(ts_config: nil, term:, joiner: nil)
|
|
ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config
|
|
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '#{term}')"
|
|
tsquery = "REPLACE(#{tsquery}::text, '&', '#{PG::Connection.escape_string(joiner)}')::tsquery" if joiner
|
|
tsquery
|
|
end
|
|
|
|
def self.set_tsquery_weight_filter(term, weight_filter)
|
|
term = term.gsub("'", "''")
|
|
"''#{PG::Connection.escape_string(term)}'':*#{weight_filter}"
|
|
end
|
|
|
|
def ts_query(ts_config = nil, weight_filter: nil)
|
|
@ts_query_cache ||= {}
|
|
@ts_query_cache["#{ts_config || default_ts_config} #{@term} #{weight_filter}"] ||=
|
|
Search.ts_query(term: @term, ts_config: ts_config, weight_filter: weight_filter)
|
|
end
|
|
|
|
def wrap_rows(query)
|
|
"SELECT *, row_number() over() row_number FROM (#{query.to_sql}) xxx"
|
|
end
|
|
|
|
def aggregate_post_sql(opts)
|
|
default_opts = {
|
|
type_filter: opts[:type_filter]
|
|
}
|
|
|
|
min_id = Search.min_post_id
|
|
|
|
if @order == :likes
|
|
# likes are a pain to aggregate so skip
|
|
query = posts_query(limit, **default_opts).select('topics.id', 'posts.post_number')
|
|
|
|
if min_id > 0
|
|
low_set = query.dup.where("post_search_data.post_id < #{min_id}")
|
|
high_set = query.where("post_search_data.post_id >= #{min_id}")
|
|
|
|
{ default: wrap_rows(high_set), remaining: wrap_rows(low_set) }
|
|
else
|
|
{ default: wrap_rows(query) }
|
|
end
|
|
else
|
|
query = posts_query(limit, **default_opts, aggregate_search: true) do |posts|
|
|
if min_id > 0
|
|
posts.select("post_search_data.post_id post_search_data_post_id")
|
|
else
|
|
posts
|
|
end
|
|
end
|
|
|
|
if min_id > 0
|
|
low_set = query.dup.where("subquery.post_search_data_post_id < #{min_id}")
|
|
high_set = query.where("subquery.post_search_data_post_id >= #{min_id}")
|
|
|
|
{ default: wrap_rows(high_set), remaining: wrap_rows(low_set) }
|
|
else
|
|
{ default: wrap_rows(query) }
|
|
end
|
|
end
|
|
end
|
|
|
|
def aggregate_posts(post_sql)
|
|
return [] unless post_sql
|
|
|
|
posts_eager_loads(Post)
|
|
.joins("JOIN (#{post_sql}) x ON x.id = posts.topic_id AND x.post_number = posts.post_number")
|
|
.order('row_number')
|
|
end
|
|
|
|
def aggregate_search(opts = {})
|
|
post_sql = aggregate_post_sql(opts)
|
|
|
|
added = 0
|
|
|
|
aggregate_posts(post_sql[:default]).each do |p|
|
|
@results.add(p)
|
|
added += 1
|
|
end
|
|
|
|
if added < limit
|
|
aggregate_posts(post_sql[:remaining]).each { |p| @results.add(p) }
|
|
end
|
|
end
|
|
|
|
def private_messages_search
|
|
raise Discourse::InvalidAccess.new("anonymous can not search PMs") unless @guardian.user
|
|
|
|
aggregate_search(type_filter: "private_messages")
|
|
end
|
|
|
|
def all_topics_search
|
|
aggregate_search(type_filter: "all_topics")
|
|
end
|
|
|
|
def topic_search
|
|
if @search_context.is_a?(Topic)
|
|
posts = posts_eager_loads(posts_query(limit))
|
|
.where('posts.topic_id = ?', @search_context.id)
|
|
|
|
posts.each do |post|
|
|
@results.add(post)
|
|
end
|
|
else
|
|
aggregate_search
|
|
end
|
|
end
|
|
|
|
def posts_eager_loads(query)
|
|
query = query.includes(:user, :post_search_data)
|
|
topic_eager_loads = [:category]
|
|
|
|
if SiteSetting.tagging_enabled
|
|
topic_eager_loads << :tags
|
|
end
|
|
|
|
query.includes(topic: topic_eager_loads)
|
|
end
|
|
|
|
end
|