From 16b70047671448f77cda76f36055a76be7703595 Mon Sep 17 00:00:00 2001
From: Sam <sam.saffron@gmail.com>
Date: Fri, 8 Aug 2014 12:12:53 +1000
Subject: [PATCH] PERF: optimise and improve topic similarity search FIX: shows
 up similarity search with blank results

---
 .../controllers/composer-messages.js.es6      |  9 +++++
 .../discourse/controllers/composer.js.es6     | 16 +++++++--
 app/models/topic.rb                           | 35 ++++++++++++++-----
 lib/search.rb                                 |  6 ++--
 spec/models/topic_spec.rb                     |  6 +++-
 5 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/app/assets/javascripts/discourse/controllers/composer-messages.js.es6 b/app/assets/javascripts/discourse/controllers/composer-messages.js.es6
index d23ba606645..0a49c13313f 100644
--- a/app/assets/javascripts/discourse/controllers/composer-messages.js.es6
+++ b/app/assets/javascripts/discourse/controllers/composer-messages.js.es6
@@ -29,6 +29,15 @@ export default Ember.ArrayController.extend({
     **/
     closeMessage: function(message) {
       this.removeObject(message);
+    },
+
+    hideMessage: function(message) {
+      var messagesByTemplate = this.get('messagesByTemplate'),
+        templateName = message.get('templateName');
+
+      // kind of hacky but the visibility depends on this
+      messagesByTemplate[templateName] = undefined;
+      this.removeObject(message);
     }
   },
 
diff --git a/app/assets/javascripts/discourse/controllers/composer.js.es6 b/app/assets/javascripts/discourse/controllers/composer.js.es6
index 0eb36d3ab1a..d0f4c6e5cb5 100644
--- a/app/assets/javascripts/discourse/controllers/composer.js.es6
+++ b/app/assets/javascripts/discourse/controllers/composer.js.es6
@@ -215,7 +215,9 @@ export default Discourse.Controller.extend({
     if (!this.get('model.creatingTopic')) return;
 
     var body = this.get('model.reply'),
-        title = this.get('model.title');
+        title = this.get('model.title'),
+        self = this,
+        message;
 
     // Ensure the fields are of the minimum length
     if (body.length < Discourse.SiteSettings.min_body_similar_length ||
@@ -229,11 +231,19 @@ export default Discourse.Controller.extend({
       similarTopics.pushObjects(newTopics);
 
       if (similarTopics.get('length') > 0) {
-        messageController.popup(Discourse.ComposerMessage.create({
+        message = Discourse.ComposerMessage.create({
           templateName: 'composer/similar_topics',
           similarTopics: similarTopics,
           extraClass: 'similar-topics'
-        }));
+        });
+
+        self.set('similarTopicsMessage', message);
+        messageController.popup(message);
+      } else {
+        message = self.get('similarTopicsMessage');
+        if (message) {
+          messageController.send('hideMessage', message);
+        }
       }
     });
 
diff --git a/app/models/topic.rb b/app/models/topic.rb
index 260cf3f9910..5c5edfbf175 100644
--- a/app/models/topic.rb
+++ b/app/models/topic.rb
@@ -363,21 +363,38 @@ class Topic < ActiveRecord::Base
     return [] unless title.present?
     return [] unless raw.present?
 
-    similar = Topic.select(sanitize_sql_array(["topics.*, similarity(topics.title, :title) + similarity(p.raw, :raw) AS similarity", title: title, raw: raw]))
-                     .visible
-                     .where(closed: false, archived: false)
-                     .secured(Guardian.new(user))
-                     .listable_topics
-                     .joins("LEFT OUTER JOIN posts AS p ON p.topic_id = topics.id AND p.post_number = 1")
-                     .limit(SiteSetting.max_similar_results)
-                     .order('similarity desc')
+    filter_words = Search.prepare_data(title + " " + raw[0...200]);
+    ts_query = Search.ts_query(filter_words, nil, "|")
 
     # Exclude category definitions from similar topic suggestions
+
+    candidates = Topic.visible
+       .secured(Guardian.new(user))
+       .listable_topics
+       .joins('JOIN posts p ON p.topic_id = topics.id AND p.post_number = 1')
+       .joins('JOIN post_search_data s ON p.id = s.post_id')
+       .where("search_data @@ #{ts_query}")
+       .order("ts_rank(search_data, #{ts_query}) DESC")
+       .limit(SiteSetting.max_similar_results * 3)
+
     exclude_topic_ids = Category.pluck(:topic_id).compact!
     if exclude_topic_ids.present?
-      similar = similar.where("topics.id NOT IN (?)", exclude_topic_ids)
+      candidates = candidates.where("topics.id NOT IN (?)", exclude_topic_ids)
     end
 
+    candidate_ids = candidates.pluck(:id)
+
+
+    return [] unless candidate_ids.present?
+
+
+    similar = Topic.select(sanitize_sql_array(["topics.*, similarity(topics.title, :title) + similarity(topics.title, :raw) AS similarity", title: title, raw: raw]))
+                     .joins("JOIN posts AS p ON p.topic_id = topics.id AND p.post_number = 1")
+                     .limit(SiteSetting.max_similar_results)
+                     .where("topics.id IN (?)", candidate_ids)
+                     .where("similarity(topics.title, :title) + similarity(topics.title, :raw) > 0.2", raw: raw, title: title)
+                     .order('similarity desc')
+
     similar
   end
 
diff --git a/lib/search.rb b/lib/search.rb
index 3c5d1462fcb..7a1423d73c7 100644
--- a/lib/search.rb
+++ b/lib/search.rb
@@ -240,10 +240,10 @@ class Search
       self.class.query_locale
     end
 
-    def self.ts_query(term, locale = nil)
+    def self.ts_query(term, locale = nil, joiner = "&")
       locale = Post.sanitize(locale) if locale
-      all_terms = term.gsub(/[:()&!'"]/,'').split
-      query = Post.sanitize(all_terms.map {|t| "#{PG::Connection.escape_string(t)}:*"}.join(" & "))
+      all_terms = term.gsub(/[*:()&!'"]/,'').squish.split
+      query = Post.sanitize(all_terms.map {|t| "#{PG::Connection.escape_string(t)}:*"}.join(" #{joiner} "))
       "TO_TSQUERY(#{locale || query_locale}, #{query})"
     end
 
diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb
index 2d67a3c6b88..2222d25701b 100644
--- a/spec/models/topic_spec.rb
+++ b/spec/models/topic_spec.rb
@@ -211,7 +211,11 @@ describe Topic do
     end
 
     context 'with a similar topic' do
-      let!(:topic) { Fabricate(:topic, title: "Evil trout is the dude who posted this topic") }
+      let!(:topic) {
+        ActiveRecord::Base.observers.enable :search_observer
+        post = create_post(title: "Evil trout is the dude who posted this topic")
+        post.topic
+      }
 
       it 'returns the similar topic if the title is similar' do
         Topic.similar_to("has evil trout made any topics?", "i am wondering has evil trout made any topics?").should == [topic]