From a57f80204821381d801b9d228d6cda08247487a4 Mon Sep 17 00:00:00 2001
From: Robin Ward <robin.ward@gmail.com>
Date: Thu, 17 Apr 2014 14:00:22 -0400
Subject: [PATCH] If there's a `TopicEmbed` record for a url, we don't have to
 crawl it. This should help sites like Boing Boing where sometimes links are
 crawled before saved in WordPress.

---
 app/jobs/regular/crawl_topic_link.rb | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/app/jobs/regular/crawl_topic_link.rb b/app/jobs/regular/crawl_topic_link.rb
index deb15bef548..c8273473dba 100644
--- a/app/jobs/regular/crawl_topic_link.rb
+++ b/app/jobs/regular/crawl_topic_link.rb
@@ -83,10 +83,17 @@ module Jobs
     def execute(args)
       raise Discourse::InvalidParameters.new(:topic_link_id) unless args[:topic_link_id].present?
 
-      begin
-        topic_link = TopicLink.where(id: args[:topic_link_id], internal: false, crawled_at: nil).first
-        return if topic_link.blank?
+      topic_link = TopicLink.where(id: args[:topic_link_id], internal: false, crawled_at: nil).first
+      return if topic_link.blank?
 
+      # Look for a topic embed for the URL. If it exists, use its title and don't crawl
+      topic_embed = TopicEmbed.where(embed_url: topic_link.url).includes(:topic).references(:topic).first
+      if topic_embed.present?
+        TopicLink.where(id: topic_link.id).update_all(['title = ?, crawled_at = CURRENT_TIMESTAMP', topic_embed.topic.title[0..255]])
+        return
+      end
+
+      begin
         crawled = false
 
         # Special case: Images