From e80851b0fa2f94971812fcf3e3b41fc5cc15f3a8 Mon Sep 17 00:00:00 2001
From: Robin Ward <robin.ward@gmail.com>
Date: Thu, 10 Apr 2014 13:45:13 -0400
Subject: [PATCH] Special case: When crawling a link to an image, just put the
 filename as the title.

---
 app/jobs/regular/crawl_topic_link.rb | 29 +++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/app/jobs/regular/crawl_topic_link.rb b/app/jobs/regular/crawl_topic_link.rb
index 5d7186550d9..deb15bef548 100644
--- a/app/jobs/regular/crawl_topic_link.rb
+++ b/app/jobs/regular/crawl_topic_link.rb
@@ -89,16 +89,27 @@ module Jobs
 
         crawled = false
 
-        result = CrawlTopicLink.fetch_beginning(topic_link.url)
-        doc = Nokogiri::HTML(result)
-        if doc
-          title = doc.at('title').try(:inner_text)
-          if title.present?
-            title.gsub!(/\n/, ' ')
-            title.gsub!(/ +/, ' ')
-            title.strip!
+        # Special case: Images
+        # If the link is to an image, put the filename as the title
+        if topic_link.url =~ /\.(jpg|gif|png)$/
+          uri = URI(topic_link.url)
+          filename = File.basename(uri.path)
+          crawled = (TopicLink.where(id: topic_link.id).update_all(["title = ?, crawled_at = CURRENT_TIMESTAMP", filename]) == 1)
+        end
+
+        unless crawled
+          # Fetch the beginning of the document to find the title
+          result = CrawlTopicLink.fetch_beginning(topic_link.url)
+          doc = Nokogiri::HTML(result)
+          if doc
+            title = doc.at('title').try(:inner_text)
             if title.present?
-              crawled = (TopicLink.where(id: topic_link.id).update_all(['title = ?, crawled_at = CURRENT_TIMESTAMP', title[0..255]]) == 1)
+              title.gsub!(/\n/, ' ')
+              title.gsub!(/ +/, ' ')
+              title.strip!
+              if title.present?
+                crawled = (TopicLink.where(id: topic_link.id).update_all(['title = ?, crawled_at = CURRENT_TIMESTAMP', title[0..255]]) == 1)
+              end
             end
           end
         end