Special case: When crawling a link to an image, just put the filename as

the title.
2025-02-25 18:55:32 -06:00 · 2014-04-10 13:45:13 -04:00 · 2014-04-10 13:45:13 -04:00 · e80851b0fa
commit e80851b0fa
parent 99e2bab62d
1 changed files with 20 additions and 9 deletions
--- a/app/jobs/regular/crawl_topic_link.rb
+++ b/app/jobs/regular/crawl_topic_link.rb
@ -89,16 +89,27 @@ module Jobs
        crawled = false
-        result = CrawlTopicLink.fetch_beginning(topic_link.url)
+        # Special case: Images
-        doc = Nokogiri::HTML(result)
+        # If the link is to an image, put the filename as the title
-        if doc
+        if topic_link.url =~ /\.(jpg|gif|png)$/
-          title = doc.at('title').try(:inner_text)
+          uri = URI(topic_link.url)
-          if title.present?
+          filename = File.basename(uri.path)
-            title.gsub!(/\n/, ' ')
+          crawled = (TopicLink.where(id: topic_link.id).update_all(["title = ?, crawled_at = CURRENT_TIMESTAMP", filename]) == 1)
-            title.gsub!(/ +/, ' ')
+        end
-            title.strip!
+
        unless crawled
          # Fetch the beginning of the document to find the title
          result = CrawlTopicLink.fetch_beginning(topic_link.url)
          doc = Nokogiri::HTML(result)
          if doc
            title = doc.at('title').try(:inner_text)
            if title.present?
-              crawled = (TopicLink.where(id: topic_link.id).update_all(['title = ?, crawled_at = CURRENT_TIMESTAMP', title[0..255]]) == 1)
+              title.gsub!(/\n/, ' ')
              title.gsub!(/ +/, ' ')
              title.strip!
              if title.present?
                crawled = (TopicLink.where(id: topic_link.id).update_all(['title = ?, crawled_at = CURRENT_TIMESTAMP', title[0..255]]) == 1)
              end
            end
          end
        end