mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
Special case: When crawling a link to an image, just put the filename as
the title.
This commit is contained in:
parent
99e2bab62d
commit
e80851b0fa
@ -89,16 +89,27 @@ module Jobs
|
|||||||
|
|
||||||
crawled = false
|
crawled = false
|
||||||
|
|
||||||
result = CrawlTopicLink.fetch_beginning(topic_link.url)
|
# Special case: Images
|
||||||
doc = Nokogiri::HTML(result)
|
# If the link is to an image, put the filename as the title
|
||||||
if doc
|
if topic_link.url =~ /\.(jpg|gif|png)$/
|
||||||
title = doc.at('title').try(:inner_text)
|
uri = URI(topic_link.url)
|
||||||
if title.present?
|
filename = File.basename(uri.path)
|
||||||
title.gsub!(/\n/, ' ')
|
crawled = (TopicLink.where(id: topic_link.id).update_all(["title = ?, crawled_at = CURRENT_TIMESTAMP", filename]) == 1)
|
||||||
title.gsub!(/ +/, ' ')
|
end
|
||||||
title.strip!
|
|
||||||
|
unless crawled
|
||||||
|
# Fetch the beginning of the document to find the title
|
||||||
|
result = CrawlTopicLink.fetch_beginning(topic_link.url)
|
||||||
|
doc = Nokogiri::HTML(result)
|
||||||
|
if doc
|
||||||
|
title = doc.at('title').try(:inner_text)
|
||||||
if title.present?
|
if title.present?
|
||||||
crawled = (TopicLink.where(id: topic_link.id).update_all(['title = ?, crawled_at = CURRENT_TIMESTAMP', title[0..255]]) == 1)
|
title.gsub!(/\n/, ' ')
|
||||||
|
title.gsub!(/ +/, ' ')
|
||||||
|
title.strip!
|
||||||
|
if title.present?
|
||||||
|
crawled = (TopicLink.where(id: topic_link.id).update_all(['title = ?, crawled_at = CURRENT_TIMESTAMP', title[0..255]]) == 1)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user