diff --git a/app/assets/javascripts/discourse/helpers/application_helpers.js b/app/assets/javascripts/discourse/helpers/application_helpers.js
index ef7ab697eee..2d26922b2b5 100644
--- a/app/assets/javascripts/discourse/helpers/application_helpers.js
+++ b/app/assets/javascripts/discourse/helpers/application_helpers.js
@@ -385,3 +385,25 @@ Handlebars.registerHelper('customHTML', function(name, contextString, options) {
Ember.Handlebars.registerBoundHelper('humanSize', function(size) {
return new Handlebars.SafeString(I18n.toHumanSize(size));
});
+
+/**
+ Renders the domain for a link if it's not internal and has a title.
+
+ @method link-domain
+ @for Handlebars
+**/
+Handlebars.registerHelper('link-domain', function(property, options) {
+ var link = Em.get(this, property, options);
+ if (link) {
+ var internal = Em.get(link, 'internal'),
+ hasTitle = (!Em.isEmpty(Em.get(link, 'title')));
+ if (hasTitle && !internal) {
+ var domain = Em.get(link, 'domain');
+ if (!Em.isEmpty(domain)) {
+ var s = domain.split('.');
+ domain = s[s.length-2] + "." + s[s.length-1];
+ return new Handlebars.SafeString("(" + domain + ")");
+ }
+ }
+ }
+});
diff --git a/app/assets/javascripts/discourse/templates/components/topic-map.js.handlebars b/app/assets/javascripts/discourse/templates/components/topic-map.js.handlebars
index 2f49ac5945b..8fa62796c4a 100644
--- a/app/assets/javascripts/discourse/templates/components/topic-map.js.handlebars
+++ b/app/assets/javascripts/discourse/templates/components/topic-map.js.handlebars
@@ -65,6 +65,7 @@
{{#if title}}{{title}}{{else}}{{shortenUrl url}}{{/if}}
{{#unless internal}}{{/unless}}
+ {{link-domain this}}
{{/groupedEach}}
diff --git a/app/assets/stylesheets/desktop/topic-post.scss b/app/assets/stylesheets/desktop/topic-post.scss
index aea3e3be873..44741759c62 100644
--- a/app/assets/stylesheets/desktop/topic-post.scss
+++ b/app/assets/stylesheets/desktop/topic-post.scss
@@ -286,6 +286,11 @@ a.star {
list-style: none;
}
+ span.domain {
+ font-size: 10px;
+ color: $secondary_text_color;
+ }
+
.avatars {
> div {
float: left;
diff --git a/app/jobs/regular/crawl_topic_link.rb b/app/jobs/regular/crawl_topic_link.rb
new file mode 100644
index 00000000000..7ce42781820
--- /dev/null
+++ b/app/jobs/regular/crawl_topic_link.rb
@@ -0,0 +1,94 @@
+require 'open-uri'
+require 'nokogiri'
+require 'excon'
+
+module Jobs
+ class CrawlTopicLink < Jobs::Base
+
+ class ReadEnough < Exception; end
+
+ # Retrieve a header regardless of case sensitivity
+ def self.header_for(head, name)
+ header = head.headers.detect do |k, v|
+ name == k.downcase
+ end
+ header[1] if header
+ end
+
+ def self.request_headers(uri)
+ { "User-Agent" => "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36",
+ "Accept" => "text/html",
+ "Host" => uri.host }
+ end
+
+ # Follow any redirects that might exist
+ def self.final_uri(url, limit=5)
+ return if limit < 0
+
+ uri = URI(url)
+ return if uri.blank? || uri.host.blank?
+ headers = CrawlTopicLink.request_headers(uri)
+ head = Excon.head(url, read_timeout: 20, headers: headers)
+ if head.status == 200
+ uri = nil unless header_for(head, 'content-type') =~ /text\/html/
+ return uri
+ end
+
+ location = header_for(head, 'location')
+ if location
+ location = "#{uri.scheme}://#{uri.host}#{location}" if location[0] == "/"
+ return final_uri(location, limit - 1)
+ end
+
+ nil
+ end
+
+ # Fetch the beginning of a HTML document at a url
+ def self.fetch_beginning(url)
+ uri = final_uri(url)
+ return "" unless uri
+
+ result = ""
+ streamer = lambda do |chunk, remaining_bytes, total_bytes|
+ result << chunk
+
+ # Using exceptions for flow control is really bad, but there really seems to
+ # be no sane way to get a stream to stop reading in Excon (or Net::HTTP for
+ # that matter!)
+ raise ReadEnough.new if result.size > 1024
+ end
+ Excon.get(uri.to_s, response_block: streamer, read_timeout: 20, headers: CrawlTopicLink.request_headers(uri))
+ result
+
+ rescue ReadEnough
+ result
+ end
+
+ def execute(args)
+ raise Discourse::InvalidParameters.new(:topic_link_id) unless args[:topic_link_id].present?
+ topic_link = TopicLink.where(id: args[:topic_link_id], internal: false, crawled_at: nil).first
+ return if topic_link.blank?
+
+ crawled = false
+
+ result = CrawlTopicLink.fetch_beginning(topic_link.url)
+ doc = Nokogiri::HTML(result)
+ if doc
+ title = doc.at('title').try(:inner_text)
+ if title.present?
+ title.gsub!(/\n/, ' ')
+ title.gsub!(/ +/, ' ')
+ title.strip!
+ if title.present?
+ crawled = topic_link.update_attributes(title: title[0..255], crawled_at: Time.now)
+ end
+ end
+ end
+ rescue Exception
+ # If there was a connection error, do nothing
+ ensure
+ topic_link.update_column(:crawled_at, Time.now) if !crawled && topic_link.present?
+ end
+
+ end
+end
diff --git a/app/models/topic_link.rb b/app/models/topic_link.rb
index cc171dbde10..b1fd9f04048 100644
--- a/app/models/topic_link.rb
+++ b/app/models/topic_link.rb
@@ -18,6 +18,8 @@ class TopicLink < ActiveRecord::Base
validate :link_to_self
+ after_commit :crawl_link_title
+
# Make sure a topic can't link to itself
def link_to_self
errors.add(:base, "can't link to the same topic") if (topic_id == link_topic_id)
@@ -27,17 +29,18 @@ class TopicLink < ActiveRecord::Base
# Sam: complicated reports are really hard in AR
builder = SqlBuilder.new("SELECT ftl.url,
- ft.title,
+ COALESCE(ft.title, ftl.title) AS title,
ftl.link_topic_id,
ftl.reflection,
ftl.internal,
+ ftl.domain,
MIN(ftl.user_id) AS user_id,
SUM(clicks) AS clicks
FROM topic_links AS ftl
LEFT JOIN topics AS ft ON ftl.link_topic_id = ft.id
LEFT JOIN categories AS c ON c.id = ft.category_id
/*where*/
- GROUP BY ftl.url, ft.title, ftl.link_topic_id, ftl.reflection, ftl.internal
+ GROUP BY ftl.url, ft.title, ftl.title, ftl.link_topic_id, ftl.reflection, ftl.internal, ftl.domain
ORDER BY clicks DESC")
builder.where('ftl.topic_id = :topic_id', topic_id: topic_id)
@@ -58,9 +61,10 @@ class TopicLink < ActiveRecord::Base
l.post_id,
l.url,
l.clicks,
- t.title,
+ COALESCE(t.title, l.title) AS title,
l.internal,
- l.reflection
+ l.reflection,
+ l.domain
FROM topic_links l
LEFT JOIN topics t ON t.id = l.link_topic_id
LEFT JOIN categories AS c ON c.id = t.category_id
@@ -87,6 +91,7 @@ class TopicLink < ActiveRecord::Base
def self.extract_from(post)
return unless post.present?
+ added_urls = []
TopicLink.transaction do
added_urls = []
@@ -184,6 +189,11 @@ class TopicLink < ActiveRecord::Base
end
end
end
+
+ # Crawl a link's title after it's saved
+ def crawl_link_title
+ Jobs.enqueue(:crawl_topic_link, topic_link_id: id)
+ end
end
# == Schema Information
diff --git a/app/serializers/topic_link_serializer.rb b/app/serializers/topic_link_serializer.rb
index f73972a4138..e14c9e8d554 100644
--- a/app/serializers/topic_link_serializer.rb
+++ b/app/serializers/topic_link_serializer.rb
@@ -6,7 +6,8 @@ class TopicLinkSerializer < ApplicationSerializer
:internal,
:reflection,
:clicks,
- :user_id
+ :user_id,
+ :domain
def url
object['url']
@@ -40,4 +41,8 @@ class TopicLinkSerializer < ApplicationSerializer
object['user_id'].present?
end
+ def domain
+ object['domain']
+ end
+
end
diff --git a/db/migrate/20140404143501_add_title_to_topic_links.rb b/db/migrate/20140404143501_add_title_to_topic_links.rb
new file mode 100644
index 00000000000..90d142782d0
--- /dev/null
+++ b/db/migrate/20140404143501_add_title_to_topic_links.rb
@@ -0,0 +1,6 @@
+class AddTitleToTopicLinks < ActiveRecord::Migration
+ def change
+ add_column :topic_links, :title, :string
+ add_column :topic_links, :crawled_at, :datetime
+ end
+end
diff --git a/spec/jobs/crawl_topic_link_spec.rb b/spec/jobs/crawl_topic_link_spec.rb
new file mode 100644
index 00000000000..9224c499e4d
--- /dev/null
+++ b/spec/jobs/crawl_topic_link_spec.rb
@@ -0,0 +1,12 @@
+require 'spec_helper'
+require_dependency 'jobs/base'
+require_dependency 'jobs/regular/crawl_topic_link'
+
+describe Jobs::CrawlTopicLink do
+
+ let(:job) { Jobs::CrawlTopicLink.new }
+
+ it "needs a topic_link_id" do
+ -> { job.execute({}) }.should raise_error(Discourse::InvalidParameters)
+ end
+end