mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
FEATURE: Whitelists for inline oneboxing
This commit is contained in:
@@ -1,38 +1,50 @@
|
||||
require_dependency 'retrieve_title'
|
||||
|
||||
class InlineOneboxer
|
||||
|
||||
def initialize(urls)
|
||||
def initialize(urls, opts=nil)
|
||||
@urls = urls
|
||||
@opts = opts || {}
|
||||
end
|
||||
|
||||
def process
|
||||
@urls.map {|url| InlineOneboxer.lookup(url) }.compact
|
||||
@urls.map {|url| InlineOneboxer.lookup(url, @opts) }.compact
|
||||
end
|
||||
|
||||
def self.clear_cache!
|
||||
def self.purge(url)
|
||||
Rails.cache.delete(cache_key(url))
|
||||
end
|
||||
|
||||
def self.cache_lookup(url)
|
||||
Rails.cache.read(cache_key(url))
|
||||
end
|
||||
|
||||
def self.lookup(url)
|
||||
cached = cache_lookup(url)
|
||||
return cached if cached.present?
|
||||
def self.lookup(url, opts=nil)
|
||||
opts ||= {}
|
||||
|
||||
unless opts[:skip_cache]
|
||||
cached = cache_lookup(url)
|
||||
return cached if cached.present?
|
||||
end
|
||||
|
||||
if route = Discourse.route_for(url)
|
||||
if route[:controller] == "topics" &&
|
||||
route[:action] == "show" &&
|
||||
topic = (Topic.where(id: route[:topic_id].to_i).first rescue nil)
|
||||
|
||||
# Only public topics
|
||||
if Guardian.new.can_see?(topic)
|
||||
onebox = {
|
||||
url: url,
|
||||
title: Emoji.gsub_emoji_to_unicode(topic.title)
|
||||
}
|
||||
Rails.cache.write(cache_key(url), onebox, expires_in: 1.day)
|
||||
return onebox
|
||||
end
|
||||
return onebox_for(url, topic.title, opts) if Guardian.new.can_see?(topic)
|
||||
end
|
||||
end
|
||||
|
||||
if whitelist = SiteSetting.inline_onebox_domains_whitelist
|
||||
uri = URI(url) rescue nil
|
||||
|
||||
domains = whitelist.split('|')
|
||||
if uri.present? &&
|
||||
uri.hostname.present? &&
|
||||
domains.include?(uri.hostname) &&
|
||||
title = RetrieveTitle.crawl(url)
|
||||
return onebox_for(url, title, opts)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -41,6 +53,18 @@ class InlineOneboxer
|
||||
|
||||
private
|
||||
|
||||
def self.onebox_for(url, title, opts)
|
||||
onebox = {
|
||||
url: url,
|
||||
title: Emoji.gsub_emoji_to_unicode(title)
|
||||
}
|
||||
unless opts[:skip_cache]
|
||||
Rails.cache.write(cache_key(url), onebox, expires_in: 1.day)
|
||||
end
|
||||
|
||||
onebox
|
||||
end
|
||||
|
||||
def self.cache_key(url)
|
||||
"inline_onebox:#{url}"
|
||||
end
|
||||
|
||||
70
lib/retrieve_title.rb
Normal file
70
lib/retrieve_title.rb
Normal file
@@ -0,0 +1,70 @@
|
||||
require_dependency 'final_destination'
|
||||
|
||||
module RetrieveTitle
|
||||
class ReadEnough < StandardError; end
|
||||
|
||||
def self.crawl(url)
|
||||
extract_title(fetch_beginning(url))
|
||||
rescue Exception
|
||||
# If there was a connection error, do nothing
|
||||
end
|
||||
|
||||
def self.extract_title(html)
|
||||
title = nil
|
||||
if doc = Nokogiri::HTML(html)
|
||||
|
||||
if node = doc.at('meta[property="og:title"]')
|
||||
title = node['content']
|
||||
end
|
||||
|
||||
title ||= doc.at('title')&.inner_text
|
||||
end
|
||||
|
||||
if title.present?
|
||||
title.gsub!(/\n/, ' ')
|
||||
title.gsub!(/ +/, ' ')
|
||||
title.strip!
|
||||
return title
|
||||
end
|
||||
nil
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.max_chunk_size(uri)
|
||||
# Amazon leaves the title until very late. Normally it's a bad idea to make an exception for
|
||||
# one host but amazon is a big one.
|
||||
return 80 if uri.host =~ /amazon\.(com|ca|co\.uk|es|fr|de|it|com\.au|com\.br|cn|in|co\.jp|com\.mx)$/
|
||||
|
||||
# default is 10k
|
||||
10
|
||||
end
|
||||
|
||||
# Fetch the beginning of a HTML document at a url
|
||||
def self.fetch_beginning(url)
|
||||
# Never crawl in test mode
|
||||
return if Rails.env.test?
|
||||
|
||||
fd = FinalDestination.new(url)
|
||||
uri = fd.resolve
|
||||
return "" unless uri
|
||||
|
||||
result = ""
|
||||
streamer = lambda do |chunk, _, _|
|
||||
result << chunk
|
||||
|
||||
# Using exceptions for flow control is really bad, but there really seems to
|
||||
# be no sane way to get a stream to stop reading in Excon (or Net::HTTP for
|
||||
# that matter!)
|
||||
raise ReadEnough.new if result.size > (max_chunk_size(uri) * 1024)
|
||||
end
|
||||
Excon.get(uri.to_s, response_block: streamer, read_timeout: 20, headers: fd.request_headers)
|
||||
result
|
||||
|
||||
rescue Excon::Errors::SocketError => ex
|
||||
return result if ex.socket_error.is_a?(ReadEnough)
|
||||
raise
|
||||
rescue ReadEnough
|
||||
result
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user