mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
DEV: Oneboxer wildcard subdomains (#13015)
* DEV: Allow wildcards in Oneboxer optional domain Site Settings Allows a wildcard to be used as a subdomain on Oneboxer-related SiteSettings, e.g.: - `force_get_hosts` - `cache_onebox_response_body_domains` - `force_custom_user_agent_hosts` * DEV: fix typos * FIX: Try doing a GET after receiving a 500 error from a HEAD By default we try to do a `HEAD` requests. If this results in a 500 error response, we should try to do a `GET` * DEV: `force_get_hosts` should be a hidden setting * DEV: Oneboxer Strategies Have an alternative oneboxing ‘strategy’ (i.e., set of options) to use when an attempt to generate a Onebox fails. Keep track of any non-default strategies that were used on a particular host, and use that strategy for that host in the future. Initially, the alternate strategy (`force_get_and_ua`) forces the FinalDestination step of Oneboxing to do a `GET` rather than `HEAD`, and forces a custom user agent. * DEV: change stubbed return code The stubbed status code needs to be a value not recognized by FinalDestination
This commit is contained in:
parent
a62ad0fa4d
commit
19182b1386
@ -1637,6 +1637,10 @@ onebox:
|
|||||||
force_custom_user_agent_hosts:
|
force_custom_user_agent_hosts:
|
||||||
default: "http://codepen.io"
|
default: "http://codepen.io"
|
||||||
type: list
|
type: list
|
||||||
|
force_get_hosts:
|
||||||
|
default: "us.battle.net|news.yahoo.com|*.medium.com"
|
||||||
|
type: list
|
||||||
|
hidden: true
|
||||||
facebook_app_access_token:
|
facebook_app_access_token:
|
||||||
default: ""
|
default: ""
|
||||||
secret: true
|
secret: true
|
||||||
|
@ -215,7 +215,7 @@ class FinalDestination
|
|||||||
|
|
||||||
@status = :resolved
|
@status = :resolved
|
||||||
return @uri
|
return @uri
|
||||||
when 400, 405, 406, 409, 501
|
when 400, 405, 406, 409, 500, 501
|
||||||
response_status, small_headers = small_get(request_headers)
|
response_status, small_headers = small_get(request_headers)
|
||||||
|
|
||||||
if response_status == 200
|
if response_status == 200
|
||||||
@ -300,7 +300,17 @@ class FinalDestination
|
|||||||
|
|
||||||
def hostname_matches?(url)
|
def hostname_matches?(url)
|
||||||
url = uri(url)
|
url = uri(url)
|
||||||
@uri && url.present? && @uri.hostname == url&.hostname
|
|
||||||
|
if @uri&.hostname.present? && url&.hostname.present?
|
||||||
|
hostname_parts = url.hostname.split('.')
|
||||||
|
has_wildcard = hostname_parts.first == '*'
|
||||||
|
|
||||||
|
if has_wildcard
|
||||||
|
@uri.hostname.end_with?(hostname_parts[1..-1].join('.'))
|
||||||
|
else
|
||||||
|
@uri.hostname == url.hostname
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def is_dest_valid?
|
def is_dest_valid?
|
||||||
|
@ -32,7 +32,8 @@ module Oneboxer
|
|||||||
end
|
end
|
||||||
|
|
||||||
def self.force_get_hosts
|
def self.force_get_hosts
|
||||||
hosts = ['http://us.battle.net', 'https://news.yahoo.com']
|
hosts = []
|
||||||
|
hosts += SiteSetting.force_get_hosts.split('|').collect { |domain| "https://#{domain}" }
|
||||||
hosts += SiteSetting.cache_onebox_response_body_domains.split('|').collect { |domain| "https://www.#{domain}" }
|
hosts += SiteSetting.cache_onebox_response_body_domains.split('|').collect { |domain| "https://www.#{domain}" }
|
||||||
hosts += amazon_domains
|
hosts += amazon_domains
|
||||||
|
|
||||||
@ -394,8 +395,13 @@ module Oneboxer
|
|||||||
allowed += SiteSetting.allowed_iframes.split("|")
|
allowed += SiteSetting.allowed_iframes.split("|")
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.external_onebox(url)
|
def self.external_onebox(url, available_strategies = nil)
|
||||||
Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
|
Discourse.cache.fetch(onebox_cache_key(url), expires_in: 1.day) do
|
||||||
|
|
||||||
|
uri = URI(url)
|
||||||
|
available_strategies ||= Oneboxer.ordered_strategies(uri.hostname)
|
||||||
|
strategy = available_strategies.shift
|
||||||
|
|
||||||
fd_options = {
|
fd_options = {
|
||||||
ignore_redirects: ignore_redirects,
|
ignore_redirects: ignore_redirects,
|
||||||
ignore_hostnames: blocked_domains,
|
ignore_hostnames: blocked_domains,
|
||||||
@ -404,6 +410,13 @@ module Oneboxer
|
|||||||
preserve_fragment_url_hosts: preserve_fragment_url_hosts
|
preserve_fragment_url_hosts: preserve_fragment_url_hosts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if strategy && Oneboxer.strategies[strategy][:force_get_host]
|
||||||
|
fd_options[:force_get_hosts] = ["https://#{uri.hostname}"]
|
||||||
|
end
|
||||||
|
if strategy && Oneboxer.strategies[strategy][:force_custom_user_agent_host]
|
||||||
|
fd_options[:force_custom_user_agent_hosts] = ["https://#{uri.hostname}"]
|
||||||
|
end
|
||||||
|
|
||||||
user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present?
|
user_agent_override = SiteSetting.cache_onebox_user_agent if Oneboxer.cache_response_body?(url) && SiteSetting.cache_onebox_user_agent.present?
|
||||||
fd_options[:default_user_agent] = user_agent_override if user_agent_override
|
fd_options[:default_user_agent] = user_agent_override if user_agent_override
|
||||||
|
|
||||||
@ -415,6 +428,11 @@ module Oneboxer
|
|||||||
if fd.status == :invalid_address
|
if fd.status == :invalid_address
|
||||||
args[:error_message] = I18n.t("errors.onebox.invalid_address", hostname: fd.hostname)
|
args[:error_message] = I18n.t("errors.onebox.invalid_address", hostname: fd.hostname)
|
||||||
elsif fd.status_code
|
elsif fd.status_code
|
||||||
|
# Try a different oneboxing strategy, if we have any options left:
|
||||||
|
if available_strategies.present?
|
||||||
|
return external_onebox(url, available_strategies)
|
||||||
|
end
|
||||||
|
|
||||||
args[:error_message] = I18n.t("errors.onebox.error_response", status_code: fd.status_code)
|
args[:error_message] = I18n.t("errors.onebox.error_response", status_code: fd.status_code)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -466,6 +484,8 @@ module Oneboxer
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
Oneboxer.cache_preferred_strategy(uri.hostname, strategy)
|
||||||
|
|
||||||
result
|
result
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -490,4 +510,44 @@ module Oneboxer
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.ordered_strategies(hostname)
|
||||||
|
all = strategies.keys
|
||||||
|
preferred = Oneboxer.preferred_strategy(hostname)
|
||||||
|
|
||||||
|
all.insert(0, all.delete(preferred)) if all.include?(preferred)
|
||||||
|
|
||||||
|
all
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.strategies
|
||||||
|
{
|
||||||
|
default: {}, # don't override anything by default
|
||||||
|
force_get_and_ua: {
|
||||||
|
force_get_host: true,
|
||||||
|
force_custom_user_agent_host: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.cache_preferred_strategy(hostname, strategy)
|
||||||
|
return if strategy == :default
|
||||||
|
|
||||||
|
key = redis_oneboxer_strategy_key(hostname)
|
||||||
|
Discourse.redis.without_namespace.setex(key, 2.weeks.to_i, strategy.to_s)
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.clear_preferred_strategy!(hostname)
|
||||||
|
key = redis_oneboxer_strategy_key(hostname)
|
||||||
|
Discourse.redis.without_namespace.del(key)
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.preferred_strategy(hostname)
|
||||||
|
key = redis_oneboxer_strategy_key(hostname)
|
||||||
|
Discourse.redis.without_namespace.get(key)&.to_sym
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.redis_oneboxer_strategy_key(hostname)
|
||||||
|
"ONEBOXER_STRATEGY_#{hostname}"
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
@ -9,7 +9,7 @@ describe FinalDestination do
|
|||||||
{
|
{
|
||||||
ignore_redirects: ['https://ignore-me.com'],
|
ignore_redirects: ['https://ignore-me.com'],
|
||||||
|
|
||||||
force_get_hosts: ['https://force.get.com'],
|
force_get_hosts: ['https://force.get.com', 'https://*.ihaveawildcard.com/'],
|
||||||
|
|
||||||
preserve_fragment_url_hosts: ['https://eviltrout.com'],
|
preserve_fragment_url_hosts: ['https://eviltrout.com'],
|
||||||
|
|
||||||
@ -17,6 +17,7 @@ describe FinalDestination do
|
|||||||
lookup_ip: lambda do |host|
|
lookup_ip: lambda do |host|
|
||||||
case host
|
case host
|
||||||
when 'eviltrout.com' then '52.84.143.152'
|
when 'eviltrout.com' then '52.84.143.152'
|
||||||
|
when 'particularly.eviltrout.com' then '52.84.143.152'
|
||||||
when 'codinghorror.com' then '91.146.108.148'
|
when 'codinghorror.com' then '91.146.108.148'
|
||||||
when 'discourse.org' then '104.25.152.10'
|
when 'discourse.org' then '104.25.152.10'
|
||||||
when 'some_thing.example.com' then '104.25.152.10'
|
when 'some_thing.example.com' then '104.25.152.10'
|
||||||
@ -24,6 +25,7 @@ describe FinalDestination do
|
|||||||
when 'internal-ipv6.com' then '2001:abc:de:01:3:3d0:6a65:c2bf'
|
when 'internal-ipv6.com' then '2001:abc:de:01:3:3d0:6a65:c2bf'
|
||||||
when 'ignore-me.com' then '53.84.143.152'
|
when 'ignore-me.com' then '53.84.143.152'
|
||||||
when 'force.get.com' then '22.102.29.40'
|
when 'force.get.com' then '22.102.29.40'
|
||||||
|
when 'any-subdomain.ihaveawildcard.com' then '104.25.152.11'
|
||||||
when 'wikipedia.com' then '1.2.3.4'
|
when 'wikipedia.com' then '1.2.3.4'
|
||||||
else
|
else
|
||||||
as_ip = IPAddr.new(host)
|
as_ip = IPAddr.new(host)
|
||||||
@ -170,8 +172,11 @@ describe FinalDestination do
|
|||||||
before do
|
before do
|
||||||
stub_request(:head, 'https://force.get.com/posts?page=4')
|
stub_request(:head, 'https://force.get.com/posts?page=4')
|
||||||
stub_request(:get, 'https://force.get.com/posts?page=4')
|
stub_request(:get, 'https://force.get.com/posts?page=4')
|
||||||
|
stub_request(:get, 'https://any-subdomain.ihaveawildcard.com/some/other/content')
|
||||||
stub_request(:head, 'https://eviltrout.com/posts?page=2')
|
stub_request(:head, 'https://eviltrout.com/posts?page=2')
|
||||||
stub_request(:get, 'https://eviltrout.com/posts?page=2')
|
stub_request(:get, 'https://eviltrout.com/posts?page=2')
|
||||||
|
stub_request(:head, 'https://particularly.eviltrout.com/has/a/secret/plan')
|
||||||
|
stub_request(:get, 'https://particularly.eviltrout.com/has/a/secret/plan')
|
||||||
end
|
end
|
||||||
|
|
||||||
it "will do a GET when forced" do
|
it "will do a GET when forced" do
|
||||||
@ -189,6 +194,23 @@ describe FinalDestination do
|
|||||||
expect(WebMock).to_not have_requested(:get, 'https://eviltrout.com/posts?page=2')
|
expect(WebMock).to_not have_requested(:get, 'https://eviltrout.com/posts?page=2')
|
||||||
expect(WebMock).to have_requested(:head, 'https://eviltrout.com/posts?page=2')
|
expect(WebMock).to have_requested(:head, 'https://eviltrout.com/posts?page=2')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "will do a GET when forced on a wildcard subdomain" do
|
||||||
|
final = FinalDestination.new('https://any-subdomain.ihaveawildcard.com/some/other/content', opts)
|
||||||
|
expect(final.resolve.to_s).to eq('https://any-subdomain.ihaveawildcard.com/some/other/content')
|
||||||
|
expect(final.status).to eq(:resolved)
|
||||||
|
expect(WebMock).to have_requested(:get, 'https://any-subdomain.ihaveawildcard.com/some/other/content')
|
||||||
|
expect(WebMock).to_not have_requested(:head, 'https://any-subdomain.ihaveawildcard.com/some/other/content')
|
||||||
|
end
|
||||||
|
|
||||||
|
it "will do a HEAD if on a subdomain of a forced get domain without a wildcard" do
|
||||||
|
final = FinalDestination.new('https://particularly.eviltrout.com/has/a/secret/plan', opts)
|
||||||
|
expect(final.resolve.to_s).to eq('https://particularly.eviltrout.com/has/a/secret/plan')
|
||||||
|
expect(final.status).to eq(:resolved)
|
||||||
|
expect(WebMock).to_not have_requested(:get, 'https://particularly.eviltrout.com/has/a/secret/plan')
|
||||||
|
expect(WebMock).to have_requested(:head, 'https://particularly.eviltrout.com/has/a/secret/plan')
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
context "HEAD not supported" do
|
context "HEAD not supported" do
|
||||||
|
@ -355,6 +355,60 @@ describe Oneboxer do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context 'strategies' do
|
||||||
|
it "has a 'default' strategy" do
|
||||||
|
expect(Oneboxer.strategies.keys.first).to eq(:default)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "has a strategy with overrides" do
|
||||||
|
strategy = Oneboxer.strategies.keys[1]
|
||||||
|
expect(Oneboxer.strategies[strategy].keys).not_to eq([])
|
||||||
|
end
|
||||||
|
|
||||||
|
context "using a non-default strategy" do
|
||||||
|
let(:hostname) { "my.interesting.site" }
|
||||||
|
let(:url) { "https://#{hostname}/cool/content" }
|
||||||
|
let(:html) do
|
||||||
|
<<~HTML
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta property="og:title" content="Page Title">
|
||||||
|
<meta property="og:description" content="Here is some cool content">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>body</p>
|
||||||
|
</body>
|
||||||
|
<html>
|
||||||
|
HTML
|
||||||
|
end
|
||||||
|
|
||||||
|
before do
|
||||||
|
stub_request(:head, url).to_return(status: 509)
|
||||||
|
stub_request(:get, url).to_return(status: 200, body: html)
|
||||||
|
end
|
||||||
|
|
||||||
|
after do
|
||||||
|
Oneboxer.clear_preferred_strategy!(hostname)
|
||||||
|
end
|
||||||
|
|
||||||
|
it "uses mutiple strategies" do
|
||||||
|
default_ordered = Oneboxer.strategies.keys
|
||||||
|
custom_ordered = Oneboxer.ordered_strategies(hostname)
|
||||||
|
expect(custom_ordered).to eq(default_ordered)
|
||||||
|
|
||||||
|
expect(Oneboxer.preferred_strategy(hostname)).to eq(nil)
|
||||||
|
expect(Oneboxer.preview(url, invalidate_oneboxes: true)).to include("Here is some cool content")
|
||||||
|
|
||||||
|
custom_ordered = Oneboxer.ordered_strategies(hostname)
|
||||||
|
|
||||||
|
expect(custom_ordered.count).to eq(default_ordered.count)
|
||||||
|
expect(custom_ordered).not_to eq(default_ordered)
|
||||||
|
|
||||||
|
expect(Oneboxer.preferred_strategy(hostname)).not_to eq(:default)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
describe 'cache_onebox_response_body' do
|
describe 'cache_onebox_response_body' do
|
||||||
let(:html) do
|
let(:html) do
|
||||||
<<~HTML
|
<<~HTML
|
||||||
|
Loading…
Reference in New Issue
Block a user