mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
FIX: Allow to follow non-ASCII canonical links for oneboxes
This commit is contained in:
parent
324857c4c4
commit
5055a071b8
@ -28,7 +28,7 @@ module Onebox
|
||||
)
|
||||
|
||||
doc = Nokogiri.HTML(response)
|
||||
uri = Addressable::URI.parse(url)
|
||||
uri = Addressable::URI.parse(url).normalize!
|
||||
|
||||
ignore_canonical_tag = doc.at('meta[property="og:ignore_canonical"]')
|
||||
should_ignore_canonical =
|
||||
@ -38,13 +38,13 @@ module Onebox
|
||||
!should_ignore_canonical
|
||||
# prefer canonical link
|
||||
canonical_link = doc.at('//link[@rel="canonical"]/@href')
|
||||
canonical_uri = Addressable::URI.parse(canonical_link)
|
||||
canonical_uri = Addressable::URI.parse(canonical_link)&.normalize!
|
||||
if canonical_link && canonical_uri &&
|
||||
"#{canonical_uri.host}#{canonical_uri.path}" != "#{uri.host}#{uri.path}"
|
||||
uri =
|
||||
FinalDestination.new(
|
||||
canonical_link,
|
||||
Oneboxer.get_final_destination_options(canonical_link),
|
||||
canonical_uri,
|
||||
Oneboxer.get_final_destination_options(canonical_uri),
|
||||
).resolve
|
||||
if uri.present?
|
||||
response =
|
||||
|
@ -76,8 +76,9 @@ RSpec.describe Onebox::Helpers do
|
||||
end
|
||||
|
||||
context "with canonical link" do
|
||||
let(:uri) { "https://www.example.com" }
|
||||
|
||||
it "follows canonical link" do
|
||||
uri = "https://www.example.com"
|
||||
stub_request(:get, uri).to_return(
|
||||
status: 200,
|
||||
body: "<!DOCTYPE html><link rel='canonical' href='http://foobar.com/'/><p>invalid</p>",
|
||||
@ -92,7 +93,6 @@ RSpec.describe Onebox::Helpers do
|
||||
end
|
||||
|
||||
it "does not follow canonical link pointing at localhost" do
|
||||
uri = "https://www.example.com"
|
||||
FinalDestination::SSRFDetector
|
||||
.stubs(:lookup_ips)
|
||||
.with { |h| h == "localhost" }
|
||||
@ -104,6 +104,25 @@ RSpec.describe Onebox::Helpers do
|
||||
|
||||
expect(described_class.fetch_html_doc(uri).to_s).to match("success")
|
||||
end
|
||||
|
||||
context "when canonical link contains non-ASCII characters" do
|
||||
before do
|
||||
stub_request(:get, uri).to_return(
|
||||
status: 200,
|
||||
body:
|
||||
"<!DOCTYPE html><link rel='canonical' href='http://foobar.com/héhé'/><p>invalid</p>",
|
||||
)
|
||||
stub_request(:get, "http://foobar.com/héhé").to_return(
|
||||
status: 200,
|
||||
body: "<!DOCTYPE html><p>success</p>",
|
||||
)
|
||||
stub_request(:head, "http://foobar.com/héhé").to_return(status: 200, body: "")
|
||||
end
|
||||
|
||||
it "does not break" do
|
||||
expect(described_class.fetch_html_doc(uri).to_s).to match("success")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -286,10 +286,65 @@ RSpec.describe Oneboxer do
|
||||
"<a href='http://test.localhost/g/somegroup#'onerror=''>http://test.localhost/g/somegroup#'onerror='</a>",
|
||||
)
|
||||
end
|
||||
|
||||
context "when URL contains non-ASCII characters" do
|
||||
let(:html) { <<~HTML }
|
||||
<html>
|
||||
<head>
|
||||
<meta property="og:title" content="Cats">
|
||||
<meta property="og:description" content="Meow">
|
||||
</head>
|
||||
<body>
|
||||
<p>body</p>
|
||||
</body>
|
||||
<html>
|
||||
HTML
|
||||
|
||||
context "when response is not a redirect" do
|
||||
let(:url) { "https://its.me/héhé" }
|
||||
|
||||
before do
|
||||
stub_request(:get, url).to_return(status: 200, body: html, headers: {})
|
||||
stub_request(:head, url).to_return(status: 200, body: "", headers: {})
|
||||
end
|
||||
|
||||
it "does not break" do
|
||||
expect(described_class.onebox_raw(url)[:onebox]).to be_present
|
||||
end
|
||||
end
|
||||
|
||||
context "when response is a redirect" do
|
||||
let(:url) { "https://its.me" }
|
||||
let(:non_ascii_url) { "#{url}/h%C3%A9h%C3%A9" }
|
||||
|
||||
before do
|
||||
stub_request(:get, url).to_return(
|
||||
status: 301,
|
||||
body: "",
|
||||
headers: {
|
||||
"location" => non_ascii_url,
|
||||
},
|
||||
)
|
||||
stub_request(:head, url).to_return(
|
||||
status: 301,
|
||||
body: "",
|
||||
headers: {
|
||||
"location" => non_ascii_url,
|
||||
},
|
||||
)
|
||||
stub_request(:get, non_ascii_url).to_return(status: 200, body: html, headers: {})
|
||||
stub_request(:head, non_ascii_url).to_return(status: 200, body: "", headers: {})
|
||||
end
|
||||
|
||||
it "does not break" do
|
||||
expect(described_class.onebox_raw(url)[:onebox]).to be_present
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe ".external_onebox" do
|
||||
html = <<~HTML
|
||||
let(:html) { <<~HTML }
|
||||
<html>
|
||||
<head>
|
||||
<meta property="og:title" content="Cats">
|
||||
|
Loading…
Reference in New Issue
Block a user