mirror of
https://github.com/discourse/discourse.git
synced 2024-11-22 08:57:10 -06:00
FIX: Hack our title retriever so that it parses YouTube URLs
This commit is contained in:
parent
cd6dff58dd
commit
07e84a3afa
@ -15,6 +15,12 @@ module RetrieveTitle
|
|||||||
|
|
||||||
title = doc.at('title')&.inner_text
|
title = doc.at('title')&.inner_text
|
||||||
|
|
||||||
|
# A horrible hack - YouTube uses `document.title` to populate the title
|
||||||
|
# for some reason. For any other site than YouTube this wouldn't be worth it.
|
||||||
|
if title == "YouTube" && html =~ /document\.title *= *"(.*)";/
|
||||||
|
title = Regexp.last_match[1].sub(/ - YouTube$/, '')
|
||||||
|
end
|
||||||
|
|
||||||
if !title && node = doc.at('meta[property="og:title"]')
|
if !title && node = doc.at('meta[property="og:title"]')
|
||||||
title = node['content']
|
title = node['content']
|
||||||
end
|
end
|
||||||
@ -32,9 +38,11 @@ module RetrieveTitle
|
|||||||
private
|
private
|
||||||
|
|
||||||
def self.max_chunk_size(uri)
|
def self.max_chunk_size(uri)
|
||||||
# Amazon leaves the title until very late. Normally it's a bad idea to make an exception for
|
|
||||||
# one host but amazon is a big one.
|
# Amazon and YouTube leave the title until very late. Exceptions are bad
|
||||||
|
# but these are large sites.
|
||||||
return 80 if uri.host =~ /amazon\.(com|ca|co\.uk|es|fr|de|it|com\.au|com\.br|cn|in|co\.jp|com\.mx)$/
|
return 80 if uri.host =~ /amazon\.(com|ca|co\.uk|es|fr|de|it|com\.au|com\.br|cn|in|co\.jp|com\.mx)$/
|
||||||
|
return 300 if uri.host =~ /youtube\.com$/ || uri.host =~ /youtu.be/
|
||||||
|
|
||||||
# default is 10k
|
# default is 10k
|
||||||
10
|
10
|
||||||
|
@ -44,6 +44,17 @@ describe RetrieveTitle do
|
|||||||
expect(title).to eq("Good Title")
|
expect(title).to eq("Good Title")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "will parse a YouTube url from javascript" do
|
||||||
|
title = RetrieveTitle.extract_title(<<~HTML
|
||||||
|
<html>
|
||||||
|
<title>YouTube</title>
|
||||||
|
<script>document.title = "Video Title";</script>
|
||||||
|
</html>
|
||||||
|
HTML
|
||||||
|
)
|
||||||
|
expect(title).to eq("Video Title")
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user