DEV: Absorb onebox gem into core (#12979)

* Move onebox gem in core library

* Update template file path

* Remove warning for onebox gem caching

* Remove onebox version file

* Remove onebox gem

* Add sanitize gem

* Require onebox library in lazy-yt plugin

* Remove onebox web specific code

This code was used in standalone onebox Sinatra application

* Merge Discourse specific AllowlistedGenericOnebox engine in core

* Fix onebox engine filenames to match class name casing

* Move onebox specs from gem into core

* DEV: Rename `response` helper to `onebox_response`

Fixes a naming collision.

* Require rails_helper

* Don't use `before/after(:all)`

* Whitespace

* Remove fakeweb

* Remove poor unit tests

* DEV: Re-add fakeweb, plugins are using it

* Move onebox helpers

* Stub Instagram API

* FIX: Follow additional redirect status codes (#476)

Don’t throw errors if we encounter 303, 307 or 308 HTTP status codes in responses

* Remove an empty file

* DEV: Update the license file

Using the copy from https://choosealicense.com/licenses/gpl-2.0/#

Hopefully this will enable GitHub to show the license UI?

* DEV: Update embedded copyrights

* DEV: Add Onebox copyright notice

* DEV: Add MIT license, convert COPYRIGHT.txt to md

* DEV: Remove an incorrect copyright claim

Co-authored-by: Jarek Radosz <jradosz@gmail.com>
Co-authored-by: jbrw <jamie@goatforce5.org>
This commit is contained in:
Arpit Jalan
2021-05-26 15:11:35 +05:30
committed by GitHub
parent d0779a87bb
commit 283b08d45f
211 changed files with 78330 additions and 74 deletions

View File

@@ -1,21 +1,267 @@
# frozen_string_literal: true
require 'htmlentities'
require "ipaddr"
module Onebox
module Engine
class AllowlistedGenericOnebox
include Engine
include StandardEmbed
include LayoutSupport
def self.priority
200
end
# Often using the `html` attribute is not what we want, like for some blogs that
# include the entire page HTML. However for some providers like Flickr it allows us
# to return gifv and galleries.
def self.default_html_providers
['Flickr', 'Meetup']
end
def self.html_providers
@html_providers ||= default_html_providers.dup
end
def self.html_providers=(new_provs)
@html_providers = new_provs
end
# A re-written URL converts http:// -> https://
def self.rewrites
@rewrites ||= https_hosts.dup
end
def self.rewrites=(new_list)
@rewrites = new_list
end
def self.https_hosts
%w(slideshare.net dailymotion.com livestream.com imgur.com flickr.com)
end
def self.host_matches(uri, list)
!!list.find { |h| %r((^|\.)#{Regexp.escape(h)}$).match(uri.host) }
end
def self.allowed_twitter_labels
['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
end
# overwrite the allowlist
def self.===(other)
other.is_a?(URI) ? (IPAddr.new(other.hostname) rescue nil).nil? : true
end
# ensure we're the last engine to be used
def self.priority
Float::INFINITY
def to_html
rewrite_https(generic_html)
end
def placeholder_html
return article_html if is_article?
return image_html if is_image?
return Onebox::Helpers.video_placeholder_html if is_video? || is_card?
return Onebox::Helpers.generic_placeholder_html if is_embedded?
to_html
end
def data
@data ||= begin
html_entities = HTMLEntities.new
d = { link: link }.merge(raw)
if !Onebox::Helpers.blank?(d[:title])
d[:title] = html_entities.decode(Onebox::Helpers.truncate(d[:title], 80))
end
d[:description] ||= d[:summary]
if !Onebox::Helpers.blank?(d[:description])
d[:description] = html_entities.decode(Onebox::Helpers.truncate(d[:description], 250))
end
if !Onebox::Helpers.blank?(d[:site_name])
d[:domain] = html_entities.decode(Onebox::Helpers.truncate(d[:site_name], 80))
elsif !Onebox::Helpers.blank?(d[:domain])
d[:domain] = "http://#{d[:domain]}" unless d[:domain] =~ /^https?:\/\//
d[:domain] = URI(d[:domain]).host.to_s.sub(/^www\./, '') rescue nil
end
# prefer secure URLs
d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
d[:image] = nil if Onebox::Helpers.blank?(d[:image])
d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
d[:video] = nil if Onebox::Helpers.blank?(d[:video])
d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
if !Onebox::Helpers.blank?(d[:published_time])
d[:article_published_time] = Time.parse(d[:published_time]).strftime("%-d %b %y")
d[:article_published_time_title] = Time.parse(d[:published_time]).strftime("%I:%M%p - %d %B %Y")
end
# Twitter labels
if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
d[:label_1] = Onebox::Helpers.truncate(d[:label1])
d[:data_1] = Onebox::Helpers.truncate(d[:data1])
end
if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
unless Onebox::Helpers.blank?(d[:label_1])
d[:label_2] = Onebox::Helpers.truncate(d[:label2])
d[:data_2] = Onebox::Helpers.truncate(d[:data2])
else
d[:label_1] = Onebox::Helpers.truncate(d[:label2])
d[:data_1] = Onebox::Helpers.truncate(d[:data2])
end
end
if Onebox::Helpers.blank?(d[:label_1]) && !Onebox::Helpers.blank?(d[:price_amount]) && !Onebox::Helpers.blank?(d[:price_currency])
d[:label_1] = "Price"
d[:data_1] = Onebox::Helpers.truncate("#{d[:price_currency].strip} #{d[:price_amount].strip}")
end
skip_missing_tags = [:video]
d.each do |k, v|
next if skip_missing_tags.include?(k)
if v == nil || v == ''
errors[k] ||= []
errors[k] << 'is blank'
end
end
d
end
end
private
def rewrite_https(html)
return unless html
if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
html = html.gsub("http://", "https://")
end
html
end
def generic_html
return article_html if is_article?
return video_html if is_video?
return image_html if is_image?
return embedded_html if is_embedded?
return card_html if is_card?
return article_html if (has_text? || is_image_article?)
end
def is_card?
data[:card] == 'player' &&
data[:player] =~ URI::regexp &&
options[:allowed_iframe_regexes]&.any? { |r| data[:player] =~ r }
end
def is_article?
(data[:type] =~ /article/ || data[:asset_type] =~ /article/) &&
has_text?
end
def has_text?
has_title? && !Onebox::Helpers.blank?(data[:description])
end
def has_title?
!Onebox::Helpers.blank?(data[:title])
end
def is_image_article?
has_title? && has_image?
end
def is_image?
data[:type] =~ /photo|image/ &&
data[:type] !~ /photostream/ &&
has_image?
end
def has_image?
!Onebox::Helpers.blank?(data[:image])
end
def is_video?
data[:type] =~ /^video[\/\.]/ &&
data[:video_type] == "video/mp4" && # Many sites include 'videos' with text/html types (i.e. iframes)
!Onebox::Helpers.blank?(data[:video])
end
def is_embedded?
return false unless data[:html] && data[:height]
return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
return false unless data[:html]["iframe"]
fragment = Nokogiri::HTML5::fragment(data[:html])
src = fragment.at_css('iframe')&.[]("src")
options[:allowed_iframe_regexes]&.any? { |r| src =~ r }
end
def card_html
escaped_url = ::Onebox::Helpers.normalize_url_for_output(data[:player])
<<~RAW
<iframe src="#{escaped_url}"
width="#{data[:player_width] || "100%"}"
height="#{data[:player_height]}"
scrolling="no"
frameborder="0">
</iframe>
RAW
end
def article_html
layout.to_html
end
def image_html
return if Onebox::Helpers.blank?(data[:image])
escaped_src = ::Onebox::Helpers.normalize_url_for_output(data[:image])
alt = data[:description] || data[:title]
width = data[:image_width] || data[:thumbnail_width] || data[:width]
height = data[:image_height] || data[:thumbnail_height] || data[:height]
"<img src='#{escaped_src}' alt='#{alt}' width='#{width}' height='#{height}' class='onebox'>"
end
def video_html
escaped_video_src = ::Onebox::Helpers.normalize_url_for_output(data[:video])
escaped_image_src = ::Onebox::Helpers.normalize_url_for_output(data[:image])
<<-HTML
<video
title='#{data[:title]}'
width='#{data[:video_width]}'
height='#{data[:video_height]}'
style='max-width:100%'
poster='#{escaped_image_src}'
controls=''
>
<source src='#{escaped_video_src}'>
</video>
HTML
end
def embedded_html
fragment = Nokogiri::HTML5::fragment(data[:html])
fragment.css("img").each { |img| img["class"] = "thumbnail" }
if iframe = fragment.at_css("iframe")
iframe.remove_attribute("style")
iframe["width"] = data[:width] || "100%"
iframe["height"] = data[:height]
iframe["scrolling"] = "no"
iframe["frameborder"] = "0"
end
fragment.to_html
end
end
end
end

View File

@@ -0,0 +1,198 @@
# frozen_string_literal: true
require 'json'
require "onebox/open_graph"
module Onebox
module Engine
class AmazonOnebox
include Engine
include LayoutSupport
include HTML
always_https
matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
def url
@raw ||= nil
# If possible, fetch the cached HTML body immediately so we can
# try to grab the canonical URL from that document,
# rather than guess at the best URL structure to use
if !@raw && has_cached_body
@raw = Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
end
if @raw
canonical_link = @raw.at('//link[@rel="canonical"]/@href')
return canonical_link.to_s if canonical_link
end
if match && match[:id]
id = Addressable::URI.encode_component(match[:id], Addressable::URI::CharacterClasses::PATH)
return "https://www.amazon.#{tld}/dp/#{id}"
end
@url
end
def tld
@tld ||= @@matcher.match(@url)["tld"]
end
def http_params
if @options && @options[:user_agent]
{ 'User-Agent' => @options[:user_agent] }
end
end
private
def has_cached_body
body_cacher&.respond_to?('cache_response_body?') &&
body_cacher.cache_response_body?(uri.to_s) &&
body_cacher.cached_response_body_exists?(uri.to_s)
end
def match
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
end
def image
if (main_image = raw.css("#main-image")) && main_image.any?
attributes = main_image.first.attributes
if attributes["data-a-hires"]
return attributes["data-a-hires"].to_s
elsif attributes["data-a-dynamic-image"]
return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
end
end
if (landing_image = raw.css("#landingImage")) && landing_image.any?
attributes = landing_image.first.attributes
if attributes["data-old-hires"]
return attributes["data-old-hires"].to_s
else
return landing_image.first["src"].to_s
end
end
if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
::JSON.parse(ebook_image.first.attributes["data-a-dynamic-image"].value).keys.first
end
end
def price
# get item price (Amazon markup is inconsistent, deal with it)
if raw.css("#priceblock_ourprice .restOfPrice")[0] && raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text
"#{raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text}#{raw.css("#priceblock_ourprice .buyingPrice")[0].inner_text}.#{raw.css("#priceblock_ourprice .restOfPrice")[1].inner_text}"
elsif raw.css("#priceblock_dealprice") && (dealprice = raw.css("#priceblock_dealprice span")[0])
dealprice.inner_text
elsif !raw.css("#priceblock_ourprice").inner_text.empty?
raw.css("#priceblock_ourprice").inner_text
else
raw.css(".mediaMatrixListItem.a-active .a-color-price").inner_text
end
end
def multiple_authors(authors_xpath)
raw
.xpath(authors_xpath)
.map { |a| a.inner_text.strip }
.join(", ")
end
def data
og = ::Onebox::OpenGraph.new(raw)
if raw.at_css('#dp.book_mobile') # printed books
title = raw.at("h1#title")&.inner_text
authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
table_xpath = "//div[@id='productDetails_secondary_view_div']//table[@id='productDetails_techSpec_section_1']"
isbn = raw.xpath("#{table_xpath}//tr[8]//td").inner_text.strip
# if ISBN is misplaced or absent it's hard to find out which data is
# available and where to find it so just set it all to nil
if /^\d(\-?\d){12}$/.match(isbn)
publisher = raw.xpath("#{table_xpath}//tr[1]//td").inner_text.strip
published = raw.xpath("#{table_xpath}//tr[2]//td").inner_text.strip
book_length = raw.xpath("#{table_xpath}//tr[6]//td").inner_text.strip
else
isbn = publisher = published = book_length = nil
end
result = {
link: url,
title: title,
by_info: authors,
image: og.image || image,
description: raw.at("#productDescription")&.inner_text,
rating: "#{rating}#{', ' if rating && (!isbn&.empty? || !price&.empty?)}",
price: price,
isbn_asin_text: "ISBN",
isbn_asin: isbn,
publisher: publisher,
published: "#{published}#{', ' if published && !price&.empty?}"
}
elsif raw.at_css('#dp.ebooks_mobile') # ebooks
title = raw.at("#ebooksTitle")&.inner_text
authors = raw.at_css('#a-popover-mobile-udp-contributor-popover-id') ? multiple_authors("//div[@id='a-popover-mobile-udp-contributor-popover-id']//span[contains(@class,'a-text-bold')]") : (raw.at("#byline")&.inner_text&.strip || raw.at("#bylineInfo")&.inner_text&.strip)
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text || raw.at("#acrCustomerReviewLink .a-icon")&.inner_text
table_xpath = "//div[@id='detailBullets_secondary_view_div']//ul"
asin = raw.xpath("#{table_xpath}//li[4]/span/span[2]").inner_text
# if ASIN is misplaced or absent it's hard to find out which data is
# available and where to find it so just set it all to nil
if /^[0-9A-Z]{10}$/.match(asin)
publisher = raw.xpath("#{table_xpath}//li[2]/span/span[2]").inner_text
published = raw.xpath("#{table_xpath}//li[1]/span/span[2]").inner_text
else
asin = publisher = published = nil
end
result = {
link: url,
title: title,
by_info: authors,
image: og.image || image,
description: raw.at("#productDescription")&.inner_text,
rating: "#{rating}#{', ' if rating && (!asin&.empty? || !price&.empty?)}",
price: price,
isbn_asin_text: "ASIN",
isbn_asin: asin,
publisher: publisher,
published: "#{published}#{', ' if published && !price&.empty?}"
}
else
title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
result = {
link: url,
title: title,
image: og.image || image,
price: price
}
result[:by_info] = raw.at("#by-line")
result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
summary = raw.at("#productDescription")
description = og.description || summary&.inner_text
description ||= raw.css("meta[name=description]").first&.[]("content")
result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
end
result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
result
end
end
end
end

View File

@@ -0,0 +1,27 @@
# frozen_string_literal: true
module Onebox
module Engine
class AsciinemaOnebox
include Engine
include StandardEmbed
always_https
matches_regexp(/^https?:\/\/asciinema\.org\/a\/[\p{Alnum}_\-]+$/)
def to_html
"<script type='text/javascript' src='https://asciinema.org/a/#{match[:asciinema_id]}.js' id='asciicast-#{match[:asciinema_id]}' async></script>"
end
def placeholder_html
"<img src='https://asciinema.org/a/#{match[:asciinema_id]}.png'>"
end
private
def match
@match ||= @url.match(/asciinema\.org\/a\/(?<asciinema_id>[\p{Alnum}_\-]+)$/)
end
end
end
end

View File

@@ -0,0 +1,30 @@
# frozen_string_literal: true
module Onebox
module Engine
class AudioOnebox
include Engine
matches_regexp(/^(https?:)?\/\/.*\.(mp3|ogg|opus|wav|m4a)(\?.*)?$/i)
def always_https?
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
end
def to_html
escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
<<-HTML
<audio controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
<source src="#{escaped_url}">
<a href="#{escaped_url}">#{@url}</a>
</audio>
HTML
end
def placeholder_html
::Onebox::Helpers.audio_placeholder_html
end
end
end
end

View File

@@ -0,0 +1,29 @@
# frozen_string_literal: true
module Onebox
module Engine
class AudioboomOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/audioboom\.com\/posts\/\d+/)
always_https
def placeholder_html
oembed = get_oembed
<<-HTML
<img
src="#{oembed.thumbnail_url}"
style="max-width: #{oembed.width}px; max-height: #{oembed.height}px;"
#{oembed.title_attr}
>
HTML
end
def to_html
get_oembed.html
end
end
end
end

View File

@@ -0,0 +1,35 @@
# frozen_string_literal: true
module Onebox
module Engine
class BandCampOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/.*\.bandcamp\.com\/(album|track)\//)
always_https
requires_iframe_origins "https://bandcamp.com"
def placeholder_html
og = get_opengraph
"<img src='#{og.image}' height='#{og.video_height}' #{og.title_attr}>"
end
def to_html
og = get_opengraph
escaped_src = og.video_secure_url || og.video
<<-HTML
<iframe
src="#{escaped_src}"
width="#{og.video_width}"
height="#{og.video_height}"
scrolling="no"
frameborder="0"
allowfullscreen
></iframe>
HTML
end
end
end
end

View File

@@ -0,0 +1,53 @@
# frozen_string_literal: true
module Onebox
module Engine
class CloudAppOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/cl\.ly/)
always_https
def to_html
og = get_opengraph
if !og.image.nil?
image_html(og)
elsif og.title.to_s[/\.(mp4|ogv|webm)$/]
video_html(og)
else
link_html(og)
end
end
private
def link_html(og)
<<-HTML
<a href='#{og.url}' target='_blank' rel='noopener'>
#{og.title}
</a>
HTML
end
def video_html(og)
direct_src = ::Onebox::Helpers.normalize_url_for_output("#{og.get(:url)}/#{og.title}")
<<-HTML
<video width='480' height='360' #{og.title_attr} controls loop>
<source src='#{direct_src}' type='video/mp4'>
</video>
HTML
end
def image_html(og)
<<-HTML
<a href='#{og.url}' target='_blank' class='onebox' rel='noopener'>
<img src='#{og.image}' #{og.title_attr} alt='CloudApp' width='480'>
</a>
HTML
end
end
end
end

View File

@@ -0,0 +1,22 @@
# frozen_string_literal: true
module Onebox
module Engine
class CoubOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/coub\.com\/view\//)
always_https
def placeholder_html
oembed = get_oembed
"<img src='#{oembed.thumbnail_url}' height='#{oembed.thumbnail_height}' width='#{oembed.thumbnail_width}' #{oembed.title_attr}>"
end
def to_html
get_oembed.html
end
end
end
end

View File

@@ -0,0 +1,34 @@
# frozen_string_literal: true
module Onebox
module Engine
class FacebookMediaOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/.*\.facebook\.com\/(\w+)\/(videos|\?).*/)
always_https
requires_iframe_origins "https://www.facebook.com"
def to_html
metadata = get_twitter
if metadata.present? && metadata[:card] == "player" && metadata[:player].present?
<<-HTML
<iframe
src="#{metadata[:player]}"
width="#{metadata[:player_width]}"
height="#{metadata[:player_height]}"
scrolling="no"
frameborder="0"
allowfullscreen
></iframe>
HTML
else
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
return if Onebox::Helpers.blank?(html)
html
end
end
end
end
end

View File

@@ -0,0 +1,18 @@
# frozen_string_literal: true
module Onebox
module Engine
class FiveHundredPxOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/500px\.com\/photo\/\d+\//)
always_https
def to_html
og = get_opengraph
"<img src='#{og.image}' width='#{og.image_width}' height='#{og.image_height}' class='onebox' #{og.title_attr}>"
end
end
end
end

View File

@@ -0,0 +1,52 @@
# frozen_string_literal: true
require_relative './opengraph_image'
module Onebox
module Engine
class FlickrOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/www\.flickr\.com\/photos\//)
always_https
def to_html
og = get_opengraph
return album_html(og) if og.url =~ /\/sets\//
return image_html(og) if !og.image.nil?
nil
end
private
def album_html(og)
escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
album_title = "[Album] #{og.title}"
<<-HTML
<div class='onebox flickr-album'>
<a href='#{escaped_url}' target='_blank' rel='noopener'>
<span class='outer-box' style='max-width:#{og.image_width}px'>
<span class='inner-box'>
<span class='album-title'>#{album_title}</span>
</span>
</span>
<img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
</a>
</div>
HTML
end
def image_html(og)
escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
<<-HTML
<a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
<img src='#{og.secure_image_url}' #{og.title_attr} alt='Imgur' height='#{og.image_height}' width='#{og.image_width}'>
</a>
HTML
end
end
end
end

View File

@@ -0,0 +1,16 @@
# frozen_string_literal: true
require_relative './opengraph_image'
module Onebox
module Engine
class FlickrShortenedOnebox
include Engine
include StandardEmbed
include OpengraphImage
matches_regexp(/^https?:\/\/flic\.kr\/p\//)
always_https
end
end
end

View File

@@ -0,0 +1,113 @@
# frozen_string_literal: true
module Onebox
module Engine
class GfycatOnebox
include Engine
include JSON
matches_regexp(/^https?:\/\/gfycat\.com\//)
always_https
# This engine should have priority over AllowlistedGenericOnebox.
def self.priority
1
end
def to_html
<<-HTML
<aside class="onebox gfycat">
<header class="source">
<img src="https://gfycat.com/static/favicons/favicon-96x96.png" class="site-icon" width="64" height="64">
<a href="#{data[:url]}" target="_blank" rel="nofollow ugc noopener">Gfycat.com</a>
</header>
<article class="onebox-body">
<h4>
#{data[:title]} by
<a href="https://gfycat.com/@#{data[:author]}" target="_blank" rel="nofollow ugc noopener">
<span>#{data[:author]}</span>
</a>
</h4>
<div class="video" style="--aspect-ratio: #{data[:width]}/#{data[:height]}">
<video controls loop muted poster="#{data[:posterUrl]}">
<source id="webmSource" src="#{data[:webmUrl]}" type="video/webm">
<source id="mp4Source" src="#{data[:mp4Url]}" type="video/mp4">
<img title="Sorry, your browser doesn't support HTML5 video." src="#{data[:posterUrl]}">
</video>
</div>
<p>
<span class="label1">#{data[:keywords]}</span>
</p>
</article>
<div style="clear: both"></div>
</aside>
HTML
end
def placeholder_html
<<-HTML
<a href="#{data[:url]}">
<img src="#{data[:posterUrl]}" width="#{data[:width]}" height="#{data[:height]}"><br/>
#{data[:name]}
</a>
HTML
end
private
def match
@match ||= @url.match(/^https?:\/\/gfycat\.com\/(gifs\/detail\/)?(?<name>.+)/)
end
def og_data
return @og_data if defined?(@og_data)
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
page = Nokogiri::HTML(response)
script = page.at_css('script[type="application/ld+json"]')
if json_string = script&.text
@og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
else
@og_data = {}
end
end
def data
return @data if defined?(@data)
@data = {
name: match[:name],
title: og_data[:headline] || 'No Title',
author: og_data[:author],
url: @url,
}
if keywords = og_data[:keywords]&.split(',')
@data[:keywords] = keywords
.map { |keyword| "<a href='https://gfycat.com/gifs/search/#{keyword}'>##{keyword}</a>" }
.join(' ')
end
if og_data[:video]
content_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:contentUrl])
video_url = Pathname.new(content_url)
@data[:webmUrl] = video_url.sub_ext(".webm").to_s
@data[:mp4Url] = video_url.sub_ext(".mp4").to_s
thumbnail_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:thumbnailUrl])
@data[:posterUrl] = thumbnail_url
@data[:width] = og_data[:video][:width]
@data[:height] = og_data[:video][:height]
end
@data
end
end
end
end

View File

@@ -0,0 +1,23 @@
# frozen_string_literal: true
module Onebox
module Engine
class GiphyOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/(giphy\.com\/gifs|gph\.is)\//)
always_https
def to_html
oembed = get_oembed
<<-HTML
<a href="#{oembed.url}" target="_blank" rel="noopener" class="onebox">
<img src="#{oembed.url}" width="#{oembed.width}" height="#{oembed.height}" #{oembed.title_attr}>
</a>
HTML
end
end
end
end

View File

@@ -0,0 +1,31 @@
# frozen_string_literal: true
require_relative '../mixins/git_blob_onebox'
module Onebox
module Engine
class GithubBlobOnebox
def self.git_regexp
/^https?:\/\/(www\.)?github\.com.*\/blob\//
end
def self.onebox_name
"githubblob"
end
include Onebox::Mixins::GitBlobOnebox
def raw_regexp
/github\.com\/(?<user>[^\/]+)\/(?<repo>[^\/]+)\/blob\/(?<sha1>[^\/]+)\/(?<file>[^#]+)(#(L(?<from>[^-]*)(-L(?<to>.*))?))?/mi
end
def raw_template(m)
"https://raw.githubusercontent.com/#{m[:user]}/#{m[:repo]}/#{m[:sha1]}/#{m[:file]}"
end
def title
Sanitize.fragment(Onebox::Helpers.uri_unencode(link).sub(/^https?\:\/\/github\.com\//, ''))
end
end
end
end

View File

@@ -0,0 +1,51 @@
# frozen_string_literal: true
require_relative '../mixins/github_body'
module Onebox
module Engine
class GithubCommitOnebox
include Engine
include LayoutSupport
include JSON
include Onebox::Mixins::GithubBody
matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/commit\//)
always_https
def url
"https://api.github.com/repos/#{match[:owner]}/#{match[:repository]}/commits/#{match[:sha]}"
end
private
def match
return @match if defined?(@match)
@match = @url.match(%{github\.com/(?<owner>[^/]+)/(?<repository>[^/]+)/commit/(?<sha>[^/]+)})
@match ||= @url.match(%{github\.com/(?<owner>[^/]+)/(?<repository>[^/]+)/pull/(?<pr>[^/]+)/commit/(?<sha>[^/]+)})
@match
end
def data
result = raw.clone
lines = result['commit']['message'].split("\n")
result['title'] = lines.first
result['body'], result['excerpt'] = compute_body(lines[1..lines.length].join("\n"))
committed_at = Time.parse(result['commit']['author']['date'])
result['committed_at'] = committed_at.strftime("%I:%M%p - %d %b %y %Z")
result['committed_at_date'] = committed_at.strftime("%F")
result['committed_at_time'] = committed_at.strftime("%T")
result['link'] = link
ulink = URI(link)
result['domain'] = "#{ulink.host}/#{ulink.path.split('/')[1]}/#{ulink.path.split('/')[2]}"
result
end
end
end
end

View File

@@ -0,0 +1,78 @@
# frozen_string_literal: true
module Onebox
module Engine
class GithubFolderOnebox
include Engine
include StandardEmbed
include LayoutSupport
matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
always_https
def self.priority
# This engine should have lower priority than the other Github engines
150
end
private
def data
og = get_opengraph
max_length = 250
display_path = extract_path(og.url, max_length)
display_description = clean_description(og.description, og.title, max_length)
title = og.title
fragment = Addressable::URI.parse(url).fragment
if fragment
fragment = Addressable::URI.unencode(fragment)
if html_doc.css('.Box.md')
# For links to markdown docs
node = html_doc.css('a.anchor').find { |n| n['href'] == "##{fragment}" }
subtitle = node&.parent&.text
elsif html_doc.css('.Box.rdoc')
# For links to rdoc docs
node = html_doc.css('h3').find { |n| n['id'] == "user-content-#{fragment.downcase}" }
subtitle = node&.css('text()')&.first&.text
end
title = "#{title} - #{subtitle}" if subtitle
end
{
link: url,
image: og.image,
title: Onebox::Helpers.truncate(title, 250),
path: display_path,
description: display_description,
favicon: get_favicon
}
end
def extract_path(root, max_length)
path = url.split('#')[0].split('?')[0]
path = path["#{root}/tree/".length..-1]
return unless path
path.length > max_length ? path[-max_length..-1] : path
end
def clean_description(description, title, max_length)
return unless description
desc_end = " - #{title}"
if description[-desc_end.length..-1] == desc_end
description = description[0...-desc_end.length]
end
Onebox::Helpers.truncate(description, max_length)
end
end
end
end

View File

@@ -0,0 +1,81 @@
# frozen_string_literal: true
module Onebox
module Engine
class GithubGistOnebox
include Engine
include LayoutSupport
include JSON
MAX_FILES = 3
matches_regexp(/^http(?:s)?:\/\/gist\.(?:(?:\w)+\.)?(github)\.com(?:\/)?/)
always_https
def url
"https://api.github.com/gists/#{match[:sha]}"
end
private
def data
@data ||= {
title: 'gist.github.com',
link: link,
gist_files: gist_files.take(MAX_FILES),
truncated_files?: truncated_files?
}
end
def truncated_files?
gist_files.size > MAX_FILES
end
def gist_files
return [] unless gist_api
@gist_files ||= gist_api["files"].values.map do |file_json|
GistFile.new(file_json)
end
end
def gist_api
@raw ||= raw.clone
rescue OpenURI::HTTPError
# The Gist API rate limit of 60 requests per hour was reached.
nil
end
def match
@match ||= @url.match(%r{gist\.github\.com/([^/]+/)?(?<sha>[0-9a-f]+)})
end
class GistFile
attr_reader :filename
attr_reader :language
MAX_LINES = 10
def initialize(json)
@json = json
@filename = @json["filename"]
@language = @json["language"]
end
def content
lines.take(MAX_LINES).join("\n")
end
def truncated?
lines.size > MAX_LINES
end
private
def lines
@lines ||= @json["content"].split("\n")
end
end
end
end
end

View File

@@ -0,0 +1,54 @@
# frozen_string_literal: true
require_relative '../mixins/github_body'
module Onebox
module Engine
class GithubIssueOnebox
#Author Lidlanca 2014
include Engine
include LayoutSupport
include JSON
include Onebox::Mixins::GithubBody
matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?github\.com\/(?<org>.+)\/(?<repo>.+)\/issues\/([[:digit:]]+)/)
always_https
def url
m = match
"https://api.github.com/repos/#{m["org"]}/#{m["repo"]}/issues/#{m["item_id"]}"
end
private
def match
@match ||= @url.match(/^http(?:s)?:\/\/(?:www\.)?(?:(?:\w)+\.)?github\.com\/(?<org>.+)\/(?<repo>.+)\/(?<type>issues)\/(?<item_id>[\d]+)/)
end
def data
created_at = Time.parse(raw['created_at'])
closed_at = Time.parse(raw['closed_at']) if raw['closed_at']
body, excerpt = compute_body(raw['body'])
ulink = URI(link)
{
link: @url,
title: raw["title"],
body: body,
excerpt: excerpt,
labels: raw["labels"],
user: raw['user'],
created_at: created_at.strftime("%I:%M%p - %d %b %y %Z"),
created_at_date: created_at.strftime("%F"),
created_at_time: created_at.strftime("%T"),
closed_at: closed_at&.strftime("%I:%M%p - %d %b %y %Z"),
closed_at_date: closed_at&.strftime("%F"),
closed_at_time: closed_at&.strftime("%T"),
closed_by: raw['closed_by'],
avatar: "https://avatars1.githubusercontent.com/u/#{raw['user']['id']}?v=2&s=96",
domain: "#{ulink.host}/#{ulink.path.split('/')[1]}/#{ulink.path.split('/')[2]}",
}
end
end
end
end

View File

@@ -0,0 +1,46 @@
# frozen_string_literal: true
require_relative '../mixins/github_body'
module Onebox
module Engine
class GithubPullRequestOnebox
include Engine
include LayoutSupport
include JSON
include Onebox::Mixins::GithubBody
GITHUB_COMMENT_REGEX = /(<!--.*?-->\r\n)/
matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/pull/)
always_https
def url
"https://api.github.com/repos/#{match[:owner]}/#{match[:repository]}/pulls/#{match[:number]}"
end
private
def match
@match ||= @url.match(%r{github\.com/(?<owner>[^/]+)/(?<repository>[^/]+)/pull/(?<number>[^/]+)})
end
def data
result = raw.clone
result['link'] = link
created_at = Time.parse(result['created_at'])
result['created_at'] = created_at.strftime("%I:%M%p - %d %b %y %Z")
result['created_at_date'] = created_at.strftime("%F")
result['created_at_time'] = created_at.strftime("%T")
ulink = URI(link)
result['domain'] = "#{ulink.host}/#{ulink.path.split('/')[1]}/#{ulink.path.split('/')[2]}"
result['body'], result['excerpt'] = compute_body(result['body'])
result
end
end
end
end

View File

@@ -0,0 +1,31 @@
# frozen_string_literal: true
require_relative '../mixins/git_blob_onebox'
module Onebox
module Engine
class GitlabBlobOnebox
def self.git_regexp
/^https?:\/\/(www\.)?gitlab\.com.*\/blob\//
end
def self.onebox_name
"gitlabblob"
end
include Onebox::Mixins::GitBlobOnebox
def raw_regexp
/gitlab\.com\/(?<user>[^\/]+)\/(?<repo>[^\/]+)\/blob\/(?<sha1>[^\/]+)\/(?<file>[^#]+)(#(L(?<from>[^-]*)(-L(?<to>.*))?))?/mi
end
def raw_template(m)
"https://gitlab.com/#{m[:user]}/#{m[:repo]}/raw/#{m[:sha1]}/#{m[:file]}"
end
def title
Sanitize.fragment(Onebox::Helpers.uri_unencode(link).sub(/^https?\:\/\/gitlab\.com\//, ''))
end
end
end
end

View File

@@ -0,0 +1,32 @@
# frozen_string_literal: true
module Onebox
module Engine
class GoogleCalendarOnebox
include Engine
matches_regexp(/^(https?:)?\/\/((www|calendar)\.google\.[\w.]{2,}|goo\.gl)\/calendar\/.+$/)
always_https
requires_iframe_origins "https://calendar.google.com"
def to_html
url = @url.split('&').first
src = ::Onebox::Helpers.normalize_url_for_output(url)
"<iframe src='#{src}&rm=minimal' style='border: 0' width='800' height='600' frameborder='0' scrolling='no'>#{placeholder_html}</iframe>"
end
def placeholder_html
<<-HTML
<div placeholder>
<div class='gdocs-onebox gdocs-onebox-splash' style='display:table-cell;vertical-align:middle;width:800px;height:600px'>
<div style='text-align:center;'>
<div class='gdocs-onebox-logo g-calendar-logo'></div>
<p>Google Calendar</p>
</div>
</div>
</div>
HTML
end
end
end
end

View File

@@ -0,0 +1,46 @@
# frozen_string_literal: true
module Onebox
module Engine
class GoogleDocsOnebox
include Engine
include StandardEmbed
include LayoutSupport
SUPPORTED_ENDPOINTS = %w(spreadsheets document forms presentation)
SHORT_TYPES = {
spreadsheets: :sheets,
document: :docs,
presentation: :slides,
forms: :forms,
}
matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{SUPPORTED_ENDPOINTS.join('|')}))\/d\/((?<key>[\w-]*)).+$/)
always_https
private
def data
og_data = get_opengraph
short_type = SHORT_TYPES[match[:endpoint].to_sym]
description = if Onebox::Helpers.blank?(og_data.description)
"This #{short_type.to_s.chop.capitalize} is private"
else
Onebox::Helpers.truncate(og_data.description, 250)
end
{
link: link,
title: og_data.title || "Google #{short_type.to_s.capitalize}",
description: description,
type: short_type
}
end
def match
@match ||= @url.match(@@matcher)
end
end
end
end

View File

@@ -0,0 +1,30 @@
# frozen_string_literal: true
module Onebox
module Engine
class GoogleDriveOnebox
include Engine
include StandardEmbed
include LayoutSupport
matches_regexp(/^(https?:)?\/\/(drive\.google\.com)\/file\/d\/(?<key>[\w-]*)\/.+$/)
always_https
protected
def data
og_data = get_opengraph
title = og_data.title || "Google Drive"
title = "#{og_data.title} (video)" if og_data.type =~ /^video[\/\.]/
description = og_data.description || "Google Drive file."
{
link: link,
title: title,
description: Onebox::Helpers.truncate(description, 250),
image: og_data.image
}
end
end
end
end

View File

@@ -0,0 +1,184 @@
# frozen_string_literal: true
module Onebox
module Engine
class GoogleMapsOnebox
include Engine
class << self
def ===(other)
if other.kind_of? URI
@@matchers && @@matchers.any? { |m| other.to_s =~ m[:regexp] }
else
super
end
end
private
def matches_regexp(key, regexp)
(@@matchers ||= []) << { key: key, regexp: regexp }
end
end
always_https
requires_iframe_origins("https://maps.google.com", "https://google.com")
# Matches shortened Google Maps URLs
matches_regexp :short, %r"^(https?:)?//goo\.gl/maps/"
# Matches URLs for custom-created maps
matches_regexp :custom, %r"^(?:https?:)?//www\.google(?:\.(?:\w{2,}))+/maps/d/(?:edit|viewer|embed)\?mid=.+$"
# Matches URLs with streetview data
matches_regexp :streetview, %r"^(?:https?:)?//www\.google(?:\.(?:\w{2,}))+/maps[^@]+@(?<lon>-?[\d.]+),(?<lat>-?[\d.]+),(?:\d+)a,(?<zoom>[\d.]+)y,(?<heading>[\d.]+)h,(?<pitch>[\d.]+)t.+?data=.*?!1s(?<pano>[^!]{22})"
# Matches "normal" Google Maps URLs with arbitrary data
matches_regexp :standard, %r"^(?:https?:)?//www\.google(?:\.(?:\w{2,}))+/maps"
# Matches URLs for the old Google Maps domain which we occasionally get redirected to
matches_regexp :canonical, %r"^(?:https?:)?//maps\.google(?:\.(?:\w{2,}))+/maps\?"
def initialize(url, timeout = nil)
super
resolve_url!
rescue Net::HTTPServerException, Timeout::Error, Net::HTTPError, Errno::ECONNREFUSED, RuntimeError => err
raise ArgumentError, "malformed url or unresolveable: #{err.message}"
end
def streetview?
!!@streetview
end
def to_html
"<div class='maps-onebox'><iframe src=\"#{link}\" width=\"690\" height=\"400\" frameborder=\"0\" style=\"border:0\">#{placeholder_html}</iframe></div>"
end
def placeholder_html
::Onebox::Helpers.map_placeholder_html
end
private
def data
{ link: url, title: url }
end
def resolve_url!
@streetview = false
type, match = match_url
# Resolve shortened URL, if necessary
if type == :short
follow_redirect!
type, match = match_url
end
# Try to get the old-maps URI, it is far easier to embed.
if type == :standard
retry_count = 10
while (retry_count -= 1) > 0
follow_redirect!
type, match = match_url
break if type != :standard
sleep 0.1
end
end
case type
when :standard
# Fallback for map URLs that don't resolve into an easily embeddable old-style URI
# Roadmaps use a "z" zoomlevel, satellite maps use "m" the horizontal width in meters
# TODO: tilted satellite maps using "a,y,t"
match = @url.match(/@(?<lon>[\d.-]+),(?<lat>[\d.-]+),(?<zoom>\d+)(?<mz>[mz])/)
raise "unexpected standard url #{@url}" unless match
zoom = match[:mz] == "z" ? match[:zoom] : Math.log2(57280048.0 / match[:zoom].to_f).round
location = "#{match[:lon]},#{match[:lat]}"
url = "https://maps.google.com/maps?ll=#{location}&z=#{zoom}&output=embed&dg=ntvb"
url += "&q=#{$1}" if match = @url.match(/\/place\/([^\/\?]+)/)
url += "&cid=#{($1 + $2).to_i(16)}" if @url.match(/!3m1!1s0x(\h{16}):0x(\h{16})/)
@url = url
@placeholder = "https://maps.googleapis.com/maps/api/staticmap?maptype=roadmap&center=#{location}&zoom=#{zoom}&size=690x400&sensor=false"
when :custom
url = @url.dup
@url = rewrite_custom_url(url, "embed")
@placeholder = rewrite_custom_url(url, "thumbnail")
@placeholder_height = @placeholder_width = 120
when :streetview
@streetview = true
panoid = match[:pano]
lon = match[:lon].to_f.to_s
lat = match[:lat].to_f.to_s
heading = match[:heading].to_f.round(4).to_s
pitch = (match[:pitch].to_f / 10.0).round(4).to_s
fov = (match[:zoom].to_f / 100.0).round(4).to_s
zoom = match[:zoom].to_f.round
@url = "https://www.google.com/maps/embed?pb=!3m2!2sen!4v0!6m8!1m7!1s#{panoid}!2m2!1d#{lon}!2d#{lat}!3f#{heading}!4f#{pitch}!5f#{fov}"
@placeholder = "https://maps.googleapis.com/maps/api/streetview?size=690x400&location=#{lon},#{lat}&pano=#{panoid}&fov=#{zoom}&heading=#{heading}&pitch=#{pitch}&sensor=false"
when :canonical
query = URI::decode_www_form(uri.query).to_h
if !query.has_key?("ll")
raise ArgumentError, "canonical url lacks location argument" unless query.has_key?("sll")
query["ll"] = query["sll"]
@url += "&ll=#{query["sll"]}"
end
location = query["ll"]
if !query.has_key?("z")
raise ArgumentError, "canonical url has incomplete query arguments" unless query.has_key?("spn") || query.has_key?("sspn")
if !query.has_key?("spn")
query["spn"] = query["sspn"]
@url += "&spn=#{query["sspn"]}"
end
angle = query["spn"].split(",").first.to_f
zoom = (Math.log(690.0 * 360.0 / angle / 256.0) / Math.log(2)).round
else
zoom = query["z"]
end
@url = @url.sub('output=classic', 'output=embed')
@placeholder = "https://maps.googleapis.com/maps/api/staticmap?maptype=roadmap&size=690x400&sensor=false&center=#{location}&zoom=#{zoom}"
else
raise "unexpected url type #{type.inspect}"
end
end
def match_url
@@matchers.each do |matcher|
if m = matcher[:regexp].match(@url)
return matcher[:key], m
end
end
raise ArgumentError, "\"#{@url}\" does not match any known pattern"
end
def rewrite_custom_url(url, target)
uri = URI(url)
uri.path = uri.path.sub(/(?<=^\/maps\/d\/)\w+$/, target)
uri.to_s
end
def follow_redirect!
begin
http = Net::HTTP.start(
uri.host,
uri.port,
use_ssl: uri.scheme == 'https',
open_timeout: timeout,
read_timeout: timeout
)
response = http.head(uri.path)
raise "unexpected response code #{response.code}" unless %w(200 301 302).include?(response.code)
@url = response.code == "200" ? uri.to_s : response["Location"]
@uri = URI(@url)
ensure
http.finish rescue nil
end
end
end
end
end

View File

@@ -0,0 +1,73 @@
# frozen_string_literal: true
module Onebox
module Engine
class GooglePhotosOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/(photos)\.(app\.goo\.gl|google\.com)/)
always_https
def to_html
og = get_opengraph
return video_html(og) if og.video_secure_url
return album_html(og) if og.type == "google_photos:photo_album"
return image_html(og) if og.image
nil
end
private
def video_html(og)
escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
<<-HTML
<aside class="onebox google-photos">
<header class="source">
<img src="#{raw[:favicon]}" class="site-icon" width="16" height="16">
<a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">#{raw[:site_name]}</a>
</header>
<article class="onebox-body">
<h3><a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">#{og.title}</a></h3>
<div class="aspect-image-full-size">
<a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">
<img src="#{og.secure_image_url}" class="scale-image"/>
<span class="instagram-video-icon"></span>
</a>
</div>
</article>
</aside>
HTML
end
def album_html(og)
escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
album_title = og.description.nil? ? og.title : "[#{og.description}] #{og.title}"
<<-HTML
<div class='onebox google-photos-album'>
<a href='#{escaped_url}' target='_blank' rel='noopener'>
<span class='outer-box' style='width:#{og.image_width}px'>
<span class='inner-box'>
<span class='album-title'>#{Onebox::Helpers.truncate(album_title, 80)}</span>
</span>
</span>
<img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
</a>
</div>
HTML
end
def image_html(og)
escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
<<-HTML
<a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
<img src='#{og.secure_image_url}' #{og.title_attr} alt='Google Photos' height='#{og.image_height}' width='#{og.image_width}'>
</a>
HTML
end
end
end
end

View File

@@ -0,0 +1,31 @@
# frozen_string_literal: true
module Onebox
module Engine
class GooglePlayAppOnebox
include Engine
include LayoutSupport
include HTML
DEFAULTS = {
MAX_DESCRIPTION_CHARS: 500
}
matches_regexp(/^https?:\/\/play\.(?:(?:\w)+\.)?(google)\.com(?:\/)?\/store\/apps\//)
always_https
private
def data
price = raw.css("meta[itemprop=price]").first["content"] rescue "Free"
{
link: link,
title: raw.css("meta[property='og:title']").first["content"].gsub(" - Apps on Google Play", ""),
image: ::Onebox::Helpers.normalize_url_for_output(raw.css("meta[property='og:image']").first["content"]),
description: raw.css("meta[name=description]").first["content"][0..DEFAULTS[:MAX_DESCRIPTION_CHARS]].chop + "...",
price: price == "0" ? "Free" : price
}
end
end
end
end

26
lib/onebox/engine/html.rb Normal file
View File

@@ -0,0 +1,26 @@
# frozen_string_literal: true
module Onebox
module Engine
module HTML
private
# Overwrite for any custom headers
def http_params
{}
end
def raw
@raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
end
def body_cacher
self.options&.[](:body_cacher)
end
def html?
raw.respond_to(:css)
end
end
end
end

View File

@@ -0,0 +1,29 @@
# frozen_string_literal: true
module Onebox
module Engine
class ImageOnebox
include Engine
matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
def always_https?
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
end
def to_html
# Fix Dropbox image links
if @url[/^https:\/\/www.dropbox.com\/s\//]
@url.sub!("https://www.dropbox.com", "https://dl.dropboxusercontent.com")
end
escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
<<-HTML
<a href="#{escaped_url}" target="_blank" rel="noopener" class="onebox">
<img src="#{escaped_url}">
</a>
HTML
end
end
end
end

View File

@@ -0,0 +1,67 @@
# frozen_string_literal: true
module Onebox
module Engine
class ImgurOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/(www\.)?imgur\.com/)
always_https
def to_html
og = get_opengraph
return video_html(og) if !og.video_secure_url.nil?
return album_html(og) if is_album?
return image_html(og) if !og.image.nil?
nil
end
private
def video_html(og)
<<-HTML
<video width='#{og.video_width}' height='#{og.video_height}' #{og.title_attr} controls loop>
<source src='#{og.video_secure_url}' type='video/mp4'>
<source src='#{og.video_secure_url.gsub('mp4', 'webm')}' type='video/webm'>
</video>
HTML
end
def album_html(og)
escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
album_title = "[Album] #{og.title}"
<<-HTML
<div class='onebox imgur-album'>
<a href='#{escaped_url}' target='_blank' rel='noopener'>
<span class='outer-box' style='width:#{og.image_width}px'>
<span class='inner-box'>
<span class='album-title'>#{album_title}</span>
</span>
</span>
<img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
</a>
</div>
HTML
end
def is_album?
response = Onebox::Helpers.fetch_response("https://api.imgur.com/oembed.json?url=#{url}") rescue "{}"
oembed_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(response))
imgur_data_id = Nokogiri::HTML(oembed_data[:html]).xpath("//blockquote").attr("data-id")
imgur_data_id.to_s[/a\//]
end
def image_html(og)
escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
<<-HTML
<a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
<img src='#{og.secure_image_url.chomp("?fb")}' #{og.title_attr} alt='Imgur'>
</a>
HTML
end
end
end
end

View File

@@ -0,0 +1,46 @@
# frozen_string_literal: true
module Onebox
module Engine
class InstagramOnebox
include Engine
include StandardEmbed
include LayoutSupport
matches_regexp(/^https?:\/\/(?:www\.)?(?:instagram\.com|instagr\.am)\/?(?:.*)\/(?:p|tv)\/[a-zA-Z\d_-]+/)
always_https
def clean_url
url.scan(/^https?:\/\/(?:www\.)?(?:instagram\.com|instagr\.am)\/?(?:.*)\/(?:p|tv)\/[a-zA-Z\d_-]+/).flatten.first
end
def data
oembed = get_oembed
raise "No oEmbed data found. Ensure 'facebook_app_access_token' is valid" if oembed.data.empty?
{
link: clean_url.gsub("/#{oembed.author_name}/", "/"),
title: "@#{oembed.author_name}",
image: oembed.thumbnail_url,
description: Onebox::Helpers.truncate(oembed.title, 250),
}
end
protected
def access_token
(options[:facebook_app_access_token] || Onebox.options.facebook_app_access_token).to_s
end
def get_oembed_url
if access_token != ''
"https://graph.facebook.com/v9.0/instagram_oembed?url=#{clean_url}&access_token=#{access_token}"
else
# The following is officially deprecated by Instagram, but works in some limited circumstances.
"https://api.instagram.com/oembed/?url=#{clean_url}"
end
end
end
end
end

13
lib/onebox/engine/json.rb Normal file
View File

@@ -0,0 +1,13 @@
# frozen_string_literal: true
module Onebox
module Engine
module JSON
private
def raw
@raw ||= ::MultiJson.load(URI.open(url, read_timeout: timeout))
end
end
end
end

View File

@@ -0,0 +1,36 @@
# frozen_string_literal: true
module Onebox
module Engine
class KalturaOnebox
include Engine
include StandardEmbed
always_https
matches_regexp(/^https?:\/\/[a-z0-9]+\.kaltura\.com\/id\/[a-zA-Z0-9]+/)
requires_iframe_origins "https://*.kaltura.com"
def preview_html
og = get_opengraph
<<~HTML
<img src="#{og.image_secure_url}" width="#{og.video_width}" height="#{og.video_height}">
HTML
end
def to_html
og = get_opengraph
<<~HTML
<iframe
src="#{og.video_secure_url}"
width="#{og.video_width}"
height="#{og.video_height}"
frameborder='0'
allowfullscreen
></iframe>
HTML
end
end
end
end

View File

@@ -0,0 +1,22 @@
# frozen_string_literal: true
module Onebox
module Engine
class MixcloudOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/www\.mixcloud\.com\//)
always_https
def placeholder_html
oembed = get_oembed
"<img src='#{oembed.image}' height='#{oembed.height}' #{oembed.title_attr}>"
end
def to_html
get_oembed.html
end
end
end
end

View File

@@ -0,0 +1,13 @@
# frozen_string_literal: true
module Onebox
module Engine
module OpengraphImage
def to_html
og = get_opengraph
"<img src='#{og.image}' width='#{og.image_width}' height='#{og.image_height}' class='onebox' #{og.title_attr}>"
end
end
end
end

View File

@@ -0,0 +1,55 @@
# frozen_string_literal: true
module Onebox
module Engine
class PastebinOnebox
include Engine
include LayoutSupport
MAX_LINES = 10
matches_regexp(/^http?:\/\/pastebin\.com/)
private
def data
@data ||= {
title: 'pastebin.com',
link: link,
content: content,
truncated?: truncated?
}
end
def content
lines.take(MAX_LINES).join("\n")
end
def truncated?
lines.size > MAX_LINES
end
def lines
return @lines if defined?(@lines)
response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
@lines = response.split("\n")
end
def paste_key
regex = case uri
when /\/raw\//
/\/raw\/([^\/]+)/
when /\/download\//
/\/download\/([^\/]+)/
when /\/embed\//
/\/embed\/([^\/]+)/
else
/\/([^\/]+)/
end
match = uri.path.match(regex)
match[1] if match && match[1]
end
end
end
end

View File

@@ -0,0 +1,29 @@
# frozen_string_literal: true
module Onebox
module Engine
class PdfOnebox
include Engine
include LayoutSupport
matches_regexp(/^(https?:)?\/\/.*\.pdf(\?.*)?$/i)
always_https
private
def data
begin
size = Onebox::Helpers.fetch_content_length(@url)
rescue
raise "Unable to read pdf file: #{@url}"
end
{
link: link,
title: File.basename(uri.path),
filesize: size ? Onebox::Helpers.pretty_filesize(size.to_i) : nil,
}
end
end
end
end

View File

@@ -0,0 +1,60 @@
# frozen_string_literal: true
module Onebox
module Engine
class PubmedOnebox
include Engine
include LayoutSupport
matches_regexp(/^https?:\/\/(?:(?:\w)+\.)?(www.ncbi.nlm.nih)\.gov(?:\/)?\/pubmed\/\d+/)
private
def xml
return @xml if defined?(@xml)
doc = Nokogiri::XML(URI.open(URI.join(@url, "?report=xml&format=text")))
pre = doc.xpath("//pre")
@xml = Nokogiri::XML("<root>" + pre.text + "</root>")
end
def authors
initials = xml.css("Initials").map { |x| x.content }
last_names = xml.css("LastName").map { |x| x.content }
author_list = (initials.zip(last_names)).map { |i, l| i + " " + l }
if author_list.length > 1 then
author_list[-2] = author_list[-2] + " and " + author_list[-1]
author_list.pop
end
author_list.join(", ")
end
def date
xml.css("PubDate")
.children
.map { |x| x.content }
.select { |s| !s.match(/^\s+$/) }
.map { |s| s.split }
.flatten
.sort
.reverse
.join(" ") # Reverse sort so month before year.
end
def data
{
title: xml.css("ArticleTitle").text,
authors: authors,
journal: xml.css("Title").text,
abstract: xml.css("AbstractText").text,
date: date,
link: @url,
pmid: match[:pmid]
}
end
def match
@match ||= @url.match(%r{www\.ncbi\.nlm\.nih\.gov/pubmed/(?<pmid>[0-9]+)})
end
end
end
end

View File

@@ -0,0 +1,55 @@
# frozen_string_literal: true
module Onebox
module Engine
class RedditMediaOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/(www\.)?reddit\.com/)
def to_html
if raw[:type] == "image"
<<-HTML
<aside class="onebox reddit">
<header class="source">
<img src="#{raw[:favicon]}" class="site-icon" width="16" height="16">
<a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">#{raw[:site_name]}</a>
</header>
<article class="onebox-body">
<h3><a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">#{raw[:title]}</a></h3>
<div class="scale-images">
<img src="#{raw[:image]}" class="scale-image"/>
</div>
<div class="description"><p>#{raw[:description]}</p></div>
</article>
</aside>
HTML
elsif raw[:type] =~ /^video[\/\.]/
<<-HTML
<aside class="onebox reddit">
<header class="source">
<img src="#{raw[:favicon]}" class="site-icon" width="16" height="16">
<a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">#{raw[:site_name]}</a>
</header>
<article class="onebox-body">
<h3><a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">#{raw[:title]}</a></h3>
<div class="aspect-image-full-size">
<a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">
<img src="#{raw[:image]}" class="scale-image"/>
<span class="instagram-video-icon"></span>
</a>
</div>
<div class="description"><p>#{raw[:description]}</p></div>
</article>
</aside>
HTML
else
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
return if Onebox::Helpers.blank?(html)
html
end
end
end
end
end

View File

@@ -0,0 +1,25 @@
# frozen_string_literal: true
module Onebox
module Engine
class ReplitOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/repl\.it\/.+/)
always_https
def placeholder_html
oembed = get_oembed
<<-HTML
<img src="#{oembed.thumbnail_url}" style="max-width: #{oembed.width}px; max-height: #{oembed.height}px;" #{oembed.title_attr}>
HTML
end
def to_html
get_oembed.html
end
end
end
end

View File

@@ -0,0 +1,36 @@
# frozen_string_literal: true
module Onebox
module Engine
class SimplecastOnebox
include Engine
include StandardEmbed
matches_regexp(/https?:\/\/(.+)?simplecast.com\/(episodes|s)\/.*/)
always_https
requires_iframe_origins("https://embed.simplecast.com")
def to_html
get_oembed.html
end
def placeholder_html
oembed = get_oembed
return if Onebox::Helpers.blank?(oembed.thumbnail_url)
"<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
end
private
def get_oembed_url
if id = url.scan(/([a-zA-Z0-9]*)\Z/).flatten.first
oembed_url = "https://simplecast.com/s/#{id}"
else
oembed_url = url
end
"https://simplecast.com/oembed?url=#{oembed_url}"
end
end
end
end

View File

@@ -0,0 +1,34 @@
# frozen_string_literal: true
module Onebox
module Engine
class SketchFabOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/sketchfab\.com\/(?:models\/|3d-models\/(?:[^\/\s]+-)?)([a-z0-9]{32})/)
always_https
requires_iframe_origins("https://sketchfab.com")
def to_html
og = get_opengraph
src = og.video_url.gsub("autostart=1", "")
<<-HTML
<iframe
src="#{src}"
width="#{og.video_width}"
height="#{og.video_height}"
scrolling="no"
frameborder="0"
allowfullscreen
></iframe>
HTML
end
def placeholder_html
"<img src='#{get_opengraph.image}'>"
end
end
end
end

View File

@@ -0,0 +1,33 @@
# frozen_string_literal: true
module Onebox
module Engine
class SlidesOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/slides\.com\/[\p{Alnum}_\-]+\/[\p{Alnum}_\-]+$/)
requires_iframe_origins "https://slides.com"
def to_html
<<-HTML
<iframe
src="https://slides.com#{uri.path}/embed?style=light"
width="576"
height="420"
scrolling="no"
frameborder="0"
webkitallowfullscreen
mozallowfullscreen
allowfullscreen
></iframe>
HTML
end
def placeholder_html
escaped_src = ::Onebox::Helpers.normalize_url_for_output(raw[:image])
"<img src='#{escaped_src}'>"
end
end
end
end

View File

@@ -0,0 +1,33 @@
# frozen_string_literal: true
module Onebox
module Engine
class SoundCloudOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/soundcloud\.com/)
requires_iframe_origins "https://w.soundcloud.com"
always_https
def to_html
oembed = get_oembed
oembed.html.gsub('visual=true', 'visual=false')
end
def placeholder_html
oembed = get_oembed
return if Onebox::Helpers.blank?(oembed.thumbnail_url)
"<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
end
protected
def get_oembed_url
oembed_url = "https://soundcloud.com/oembed.json?url=#{url}"
oembed_url += "&maxheight=166" unless url["/sets/"]
oembed_url
end
end
end
end

View File

@@ -0,0 +1,56 @@
# frozen_string_literal: true
module Onebox
module Engine
class StackExchangeOnebox
include Engine
include LayoutSupport
include JSON
def self.domains
%w(stackexchange.com stackoverflow.com superuser.com serverfault.com askubuntu.com stackapps.com mathoverflow.net)
.map { |domain| Regexp.escape(domain) }
end
matches_regexp(/^https?:\/\/(?:(?:(?<subsubdomain>\w*)\.)?(?<subdomain>\w*)\.)?(?<domain>#{domains.join('|')})\/((?:questions|q)\/(?<question_id>\d*)(\/.*\/(?<answer_id1>\d*))?|(a\/(?<answer_id2>\d*)))/)
def always_https?
uri.host.split('.').length <= 3
end
private
def match
@match ||= @url.match(@@matcher)
end
def url
domain = uri.host
question_id = match[:question_id]
answer_id = match[:answer_id2] || match[:answer_id1]
if answer_id
"https://api.stackexchange.com/2.2/answers/#{answer_id}?site=#{domain}&filter=!.FjueITQdx6-Rq3Ue9PWG.QZ2WNdW"
else
"https://api.stackexchange.com/2.2/questions/#{question_id}?site=#{domain}&filter=!5-duuxrJa-iw9oVvOA(JNimB5VIisYwZgwcfNI"
end
end
def data
return @data if defined?(@data)
result = raw['items'][0]
if result
result['creation_date'] =
Time.at(result['creation_date'].to_i).strftime("%I:%M%p - %d %b %y %Z")
result['tags'] = result['tags'].take(4).join(', ')
result['is_answer'] = result.key?('answer_id')
result['is_question'] = result.key?('question_id')
end
@data = result
end
end
end
end

View File

@@ -0,0 +1,145 @@
# frozen_string_literal: true
require "cgi"
require "onebox/open_graph"
require 'onebox/oembed'
module Onebox
module Engine
module StandardEmbed
def self.oembed_providers
@@oembed_providers ||= {}
end
def self.add_oembed_provider(regexp, endpoint)
oembed_providers[regexp] = endpoint
end
def self.opengraph_providers
@@opengraph_providers ||= []
end
def self.add_opengraph_provider(regexp)
opengraph_providers << regexp
end
# Some oembed providers (like meetup.com) don't provide links to themselves
add_oembed_provider(/www\.meetup\.com\//, 'http://api.meetup.com/oembed')
add_oembed_provider(/www\.mixcloud\.com\//, 'https://www.mixcloud.com/oembed/')
# In order to support Private Videos
add_oembed_provider(/vimeo\.com\//, 'https://vimeo.com/api/oembed.json')
# NYT requires login so use oembed only
add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
def always_https?
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
end
def raw
return @raw if defined?(@raw)
og = get_opengraph
twitter = get_twitter
oembed = get_oembed
@raw = {}
og.data.each do |k, v|
next if k == "title_attr"
v = og.send(k)
@raw[k] ||= v unless v.nil?
end
twitter.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) }
oembed.data.each do |k, v|
v = oembed.send(k)
@raw[k] ||= v unless v.nil?
end
favicon = get_favicon
@raw["favicon".to_sym] = favicon unless Onebox::Helpers::blank?(favicon)
@raw
end
protected
def html_doc
return @html_doc if defined?(@html_doc)
headers = nil
headers = { 'Cookie' => options[:cookie] } if options[:cookie]
@html_doc = Onebox::Helpers.fetch_html_doc(url, headers)
end
def get_oembed
@oembed ||= Onebox::Oembed.new(get_json_response)
end
def get_opengraph
@opengraph ||= ::Onebox::OpenGraph.new(html_doc)
end
def get_twitter
return {} unless html_doc
twitter = {}
html_doc.css('meta').each do |m|
if (m["property"] && m["property"][/^twitter:(.+)$/i]) || (m["name"] && m["name"][/^twitter:(.+)$/i])
value = (m["content"] || m["value"]).to_s
twitter[$1.tr('-:' , '_').to_sym] ||= value unless (Onebox::Helpers::blank?(value) || value == "0 minutes")
end
end
twitter
end
def get_favicon
return nil unless html_doc
favicon = html_doc.css('link[rel="shortcut icon"], link[rel="icon shortcut"], link[rel="shortcut"], link[rel="icon"]').first
favicon = favicon.nil? ? nil : (favicon['href'].nil? ? nil : favicon['href'].strip)
Onebox::Helpers::get_absolute_image_url(favicon, url)
end
def get_json_response
oembed_url = get_oembed_url
return "{}" if Onebox::Helpers.blank?(oembed_url)
Onebox::Helpers.fetch_response(oembed_url) rescue "{}"
rescue Errno::ECONNREFUSED, Net::HTTPError, Net::HTTPFatalError, MultiJson::LoadError
"{}"
end
def get_oembed_url
oembed_url = nil
StandardEmbed.oembed_providers.each do |regexp, endpoint|
if url =~ regexp
oembed_url = "#{endpoint}?url=#{url}"
break
end
end
if html_doc
if Onebox::Helpers.blank?(oembed_url)
application_json = html_doc.at("//link[@type='application/json+oembed']/@href")
oembed_url = application_json.value if application_json
end
if Onebox::Helpers.blank?(oembed_url)
text_json = html_doc.at("//link[@type='text/json+oembed']/@href")
oembed_url ||= text_json.value if text_json
end
end
oembed_url
end
end
end
end

View File

@@ -0,0 +1,41 @@
# frozen_string_literal: true
module Onebox
module Engine
class SteamStoreOnebox
include Engine
include StandardEmbed
always_https
matches_regexp(/^https?:\/\/store\.steampowered\.com\/app\/\d+/)
requires_iframe_origins "https://store.steampowered.com"
def placeholder_html
og = get_opengraph
<<-HTML
<div style='width:100%; height:190px; background-color:#262626; color:#9e9e9e; margin:15px 0;'>
<div style='padding:10px'>
<h3 style='color:#fff; margin:10px 0 10px 5px;'>#{og.title}</h3>
<img src='#{og.image}' style='float:left; max-width:184px; margin:5px 15px 0 5px'/>
<p>#{og.description}</p>
</div>
</div>
HTML
end
def to_html
iframe_url = @url[/https?:\/\/store\.steampowered\.com\/app\/\d+/].gsub("/app/", "/widget/")
escaped_src = ::Onebox::Helpers.normalize_url_for_output(iframe_url)
<<-HTML
<iframe
src='#{escaped_src}'
frameborder='0'
width='100%'
height='190'
></iframe>
HTML
end
end
end
end

View File

@@ -0,0 +1,34 @@
# frozen_string_literal: true
module Onebox
module Engine
class TrelloOnebox
include Engine
include StandardEmbed
matches_regexp(/^https:\/\/trello\.com\/[bc]\/\W*/)
requires_iframe_origins "https://trello.com"
always_https
def to_html
src = "https://trello.com/#{match[:type]}/#{match[:key]}.html"
height = match[:type] == 'b' ? 400 : 200
<<-HTML
<iframe src="#{src}" width="100%" height="#{height}" frameborder="0" style="border:0"></iframe>
HTML
end
def placeholder_html
::Onebox::Helpers.generic_placeholder_html
end
private
def match
return @match if defined?(@match)
@match = @url.match(%{trello\.com/(?<type>[^/]+)/(?<key>[^/]+)/?\W*})
end
end
end
end

View File

@@ -0,0 +1,20 @@
# frozen_string_literal: true
require_relative '../mixins/twitch_onebox'
class Onebox::Engine::TwitchClipsOnebox
def self.twitch_regexp
/^https?:\/\/clips\.twitch\.tv\/([a-zA-Z0-9_]+\/?[^#\?\/]+)/
end
include Onebox::Mixins::TwitchOnebox
requires_iframe_origins "https://clips.twitch.tv"
def query_params
"clip=#{twitch_id}"
end
def base_url
"clips.twitch.tv/embed?"
end
end

View File

@@ -0,0 +1,15 @@
# frozen_string_literal: true
require_relative '../mixins/twitch_onebox'
class Onebox::Engine::TwitchStreamOnebox
def self.twitch_regexp
/^https?:\/\/(?:www\.|go\.)?twitch\.tv\/(?!directory)([a-zA-Z0-9_]{4,25})$/
end
include Onebox::Mixins::TwitchOnebox
def query_params
"channel=#{twitch_id}"
end
end

View File

@@ -0,0 +1,15 @@
# frozen_string_literal: true
require_relative '../mixins/twitch_onebox'
class Onebox::Engine::TwitchVideoOnebox
def self.twitch_regexp
/^https?:\/\/(?:www\.)?twitch\.tv\/videos\/([0-9]+)/
end
include Onebox::Mixins::TwitchOnebox
def query_params
"video=v#{twitch_id}"
end
end

View File

@@ -0,0 +1,172 @@
# frozen_string_literal: true
module Onebox
module Engine
class TwitterStatusOnebox
include Engine
include LayoutSupport
include HTML
matches_regexp(/^https?:\/\/(mobile\.|www\.)?twitter\.com\/.+?\/status(es)?\/\d+(\/(video|photo)\/\d?+)?+(\/?\?.*)?\/?$/)
always_https
def http_params
{ 'User-Agent' => 'DiscourseBot/1.0' }
end
private
def get_twitter_data
response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
html = Nokogiri::HTML(response)
twitter_data = {}
html.css('meta').each do |m|
if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i)
m_content = m.attribute('content').to_s.strip
m_property = m.attribute('property').to_s.gsub('og:', '')
twitter_data[m_property.to_sym] = m_content
end
end
twitter_data
end
def match
@match ||= @url.match(%r{twitter\.com/.+?/status(es)?/(?<id>\d+)})
end
def twitter_data
@twitter_data ||= get_twitter_data
end
def client
Onebox.options.twitter_client
end
def twitter_api_credentials_present?
client && !client.twitter_credentials_missing?
end
def raw
if twitter_api_credentials_present?
@raw ||= OpenStruct.new(client.status(match[:id]).to_hash)
else
super
end
end
def access(*keys)
keys.reduce(raw) do |memo, key|
next unless memo
memo[key] || memo[key.to_s]
end
end
def tweet
if twitter_api_credentials_present?
client.prettify_tweet(raw)&.strip
else
twitter_data[:description].gsub(/“(.+?)”/im) { $1 } if twitter_data[:description]
end
end
def timestamp
if twitter_api_credentials_present?
date = DateTime.strptime(access(:created_at), "%a %b %d %H:%M:%S %z %Y")
user_offset = access(:user, :utc_offset).to_i
offset = (user_offset >= 0 ? "+" : "-") + Time.at(user_offset.abs).gmtime.strftime("%H%M")
date.new_offset(offset).strftime("%-l:%M %p - %-d %b %Y")
else
attr_at_css(".tweet-timestamp", 'title')
end
end
def title
if twitter_api_credentials_present?
"#{access(:user, :name)} (#{access(:user, :screen_name)})"
else
"#{attr_at_css('.tweet.permalink-tweet', 'data-name')} (#{attr_at_css('.tweet.permalink-tweet', 'data-screen-name')})"
end
end
def avatar
if twitter_api_credentials_present?
access(:user, :profile_image_url_https).sub('normal', '400x400')
elsif twitter_data[:image]
twitter_data[:image]
end
end
def likes
if twitter_api_credentials_present?
prettify_number(access(:favorite_count).to_i)
else
attr_at_css(".request-favorited-popup", 'data-compact-localized-count')
end
end
def retweets
if twitter_api_credentials_present?
prettify_number(access(:retweet_count).to_i)
else
attr_at_css(".request-retweeted-popup", 'data-compact-localized-count')
end
end
def quoted_full_name
if twitter_api_credentials_present?
access(:quoted_status, :user, :name)
else
raw.css('.QuoteTweet-fullname')[0]&.text
end
end
def quoted_screen_name
if twitter_api_credentials_present?
access(:quoted_status, :user, :screen_name)
else
attr_at_css(".QuoteTweet-innerContainer", "data-screen-name")
end
end
def quoted_tweet
if twitter_api_credentials_present?
access(:quoted_status, :full_text)
else
raw.css('.QuoteTweet-text')[0]&.text
end
end
def quoted_link
if twitter_api_credentials_present?
"https://twitter.com/#{quoted_screen_name}/status/#{access(:quoted_status, :id)}"
else
"https://twitter.com#{attr_at_css(".QuoteTweet-innerContainer", "href")}"
end
end
def prettify_number(count)
count > 0 ? client.prettify_number(count) : nil
end
def attr_at_css(css_property, attribute_name)
raw.at_css(css_property)&.attr(attribute_name)
end
def data
@data ||= {
link: link,
tweet: tweet,
timestamp: timestamp,
title: title,
avatar: avatar,
likes: likes,
retweets: retweets,
quoted_tweet: quoted_tweet,
quoted_full_name: quoted_full_name,
quoted_screen_name: quoted_screen_name,
quoted_link: quoted_link
}
end
end
end
end

View File

@@ -0,0 +1,48 @@
# frozen_string_literal: true
module Onebox
module Engine
class TypeformOnebox
include Engine
matches_regexp(/^https?:\/\/[a-z0-9\-_]+\.typeform\.com\/to\/[a-zA-Z0-9]+/)
requires_iframe_origins "https://*.typeform.com"
always_https
def to_html
typeform_src = build_typeform_src
<<~HTML
<iframe
src="#{typeform_src}"
width="100%"
height="600px"
scrolling="no"
frameborder="0"
></iframe>
HTML
end
def placeholder_html
::Onebox::Helpers.generic_placeholder_html
end
private
def build_typeform_src
escaped_src = ::Onebox::Helpers.normalize_url_for_output(@url)
query_params = CGI::parse(URI::parse(escaped_src).query || '')
return escaped_src if query_params.has_key?('typeform-embed')
if query_params.empty?
escaped_src += '?' unless escaped_src.end_with?('?')
else
escaped_src += '&'
end
escaped_src += 'typeform-embed=embed-widget'
end
end
end
end

View File

@@ -0,0 +1,36 @@
# frozen_string_literal: true
module Onebox
module Engine
class VideoOnebox
include Engine
matches_regexp(/^(https?:)?\/\/.*\.(mov|mp4|webm|ogv)(\?.*)?$/i)
def always_https?
AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
end
def to_html
# Fix Dropbox image links
if @url[/^https:\/\/www.dropbox.com\/s\//]
@url.sub!("https://www.dropbox.com", "https://dl.dropboxusercontent.com")
end
escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
<<-HTML
<div class="onebox video-onebox">
<video width='100%' height='100%' controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
<source src='#{escaped_url}'>
<a href='#{escaped_url}'>#{@url}</a>
</video>
</div>
HTML
end
def placeholder_html
::Onebox::Helpers.video_placeholder_html
end
end
end
end

View File

@@ -0,0 +1,55 @@
# frozen_string_literal: true
module Onebox
module Engine
class VimeoOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/(www\.)?vimeo\.com\/\d+/)
requires_iframe_origins "https://player.vimeo.com"
always_https
WIDTH ||= 640
HEIGHT ||= 360
def placeholder_html
::Onebox::Helpers.video_placeholder_html
end
def to_html
video_id = oembed_data[:video_id]
if video_id.nil?
# for private videos
video_id = uri.path[/\/(\d+)/, 1]
end
video_src = "https://player.vimeo.com/video/#{video_id}"
video_src = video_src.gsub('autoplay=1', '').chomp("?")
<<-HTML
<iframe
width="#{WIDTH}"
height="#{HEIGHT}"
src="#{video_src}"
data-original-href="#{link}"
frameborder="0"
allowfullscreen
></iframe>
HTML
end
private
def oembed_data
response = Onebox::Helpers.fetch_response("https://vimeo.com/api/oembed.json?url=#{url}")
@oembed_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(response))
rescue
"{}"
end
def og_data
@og_data = get_opengraph
end
end
end
end

View File

@@ -0,0 +1,43 @@
# frozen_string_literal: true
module Onebox
module Engine
class WikimediaOnebox
include Engine
include LayoutSupport
include JSON
matches_regexp(/^https?:\/\/commons\.wikimedia\.org\/wiki\/(File:.+)/)
always_https
def self.priority
# Wikimedia links end in an image extension.
# E.g. https://commons.wikimedia.org/wiki/File:Stones_members_montage2.jpg
# This engine should have priority over the generic ImageOnebox.
1
end
def url
"https://en.wikipedia.org/w/api.php?action=query&titles=#{match[:name]}&prop=imageinfo&iilimit=50&iiprop=timestamp|user|url&iiurlwidth=500&format=json"
end
private
def match
@match ||= @url.match(/^https?:\/\/commons\.wikimedia\.org\/wiki\/(?<name>File:.+)/)
end
def data
first_page = raw['query']['pages'].first[1]
{
link: first_page['imageinfo'].first['descriptionurl'],
title: first_page['title'],
image: first_page['imageinfo'].first['url'],
thumbnail: first_page['imageinfo'].first['thumburl']
}
end
end
end
end

View File

@@ -0,0 +1,97 @@
# frozen_string_literal: true
module Onebox
module Engine
class WikipediaOnebox
include Engine
include LayoutSupport
include HTML
matches_regexp(/^https?:\/\/.*\.wikipedia\.(com|org)/)
always_https
private
def data
paras = []
text = ""
# Detect section Hash in the url and retrive the related paragraphs. if no hash provided the first few paragraphs will be used
# Author Lidlanca
# Date 9/8/2014
if (m_url_hash = @url.match(/#([^\/?]+)/)) # extract url hash
m_url_hash_name = m_url_hash[1]
end
unless m_url_hash.nil?
section_header_title = raw.xpath("//span[@id='#{m_url_hash_name}']")
if section_header_title.empty?
paras = raw.search("p") # default get all the paras
else
section_title_text = section_header_title.inner_text
section_header = section_header_title[0].parent # parent element of the section span element should be an <h3> node
cur_element = section_header
# p|text|div covers the general case. We assume presence of at least 1 P node. if section has no P node we may end up with a P node from the next section.
# div tag is commonly used as an assets wraper in an article section. often as the first element holding an image.
# ul support will imporve the output generated for a section with a list as the main content (for example: an Author Bibliography, A musician Discography, etc)
first_p_found = nil
while (((next_sibling = cur_element.next_sibling).name =~ /p|text|div|ul/) || first_p_found.nil?) do # from section header get the next sibling until it is a breaker tag
cur_element = next_sibling
if (cur_element.name == "p" || cur_element.name == "ul") #we treat a list as we detect a p to avoid showing
first_p_found = true
paras.push(cur_element)
end
end
end
else # no hash found in url
paras = raw.search("p") # default get all the paras
end
unless paras.empty?
cnt = 0
while text.length < Onebox::LayoutSupport.max_text && cnt <= 3
break if cnt >= paras.size
text += " " unless cnt == 0
if paras[cnt].name == "ul" # Handle UL tag. Generate a textual ordered list (1.item | 2.item | 3.item). Unfortunately no newline allowed in output
li_index = 1
list_items = []
paras[cnt].children.css("li").each { |li| list_items.push "#{li_index}." + li.inner_text ; li_index += 1 }
paragraph = (list_items.join " |\n ")[0..Onebox::LayoutSupport.max_text]
else
paragraph = paras[cnt].inner_text[0..Onebox::LayoutSupport.max_text]
end
paragraph.gsub!(/\[\d+\]/mi, "")
text += paragraph
cnt += 1
end
end
text = "#{text[0..Onebox::LayoutSupport.max_text]}..." if text.length > Onebox::LayoutSupport.max_text
result = {
link: link,
title: raw.css("html body h1").inner_text + (section_title_text ? " | " + section_title_text : ""), #if a section sub title exists add it to the main article title
description: text
}
img = raw.css(".image img")
if img && img.size > 0
img.each do |i|
src = i["src"]
if src !~ /Question_book/
result[:image] = src
break
end
end
end
result
end
end
end
end

View File

@@ -0,0 +1,30 @@
# frozen_string_literal: true
module Onebox
module Engine
class WistiaOnebox
include Engine
include StandardEmbed
matches_regexp(/https?:\/\/(.+)?(wistia.com|wi.st)\/(medias|embed)\/.*/)
requires_iframe_origins("https://fast.wistia.com", "https://fast.wistia.net")
always_https
def to_html
get_oembed.html
end
def placeholder_html
oembed = get_oembed
return if Onebox::Helpers.blank?(oembed.thumbnail_url)
"<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
end
private
def get_oembed_url
"https://fast.wistia.com/oembed?embedType=iframe&url=#{url}"
end
end
end
end

View File

@@ -0,0 +1,32 @@
# frozen_string_literal: true
module Onebox
module Engine
class XkcdOnebox
include Engine
include LayoutSupport
include JSON
matches_regexp(/^https?:\/\/(www\.)?(m\.)?xkcd\.com\/\d+/)
def url
"https://xkcd.com/#{match[:comic_id]}/info.0.json"
end
private
def match
@match ||= @url.match(%{xkcd\.com/(?<comic_id>\\d+)})
end
def data
{
link: @url,
title: raw['safe_title'],
image: raw['img'],
description: raw['alt']
}
end
end
end
end

View File

@@ -0,0 +1,35 @@
# frozen_string_literal: true
module Onebox
module Engine
class YoukuOnebox
include Engine
include HTML
matches_regexp(/^(https?:\/\/)?([\da-z\.-]+)(youku.com\/)(.)+\/?$/)
requires_iframe_origins "https://player.youku.com"
# Try to get the video ID. Works for URLs of the form:
# * http://v.youku.com/v_show/id_XNjM3MzAxNzc2.html
# * http://v.youku.com/v_show/id_XMTQ5MjgyMjMyOA==.html?from=y1.3-tech-index3-232-10183.89969-89963.3-1
def video_id
match = uri.path.match(/\/v_show\/id_([a-zA-Z0-9_=\-]+)(\.html)?.*/)
match && match[1]
rescue
nil
end
def to_html
<<~HTML
<iframe
src="https://player.youku.com/embed/#{video_id}"
width="640"
height="430"
frameborder='0'
allowfullscreen
></iframe>
HTML
end
end
end
end

View File

@@ -0,0 +1,173 @@
# frozen_string_literal: true
module Onebox
module Engine
class YoutubeOnebox
include Engine
include StandardEmbed
matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
requires_iframe_origins "https://www.youtube.com"
always_https
WIDTH ||= 480
HEIGHT ||= 360
def parse_embed_response
return unless video_id
return @parse_embed_response if defined?(@parse_embed_response)
embed_url = "https://www.youtube.com/embed/#{video_id}"
@embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
begin
script_tag = @embed_doc.xpath('//script').find { |tag| tag.to_s.include?('ytcfg.set') }.to_s
match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
yt_json = ::JSON.parse(match[:json])
renderer = ::JSON.parse(yt_json['PLAYER_VARS']['embedded_player_response'])['embedPreview']['thumbnailPreviewRenderer']
title = renderer['title']['runs'].first['text']
image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
rescue
return
end
@parse_embed_response = { image: image, title: title }
end
def placeholder_html
if video_id || list_id
result = parse_embed_response
result ||= get_opengraph.data
"<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{result[:title]}'>"
else
to_html
end
end
def to_html
if video_id
<<-HTML
<iframe
src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
width="#{WIDTH}"
height="#{HEIGHT}"
frameborder="0"
allowfullscreen
></iframe>
HTML
elsif list_id
<<-HTML
<iframe
src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
width="#{WIDTH}"
height="#{HEIGHT}"
frameborder="0"
allowfullscreen
></iframe>
HTML
else
# for channel pages
html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
return if Onebox::Helpers.blank?(html)
html.gsub!(/['"]\/\//, "https://")
html
end
end
def video_title
@video_title ||= begin
result = parse_embed_response || get_opengraph.data
result[:title]
end
end
private
def video_id
@video_id ||= begin
# http://youtu.be/afyK1HSFfgw
if uri.host["youtu.be"]
id = uri.path[/\/([\w\-]+)/, 1]
return id if id
end
# https://www.youtube.com/embed/vsF0K3Ou1v0
if uri.path["/embed/"]
id = uri.path[/\/embed\/([\w\-]+)/, 1]
return id if id
end
# https://www.youtube.com/watch?v=Z0UISCEe52Y
params['v']
end
end
def list_id
@list_id ||= params['list']
end
def embed_params
p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
p['list'] = list_id if list_id
# Parse timestrings, and assign the result as a start= parameter
start = if params['start']
params['start']
elsif params['t']
params['t']
elsif uri.fragment && uri.fragment.start_with?('t=')
# referencing uri is safe here because any throws were already caught by video_id returning nil
# remove the t= from the start
uri.fragment[2..-1]
end
p['start'] = parse_timestring(start) if start
p['end'] = parse_timestring params['end'] if params['end']
# Official workaround for looping videos
# https://developers.google.com/youtube/player_parameters#loop
# use params.include? so that you can just add "&loop"
if params.include?('loop')
p['loop'] = 1
p['playlist'] = video_id
end
# https://developers.google.com/youtube/player_parameters#rel
p['rel'] = 0 if params.include?('rel')
# https://developers.google.com/youtube/player_parameters#enablejsapi
p['enablejsapi'] = params['enablejsapi'] if params.include?('enablejsapi')
URI.encode_www_form(p)
end
def parse_timestring(string)
if string =~ /(\d+h)?(\d+m)?(\d+s?)?/
($1.to_i * 3600) + ($2.to_i * 60) + $3.to_i
end
end
def params
return {} unless uri.query
# This mapping is necessary because CGI.parse returns a hash of keys to arrays.
# And *that* is necessary because querystrings support arrays, so they
# force you to deal with it to avoid security issues that would pop up
# if one day it suddenly gave you an array.
#
# However, we aren't interested. Just take the first one.
@params ||= begin
p = {}
CGI.parse(uri.query).each { |k, v| p[k] = v.first }
p
end
rescue
{}
end
end
end
end