DEV: Absorb onebox gem into core (#12979)

* Move onebox gem in core library * Update template file path * Remove warning for onebox gem caching * Remove onebox version file * Remove onebox gem * Add sanitize gem * Require onebox library in lazy-yt plugin * Remove onebox web specific code This code was used in standalone onebox Sinatra application * Merge Discourse specific AllowlistedGenericOnebox engine in core * Fix onebox engine filenames to match class name casing * Move onebox specs from gem into core * DEV: Rename `response` helper to `onebox_response` Fixes a naming collision. * Require rails_helper * Don't use `before/after(:all)` * Whitespace * Remove fakeweb * Remove poor unit tests * DEV: Re-add fakeweb, plugins are using it * Move onebox helpers * Stub Instagram API * FIX: Follow additional redirect status codes (#476) Don’t throw errors if we encounter 303, 307 or 308 HTTP status codes in responses * Remove an empty file * DEV: Update the license file Using the copy from https://choosealicense.com/licenses/gpl-2.0/# Hopefully this will enable GitHub to show the license UI? * DEV: Update embedded copyrights * DEV: Add Onebox copyright notice * DEV: Add MIT license, convert COPYRIGHT.txt to md * DEV: Remove an incorrect copyright claim Co-authored-by: Jarek Radosz <jradosz@gmail.com> Co-authored-by: jbrw <jamie@goatforce5.org>
2025-02-25 18:55:32 -06:00 · 2021-05-26 15:11:35 +05:30
parent d0779a87bb
commit 283b08d45f
211 changed files with 78330 additions and 74 deletions
--- a/lib/onebox/engine/allowlisted_generic_onebox.rb
+++ b/lib/onebox/engine/allowlisted_generic_onebox.rb
@@ -1,21 +1,267 @@
 # frozen_string_literal: true

+require 'htmlentities'
 require "ipaddr"

 module Onebox
  module Engine
    class AllowlistedGenericOnebox
+      include Engine
+      include StandardEmbed
+      include LayoutSupport
+
+      def self.priority
+        200
+      end
+
+      # Often using the `html` attribute is not what we want, like for some blogs that
+      # include the entire page HTML. However for some providers like Flickr it allows us
+      # to return gifv and galleries.
+      def self.default_html_providers
+        ['Flickr', 'Meetup']
+      end
+
+      def self.html_providers
+        @html_providers ||= default_html_providers.dup
+      end
+
+      def self.html_providers=(new_provs)
+        @html_providers = new_provs
+      end
+
+      # A re-written URL converts http:// -> https://
+      def self.rewrites
+        @rewrites ||= https_hosts.dup
+      end
+
+      def self.rewrites=(new_list)
+        @rewrites = new_list
+      end
+
+      def self.https_hosts
+        %w(slideshare.net dailymotion.com livestream.com imgur.com flickr.com)
+      end
+
+      def self.host_matches(uri, list)
+        !!list.find { |h| %r((^|\.)#{Regexp.escape(h)}$).match(uri.host) }
+      end
+
+      def self.allowed_twitter_labels
+        ['brand', 'price', 'usd', 'cad', 'reading time', 'likes']
+      end

-      # overwrite the allowlist
      def self.===(other)
        other.is_a?(URI) ? (IPAddr.new(other.hostname) rescue nil).nil? : true
      end

-      # ensure we're the last engine to be used
-      def self.priority
-        Float::INFINITY
+      def to_html
+        rewrite_https(generic_html)
      end

+      def placeholder_html
+        return article_html if is_article?
+        return image_html if is_image?
+        return Onebox::Helpers.video_placeholder_html if is_video? || is_card?
+        return Onebox::Helpers.generic_placeholder_html if is_embedded?
+        to_html
+      end
+
+      def data
+        @data ||= begin
+          html_entities = HTMLEntities.new
+          d = { link: link }.merge(raw)
+
+          if !Onebox::Helpers.blank?(d[:title])
+            d[:title] = html_entities.decode(Onebox::Helpers.truncate(d[:title], 80))
+          end
+
+          d[:description] ||= d[:summary]
+          if !Onebox::Helpers.blank?(d[:description])
+            d[:description] = html_entities.decode(Onebox::Helpers.truncate(d[:description], 250))
+          end
+
+          if !Onebox::Helpers.blank?(d[:site_name])
+            d[:domain] = html_entities.decode(Onebox::Helpers.truncate(d[:site_name], 80))
+          elsif !Onebox::Helpers.blank?(d[:domain])
+            d[:domain] = "http://#{d[:domain]}" unless d[:domain] =~ /^https?:\/\//
+            d[:domain] = URI(d[:domain]).host.to_s.sub(/^www\./, '') rescue nil
+          end
+
+          # prefer secure URLs
+          d[:image] = d[:image_secure_url] || d[:image_url] || d[:thumbnail_url] || d[:image]
+          d[:image] = Onebox::Helpers::get_absolute_image_url(d[:image], @url)
+          d[:image] = Onebox::Helpers::normalize_url_for_output(html_entities.decode(d[:image]))
+          d[:image] = nil if Onebox::Helpers.blank?(d[:image])
+
+          d[:video] = d[:video_secure_url] || d[:video_url] || d[:video]
+          d[:video] = nil if Onebox::Helpers.blank?(d[:video])
+
+          d[:published_time] = d[:article_published_time] unless Onebox::Helpers.blank?(d[:article_published_time])
+          if !Onebox::Helpers.blank?(d[:published_time])
+            d[:article_published_time] = Time.parse(d[:published_time]).strftime("%-d %b %y")
+            d[:article_published_time_title] = Time.parse(d[:published_time]).strftime("%I:%M%p - %d %B %Y")
+          end
+
+          # Twitter labels
+          if !Onebox::Helpers.blank?(d[:label1]) && !Onebox::Helpers.blank?(d[:data1]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label1] =~ /#{l}/i }
+            d[:label_1] = Onebox::Helpers.truncate(d[:label1])
+            d[:data_1]  = Onebox::Helpers.truncate(d[:data1])
+          end
+          if !Onebox::Helpers.blank?(d[:label2]) && !Onebox::Helpers.blank?(d[:data2]) && !!AllowlistedGenericOnebox.allowed_twitter_labels.find { |l| d[:label2] =~ /#{l}/i }
+            unless Onebox::Helpers.blank?(d[:label_1])
+              d[:label_2] = Onebox::Helpers.truncate(d[:label2])
+              d[:data_2]  = Onebox::Helpers.truncate(d[:data2])
+            else
+              d[:label_1] = Onebox::Helpers.truncate(d[:label2])
+              d[:data_1]  = Onebox::Helpers.truncate(d[:data2])
+            end
+          end
+
+          if Onebox::Helpers.blank?(d[:label_1]) && !Onebox::Helpers.blank?(d[:price_amount]) && !Onebox::Helpers.blank?(d[:price_currency])
+            d[:label_1] = "Price"
+            d[:data_1] = Onebox::Helpers.truncate("#{d[:price_currency].strip} #{d[:price_amount].strip}")
+          end
+
+          skip_missing_tags = [:video]
+          d.each do |k, v|
+            next if skip_missing_tags.include?(k)
+            if v == nil || v == ''
+              errors[k] ||= []
+              errors[k] << 'is blank'
+            end
+          end
+
+          d
+        end
+      end
+
+      private
+
+      def rewrite_https(html)
+        return unless html
+        if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
+          html = html.gsub("http://", "https://")
+        end
+        html
+      end
+
+      def generic_html
+        return article_html  if is_article?
+        return video_html    if is_video?
+        return image_html    if is_image?
+        return embedded_html if is_embedded?
+        return card_html     if is_card?
+        return article_html  if (has_text? || is_image_article?)
+      end
+
+      def is_card?
+        data[:card] == 'player' &&
+          data[:player] =~ URI::regexp &&
+          options[:allowed_iframe_regexes]&.any? { |r| data[:player] =~ r }
+      end
+
+      def is_article?
+        (data[:type] =~ /article/ || data[:asset_type] =~ /article/) &&
+        has_text?
+      end
+
+      def has_text?
+        has_title? && !Onebox::Helpers.blank?(data[:description])
+      end
+
+      def has_title?
+        !Onebox::Helpers.blank?(data[:title])
+      end
+
+      def is_image_article?
+        has_title? && has_image?
+      end
+
+      def is_image?
+        data[:type] =~ /photo|image/ &&
+        data[:type] !~ /photostream/ &&
+        has_image?
+      end
+
+      def has_image?
+        !Onebox::Helpers.blank?(data[:image])
+      end
+
+      def is_video?
+        data[:type] =~ /^video[\/\.]/ &&
+          data[:video_type] == "video/mp4" && # Many sites include 'videos' with text/html types (i.e. iframes)
+          !Onebox::Helpers.blank?(data[:video])
+      end
+
+      def is_embedded?
+        return false unless data[:html] && data[:height]
+        return true if AllowlistedGenericOnebox.html_providers.include?(data[:provider_name])
+        return false unless data[:html]["iframe"]
+
+        fragment = Nokogiri::HTML5::fragment(data[:html])
+        src = fragment.at_css('iframe')&.[]("src")
+        options[:allowed_iframe_regexes]&.any? { |r| src =~ r }
+      end
+
+      def card_html
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(data[:player])
+
+        <<~RAW
+        <iframe src="#{escaped_url}"
+                width="#{data[:player_width] || "100%"}"
+                height="#{data[:player_height]}"
+                scrolling="no"
+                frameborder="0">
+        </iframe>
+        RAW
+      end
+
+      def article_html
+        layout.to_html
+      end
+
+      def image_html
+        return if Onebox::Helpers.blank?(data[:image])
+
+        escaped_src = ::Onebox::Helpers.normalize_url_for_output(data[:image])
+
+        alt    = data[:description]  || data[:title]
+        width  = data[:image_width]  || data[:thumbnail_width]  || data[:width]
+        height = data[:image_height] || data[:thumbnail_height] || data[:height]
+
+        "<img src='#{escaped_src}' alt='#{alt}' width='#{width}' height='#{height}' class='onebox'>"
+      end
+
+      def video_html
+        escaped_video_src = ::Onebox::Helpers.normalize_url_for_output(data[:video])
+        escaped_image_src = ::Onebox::Helpers.normalize_url_for_output(data[:image])
+
+        <<-HTML
+          <video
+            title='#{data[:title]}'
+            width='#{data[:video_width]}'
+            height='#{data[:video_height]}'
+            style='max-width:100%'
+            poster='#{escaped_image_src}'
+            controls=''
+          >
+            <source src='#{escaped_video_src}'>
+          </video>
+        HTML
+      end
+
+      def embedded_html
+        fragment = Nokogiri::HTML5::fragment(data[:html])
+        fragment.css("img").each { |img| img["class"] = "thumbnail" }
+        if iframe = fragment.at_css("iframe")
+          iframe.remove_attribute("style")
+          iframe["width"] = data[:width] || "100%"
+          iframe["height"] = data[:height]
+          iframe["scrolling"] = "no"
+          iframe["frameborder"] = "0"
+        end
+        fragment.to_html
+      end
    end
  end
 end
--- a/lib/onebox/engine/amazon_onebox.rb
+++ b/lib/onebox/engine/amazon_onebox.rb
@@ -0,0 +1,198 @@
+# frozen_string_literal: true
+
+require 'json'
+require "onebox/open_graph"
+
+module Onebox
+  module Engine
+    class AmazonOnebox
+      include Engine
+      include LayoutSupport
+      include HTML
+
+      always_https
+      matches_regexp(/^https?:\/\/(?:www\.)?(?:smile\.)?(amazon|amzn)\.(?<tld>com|ca|de|it|es|fr|co\.jp|co\.uk|cn|in|com\.br|com\.mx|nl|pl|sa|sg|se|com\.tr|ae)\//)
+
+      def url
+        @raw ||= nil
+
+        # If possible, fetch the cached HTML body immediately so we can
+        # try to grab the canonical URL from that document,
+        # rather than guess at the best URL structure to use
+        if !@raw && has_cached_body
+          @raw = Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
+        end
+
+        if @raw
+          canonical_link = @raw.at('//link[@rel="canonical"]/@href')
+          return canonical_link.to_s if canonical_link
+        end
+
+        if match && match[:id]
+          id = Addressable::URI.encode_component(match[:id], Addressable::URI::CharacterClasses::PATH)
+          return "https://www.amazon.#{tld}/dp/#{id}"
+        end
+
+        @url
+      end
+
+      def tld
+        @tld ||= @@matcher.match(@url)["tld"]
+      end
+
+      def http_params
+        if @options && @options[:user_agent]
+          { 'User-Agent' => @options[:user_agent] }
+        end
+      end
+
+      private
+
+      def has_cached_body
+        body_cacher&.respond_to?('cache_response_body?') &&
+          body_cacher.cache_response_body?(uri.to_s) &&
+          body_cacher.cached_response_body_exists?(uri.to_s)
+      end
+
+      def match
+        @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
+      end
+
+      def image
+        if (main_image = raw.css("#main-image")) && main_image.any?
+          attributes = main_image.first.attributes
+
+          if attributes["data-a-hires"]
+            return attributes["data-a-hires"].to_s
+          elsif attributes["data-a-dynamic-image"]
+            return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
+          end
+        end
+
+        if (landing_image = raw.css("#landingImage")) && landing_image.any?
+          attributes = landing_image.first.attributes
+
+          if attributes["data-old-hires"]
+            return attributes["data-old-hires"].to_s
+          else
+            return landing_image.first["src"].to_s
+          end
+        end
+
+        if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
+          ::JSON.parse(ebook_image.first.attributes["data-a-dynamic-image"].value).keys.first
+        end
+      end
+
+      def price
+        # get item price (Amazon markup is inconsistent, deal with it)
+        if raw.css("#priceblock_ourprice .restOfPrice")[0] && raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text
+          "#{raw.css("#priceblock_ourprice .restOfPrice")[0].inner_text}#{raw.css("#priceblock_ourprice .buyingPrice")[0].inner_text}.#{raw.css("#priceblock_ourprice .restOfPrice")[1].inner_text}"
+        elsif raw.css("#priceblock_dealprice") && (dealprice = raw.css("#priceblock_dealprice span")[0])
+          dealprice.inner_text
+        elsif !raw.css("#priceblock_ourprice").inner_text.empty?
+          raw.css("#priceblock_ourprice").inner_text
+        else
+          raw.css(".mediaMatrixListItem.a-active .a-color-price").inner_text
+        end
+      end
+
+      def multiple_authors(authors_xpath)
+        raw
+          .xpath(authors_xpath)
+          .map { |a| a.inner_text.strip }
+          .join(", ")
+      end
+
+      def data
+        og = ::Onebox::OpenGraph.new(raw)
+
+        if raw.at_css('#dp.book_mobile') # printed books
+          title = raw.at("h1#title")&.inner_text
+          authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
+          rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
+
+          table_xpath = "//div[@id='productDetails_secondary_view_div']//table[@id='productDetails_techSpec_section_1']"
+          isbn = raw.xpath("#{table_xpath}//tr[8]//td").inner_text.strip
+
+          # if ISBN is misplaced or absent it's hard to find out which data is
+          # available and where to find it so just set it all to nil
+          if /^\d(\-?\d){12}$/.match(isbn)
+            publisher = raw.xpath("#{table_xpath}//tr[1]//td").inner_text.strip
+            published = raw.xpath("#{table_xpath}//tr[2]//td").inner_text.strip
+            book_length = raw.xpath("#{table_xpath}//tr[6]//td").inner_text.strip
+          else
+            isbn = publisher = published = book_length = nil
+          end
+
+          result = {
+            link: url,
+            title: title,
+            by_info: authors,
+            image: og.image || image,
+            description: raw.at("#productDescription")&.inner_text,
+            rating: "#{rating}#{', ' if rating && (!isbn&.empty? || !price&.empty?)}",
+            price: price,
+            isbn_asin_text: "ISBN",
+            isbn_asin: isbn,
+            publisher: publisher,
+            published: "#{published}#{', ' if published && !price&.empty?}"
+          }
+
+        elsif raw.at_css('#dp.ebooks_mobile') # ebooks
+          title = raw.at("#ebooksTitle")&.inner_text
+          authors = raw.at_css('#a-popover-mobile-udp-contributor-popover-id') ? multiple_authors("//div[@id='a-popover-mobile-udp-contributor-popover-id']//span[contains(@class,'a-text-bold')]") : (raw.at("#byline")&.inner_text&.strip || raw.at("#bylineInfo")&.inner_text&.strip)
+          rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text || raw.at("#acrCustomerReviewLink .a-icon")&.inner_text
+
+          table_xpath = "//div[@id='detailBullets_secondary_view_div']//ul"
+          asin = raw.xpath("#{table_xpath}//li[4]/span/span[2]").inner_text
+
+          # if ASIN is misplaced or absent it's hard to find out which data is
+          # available and where to find it so just set it all to nil
+          if /^[0-9A-Z]{10}$/.match(asin)
+            publisher = raw.xpath("#{table_xpath}//li[2]/span/span[2]").inner_text
+            published = raw.xpath("#{table_xpath}//li[1]/span/span[2]").inner_text
+          else
+            asin = publisher = published = nil
+          end
+
+          result = {
+            link: url,
+            title: title,
+            by_info: authors,
+            image: og.image || image,
+            description: raw.at("#productDescription")&.inner_text,
+            rating: "#{rating}#{', ' if rating && (!asin&.empty? || !price&.empty?)}",
+            price: price,
+            isbn_asin_text: "ASIN",
+            isbn_asin: asin,
+            publisher: publisher,
+            published: "#{published}#{', ' if published && !price&.empty?}"
+          }
+
+        else
+          title = og.title || CGI.unescapeHTML(raw.css("title").inner_text)
+          result = {
+            link: url,
+            title: title,
+            image: og.image || image,
+            price: price
+          }
+
+          result[:by_info] = raw.at("#by-line")
+          result[:by_info] = Onebox::Helpers.clean(result[:by_info].inner_html) if result[:by_info]
+
+          summary = raw.at("#productDescription")
+
+          description = og.description || summary&.inner_text
+          description ||= raw.css("meta[name=description]").first&.[]("content")
+          result[:description] = CGI.unescapeHTML(Onebox::Helpers.truncate(description, 250)) if description
+        end
+
+        result[:price] = nil if result[:price].start_with?("$0") || result[:price] == 0
+
+        result
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/asciinema_onebox.rb
+++ b/lib/onebox/engine/asciinema_onebox.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class AsciinemaOnebox
+      include Engine
+      include StandardEmbed
+
+      always_https
+      matches_regexp(/^https?:\/\/asciinema\.org\/a\/[\p{Alnum}_\-]+$/)
+
+      def to_html
+        "<script type='text/javascript' src='https://asciinema.org/a/#{match[:asciinema_id]}.js' id='asciicast-#{match[:asciinema_id]}' async></script>"
+      end
+
+      def placeholder_html
+        "<img src='https://asciinema.org/a/#{match[:asciinema_id]}.png'>"
+      end
+
+      private
+
+      def match
+        @match ||= @url.match(/asciinema\.org\/a\/(?<asciinema_id>[\p{Alnum}_\-]+)$/)
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/audio_onebox.rb
+++ b/lib/onebox/engine/audio_onebox.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class AudioOnebox
+      include Engine
+
+      matches_regexp(/^(https?:)?\/\/.*\.(mp3|ogg|opus|wav|m4a)(\?.*)?$/i)
+
+      def always_https?
+        AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
+      end
+
+      def to_html
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
+
+        <<-HTML
+          <audio controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
+            <source src="#{escaped_url}">
+            <a href="#{escaped_url}">#{@url}</a>
+          </audio>
+        HTML
+      end
+
+      def placeholder_html
+        ::Onebox::Helpers.audio_placeholder_html
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/audioboom_onebox.rb
+++ b/lib/onebox/engine/audioboom_onebox.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class AudioboomOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/audioboom\.com\/posts\/\d+/)
+      always_https
+
+      def placeholder_html
+        oembed = get_oembed
+
+        <<-HTML
+          <img
+            src="#{oembed.thumbnail_url}"
+            style="max-width: #{oembed.width}px; max-height: #{oembed.height}px;"
+            #{oembed.title_attr}
+          >
+        HTML
+      end
+
+      def to_html
+        get_oembed.html
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/band_camp_onebox.rb
+++ b/lib/onebox/engine/band_camp_onebox.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class BandCampOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/.*\.bandcamp\.com\/(album|track)\//)
+      always_https
+      requires_iframe_origins "https://bandcamp.com"
+
+      def placeholder_html
+        og = get_opengraph
+        "<img src='#{og.image}' height='#{og.video_height}' #{og.title_attr}>"
+      end
+
+      def to_html
+        og = get_opengraph
+        escaped_src = og.video_secure_url || og.video
+
+        <<-HTML
+          <iframe
+            src="#{escaped_src}"
+            width="#{og.video_width}"
+            height="#{og.video_height}"
+            scrolling="no"
+            frameborder="0"
+            allowfullscreen
+          ></iframe>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/cloud_app_onebox.rb
+++ b/lib/onebox/engine/cloud_app_onebox.rb
@@ -0,0 +1,53 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class CloudAppOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/cl\.ly/)
+      always_https
+
+      def to_html
+        og = get_opengraph
+
+        if !og.image.nil?
+          image_html(og)
+        elsif og.title.to_s[/\.(mp4|ogv|webm)$/]
+          video_html(og)
+        else
+          link_html(og)
+        end
+      end
+
+      private
+
+      def link_html(og)
+        <<-HTML
+          <a href='#{og.url}' target='_blank' rel='noopener'>
+            #{og.title}
+          </a>
+        HTML
+      end
+
+      def video_html(og)
+        direct_src = ::Onebox::Helpers.normalize_url_for_output("#{og.get(:url)}/#{og.title}")
+
+        <<-HTML
+          <video width='480' height='360' #{og.title_attr} controls loop>
+            <source src='#{direct_src}' type='video/mp4'>
+          </video>
+        HTML
+      end
+
+      def image_html(og)
+        <<-HTML
+          <a href='#{og.url}' target='_blank' class='onebox' rel='noopener'>
+            <img src='#{og.image}' #{og.title_attr} alt='CloudApp' width='480'>
+          </a>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/coub_onebox.rb
+++ b/lib/onebox/engine/coub_onebox.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class CoubOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/coub\.com\/view\//)
+      always_https
+
+      def placeholder_html
+        oembed = get_oembed
+        "<img src='#{oembed.thumbnail_url}' height='#{oembed.thumbnail_height}' width='#{oembed.thumbnail_width}' #{oembed.title_attr}>"
+      end
+
+      def to_html
+        get_oembed.html
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/facebook_media_onebox.rb
+++ b/lib/onebox/engine/facebook_media_onebox.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class FacebookMediaOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/.*\.facebook\.com\/(\w+)\/(videos|\?).*/)
+      always_https
+      requires_iframe_origins "https://www.facebook.com"
+
+      def to_html
+        metadata = get_twitter
+        if metadata.present? && metadata[:card] == "player" && metadata[:player].present?
+          <<-HTML
+            <iframe
+              src="#{metadata[:player]}"
+              width="#{metadata[:player_width]}"
+              height="#{metadata[:player_height]}"
+              scrolling="no"
+              frameborder="0"
+              allowfullscreen
+            ></iframe>
+          HTML
+        else
+          html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
+          return if Onebox::Helpers.blank?(html)
+          html
+        end
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/five_hundred_px_onebox.rb
+++ b/lib/onebox/engine/five_hundred_px_onebox.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class FiveHundredPxOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/500px\.com\/photo\/\d+\//)
+      always_https
+
+      def to_html
+        og = get_opengraph
+        "<img src='#{og.image}' width='#{og.image_width}' height='#{og.image_height}' class='onebox' #{og.title_attr}>"
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/flickr_onebox.rb
+++ b/lib/onebox/engine/flickr_onebox.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+
+require_relative './opengraph_image'
+
+module Onebox
+  module Engine
+    class FlickrOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/www\.flickr\.com\/photos\//)
+      always_https
+
+      def to_html
+        og = get_opengraph
+        return album_html(og) if og.url =~ /\/sets\//
+        return image_html(og) if !og.image.nil?
+        nil
+      end
+
+      private
+
+      def album_html(og)
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
+        album_title = "[Album] #{og.title}"
+
+        <<-HTML
+          <div class='onebox flickr-album'>
+            <a href='#{escaped_url}' target='_blank' rel='noopener'>
+              <span class='outer-box' style='max-width:#{og.image_width}px'>
+                <span class='inner-box'>
+                  <span class='album-title'>#{album_title}</span>
+                </span>
+              </span>
+              <img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
+            </a>
+          </div>
+        HTML
+      end
+
+      def image_html(og)
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
+
+        <<-HTML
+          <a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
+            <img src='#{og.secure_image_url}' #{og.title_attr} alt='Imgur' height='#{og.image_height}' width='#{og.image_width}'>
+          </a>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/flickr_shortened_onebox.rb
+++ b/lib/onebox/engine/flickr_shortened_onebox.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+require_relative './opengraph_image'
+
+module Onebox
+  module Engine
+    class FlickrShortenedOnebox
+      include Engine
+      include StandardEmbed
+      include OpengraphImage
+
+      matches_regexp(/^https?:\/\/flic\.kr\/p\//)
+      always_https
+    end
+  end
+end
--- a/lib/onebox/engine/gfycat_onebox.rb
+++ b/lib/onebox/engine/gfycat_onebox.rb
@@ -0,0 +1,113 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GfycatOnebox
+      include Engine
+      include JSON
+
+      matches_regexp(/^https?:\/\/gfycat\.com\//)
+      always_https
+
+      # This engine should have priority over AllowlistedGenericOnebox.
+      def self.priority
+        1
+      end
+
+      def to_html
+        <<-HTML
+          <aside class="onebox gfycat">
+            <header class="source">
+              <img src="https://gfycat.com/static/favicons/favicon-96x96.png" class="site-icon" width="64" height="64">
+              <a href="#{data[:url]}" target="_blank" rel="nofollow ugc noopener">Gfycat.com</a>
+            </header>
+
+            <article class="onebox-body">
+              <h4>
+                #{data[:title]} by
+                <a href="https://gfycat.com/@#{data[:author]}" target="_blank" rel="nofollow ugc noopener">
+                  <span>#{data[:author]}</span>
+                </a>
+              </h4>
+
+              <div class="video" style="--aspect-ratio: #{data[:width]}/#{data[:height]}">
+                <video controls loop muted poster="#{data[:posterUrl]}">
+                  <source id="webmSource" src="#{data[:webmUrl]}" type="video/webm">
+                  <source id="mp4Source" src="#{data[:mp4Url]}" type="video/mp4">
+                  <img title="Sorry, your browser doesn't support HTML5 video." src="#{data[:posterUrl]}">
+                </video>
+              </div>
+
+              <p>
+                <span class="label1">#{data[:keywords]}</span>
+              </p>
+            </article>
+
+            <div style="clear: both"></div>
+          </aside>
+        HTML
+      end
+
+      def placeholder_html
+        <<-HTML
+          <a href="#{data[:url]}">
+            <img src="#{data[:posterUrl]}" width="#{data[:width]}" height="#{data[:height]}"><br/>
+            #{data[:name]}
+          </a>
+        HTML
+      end
+
+      private
+
+      def match
+        @match ||= @url.match(/^https?:\/\/gfycat\.com\/(gifs\/detail\/)?(?<name>.+)/)
+      end
+
+      def og_data
+        return @og_data if defined?(@og_data)
+
+        response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
+        page = Nokogiri::HTML(response)
+        script = page.at_css('script[type="application/ld+json"]')
+
+        if json_string = script&.text
+          @og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
+        else
+          @og_data = {}
+        end
+      end
+
+      def data
+        return @data if defined?(@data)
+
+        @data = {
+          name: match[:name],
+          title: og_data[:headline] || 'No Title',
+          author: og_data[:author],
+          url: @url,
+        }
+
+        if keywords = og_data[:keywords]&.split(',')
+          @data[:keywords] = keywords
+            .map { |keyword| "<a href='https://gfycat.com/gifs/search/#{keyword}'>##{keyword}</a>" }
+            .join(' ')
+        end
+
+        if og_data[:video]
+          content_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:contentUrl])
+          video_url = Pathname.new(content_url)
+          @data[:webmUrl] = video_url.sub_ext(".webm").to_s
+          @data[:mp4Url] = video_url.sub_ext(".mp4").to_s
+
+          thumbnail_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:thumbnailUrl])
+          @data[:posterUrl] = thumbnail_url
+
+          @data[:width] = og_data[:video][:width]
+          @data[:height] = og_data[:video][:height]
+        end
+
+        @data
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/giphy_onebox.rb
+++ b/lib/onebox/engine/giphy_onebox.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GiphyOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/(giphy\.com\/gifs|gph\.is)\//)
+      always_https
+
+      def to_html
+        oembed = get_oembed
+
+        <<-HTML
+          <a href="#{oembed.url}" target="_blank" rel="noopener" class="onebox">
+            <img src="#{oembed.url}" width="#{oembed.width}" height="#{oembed.height}" #{oembed.title_attr}>
+          </a>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/github_blob_onebox.rb
+++ b/lib/onebox/engine/github_blob_onebox.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+require_relative '../mixins/git_blob_onebox'
+
+module Onebox
+  module Engine
+    class GithubBlobOnebox
+      def self.git_regexp
+        /^https?:\/\/(www\.)?github\.com.*\/blob\//
+      end
+
+      def self.onebox_name
+        "githubblob"
+      end
+
+      include Onebox::Mixins::GitBlobOnebox
+
+      def raw_regexp
+        /github\.com\/(?<user>[^\/]+)\/(?<repo>[^\/]+)\/blob\/(?<sha1>[^\/]+)\/(?<file>[^#]+)(#(L(?<from>[^-]*)(-L(?<to>.*))?))?/mi
+      end
+
+      def raw_template(m)
+        "https://raw.githubusercontent.com/#{m[:user]}/#{m[:repo]}/#{m[:sha1]}/#{m[:file]}"
+      end
+
+      def title
+        Sanitize.fragment(Onebox::Helpers.uri_unencode(link).sub(/^https?\:\/\/github\.com\//, ''))
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/github_commit_onebox.rb
+++ b/lib/onebox/engine/github_commit_onebox.rb
@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+
+require_relative '../mixins/github_body'
+
+module Onebox
+  module Engine
+    class GithubCommitOnebox
+      include Engine
+      include LayoutSupport
+      include JSON
+      include Onebox::Mixins::GithubBody
+
+      matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/commit\//)
+      always_https
+
+      def url
+        "https://api.github.com/repos/#{match[:owner]}/#{match[:repository]}/commits/#{match[:sha]}"
+      end
+
+      private
+
+      def match
+        return @match if defined?(@match)
+
+        @match = @url.match(%{github\.com/(?<owner>[^/]+)/(?<repository>[^/]+)/commit/(?<sha>[^/]+)})
+        @match ||= @url.match(%{github\.com/(?<owner>[^/]+)/(?<repository>[^/]+)/pull/(?<pr>[^/]+)/commit/(?<sha>[^/]+)})
+
+        @match
+      end
+
+      def data
+        result = raw.clone
+
+        lines = result['commit']['message'].split("\n")
+        result['title'] = lines.first
+        result['body'], result['excerpt'] = compute_body(lines[1..lines.length].join("\n"))
+
+        committed_at = Time.parse(result['commit']['author']['date'])
+        result['committed_at'] = committed_at.strftime("%I:%M%p - %d %b %y %Z")
+        result['committed_at_date'] = committed_at.strftime("%F")
+        result['committed_at_time'] = committed_at.strftime("%T")
+
+        result['link'] = link
+        ulink = URI(link)
+        result['domain'] = "#{ulink.host}/#{ulink.path.split('/')[1]}/#{ulink.path.split('/')[2]}"
+
+        result
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/github_folder_onebox.rb
+++ b/lib/onebox/engine/github_folder_onebox.rb
@@ -0,0 +1,78 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GithubFolderOnebox
+      include Engine
+      include StandardEmbed
+      include LayoutSupport
+
+      matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
+      always_https
+
+      def self.priority
+        # This engine should have lower priority than the other Github engines
+        150
+      end
+
+      private
+
+      def data
+        og = get_opengraph
+
+        max_length = 250
+
+        display_path = extract_path(og.url, max_length)
+        display_description = clean_description(og.description, og.title, max_length)
+
+        title = og.title
+
+        fragment = Addressable::URI.parse(url).fragment
+        if fragment
+          fragment = Addressable::URI.unencode(fragment)
+
+          if html_doc.css('.Box.md')
+            # For links to markdown docs
+            node = html_doc.css('a.anchor').find { |n| n['href'] == "##{fragment}" }
+            subtitle = node&.parent&.text
+          elsif html_doc.css('.Box.rdoc')
+            # For links to rdoc docs
+            node = html_doc.css('h3').find { |n| n['id'] == "user-content-#{fragment.downcase}" }
+            subtitle = node&.css('text()')&.first&.text
+          end
+
+          title = "#{title} - #{subtitle}" if subtitle
+        end
+
+        {
+          link: url,
+          image: og.image,
+          title: Onebox::Helpers.truncate(title, 250),
+          path: display_path,
+          description: display_description,
+          favicon: get_favicon
+        }
+      end
+
+      def extract_path(root, max_length)
+        path = url.split('#')[0].split('?')[0]
+        path = path["#{root}/tree/".length..-1]
+
+        return unless path
+
+        path.length > max_length ? path[-max_length..-1] : path
+      end
+
+      def clean_description(description, title, max_length)
+        return unless description
+
+        desc_end = " - #{title}"
+        if description[-desc_end.length..-1] == desc_end
+          description = description[0...-desc_end.length]
+        end
+
+        Onebox::Helpers.truncate(description, max_length)
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/github_gist_onebox.rb
+++ b/lib/onebox/engine/github_gist_onebox.rb
@@ -0,0 +1,81 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GithubGistOnebox
+      include Engine
+      include LayoutSupport
+      include JSON
+
+      MAX_FILES = 3
+
+      matches_regexp(/^http(?:s)?:\/\/gist\.(?:(?:\w)+\.)?(github)\.com(?:\/)?/)
+      always_https
+
+      def url
+        "https://api.github.com/gists/#{match[:sha]}"
+      end
+
+      private
+
+      def data
+        @data ||= {
+          title: 'gist.github.com',
+          link: link,
+          gist_files: gist_files.take(MAX_FILES),
+          truncated_files?: truncated_files?
+        }
+      end
+
+      def truncated_files?
+        gist_files.size > MAX_FILES
+      end
+
+      def gist_files
+        return [] unless gist_api
+
+        @gist_files ||= gist_api["files"].values.map do |file_json|
+          GistFile.new(file_json)
+        end
+      end
+
+      def gist_api
+        @raw ||= raw.clone
+      rescue OpenURI::HTTPError
+        # The Gist API rate limit of 60 requests per hour was reached.
+        nil
+      end
+
+      def match
+        @match ||= @url.match(%r{gist\.github\.com/([^/]+/)?(?<sha>[0-9a-f]+)})
+      end
+
+      class GistFile
+        attr_reader :filename
+        attr_reader :language
+
+        MAX_LINES = 10
+
+        def initialize(json)
+          @json = json
+          @filename = @json["filename"]
+          @language = @json["language"]
+        end
+
+        def content
+          lines.take(MAX_LINES).join("\n")
+        end
+
+        def truncated?
+          lines.size > MAX_LINES
+        end
+
+        private
+
+        def lines
+          @lines ||= @json["content"].split("\n")
+        end
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/github_issue_onebox.rb
+++ b/lib/onebox/engine/github_issue_onebox.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+require_relative '../mixins/github_body'
+
+module Onebox
+  module Engine
+    class GithubIssueOnebox
+      #Author Lidlanca 2014
+      include Engine
+      include LayoutSupport
+      include JSON
+      include Onebox::Mixins::GithubBody
+
+      matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?github\.com\/(?<org>.+)\/(?<repo>.+)\/issues\/([[:digit:]]+)/)
+      always_https
+
+      def url
+        m = match
+        "https://api.github.com/repos/#{m["org"]}/#{m["repo"]}/issues/#{m["item_id"]}"
+      end
+
+      private
+
+      def match
+        @match ||= @url.match(/^http(?:s)?:\/\/(?:www\.)?(?:(?:\w)+\.)?github\.com\/(?<org>.+)\/(?<repo>.+)\/(?<type>issues)\/(?<item_id>[\d]+)/)
+      end
+
+      def data
+        created_at = Time.parse(raw['created_at'])
+        closed_at = Time.parse(raw['closed_at']) if raw['closed_at']
+        body, excerpt = compute_body(raw['body'])
+        ulink = URI(link)
+
+        {
+          link: @url,
+          title: raw["title"],
+          body: body,
+          excerpt: excerpt,
+          labels: raw["labels"],
+          user: raw['user'],
+          created_at: created_at.strftime("%I:%M%p - %d %b %y %Z"),
+          created_at_date: created_at.strftime("%F"),
+          created_at_time: created_at.strftime("%T"),
+          closed_at: closed_at&.strftime("%I:%M%p - %d %b %y %Z"),
+          closed_at_date: closed_at&.strftime("%F"),
+          closed_at_time: closed_at&.strftime("%T"),
+          closed_by: raw['closed_by'],
+          avatar: "https://avatars1.githubusercontent.com/u/#{raw['user']['id']}?v=2&s=96",
+          domain: "#{ulink.host}/#{ulink.path.split('/')[1]}/#{ulink.path.split('/')[2]}",
+        }
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/github_pull_request_onebox.rb
+++ b/lib/onebox/engine/github_pull_request_onebox.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+
+require_relative '../mixins/github_body'
+
+module Onebox
+  module Engine
+    class GithubPullRequestOnebox
+      include Engine
+      include LayoutSupport
+      include JSON
+      include Onebox::Mixins::GithubBody
+
+      GITHUB_COMMENT_REGEX = /(<!--.*?-->\r\n)/
+
+      matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/pull/)
+      always_https
+
+      def url
+        "https://api.github.com/repos/#{match[:owner]}/#{match[:repository]}/pulls/#{match[:number]}"
+      end
+
+      private
+
+      def match
+        @match ||= @url.match(%r{github\.com/(?<owner>[^/]+)/(?<repository>[^/]+)/pull/(?<number>[^/]+)})
+      end
+
+      def data
+        result = raw.clone
+        result['link'] = link
+
+        created_at = Time.parse(result['created_at'])
+        result['created_at'] = created_at.strftime("%I:%M%p - %d %b %y %Z")
+        result['created_at_date'] = created_at.strftime("%F")
+        result['created_at_time'] = created_at.strftime("%T")
+
+        ulink = URI(link)
+        result['domain'] = "#{ulink.host}/#{ulink.path.split('/')[1]}/#{ulink.path.split('/')[2]}"
+
+        result['body'], result['excerpt'] = compute_body(result['body'])
+
+        result
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/gitlab_blob_onebox.rb
+++ b/lib/onebox/engine/gitlab_blob_onebox.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+require_relative '../mixins/git_blob_onebox'
+
+module Onebox
+  module Engine
+    class GitlabBlobOnebox
+      def self.git_regexp
+        /^https?:\/\/(www\.)?gitlab\.com.*\/blob\//
+      end
+
+      def self.onebox_name
+        "gitlabblob"
+      end
+
+      include Onebox::Mixins::GitBlobOnebox
+
+      def raw_regexp
+        /gitlab\.com\/(?<user>[^\/]+)\/(?<repo>[^\/]+)\/blob\/(?<sha1>[^\/]+)\/(?<file>[^#]+)(#(L(?<from>[^-]*)(-L(?<to>.*))?))?/mi
+      end
+
+      def raw_template(m)
+        "https://gitlab.com/#{m[:user]}/#{m[:repo]}/raw/#{m[:sha1]}/#{m[:file]}"
+      end
+
+      def title
+        Sanitize.fragment(Onebox::Helpers.uri_unencode(link).sub(/^https?\:\/\/gitlab\.com\//, ''))
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/google_calendar_onebox.rb
+++ b/lib/onebox/engine/google_calendar_onebox.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GoogleCalendarOnebox
+      include Engine
+
+      matches_regexp(/^(https?:)?\/\/((www|calendar)\.google\.[\w.]{2,}|goo\.gl)\/calendar\/.+$/)
+      always_https
+      requires_iframe_origins "https://calendar.google.com"
+
+      def to_html
+        url = @url.split('&').first
+        src = ::Onebox::Helpers.normalize_url_for_output(url)
+        "<iframe src='#{src}&rm=minimal' style='border: 0' width='800' height='600' frameborder='0' scrolling='no'>#{placeholder_html}</iframe>"
+      end
+
+      def placeholder_html
+        <<-HTML
+          <div placeholder>
+            <div class='gdocs-onebox gdocs-onebox-splash' style='display:table-cell;vertical-align:middle;width:800px;height:600px'>
+              <div style='text-align:center;'>
+                <div class='gdocs-onebox-logo g-calendar-logo'></div>
+                <p>Google Calendar</p>
+              </div>
+            </div>
+          </div>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/google_docs_onebox.rb
+++ b/lib/onebox/engine/google_docs_onebox.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GoogleDocsOnebox
+      include Engine
+      include StandardEmbed
+      include LayoutSupport
+
+      SUPPORTED_ENDPOINTS = %w(spreadsheets document forms presentation)
+      SHORT_TYPES = {
+        spreadsheets: :sheets,
+        document: :docs,
+        presentation: :slides,
+        forms: :forms,
+      }
+
+      matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{SUPPORTED_ENDPOINTS.join('|')}))\/d\/((?<key>[\w-]*)).+$/)
+      always_https
+
+      private
+
+      def data
+        og_data = get_opengraph
+        short_type = SHORT_TYPES[match[:endpoint].to_sym]
+
+        description = if Onebox::Helpers.blank?(og_data.description)
+          "This #{short_type.to_s.chop.capitalize} is private"
+        else
+          Onebox::Helpers.truncate(og_data.description, 250)
+        end
+
+        {
+          link: link,
+          title: og_data.title || "Google #{short_type.to_s.capitalize}",
+          description: description,
+          type: short_type
+        }
+      end
+
+      def match
+        @match ||= @url.match(@@matcher)
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/google_drive_onebox.rb
+++ b/lib/onebox/engine/google_drive_onebox.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GoogleDriveOnebox
+      include Engine
+      include StandardEmbed
+      include LayoutSupport
+
+      matches_regexp(/^(https?:)?\/\/(drive\.google\.com)\/file\/d\/(?<key>[\w-]*)\/.+$/)
+      always_https
+
+      protected
+
+      def data
+        og_data = get_opengraph
+        title = og_data.title || "Google Drive"
+        title = "#{og_data.title} (video)" if og_data.type =~ /^video[\/\.]/
+        description = og_data.description || "Google Drive file."
+
+        {
+          link: link,
+          title: title,
+          description: Onebox::Helpers.truncate(description, 250),
+          image: og_data.image
+        }
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/google_maps_onebox.rb
+++ b/lib/onebox/engine/google_maps_onebox.rb
@@ -0,0 +1,184 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GoogleMapsOnebox
+      include Engine
+
+      class << self
+        def ===(other)
+          if other.kind_of? URI
+            @@matchers && @@matchers.any? { |m| other.to_s =~ m[:regexp] }
+          else
+            super
+          end
+        end
+
+        private
+
+        def matches_regexp(key, regexp)
+          (@@matchers ||= []) << { key: key, regexp: regexp }
+        end
+      end
+
+      always_https
+      requires_iframe_origins("https://maps.google.com", "https://google.com")
+
+      # Matches shortened Google Maps URLs
+      matches_regexp :short,      %r"^(https?:)?//goo\.gl/maps/"
+
+      # Matches URLs for custom-created maps
+      matches_regexp :custom,     %r"^(?:https?:)?//www\.google(?:\.(?:\w{2,}))+/maps/d/(?:edit|viewer|embed)\?mid=.+$"
+
+      # Matches URLs with streetview data
+      matches_regexp :streetview, %r"^(?:https?:)?//www\.google(?:\.(?:\w{2,}))+/maps[^@]+@(?<lon>-?[\d.]+),(?<lat>-?[\d.]+),(?:\d+)a,(?<zoom>[\d.]+)y,(?<heading>[\d.]+)h,(?<pitch>[\d.]+)t.+?data=.*?!1s(?<pano>[^!]{22})"
+
+      # Matches "normal" Google Maps URLs with arbitrary data
+      matches_regexp :standard,   %r"^(?:https?:)?//www\.google(?:\.(?:\w{2,}))+/maps"
+
+      # Matches URLs for the old Google Maps domain which we occasionally get redirected to
+      matches_regexp :canonical,  %r"^(?:https?:)?//maps\.google(?:\.(?:\w{2,}))+/maps\?"
+
+      def initialize(url, timeout = nil)
+        super
+        resolve_url!
+      rescue Net::HTTPServerException, Timeout::Error, Net::HTTPError, Errno::ECONNREFUSED, RuntimeError => err
+        raise ArgumentError, "malformed url or unresolveable: #{err.message}"
+      end
+
+      def streetview?
+        !!@streetview
+      end
+
+      def to_html
+        "<div class='maps-onebox'><iframe src=\"#{link}\" width=\"690\" height=\"400\" frameborder=\"0\" style=\"border:0\">#{placeholder_html}</iframe></div>"
+      end
+
+      def placeholder_html
+        ::Onebox::Helpers.map_placeholder_html
+      end
+
+      private
+
+      def data
+        { link: url, title: url }
+      end
+
+      def resolve_url!
+        @streetview = false
+        type, match = match_url
+
+        # Resolve shortened URL, if necessary
+        if type == :short
+          follow_redirect!
+          type, match = match_url
+        end
+
+        # Try to get the old-maps URI, it is far easier to embed.
+        if type == :standard
+          retry_count = 10
+          while (retry_count -= 1) > 0
+            follow_redirect!
+            type, match = match_url
+            break if type != :standard
+            sleep 0.1
+          end
+        end
+
+        case type
+        when :standard
+          # Fallback for map URLs that don't resolve into an easily embeddable old-style URI
+          # Roadmaps use a "z" zoomlevel, satellite maps use "m" the horizontal width in meters
+          # TODO: tilted satellite maps using "a,y,t"
+          match = @url.match(/@(?<lon>[\d.-]+),(?<lat>[\d.-]+),(?<zoom>\d+)(?<mz>[mz])/)
+          raise "unexpected standard url #{@url}" unless match
+          zoom = match[:mz] == "z" ? match[:zoom] : Math.log2(57280048.0 / match[:zoom].to_f).round
+          location = "#{match[:lon]},#{match[:lat]}"
+          url = "https://maps.google.com/maps?ll=#{location}&z=#{zoom}&output=embed&dg=ntvb"
+          url += "&q=#{$1}" if match = @url.match(/\/place\/([^\/\?]+)/)
+          url += "&cid=#{($1 + $2).to_i(16)}" if @url.match(/!3m1!1s0x(\h{16}):0x(\h{16})/)
+          @url = url
+          @placeholder = "https://maps.googleapis.com/maps/api/staticmap?maptype=roadmap&center=#{location}&zoom=#{zoom}&size=690x400&sensor=false"
+
+        when :custom
+          url = @url.dup
+          @url = rewrite_custom_url(url, "embed")
+          @placeholder = rewrite_custom_url(url, "thumbnail")
+          @placeholder_height = @placeholder_width = 120
+
+        when :streetview
+          @streetview = true
+          panoid = match[:pano]
+          lon = match[:lon].to_f.to_s
+          lat = match[:lat].to_f.to_s
+          heading = match[:heading].to_f.round(4).to_s
+          pitch = (match[:pitch].to_f / 10.0).round(4).to_s
+          fov = (match[:zoom].to_f / 100.0).round(4).to_s
+          zoom = match[:zoom].to_f.round
+          @url = "https://www.google.com/maps/embed?pb=!3m2!2sen!4v0!6m8!1m7!1s#{panoid}!2m2!1d#{lon}!2d#{lat}!3f#{heading}!4f#{pitch}!5f#{fov}"
+          @placeholder = "https://maps.googleapis.com/maps/api/streetview?size=690x400&location=#{lon},#{lat}&pano=#{panoid}&fov=#{zoom}&heading=#{heading}&pitch=#{pitch}&sensor=false"
+
+        when :canonical
+          query = URI::decode_www_form(uri.query).to_h
+          if !query.has_key?("ll")
+            raise ArgumentError, "canonical url lacks location argument" unless query.has_key?("sll")
+            query["ll"] = query["sll"]
+            @url += "&ll=#{query["sll"]}"
+          end
+          location = query["ll"]
+          if !query.has_key?("z")
+            raise ArgumentError, "canonical url has incomplete query arguments" unless query.has_key?("spn") || query.has_key?("sspn")
+            if !query.has_key?("spn")
+              query["spn"] = query["sspn"]
+              @url += "&spn=#{query["sspn"]}"
+            end
+            angle = query["spn"].split(",").first.to_f
+            zoom = (Math.log(690.0 * 360.0 / angle / 256.0) / Math.log(2)).round
+          else
+            zoom = query["z"]
+          end
+          @url = @url.sub('output=classic', 'output=embed')
+          @placeholder = "https://maps.googleapis.com/maps/api/staticmap?maptype=roadmap&size=690x400&sensor=false&center=#{location}&zoom=#{zoom}"
+
+        else
+          raise "unexpected url type #{type.inspect}"
+        end
+      end
+
+      def match_url
+        @@matchers.each do |matcher|
+          if m = matcher[:regexp].match(@url)
+            return matcher[:key], m
+          end
+        end
+        raise ArgumentError, "\"#{@url}\" does not match any known pattern"
+      end
+
+      def rewrite_custom_url(url, target)
+        uri = URI(url)
+        uri.path = uri.path.sub(/(?<=^\/maps\/d\/)\w+$/, target)
+        uri.to_s
+      end
+
+      def follow_redirect!
+        begin
+          http = Net::HTTP.start(
+            uri.host,
+            uri.port,
+            use_ssl: uri.scheme == 'https',
+            open_timeout: timeout,
+            read_timeout: timeout
+          )
+
+          response = http.head(uri.path)
+          raise "unexpected response code #{response.code}" unless %w(200 301 302).include?(response.code)
+
+          @url = response.code == "200" ? uri.to_s : response["Location"]
+          @uri = URI(@url)
+        ensure
+          http.finish rescue nil
+        end
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/google_photos_onebox.rb
+++ b/lib/onebox/engine/google_photos_onebox.rb
@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GooglePhotosOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/(photos)\.(app\.goo\.gl|google\.com)/)
+      always_https
+
+      def to_html
+        og = get_opengraph
+        return video_html(og) if og.video_secure_url
+        return album_html(og) if og.type == "google_photos:photo_album"
+        return image_html(og) if og.image
+        nil
+      end
+
+      private
+
+      def video_html(og)
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
+
+        <<-HTML
+          <aside class="onebox google-photos">
+            <header class="source">
+              <img src="#{raw[:favicon]}" class="site-icon" width="16" height="16">
+              <a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">#{raw[:site_name]}</a>
+            </header>
+            <article class="onebox-body">
+              <h3><a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">#{og.title}</a></h3>
+              <div class="aspect-image-full-size">
+                <a href="#{escaped_url}" target="_blank" rel="nofollow ugc noopener">
+                  <img src="#{og.secure_image_url}" class="scale-image"/>
+                  <span class="instagram-video-icon"></span>
+                </a>
+              </div>
+            </article>
+          </aside>
+        HTML
+      end
+
+      def album_html(og)
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
+        album_title = og.description.nil? ? og.title : "[#{og.description}] #{og.title}"
+
+        <<-HTML
+          <div class='onebox google-photos-album'>
+            <a href='#{escaped_url}' target='_blank' rel='noopener'>
+              <span class='outer-box' style='width:#{og.image_width}px'>
+                <span class='inner-box'>
+                  <span class='album-title'>#{Onebox::Helpers.truncate(album_title, 80)}</span>
+                </span>
+              </span>
+              <img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
+            </a>
+          </div>
+        HTML
+      end
+
+      def image_html(og)
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
+
+        <<-HTML
+          <a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
+            <img src='#{og.secure_image_url}' #{og.title_attr} alt='Google Photos' height='#{og.image_height}' width='#{og.image_width}'>
+          </a>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/google_play_app_onebox.rb
+++ b/lib/onebox/engine/google_play_app_onebox.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class GooglePlayAppOnebox
+      include Engine
+      include LayoutSupport
+      include HTML
+
+      DEFAULTS = {
+        MAX_DESCRIPTION_CHARS: 500
+      }
+
+      matches_regexp(/^https?:\/\/play\.(?:(?:\w)+\.)?(google)\.com(?:\/)?\/store\/apps\//)
+      always_https
+
+      private
+
+      def data
+        price = raw.css("meta[itemprop=price]").first["content"] rescue "Free"
+        {
+          link: link,
+          title: raw.css("meta[property='og:title']").first["content"].gsub(" - Apps on Google Play", ""),
+          image: ::Onebox::Helpers.normalize_url_for_output(raw.css("meta[property='og:image']").first["content"]),
+          description: raw.css("meta[name=description]").first["content"][0..DEFAULTS[:MAX_DESCRIPTION_CHARS]].chop + "...",
+          price: price == "0" ? "Free" : price
+        }
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/html.rb
+++ b/lib/onebox/engine/html.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    module HTML
+      private
+
+      # Overwrite for any custom headers
+      def http_params
+        {}
+      end
+
+      def raw
+        @raw ||= Onebox::Helpers.fetch_html_doc(url, http_params, body_cacher)
+      end
+
+      def body_cacher
+        self.options&.[](:body_cacher)
+      end
+
+      def html?
+        raw.respond_to(:css)
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/image_onebox.rb
+++ b/lib/onebox/engine/image_onebox.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class ImageOnebox
+      include Engine
+
+      matches_regexp(/^(https?:)?\/\/.+\.(png|jpg|jpeg|gif|bmp|tif|tiff)(\?.*)?$/i)
+
+      def always_https?
+        AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
+      end
+
+      def to_html
+        # Fix Dropbox image links
+        if @url[/^https:\/\/www.dropbox.com\/s\//]
+          @url.sub!("https://www.dropbox.com", "https://dl.dropboxusercontent.com")
+        end
+
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
+        <<-HTML
+          <a href="#{escaped_url}" target="_blank" rel="noopener" class="onebox">
+            <img src="#{escaped_url}">
+          </a>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/imgur_onebox.rb
+++ b/lib/onebox/engine/imgur_onebox.rb
@@ -0,0 +1,67 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class ImgurOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/(www\.)?imgur\.com/)
+      always_https
+
+      def to_html
+        og = get_opengraph
+        return video_html(og) if !og.video_secure_url.nil?
+        return album_html(og) if is_album?
+        return image_html(og) if !og.image.nil?
+        nil
+      end
+
+      private
+
+      def video_html(og)
+        <<-HTML
+            <video width='#{og.video_width}' height='#{og.video_height}' #{og.title_attr} controls loop>
+              <source src='#{og.video_secure_url}' type='video/mp4'>
+              <source src='#{og.video_secure_url.gsub('mp4', 'webm')}' type='video/webm'>
+            </video>
+          HTML
+      end
+
+      def album_html(og)
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
+        album_title = "[Album] #{og.title}"
+
+        <<-HTML
+            <div class='onebox imgur-album'>
+              <a href='#{escaped_url}' target='_blank' rel='noopener'>
+                <span class='outer-box' style='width:#{og.image_width}px'>
+                  <span class='inner-box'>
+                    <span class='album-title'>#{album_title}</span>
+                  </span>
+                </span>
+                <img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
+              </a>
+            </div>
+          HTML
+      end
+
+      def is_album?
+        response = Onebox::Helpers.fetch_response("https://api.imgur.com/oembed.json?url=#{url}") rescue "{}"
+        oembed_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(response))
+        imgur_data_id = Nokogiri::HTML(oembed_data[:html]).xpath("//blockquote").attr("data-id")
+        imgur_data_id.to_s[/a\//]
+      end
+
+      def image_html(og)
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(url)
+
+        <<-HTML
+            <a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
+              <img src='#{og.secure_image_url.chomp("?fb")}' #{og.title_attr} alt='Imgur'>
+            </a>
+          HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/instagram_onebox.rb
+++ b/lib/onebox/engine/instagram_onebox.rb
@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class InstagramOnebox
+      include Engine
+      include StandardEmbed
+      include LayoutSupport
+
+      matches_regexp(/^https?:\/\/(?:www\.)?(?:instagram\.com|instagr\.am)\/?(?:.*)\/(?:p|tv)\/[a-zA-Z\d_-]+/)
+      always_https
+
+      def clean_url
+        url.scan(/^https?:\/\/(?:www\.)?(?:instagram\.com|instagr\.am)\/?(?:.*)\/(?:p|tv)\/[a-zA-Z\d_-]+/).flatten.first
+      end
+
+      def data
+        oembed = get_oembed
+        raise "No oEmbed data found. Ensure 'facebook_app_access_token' is valid" if oembed.data.empty?
+
+        {
+          link: clean_url.gsub("/#{oembed.author_name}/", "/"),
+          title: "@#{oembed.author_name}",
+          image: oembed.thumbnail_url,
+          description: Onebox::Helpers.truncate(oembed.title, 250),
+        }
+
+      end
+
+      protected
+
+      def access_token
+        (options[:facebook_app_access_token] || Onebox.options.facebook_app_access_token).to_s
+      end
+
+      def get_oembed_url
+        if access_token != ''
+          "https://graph.facebook.com/v9.0/instagram_oembed?url=#{clean_url}&access_token=#{access_token}"
+        else
+          # The following is officially deprecated by Instagram, but works in some limited circumstances.
+          "https://api.instagram.com/oembed/?url=#{clean_url}"
+        end
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/json.rb
+++ b/lib/onebox/engine/json.rb
@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    module JSON
+      private
+
+      def raw
+        @raw ||= ::MultiJson.load(URI.open(url, read_timeout: timeout))
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/kaltura_onebox.rb
+++ b/lib/onebox/engine/kaltura_onebox.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class KalturaOnebox
+      include Engine
+      include StandardEmbed
+
+      always_https
+      matches_regexp(/^https?:\/\/[a-z0-9]+\.kaltura\.com\/id\/[a-zA-Z0-9]+/)
+      requires_iframe_origins "https://*.kaltura.com"
+
+      def preview_html
+        og = get_opengraph
+
+        <<~HTML
+          <img src="#{og.image_secure_url}" width="#{og.video_width}" height="#{og.video_height}">
+        HTML
+      end
+
+      def to_html
+        og = get_opengraph
+
+        <<~HTML
+          <iframe
+            src="#{og.video_secure_url}"
+            width="#{og.video_width}"
+            height="#{og.video_height}"
+            frameborder='0'
+            allowfullscreen
+          ></iframe>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/mixcloud_onebox.rb
+++ b/lib/onebox/engine/mixcloud_onebox.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class MixcloudOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/www\.mixcloud\.com\//)
+      always_https
+
+      def placeholder_html
+        oembed = get_oembed
+        "<img src='#{oembed.image}' height='#{oembed.height}' #{oembed.title_attr}>"
+      end
+
+      def to_html
+        get_oembed.html
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/opengraph_image.rb
+++ b/lib/onebox/engine/opengraph_image.rb
@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    module OpengraphImage
+
+      def to_html
+        og = get_opengraph
+        "<img src='#{og.image}' width='#{og.image_width}' height='#{og.image_height}' class='onebox' #{og.title_attr}>"
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/pastebin_onebox.rb
+++ b/lib/onebox/engine/pastebin_onebox.rb
@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class PastebinOnebox
+      include Engine
+      include LayoutSupport
+
+      MAX_LINES = 10
+
+      matches_regexp(/^http?:\/\/pastebin\.com/)
+
+      private
+
+      def data
+        @data ||= {
+          title: 'pastebin.com',
+          link: link,
+          content: content,
+          truncated?: truncated?
+        }
+      end
+
+      def content
+        lines.take(MAX_LINES).join("\n")
+      end
+
+      def truncated?
+        lines.size > MAX_LINES
+      end
+
+      def lines
+        return @lines if defined?(@lines)
+        response = Onebox::Helpers.fetch_response("http://pastebin.com/raw/#{paste_key}", redirect_limit: 1) rescue ""
+        @lines = response.split("\n")
+      end
+
+      def paste_key
+        regex = case uri
+                when /\/raw\//
+                  /\/raw\/([^\/]+)/
+                when /\/download\//
+                  /\/download\/([^\/]+)/
+                when /\/embed\//
+                  /\/embed\/([^\/]+)/
+        else
+                  /\/([^\/]+)/
+        end
+
+        match = uri.path.match(regex)
+        match[1] if match && match[1]
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/pdf_onebox.rb
+++ b/lib/onebox/engine/pdf_onebox.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class PdfOnebox
+      include Engine
+      include LayoutSupport
+
+      matches_regexp(/^(https?:)?\/\/.*\.pdf(\?.*)?$/i)
+      always_https
+
+      private
+
+      def data
+        begin
+          size = Onebox::Helpers.fetch_content_length(@url)
+        rescue
+          raise "Unable to read pdf file: #{@url}"
+        end
+
+        {
+          link: link,
+          title: File.basename(uri.path),
+          filesize: size ? Onebox::Helpers.pretty_filesize(size.to_i) : nil,
+        }
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/pubmed_onebox.rb
+++ b/lib/onebox/engine/pubmed_onebox.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class PubmedOnebox
+      include Engine
+      include LayoutSupport
+
+      matches_regexp(/^https?:\/\/(?:(?:\w)+\.)?(www.ncbi.nlm.nih)\.gov(?:\/)?\/pubmed\/\d+/)
+
+      private
+
+      def xml
+        return @xml if defined?(@xml)
+        doc = Nokogiri::XML(URI.open(URI.join(@url, "?report=xml&format=text")))
+        pre = doc.xpath("//pre")
+        @xml = Nokogiri::XML("<root>" + pre.text + "</root>")
+      end
+
+      def authors
+        initials = xml.css("Initials").map { |x| x.content }
+        last_names = xml.css("LastName").map { |x| x.content }
+        author_list = (initials.zip(last_names)).map { |i, l| i + " " + l }
+        if author_list.length > 1 then
+          author_list[-2] = author_list[-2] + " and " + author_list[-1]
+          author_list.pop
+        end
+        author_list.join(", ")
+      end
+
+      def date
+        xml.css("PubDate")
+          .children
+          .map { |x| x.content }
+          .select { |s| !s.match(/^\s+$/) }
+          .map { |s| s.split }
+          .flatten
+          .sort
+          .reverse
+          .join(" ") # Reverse sort so month before year.
+      end
+
+      def data
+        {
+          title: xml.css("ArticleTitle").text,
+          authors: authors,
+          journal: xml.css("Title").text,
+          abstract: xml.css("AbstractText").text,
+          date: date,
+          link: @url,
+          pmid: match[:pmid]
+        }
+      end
+
+      def match
+        @match ||= @url.match(%r{www\.ncbi\.nlm\.nih\.gov/pubmed/(?<pmid>[0-9]+)})
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/reddit_media_onebox.rb
+++ b/lib/onebox/engine/reddit_media_onebox.rb
@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class RedditMediaOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/(www\.)?reddit\.com/)
+
+      def to_html
+        if raw[:type] == "image"
+          <<-HTML
+            <aside class="onebox reddit">
+              <header class="source">
+                <img src="#{raw[:favicon]}" class="site-icon" width="16" height="16">
+                <a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">#{raw[:site_name]}</a>
+              </header>
+              <article class="onebox-body">
+                <h3><a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">#{raw[:title]}</a></h3>
+                <div class="scale-images">
+                  <img src="#{raw[:image]}" class="scale-image"/>
+                </div>
+                <div class="description"><p>#{raw[:description]}</p></div>
+              </article>
+            </aside>
+          HTML
+        elsif  raw[:type] =~ /^video[\/\.]/
+          <<-HTML
+            <aside class="onebox reddit">
+              <header class="source">
+                <img src="#{raw[:favicon]}" class="site-icon" width="16" height="16">
+                <a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">#{raw[:site_name]}</a>
+              </header>
+              <article class="onebox-body">
+                <h3><a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">#{raw[:title]}</a></h3>
+                <div class="aspect-image-full-size">
+                  <a href="#{raw[:url]}" target="_blank" rel="nofollow ugc noopener">
+                    <img src="#{raw[:image]}" class="scale-image"/>
+                    <span class="instagram-video-icon"></span>
+                  </a>
+                </div>
+                <div class="description"><p>#{raw[:description]}</p></div>
+              </article>
+            </aside>
+          HTML
+        else
+          html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
+          return if Onebox::Helpers.blank?(html)
+          html
+        end
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/replit_onebox.rb
+++ b/lib/onebox/engine/replit_onebox.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class ReplitOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/repl\.it\/.+/)
+      always_https
+
+      def placeholder_html
+        oembed = get_oembed
+
+        <<-HTML
+          <img src="#{oembed.thumbnail_url}" style="max-width: #{oembed.width}px; max-height: #{oembed.height}px;" #{oembed.title_attr}>
+        HTML
+      end
+
+      def to_html
+        get_oembed.html
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/simplecast_onebox.rb
+++ b/lib/onebox/engine/simplecast_onebox.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class SimplecastOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/https?:\/\/(.+)?simplecast.com\/(episodes|s)\/.*/)
+      always_https
+      requires_iframe_origins("https://embed.simplecast.com")
+
+      def to_html
+        get_oembed.html
+      end
+
+      def placeholder_html
+        oembed = get_oembed
+        return if Onebox::Helpers.blank?(oembed.thumbnail_url)
+        "<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
+      end
+
+      private
+
+      def get_oembed_url
+        if id = url.scan(/([a-zA-Z0-9]*)\Z/).flatten.first
+          oembed_url = "https://simplecast.com/s/#{id}"
+        else
+          oembed_url = url
+        end
+
+        "https://simplecast.com/oembed?url=#{oembed_url}"
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/sketch_fab_onebox.rb
+++ b/lib/onebox/engine/sketch_fab_onebox.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class SketchFabOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/sketchfab\.com\/(?:models\/|3d-models\/(?:[^\/\s]+-)?)([a-z0-9]{32})/)
+      always_https
+      requires_iframe_origins("https://sketchfab.com")
+
+      def to_html
+        og = get_opengraph
+        src = og.video_url.gsub("autostart=1", "")
+
+        <<-HTML
+          <iframe
+            src="#{src}"
+            width="#{og.video_width}"
+            height="#{og.video_height}"
+            scrolling="no"
+            frameborder="0"
+            allowfullscreen
+          ></iframe>
+        HTML
+      end
+
+      def placeholder_html
+        "<img src='#{get_opengraph.image}'>"
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/slides_onebox.rb
+++ b/lib/onebox/engine/slides_onebox.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class SlidesOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/slides\.com\/[\p{Alnum}_\-]+\/[\p{Alnum}_\-]+$/)
+      requires_iframe_origins "https://slides.com"
+
+      def to_html
+        <<-HTML
+          <iframe
+            src="https://slides.com#{uri.path}/embed?style=light"
+            width="576"
+            height="420"
+            scrolling="no"
+            frameborder="0"
+            webkitallowfullscreen
+            mozallowfullscreen
+            allowfullscreen
+          ></iframe>
+        HTML
+      end
+
+      def placeholder_html
+        escaped_src = ::Onebox::Helpers.normalize_url_for_output(raw[:image])
+        "<img src='#{escaped_src}'>"
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/sound_cloud_onebox.rb
+++ b/lib/onebox/engine/sound_cloud_onebox.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class SoundCloudOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/soundcloud\.com/)
+      requires_iframe_origins "https://w.soundcloud.com"
+      always_https
+
+      def to_html
+        oembed = get_oembed
+        oembed.html.gsub('visual=true', 'visual=false')
+      end
+
+      def placeholder_html
+        oembed = get_oembed
+        return if Onebox::Helpers.blank?(oembed.thumbnail_url)
+        "<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
+      end
+
+      protected
+
+      def get_oembed_url
+        oembed_url = "https://soundcloud.com/oembed.json?url=#{url}"
+        oembed_url += "&maxheight=166" unless url["/sets/"]
+        oembed_url
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/stack_exchange_onebox.rb
+++ b/lib/onebox/engine/stack_exchange_onebox.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class StackExchangeOnebox
+      include Engine
+      include LayoutSupport
+      include JSON
+
+      def self.domains
+        %w(stackexchange.com stackoverflow.com superuser.com serverfault.com askubuntu.com stackapps.com mathoverflow.net)
+          .map { |domain| Regexp.escape(domain) }
+      end
+
+      matches_regexp(/^https?:\/\/(?:(?:(?<subsubdomain>\w*)\.)?(?<subdomain>\w*)\.)?(?<domain>#{domains.join('|')})\/((?:questions|q)\/(?<question_id>\d*)(\/.*\/(?<answer_id1>\d*))?|(a\/(?<answer_id2>\d*)))/)
+
+      def always_https?
+        uri.host.split('.').length <= 3
+      end
+
+      private
+
+      def match
+        @match ||= @url.match(@@matcher)
+      end
+
+      def url
+        domain = uri.host
+        question_id = match[:question_id]
+        answer_id = match[:answer_id2] || match[:answer_id1]
+
+        if answer_id
+          "https://api.stackexchange.com/2.2/answers/#{answer_id}?site=#{domain}&filter=!.FjueITQdx6-Rq3Ue9PWG.QZ2WNdW"
+        else
+          "https://api.stackexchange.com/2.2/questions/#{question_id}?site=#{domain}&filter=!5-duuxrJa-iw9oVvOA(JNimB5VIisYwZgwcfNI"
+        end
+      end
+
+      def data
+        return @data if defined?(@data)
+
+        result = raw['items'][0]
+        if result
+          result['creation_date'] =
+              Time.at(result['creation_date'].to_i).strftime("%I:%M%p - %d %b %y %Z")
+
+          result['tags'] = result['tags'].take(4).join(', ')
+          result['is_answer'] = result.key?('answer_id')
+          result['is_question'] = result.key?('question_id')
+        end
+
+        @data = result
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/standard_embed.rb
+++ b/lib/onebox/engine/standard_embed.rb
@@ -0,0 +1,145 @@
+# frozen_string_literal: true
+
+require "cgi"
+require "onebox/open_graph"
+require 'onebox/oembed'
+
+module Onebox
+  module Engine
+    module StandardEmbed
+      def self.oembed_providers
+        @@oembed_providers ||= {}
+      end
+
+      def self.add_oembed_provider(regexp, endpoint)
+        oembed_providers[regexp] = endpoint
+      end
+
+      def self.opengraph_providers
+        @@opengraph_providers ||= []
+      end
+
+      def self.add_opengraph_provider(regexp)
+        opengraph_providers << regexp
+      end
+
+      # Some oembed providers (like meetup.com) don't provide links to themselves
+      add_oembed_provider(/www\.meetup\.com\//, 'http://api.meetup.com/oembed')
+      add_oembed_provider(/www\.mixcloud\.com\//, 'https://www.mixcloud.com/oembed/')
+      # In order to support Private Videos
+      add_oembed_provider(/vimeo\.com\//, 'https://vimeo.com/api/oembed.json')
+      # NYT requires login so use oembed only
+      add_oembed_provider(/nytimes\.com\//, 'https://www.nytimes.com/svc/oembed/json/')
+
+      def always_https?
+        AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts) || super
+      end
+
+      def raw
+        return @raw if defined?(@raw)
+
+        og = get_opengraph
+        twitter = get_twitter
+        oembed = get_oembed
+
+        @raw = {}
+
+        og.data.each do |k, v|
+          next if k == "title_attr"
+          v = og.send(k)
+          @raw[k] ||= v unless v.nil?
+        end
+
+        twitter.each { |k, v| @raw[k] ||= v unless Onebox::Helpers::blank?(v) }
+
+        oembed.data.each do |k, v|
+          v = oembed.send(k)
+          @raw[k] ||= v unless v.nil?
+        end
+
+        favicon = get_favicon
+        @raw["favicon".to_sym] = favicon unless Onebox::Helpers::blank?(favicon)
+
+        @raw
+      end
+
+      protected
+
+      def html_doc
+        return @html_doc if defined?(@html_doc)
+
+        headers = nil
+        headers = { 'Cookie' => options[:cookie] } if options[:cookie]
+
+        @html_doc = Onebox::Helpers.fetch_html_doc(url, headers)
+      end
+
+      def get_oembed
+        @oembed ||= Onebox::Oembed.new(get_json_response)
+      end
+
+      def get_opengraph
+        @opengraph ||= ::Onebox::OpenGraph.new(html_doc)
+      end
+
+      def get_twitter
+        return {} unless html_doc
+
+        twitter = {}
+
+        html_doc.css('meta').each do |m|
+          if (m["property"] && m["property"][/^twitter:(.+)$/i]) || (m["name"] && m["name"][/^twitter:(.+)$/i])
+            value = (m["content"] || m["value"]).to_s
+            twitter[$1.tr('-:' , '_').to_sym] ||= value unless (Onebox::Helpers::blank?(value) || value == "0 minutes")
+          end
+        end
+
+        twitter
+      end
+
+      def get_favicon
+        return nil unless html_doc
+
+        favicon = html_doc.css('link[rel="shortcut icon"], link[rel="icon shortcut"], link[rel="shortcut"], link[rel="icon"]').first
+        favicon = favicon.nil? ? nil : (favicon['href'].nil? ? nil : favicon['href'].strip)
+
+        Onebox::Helpers::get_absolute_image_url(favicon, url)
+      end
+
+      def get_json_response
+        oembed_url = get_oembed_url
+
+        return "{}" if Onebox::Helpers.blank?(oembed_url)
+
+        Onebox::Helpers.fetch_response(oembed_url) rescue "{}"
+      rescue Errno::ECONNREFUSED, Net::HTTPError, Net::HTTPFatalError, MultiJson::LoadError
+        "{}"
+      end
+
+      def get_oembed_url
+        oembed_url = nil
+
+        StandardEmbed.oembed_providers.each do |regexp, endpoint|
+          if url =~ regexp
+            oembed_url = "#{endpoint}?url=#{url}"
+            break
+          end
+        end
+
+        if html_doc
+          if Onebox::Helpers.blank?(oembed_url)
+            application_json = html_doc.at("//link[@type='application/json+oembed']/@href")
+            oembed_url = application_json.value if application_json
+          end
+
+          if Onebox::Helpers.blank?(oembed_url)
+            text_json = html_doc.at("//link[@type='text/json+oembed']/@href")
+            oembed_url ||= text_json.value if text_json
+          end
+        end
+
+        oembed_url
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/steam_store_onebox.rb
+++ b/lib/onebox/engine/steam_store_onebox.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class SteamStoreOnebox
+      include Engine
+      include StandardEmbed
+
+      always_https
+      matches_regexp(/^https?:\/\/store\.steampowered\.com\/app\/\d+/)
+      requires_iframe_origins "https://store.steampowered.com"
+
+      def placeholder_html
+        og = get_opengraph
+        <<-HTML
+          <div style='width:100%; height:190px; background-color:#262626; color:#9e9e9e; margin:15px 0;'>
+            <div style='padding:10px'>
+              <h3 style='color:#fff; margin:10px 0 10px 5px;'>#{og.title}</h3>
+              <img src='#{og.image}' style='float:left; max-width:184px; margin:5px 15px 0 5px'/>
+              <p>#{og.description}</p>
+            </div>
+          </div>
+        HTML
+      end
+
+      def to_html
+        iframe_url = @url[/https?:\/\/store\.steampowered\.com\/app\/\d+/].gsub("/app/", "/widget/")
+        escaped_src = ::Onebox::Helpers.normalize_url_for_output(iframe_url)
+
+        <<-HTML
+          <iframe
+            src='#{escaped_src}'
+            frameborder='0'
+            width='100%'
+            height='190'
+          ></iframe>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/trello_onebox.rb
+++ b/lib/onebox/engine/trello_onebox.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class TrelloOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https:\/\/trello\.com\/[bc]\/\W*/)
+      requires_iframe_origins "https://trello.com"
+      always_https
+
+      def to_html
+        src = "https://trello.com/#{match[:type]}/#{match[:key]}.html"
+        height = match[:type] == 'b' ? 400 : 200
+
+        <<-HTML
+          <iframe src="#{src}" width="100%" height="#{height}" frameborder="0" style="border:0"></iframe>
+        HTML
+      end
+
+      def placeholder_html
+        ::Onebox::Helpers.generic_placeholder_html
+      end
+
+      private
+
+      def match
+        return @match if defined?(@match)
+        @match = @url.match(%{trello\.com/(?<type>[^/]+)/(?<key>[^/]+)/?\W*})
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/twitch_clips_onebox.rb
+++ b/lib/onebox/engine/twitch_clips_onebox.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+require_relative '../mixins/twitch_onebox'
+
+class Onebox::Engine::TwitchClipsOnebox
+  def self.twitch_regexp
+    /^https?:\/\/clips\.twitch\.tv\/([a-zA-Z0-9_]+\/?[^#\?\/]+)/
+  end
+
+  include Onebox::Mixins::TwitchOnebox
+  requires_iframe_origins "https://clips.twitch.tv"
+
+  def query_params
+    "clip=#{twitch_id}"
+  end
+
+  def base_url
+    "clips.twitch.tv/embed?"
+  end
+end
--- a/lib/onebox/engine/twitch_stream_onebox.rb
+++ b/lib/onebox/engine/twitch_stream_onebox.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+require_relative '../mixins/twitch_onebox'
+
+class Onebox::Engine::TwitchStreamOnebox
+  def self.twitch_regexp
+    /^https?:\/\/(?:www\.|go\.)?twitch\.tv\/(?!directory)([a-zA-Z0-9_]{4,25})$/
+  end
+
+  include Onebox::Mixins::TwitchOnebox
+
+  def query_params
+    "channel=#{twitch_id}"
+  end
+end
--- a/lib/onebox/engine/twitch_video_onebox.rb
+++ b/lib/onebox/engine/twitch_video_onebox.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+require_relative '../mixins/twitch_onebox'
+
+class Onebox::Engine::TwitchVideoOnebox
+  def self.twitch_regexp
+    /^https?:\/\/(?:www\.)?twitch\.tv\/videos\/([0-9]+)/
+  end
+
+  include Onebox::Mixins::TwitchOnebox
+
+  def query_params
+    "video=v#{twitch_id}"
+  end
+end
--- a/lib/onebox/engine/twitter_status_onebox.rb
+++ b/lib/onebox/engine/twitter_status_onebox.rb
@@ -0,0 +1,172 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class TwitterStatusOnebox
+      include Engine
+      include LayoutSupport
+      include HTML
+
+      matches_regexp(/^https?:\/\/(mobile\.|www\.)?twitter\.com\/.+?\/status(es)?\/\d+(\/(video|photo)\/\d?+)?+(\/?\?.*)?\/?$/)
+      always_https
+
+      def http_params
+        { 'User-Agent' => 'DiscourseBot/1.0' }
+      end
+
+      private
+
+      def get_twitter_data
+        response = Onebox::Helpers.fetch_response(url, headers: http_params) rescue nil
+        html = Nokogiri::HTML(response)
+        twitter_data = {}
+        html.css('meta').each do |m|
+          if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i)
+            m_content = m.attribute('content').to_s.strip
+            m_property = m.attribute('property').to_s.gsub('og:', '')
+            twitter_data[m_property.to_sym] = m_content
+          end
+        end
+        twitter_data
+      end
+
+      def match
+        @match ||= @url.match(%r{twitter\.com/.+?/status(es)?/(?<id>\d+)})
+      end
+
+      def twitter_data
+        @twitter_data ||= get_twitter_data
+      end
+
+      def client
+        Onebox.options.twitter_client
+      end
+
+      def twitter_api_credentials_present?
+        client && !client.twitter_credentials_missing?
+      end
+
+      def raw
+        if twitter_api_credentials_present?
+          @raw ||= OpenStruct.new(client.status(match[:id]).to_hash)
+        else
+          super
+        end
+      end
+
+      def access(*keys)
+        keys.reduce(raw) do |memo, key|
+          next unless memo
+          memo[key] || memo[key.to_s]
+        end
+      end
+
+      def tweet
+        if twitter_api_credentials_present?
+          client.prettify_tweet(raw)&.strip
+        else
+          twitter_data[:description].gsub(/“(.+?)”/im) { $1 } if twitter_data[:description]
+        end
+      end
+
+      def timestamp
+        if twitter_api_credentials_present?
+          date = DateTime.strptime(access(:created_at), "%a %b %d %H:%M:%S %z %Y")
+          user_offset = access(:user, :utc_offset).to_i
+          offset = (user_offset >= 0 ? "+" : "-") + Time.at(user_offset.abs).gmtime.strftime("%H%M")
+          date.new_offset(offset).strftime("%-l:%M %p - %-d %b %Y")
+        else
+          attr_at_css(".tweet-timestamp", 'title')
+        end
+      end
+
+      def title
+        if twitter_api_credentials_present?
+          "#{access(:user, :name)} (#{access(:user, :screen_name)})"
+        else
+          "#{attr_at_css('.tweet.permalink-tweet', 'data-name')} (#{attr_at_css('.tweet.permalink-tweet', 'data-screen-name')})"
+        end
+      end
+
+      def avatar
+        if twitter_api_credentials_present?
+          access(:user, :profile_image_url_https).sub('normal', '400x400')
+        elsif twitter_data[:image]
+          twitter_data[:image]
+        end
+      end
+
+      def likes
+        if twitter_api_credentials_present?
+          prettify_number(access(:favorite_count).to_i)
+        else
+          attr_at_css(".request-favorited-popup", 'data-compact-localized-count')
+        end
+      end
+
+      def retweets
+        if twitter_api_credentials_present?
+          prettify_number(access(:retweet_count).to_i)
+        else
+          attr_at_css(".request-retweeted-popup", 'data-compact-localized-count')
+        end
+      end
+
+      def quoted_full_name
+        if twitter_api_credentials_present?
+          access(:quoted_status, :user, :name)
+        else
+          raw.css('.QuoteTweet-fullname')[0]&.text
+        end
+      end
+
+      def quoted_screen_name
+        if twitter_api_credentials_present?
+          access(:quoted_status, :user, :screen_name)
+        else
+          attr_at_css(".QuoteTweet-innerContainer", "data-screen-name")
+        end
+      end
+
+      def quoted_tweet
+        if twitter_api_credentials_present?
+          access(:quoted_status, :full_text)
+        else
+          raw.css('.QuoteTweet-text')[0]&.text
+        end
+      end
+
+      def quoted_link
+        if twitter_api_credentials_present?
+          "https://twitter.com/#{quoted_screen_name}/status/#{access(:quoted_status, :id)}"
+        else
+          "https://twitter.com#{attr_at_css(".QuoteTweet-innerContainer", "href")}"
+        end
+      end
+
+      def prettify_number(count)
+        count > 0 ? client.prettify_number(count) : nil
+      end
+
+      def attr_at_css(css_property, attribute_name)
+        raw.at_css(css_property)&.attr(attribute_name)
+      end
+
+      def data
+        @data ||= {
+          link: link,
+          tweet: tweet,
+          timestamp: timestamp,
+          title: title,
+          avatar: avatar,
+          likes: likes,
+          retweets: retweets,
+          quoted_tweet: quoted_tweet,
+          quoted_full_name: quoted_full_name,
+          quoted_screen_name: quoted_screen_name,
+          quoted_link: quoted_link
+        }
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/typeform_onebox.rb
+++ b/lib/onebox/engine/typeform_onebox.rb
@@ -0,0 +1,48 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class TypeformOnebox
+      include Engine
+
+      matches_regexp(/^https?:\/\/[a-z0-9\-_]+\.typeform\.com\/to\/[a-zA-Z0-9]+/)
+      requires_iframe_origins "https://*.typeform.com"
+      always_https
+
+      def to_html
+        typeform_src = build_typeform_src
+
+        <<~HTML
+          <iframe
+            src="#{typeform_src}"
+            width="100%"
+            height="600px"
+            scrolling="no"
+            frameborder="0"
+          ></iframe>
+        HTML
+      end
+
+      def placeholder_html
+        ::Onebox::Helpers.generic_placeholder_html
+      end
+
+      private
+
+      def build_typeform_src
+        escaped_src = ::Onebox::Helpers.normalize_url_for_output(@url)
+        query_params = CGI::parse(URI::parse(escaped_src).query || '')
+
+        return escaped_src if query_params.has_key?('typeform-embed')
+
+        if query_params.empty?
+          escaped_src += '?' unless escaped_src.end_with?('?')
+        else
+          escaped_src += '&'
+        end
+
+        escaped_src += 'typeform-embed=embed-widget'
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/video_onebox.rb
+++ b/lib/onebox/engine/video_onebox.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class VideoOnebox
+      include Engine
+
+      matches_regexp(/^(https?:)?\/\/.*\.(mov|mp4|webm|ogv)(\?.*)?$/i)
+
+      def always_https?
+        AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.https_hosts)
+      end
+
+      def to_html
+        # Fix Dropbox image links
+        if @url[/^https:\/\/www.dropbox.com\/s\//]
+          @url.sub!("https://www.dropbox.com", "https://dl.dropboxusercontent.com")
+        end
+
+        escaped_url = ::Onebox::Helpers.normalize_url_for_output(@url)
+        <<-HTML
+          <div class="onebox video-onebox">
+            <video width='100%' height='100%' controls #{@options[:disable_media_download_controls] ? 'controlslist="nodownload"' : ""}>
+              <source src='#{escaped_url}'>
+              <a href='#{escaped_url}'>#{@url}</a>
+            </video>
+          </div>
+        HTML
+      end
+
+      def placeholder_html
+        ::Onebox::Helpers.video_placeholder_html
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/vimeo_onebox.rb
+++ b/lib/onebox/engine/vimeo_onebox.rb
@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class VimeoOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/(www\.)?vimeo\.com\/\d+/)
+      requires_iframe_origins "https://player.vimeo.com"
+      always_https
+
+      WIDTH  ||= 640
+      HEIGHT ||= 360
+
+      def placeholder_html
+        ::Onebox::Helpers.video_placeholder_html
+      end
+
+      def to_html
+        video_id = oembed_data[:video_id]
+        if video_id.nil?
+          # for private videos
+          video_id = uri.path[/\/(\d+)/, 1]
+        end
+        video_src = "https://player.vimeo.com/video/#{video_id}"
+        video_src = video_src.gsub('autoplay=1', '').chomp("?")
+
+        <<-HTML
+          <iframe
+            width="#{WIDTH}"
+            height="#{HEIGHT}"
+            src="#{video_src}"
+            data-original-href="#{link}"
+            frameborder="0"
+            allowfullscreen
+          ></iframe>
+        HTML
+      end
+
+      private
+
+      def oembed_data
+        response = Onebox::Helpers.fetch_response("https://vimeo.com/api/oembed.json?url=#{url}")
+        @oembed_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(response))
+      rescue
+        "{}"
+      end
+
+      def og_data
+        @og_data = get_opengraph
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/wikimedia_onebox.rb
+++ b/lib/onebox/engine/wikimedia_onebox.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class WikimediaOnebox
+      include Engine
+      include LayoutSupport
+      include JSON
+
+      matches_regexp(/^https?:\/\/commons\.wikimedia\.org\/wiki\/(File:.+)/)
+      always_https
+
+      def self.priority
+        # Wikimedia links end in an image extension.
+        # E.g. https://commons.wikimedia.org/wiki/File:Stones_members_montage2.jpg
+        # This engine should have priority over the generic ImageOnebox.
+
+        1
+      end
+
+      def url
+        "https://en.wikipedia.org/w/api.php?action=query&titles=#{match[:name]}&prop=imageinfo&iilimit=50&iiprop=timestamp|user|url&iiurlwidth=500&format=json"
+      end
+
+      private
+
+      def match
+        @match ||= @url.match(/^https?:\/\/commons\.wikimedia\.org\/wiki\/(?<name>File:.+)/)
+      end
+
+      def data
+        first_page = raw['query']['pages'].first[1]
+
+        {
+          link: first_page['imageinfo'].first['descriptionurl'],
+          title: first_page['title'],
+          image: first_page['imageinfo'].first['url'],
+          thumbnail: first_page['imageinfo'].first['thumburl']
+        }
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/wikipedia_onebox.rb
+++ b/lib/onebox/engine/wikipedia_onebox.rb
@@ -0,0 +1,97 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class WikipediaOnebox
+      include Engine
+      include LayoutSupport
+      include HTML
+
+      matches_regexp(/^https?:\/\/.*\.wikipedia\.(com|org)/)
+      always_https
+
+      private
+
+      def data
+        paras = []
+        text = ""
+
+        # Detect section Hash in the url and retrive the related paragraphs. if no hash provided the first few paragraphs will be used
+        # Author Lidlanca
+        # Date 9/8/2014
+        if (m_url_hash = @url.match(/#([^\/?]+)/)) # extract url hash
+          m_url_hash_name = m_url_hash[1]
+        end
+
+        unless m_url_hash.nil?
+          section_header_title = raw.xpath("//span[@id='#{m_url_hash_name}']")
+
+          if section_header_title.empty?
+            paras = raw.search("p") # default get all the paras
+          else
+            section_title_text = section_header_title.inner_text
+            section_header = section_header_title[0].parent # parent element of the section span element should be an <h3> node
+            cur_element = section_header
+
+            # p|text|div covers the general case. We assume presence of at least 1 P node. if section has no P node we may end up with a P node from the next section.
+            # div tag is commonly used as an assets wraper in an article section. often as the first element holding an image.
+            # ul support will imporve the output generated for a section with a list as the main content (for example: an Author Bibliography, A musician Discography, etc)
+            first_p_found = nil
+            while (((next_sibling = cur_element.next_sibling).name =~ /p|text|div|ul/) || first_p_found.nil?) do  # from section header get the next sibling until it is a breaker tag
+              cur_element = next_sibling
+              if (cur_element.name == "p" || cur_element.name == "ul") #we treat a list as we detect a p to avoid showing
+                first_p_found = true
+                paras.push(cur_element)
+              end
+            end
+          end
+        else # no hash found in url
+          paras = raw.search("p") # default get all the paras
+        end
+
+        unless paras.empty?
+          cnt = 0
+          while text.length < Onebox::LayoutSupport.max_text && cnt <= 3
+            break if cnt >= paras.size
+            text += " " unless cnt == 0
+
+            if paras[cnt].name == "ul" # Handle UL tag. Generate a textual ordered list (1.item | 2.item | 3.item). Unfortunately no newline allowed in output
+              li_index = 1
+              list_items = []
+              paras[cnt].children.css("li").each { |li| list_items.push "#{li_index}." + li.inner_text ; li_index += 1 }
+              paragraph = (list_items.join " |\n ")[0..Onebox::LayoutSupport.max_text]
+            else
+              paragraph = paras[cnt].inner_text[0..Onebox::LayoutSupport.max_text]
+            end
+
+            paragraph.gsub!(/\[\d+\]/mi, "")
+            text += paragraph
+            cnt += 1
+          end
+        end
+
+        text = "#{text[0..Onebox::LayoutSupport.max_text]}..." if text.length > Onebox::LayoutSupport.max_text
+
+        result = {
+          link: link,
+          title: raw.css("html body h1").inner_text + (section_title_text ? " | " + section_title_text : ""),  #if a section sub title exists add it to the main article title
+          description: text
+        }
+
+        img = raw.css(".image img")
+
+        if img && img.size > 0
+          img.each do |i|
+            src = i["src"]
+            if src !~ /Question_book/
+              result[:image] = src
+              break
+            end
+          end
+        end
+
+        result
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/wistia_onebox.rb
+++ b/lib/onebox/engine/wistia_onebox.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class WistiaOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/https?:\/\/(.+)?(wistia.com|wi.st)\/(medias|embed)\/.*/)
+      requires_iframe_origins("https://fast.wistia.com", "https://fast.wistia.net")
+      always_https
+
+      def to_html
+        get_oembed.html
+      end
+
+      def placeholder_html
+        oembed = get_oembed
+        return if Onebox::Helpers.blank?(oembed.thumbnail_url)
+        "<img src='#{oembed.thumbnail_url}' #{oembed.title_attr}>"
+      end
+
+      private
+
+      def get_oembed_url
+        "https://fast.wistia.com/oembed?embedType=iframe&url=#{url}"
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/xkcd_onebox.rb
+++ b/lib/onebox/engine/xkcd_onebox.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class XkcdOnebox
+      include Engine
+      include LayoutSupport
+      include JSON
+
+      matches_regexp(/^https?:\/\/(www\.)?(m\.)?xkcd\.com\/\d+/)
+
+      def url
+        "https://xkcd.com/#{match[:comic_id]}/info.0.json"
+      end
+
+      private
+
+      def match
+        @match ||= @url.match(%{xkcd\.com/(?<comic_id>\\d+)})
+      end
+
+      def data
+        {
+          link: @url,
+          title: raw['safe_title'],
+          image: raw['img'],
+          description: raw['alt']
+        }
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/youku_onebox.rb
+++ b/lib/onebox/engine/youku_onebox.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class YoukuOnebox
+      include Engine
+      include HTML
+
+      matches_regexp(/^(https?:\/\/)?([\da-z\.-]+)(youku.com\/)(.)+\/?$/)
+      requires_iframe_origins "https://player.youku.com"
+
+      # Try to get the video ID. Works for URLs of the form:
+      # * http://v.youku.com/v_show/id_XNjM3MzAxNzc2.html
+      # * http://v.youku.com/v_show/id_XMTQ5MjgyMjMyOA==.html?from=y1.3-tech-index3-232-10183.89969-89963.3-1
+      def video_id
+        match = uri.path.match(/\/v_show\/id_([a-zA-Z0-9_=\-]+)(\.html)?.*/)
+        match && match[1]
+      rescue
+        nil
+      end
+
+      def to_html
+        <<~HTML
+          <iframe
+            src="https://player.youku.com/embed/#{video_id}"
+            width="640"
+            height="430"
+            frameborder='0'
+            allowfullscreen
+          ></iframe>
+        HTML
+      end
+    end
+  end
+end
--- a/lib/onebox/engine/youtube_onebox.rb
+++ b/lib/onebox/engine/youtube_onebox.rb
@@ -0,0 +1,173 @@
+# frozen_string_literal: true
+
+module Onebox
+  module Engine
+    class YoutubeOnebox
+      include Engine
+      include StandardEmbed
+
+      matches_regexp(/^https?:\/\/(?:www\.)?(?:m\.)?(?:youtube\.com|youtu\.be)\/.+$/)
+      requires_iframe_origins "https://www.youtube.com"
+      always_https
+
+      WIDTH  ||= 480
+      HEIGHT ||= 360
+
+      def parse_embed_response
+        return unless video_id
+        return @parse_embed_response if defined?(@parse_embed_response)
+
+        embed_url = "https://www.youtube.com/embed/#{video_id}"
+        @embed_doc ||= Onebox::Helpers.fetch_html_doc(embed_url)
+
+        begin
+          script_tag = @embed_doc.xpath('//script').find { |tag| tag.to_s.include?('ytcfg.set') }.to_s
+          match = script_tag.to_s.match(/ytcfg\.set\((?<json>.*)\)/)
+
+          yt_json = ::JSON.parse(match[:json])
+          renderer = ::JSON.parse(yt_json['PLAYER_VARS']['embedded_player_response'])['embedPreview']['thumbnailPreviewRenderer']
+
+          title = renderer['title']['runs'].first['text']
+
+          image = "https://img.youtube.com/vi/#{video_id}/hqdefault.jpg"
+        rescue
+          return
+        end
+
+        @parse_embed_response = { image: image, title: title }
+      end
+
+      def placeholder_html
+        if video_id || list_id
+          result = parse_embed_response
+          result ||= get_opengraph.data
+
+          "<img src='#{result[:image]}' width='#{WIDTH}' height='#{HEIGHT}' title='#{result[:title]}'>"
+        else
+          to_html
+        end
+      end
+
+      def to_html
+        if video_id
+          <<-HTML
+            <iframe
+              src="https://www.youtube.com/embed/#{video_id}?#{embed_params}"
+              width="#{WIDTH}"
+              height="#{HEIGHT}"
+              frameborder="0"
+              allowfullscreen
+            ></iframe>
+          HTML
+        elsif list_id
+          <<-HTML
+            <iframe
+              src="https://www.youtube.com/embed/videoseries?list=#{list_id}&wmode=transparent&rel=0&autohide=1&showinfo=1&enablejsapi=1"
+              width="#{WIDTH}"
+              height="#{HEIGHT}"
+              frameborder="0"
+              allowfullscreen
+            ></iframe>
+          HTML
+        else
+          # for channel pages
+          html = Onebox::Engine::AllowlistedGenericOnebox.new(@url, @timeout).to_html
+          return if Onebox::Helpers.blank?(html)
+          html.gsub!(/['"]\/\//, "https://")
+          html
+        end
+      end
+
+      def video_title
+        @video_title ||= begin
+          result = parse_embed_response || get_opengraph.data
+          result[:title]
+        end
+      end
+
+      private
+
+      def video_id
+        @video_id ||= begin
+          # http://youtu.be/afyK1HSFfgw
+          if uri.host["youtu.be"]
+            id = uri.path[/\/([\w\-]+)/, 1]
+            return id if id
+          end
+
+          # https://www.youtube.com/embed/vsF0K3Ou1v0
+          if uri.path["/embed/"]
+            id = uri.path[/\/embed\/([\w\-]+)/, 1]
+            return id if id
+          end
+
+          # https://www.youtube.com/watch?v=Z0UISCEe52Y
+          params['v']
+        end
+      end
+
+      def list_id
+        @list_id ||= params['list']
+      end
+
+      def embed_params
+        p = { 'feature' => 'oembed', 'wmode' => 'opaque' }
+
+        p['list'] = list_id if list_id
+
+        # Parse timestrings, and assign the result as a start= parameter
+        start = if params['start']
+          params['start']
+        elsif params['t']
+          params['t']
+        elsif uri.fragment && uri.fragment.start_with?('t=')
+          # referencing uri is safe here because any throws were already caught by video_id returning nil
+          # remove the t= from the start
+          uri.fragment[2..-1]
+        end
+
+        p['start'] = parse_timestring(start)        if start
+        p['end']   = parse_timestring params['end'] if params['end']
+
+        # Official workaround for looping videos
+        # https://developers.google.com/youtube/player_parameters#loop
+        # use params.include? so that you can just add "&loop"
+        if params.include?('loop')
+          p['loop'] = 1
+          p['playlist'] = video_id
+        end
+
+        # https://developers.google.com/youtube/player_parameters#rel
+        p['rel'] = 0 if params.include?('rel')
+
+        # https://developers.google.com/youtube/player_parameters#enablejsapi
+        p['enablejsapi'] = params['enablejsapi'] if params.include?('enablejsapi')
+
+        URI.encode_www_form(p)
+      end
+
+      def parse_timestring(string)
+        if string =~ /(\d+h)?(\d+m)?(\d+s?)?/
+          ($1.to_i * 3600) + ($2.to_i * 60) + $3.to_i
+        end
+      end
+
+      def params
+        return {} unless uri.query
+        # This mapping is necessary because CGI.parse returns a hash of keys to arrays.
+        # And *that* is necessary because querystrings support arrays, so they
+        # force you to deal with it to avoid security issues that would pop up
+        # if one day it suddenly gave you an array.
+        #
+        # However, we aren't interested. Just take the first one.
+        @params ||= begin
+          p = {}
+          CGI.parse(uri.query).each { |k, v| p[k] = v.first }
+          p
+        end
+      rescue
+        {}
+      end
+    end
+  end
+end