FEATURE: Show more context in Discourse topic oneboxes

Currently when generating a onebox for Discourse topics, some important
context is missing such as categories and tags.

This patch addresses this issue by introducing a new onebox engine
dedicated to display this information when available. Indeed to get this
new information, categories and tags are exposed in the topic metadata
as opengraph tags.
This commit is contained in:
Loïc Guitaut
2022-11-24 16:28:21 +01:00
committed by Loïc Guitaut
parent d2e9ea6193
commit 14d97f9cf1
16 changed files with 298 additions and 26 deletions

View File

@@ -738,7 +738,8 @@ aside.onebox.xkcd .onebox-body img {
// allowlistedgeneric twitter labels // allowlistedgeneric twitter labels
.onebox.allowlistedgeneric, .onebox.allowlistedgeneric,
.onebox.whitelistedgeneric { .onebox.whitelistedgeneric,
.onebox.discoursetopic {
.label1, .label1,
.label2 { .label2 {
color: var(--primary-med-or-secondary-med); color: var(--primary-med-or-secondary-med);
@@ -754,6 +755,7 @@ aside.onebox.xkcd .onebox-body img {
.onebox { .onebox {
&.allowlistedgeneric, &.allowlistedgeneric,
&.whitelistedgeneric, &.whitelistedgeneric,
&.discoursetopic,
&.gfycat, &.gfycat,
&.githubfolder { &.githubfolder {
.site-icon { .site-icon {
@@ -769,6 +771,24 @@ aside.onebox.xkcd .onebox-body img {
} }
} }
.onebox.discoursetopic {
h3 {
width: 100%;
margin-bottom: 0.2rem !important;
}
.d-icon-tag {
width: 0.75rem;
padding-top: 0.3rem;
position: absolute;
color: var(--primary-medium);
}
.discourse-tags .discourse-tag:first-of-type {
padding-left: 1rem;
}
}
.onebox.gfycat p { .onebox.gfycat p {
span.label1 a { span.label1 a {
white-space: nowrap; white-space: nowrap;

View File

@@ -131,7 +131,8 @@
.discourse-tags { .discourse-tags {
display: inline-flex; display: inline-flex;
flex-wrap: wrap; flex-wrap: wrap;
a { a,
span {
margin-right: 0.25em; margin-right: 0.25em;
} }
} }

View File

@@ -311,6 +311,15 @@ module ApplicationHelper
result << tag(:meta, { name: "twitter:#{property}", content: content }, nil, true) result << tag(:meta, { name: "twitter:#{property}", content: content }, nil, true)
end end
end end
Array
.wrap(opts[:breadcrumbs])
.each do |breadcrumb|
result << tag(:meta, property: "og:article:section", content: breadcrumb[:name])
result << tag(:meta, property: "og:article:section:color", content: breadcrumb[:color])
end
Array
.wrap(opts[:tags])
.each { |tag_name| result << tag(:meta, property: "og:article:tag", content: tag_name) }
if opts[:read_time] && opts[:read_time] > 0 && opts[:like_count] && opts[:like_count] > 0 if opts[:read_time] && opts[:read_time] > 0 && opts[:like_count] && opts[:like_count] > 0
result << tag(:meta, name: "twitter:label1", value: I18n.t("reading_time")) result << tag(:meta, name: "twitter:label1", value: I18n.t("reading_time"))

View File

@@ -132,7 +132,7 @@
<% content_for :head do %> <% content_for :head do %>
<%= auto_discovery_link_tag(@topic_view, {action: :feed, slug: @topic_view.topic.slug, topic_id: @topic_view.topic.id}, rel: 'alternate nofollow', title: t('rss_posts_in_topic', topic: @topic_view.title), type: 'application/rss+xml') %> <%= auto_discovery_link_tag(@topic_view, {action: :feed, slug: @topic_view.topic.slug, topic_id: @topic_view.topic.id}, rel: 'alternate nofollow', title: t('rss_posts_in_topic', topic: @topic_view.title), type: 'application/rss+xml') %>
<%= raw crawlable_meta_data(title: @topic_view.title, description: @topic_view.summary(strip_images: true), image: @topic_view.image_url, read_time: @topic_view.read_time, like_count: @topic_view.like_count, ignore_canonical: true, published_time: @topic_view.published_time) %> <%= raw crawlable_meta_data(title: @topic_view.title, description: @topic_view.summary(strip_images: true), image: @topic_view.image_url, read_time: @topic_view.read_time, like_count: @topic_view.like_count, ignore_canonical: true, published_time: @topic_view.published_time, breadcrumbs: @breadcrumbs, tags: @topic_view.tags) %>
<% if @topic_view.prev_page || @topic_view.next_page %> <% if @topic_view.prev_page || @topic_view.next_page %>
<% if @topic_view.prev_page %> <% if @topic_view.prev_page %>

View File

@@ -70,7 +70,7 @@ module CookedProcessorMixin
found = false found = false
parent = img parent = img
while parent = parent.parent while parent = parent.parent
if parent["class"] && parent["class"].include?("allowlistedgeneric") if parent["class"] && parent["class"].match?(/\b(allowlistedgeneric|discoursetopic)\b/)
found = true found = true
break break
end end

View File

@@ -0,0 +1,59 @@
# frozen_string_literal: true
module Onebox
module Engine
class DiscourseTopicOnebox
include Engine
include StandardEmbed
include LayoutSupport
matches_regexp(%r{/t/.*(/\d+)?})
def data
@data ||= {
categories: categories,
link: link,
article_published_time: published_time.strftime("%-d %b %y"),
article_published_time_title: published_time.strftime("%I:%M%p - %d %B %Y"),
domain: html_entities.decode(raw[:site_name].truncate(80, separator: " ")),
description: html_entities.decode(raw[:description].truncate(250, separator: " ")),
title: html_entities.decode(raw[:title].truncate(80, separator: " ")),
image: image,
render_tags?: render_tags?,
render_category_block?: render_category_block?,
}.reverse_merge(raw)
end
alias verified_data data
private
def categories
Array
.wrap(raw[:article_sections])
.map
.with_index { |name, index| { name: name, color: raw[:article_section_colors][index] } }
end
def published_time
@published_time ||= Time.parse(raw[:published_time])
end
def html_entities
@html_entities ||= HTMLEntities.new
end
def image
image = Onebox::Helpers.get_absolute_image_url(raw[:image], @url)
Onebox::Helpers.normalize_url_for_output(html_entities.decode(image))
end
def render_tags?
raw[:article_tags].present?
end
def render_category_block?
render_tags? || categories.present?
end
end
end
end

View File

@@ -4,19 +4,11 @@ module Onebox
class Normalizer class Normalizer
attr_reader :data attr_reader :data
def get(attr, length = nil, sanitize = true) def get(attr, *args)
return nil if Onebox::Helpers.blank?(data)
value = data[attr] value = data[attr]
return if value.blank?
return nil if Onebox::Helpers.blank?(value) return value.map { |v| sanitize_value(v, *args) } if value.is_a?(Array)
sanitize_value(value, *args)
value = html_entities.decode(value)
value = Sanitize.fragment(value) if sanitize
value.strip!
value = Onebox::Helpers.truncate(value, length) unless length.nil?
value
end end
def method_missing(attr, *args, &block) def method_missing(attr, *args, &block)
@@ -48,5 +40,13 @@ module Onebox
def html_entities def html_entities
@html_entities ||= HTMLEntities.new @html_entities ||= HTMLEntities.new
end end
def sanitize_value(value, length = nil, sanitize = true)
value = html_entities.decode(value)
value = Sanitize.fragment(value) if sanitize
value.strip!
value = Onebox::Helpers.truncate(value, length) if length
value
end
end end
end end

View File

@@ -22,6 +22,8 @@ module Onebox
private private
COLLECTIONS = %i[article_section article_section_color article_tag]
def extract(doc) def extract(doc)
return {} if Onebox::Helpers.blank?(doc) return {} if Onebox::Helpers.blank?(doc)
@@ -33,7 +35,14 @@ module Onebox
if (m["property"] && m["property"][/^(?:og|article|product):(.+)$/i]) || if (m["property"] && m["property"][/^(?:og|article|product):(.+)$/i]) ||
(m["name"] && m["name"][/^(?:og|article|product):(.+)$/i]) (m["name"] && m["name"][/^(?:og|article|product):(.+)$/i])
value = (m["content"] || m["value"]).to_s value = (m["content"] || m["value"]).to_s
data[$1.tr("-:", "_").to_sym] ||= value unless Onebox::Helpers.blank?(value) next if Onebox::Helpers.blank?(value)
key = $1.tr("-:", "_").to_sym
data[key] ||= value
if key.in?(COLLECTIONS)
collection_name = "#{key}s".to_sym
data[collection_name] ||= []
data[collection_name] << value
end
end end
end end

View File

@@ -10,7 +10,7 @@ module Onebox
Sanitize::Config::RELAXED, Sanitize::Config::RELAXED,
elements: elements:
Sanitize::Config::RELAXED[:elements] + Sanitize::Config::RELAXED[:elements] +
%w[audio details embed iframe source video svg path], %w[audio details embed iframe source video svg path use],
attributes: { attributes: {
"a" => Sanitize::Config::RELAXED[:attributes]["a"] + %w[target], "a" => Sanitize::Config::RELAXED[:attributes]["a"] + %w[target],
"audio" => %w[controls controlslist], "audio" => %w[controls controlslist],
@@ -40,7 +40,8 @@ module Onebox
"path" => %w[d fill-rule], "path" => %w[d fill-rule],
"svg" => %w[aria-hidden width height viewbox], "svg" => %w[aria-hidden width height viewbox],
"div" => [:data], # any data-* attributes, "div" => [:data], # any data-* attributes,
"span" => [:data], # any data-* attributes "span" => [:data], # any data-* attributes,
"use" => %w[href],
}, },
add_attributes: { add_attributes: {
"iframe" => { "iframe" => {
@@ -89,6 +90,9 @@ module Onebox
"source" => { "source" => {
"src" => HTTP_PROTOCOLS, "src" => HTTP_PROTOCOLS,
}, },
"use" => {
"href" => [:relative],
},
}, },
css: { css: {
properties: Sanitize::Config::RELAXED[:css][:properties] + %w[--aspect-ratio], properties: Sanitize::Config::RELAXED[:css][:properties] + %w[--aspect-ratio],

View File

@@ -0,0 +1,42 @@
{{#image}}<img src="{{image}}" class="thumbnail"/>{{/image}}
<div class="title-wrapper">
<h3><a href="{{link}}" target="_blank" rel="noopener">{{title}}</a></h3>
{{#render_category_block?}}
<div class="topic-category">
{{#categories}}
<span class="badge-wrapper bullet">
<span class="badge-category-bg" style="background-color: #{{color}};"></span>
<span class="badge-category clear-badge">
<span class="category-name">{{name}}</span>
</span>
</span>
{{/categories}}
{{#render_tags?}}
<div class="topic-header-extra">
<div class="list-tags">
<div class="discourse-tags">
<svg class="fa d-icon d-icon-tag svg-icon svg-string" xmlns="http://www.w3.org/2000/svg"><use href="#tag" /></svg>
{{#article_tags}}
<span class="discourse-tag simple">{{.}}</span>
{{/article_tags}}
</div>
</div>
</div>
{{/render_tags?}}
</div>
{{/render_category_block?}}
</div>
{{#description}}
<p>{{description}}</p>
{{/description}}
{{#data1}}
<p>
<span class="label1">{{label1}}: {{data1}}</span>
{{#data2}}
<span class="label2">{{label2}}: {{data2}}</span>
{{/data2}}
</p>
{{/data1}}

View File

@@ -713,6 +713,10 @@ class TopicView
@mentioned_users = mentioned_users.to_h { |u| [u.username, u] } @mentioned_users = mentioned_users.to_h { |u| [u.username, u] }
end end
def tags
@topic.tags.map(&:name)
end
protected protected
def read_posts_set def read_posts_set
@@ -814,13 +818,9 @@ class TopicView
end end
def find_topic(topic_or_topic_id) def find_topic(topic_or_topic_id)
if topic_or_topic_id.is_a?(Topic) return topic_or_topic_id if topic_or_topic_id.is_a?(Topic)
topic_or_topic_id # with_deleted covered in #check_and_raise_exceptions
else Topic.with_deleted.includes(:category, :tags).find_by(id: topic_or_topic_id)
# with_deleted covered in #check_and_raise_exceptions
finder = Topic.with_deleted.where(id: topic_or_topic_id).includes(:category)
finder.first
end
end end
def unfiltered_posts def unfiltered_posts

View File

@@ -194,6 +194,10 @@ And that too in just over an year, way to go! [boom]">
<meta name="twitter:data2" value="9 ❤" /> <meta name="twitter:data2" value="9 ❤" />
<meta property="article:published_time" content="2014-02-06T04:55:19+00:00" /> <meta property="article:published_time" content="2014-02-06T04:55:19+00:00" />
<meta property="og:ignore_canonical" content="true" /> <meta property="og:ignore_canonical" content="true" />
<meta property="og:article:section" content="praise" />
<meta property="og:article:section:color" content="9EB83B" />
<meta property="og:article:tag" content="how-to" />
<meta property="og:article:tag" content="sso" />

View File

@@ -640,6 +640,38 @@ RSpec.describe ApplicationHelper do
expect(helper.crawlable_meta_data).not_to include("twitter:image") expect(helper.crawlable_meta_data).not_to include("twitter:image")
end end
end end
context "with breadcrumbs" do
subject(:metadata) { helper.crawlable_meta_data(breadcrumbs: breadcrumbs) }
let(:breadcrumbs) do
[{ name: "section1", color: "ff0000" }, { name: "section2", color: "0000ff" }]
end
let(:tags) { <<~HTML.strip }
<meta property="og:article:section" content="section1" />
<meta property="og:article:section:color" content="ff0000" />
<meta property="og:article:section" content="section2" />
<meta property="og:article:section:color" content="0000ff" />
HTML
it "generates section and color tags" do
expect(metadata).to include tags
end
end
context "with tags" do
subject(:metadata) { helper.crawlable_meta_data(tags: tags) }
let(:tags) { %w[tag1 tag2] }
let(:output_tags) { <<~HTML.strip }
<meta property="og:article:tag" content="tag1" />
<meta property="og:article:tag" content="tag2" />
HTML
it "generates tag tags" do
expect(metadata).to include output_tags
end
end
end end
describe "discourse_color_scheme_stylesheets" do describe "discourse_color_scheme_stylesheets" do

View File

@@ -0,0 +1,51 @@
# frozen_string_literal: true
RSpec.describe Onebox::Engine::DiscourseTopicOnebox do
subject(:onebox) { described_class.new(url) }
describe "#data" do
subject(:data) { onebox.data }
let(:url) do
"https://meta.discourse.org/t/congratulations-most-stars-in-2013-github-octoverse/12483"
end
let(:expected_data) do
{
article_published_time: "6 Feb 14",
article_published_time_title: "04:55AM - 06 February 2014",
article_tags: %w[how-to sso],
card: "summary",
categories: [{ name: "praise", color: "9EB83B" }],
data1: "1 mins 🕑",
data2: "9 ❤",
description:
"Congratulations Discourse for qualifying Repositories with the most stars on GitHub Octoverse. And that too in just over an year, way to go! 💥",
domain: "Discourse Meta",
favicon:
"https://d11a6trkgmumsb.cloudfront.net/optimized/3X/b/3/b33be9538df3547fcf9d1a51a4637d77392ac6f9_2_32x32.png",
ignore_canonical: "true",
image:
"https://d11a6trkgmumsb.cloudfront.net/optimized/2X/d/d063b3b0807377d98695ee08042a9ba0a8c593bd_2_690x362.png",
label1: "Reading time",
label2: "Likes",
link:
"https://meta.discourse.org/t/congratulations-most-stars-in-2013-github-octoverse/12483",
published_time: "2014-02-06T04:55:19+00:00",
render_category_block?: true,
render_tags?: true,
site_name: "Discourse Meta",
title: "Congratulations, most stars in 2013 GitHub Octoverse!",
url:
"https://meta.discourse.org/t/congratulations-most-stars-in-2013-github-octoverse/12483",
}
end
before do
stub_request(:get, url).to_return(status: 200, body: onebox_response("discourse_topic"))
end
it "returns the expected data" do
expect(data).to include expected_data
end
end
end

View File

@@ -24,4 +24,32 @@ RSpec.describe Onebox::OpenGraph do
og = described_class.new(doc) og = described_class.new(doc)
expect(og.image).to eq("http://test.com/test&apos;ing.mp3") expect(og.image).to eq("http://test.com/test&apos;ing.mp3")
end end
describe "Collections" do
subject(:graph) { described_class.new(doc) }
let(:doc) { Nokogiri.HTML(<<-HTML) }
<html>
<title>test</title>
<meta property="og:article:tag" content="&lt;b&gt;tag1&lt;/b&gt;" />
<meta property="og:article:tag" content="tag2" />
<meta property="og:article:section" content="category1" />
<meta property="og:article:section" content="category2" />
<meta property="og:article:section:color" content="ff0000" />
<meta property="og:article:section:color" content="0000ff" />
</html>
HTML
it "handles multiple article:tag tags" do
expect(graph.article_tags).to eq %w[tag1 tag2]
end
it "handles multiple article:section tags" do
expect(graph.article_sections).to eq %w[category1 category2]
end
it "handles multiple article:section:color tags" do
expect(graph.article_section_colors).to eq %w[ff0000 0000ff]
end
end
end end

View File

@@ -1072,4 +1072,17 @@ RSpec.describe TopicView do
end end
end end
end end
describe "#tags" do
subject(:topic_view_tags) { topic_view.tags }
let(:topic_view) { described_class.new(topic, user) }
let(:topic) { Fabricate.build(:topic, tags: tags) }
let(:tags) { Fabricate.build_times(2, :tag) }
let(:user) { Fabricate(:user) }
it "returns the tags names" do
expect(topic_view_tags).to match tags.map(&:name)
end
end
end end