mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 01:16:38 -06:00
9d50790530
When doing local oneboxes we sometimes want to allow SVGs in the final preview HTML. The main case currently is for the new cooked hashtags, which include an SVG icon. SVGs will be included in local oneboxes via `ExcerptParser` _only_ if they have the d-icon class, and if the caller for `post.excerpt` specifies the `keep_svg: true` option.
240 lines
6.9 KiB
Ruby
240 lines
6.9 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
class ExcerptParser < Nokogiri::XML::SAX::Document
|
|
attr_reader :excerpt
|
|
|
|
CUSTOM_EXCERPT_REGEX = /<\s*(span|div)[^>]*class\s*=\s*['"]excerpt['"][^>]*>/
|
|
|
|
def initialize(length, options = nil)
|
|
@length = length
|
|
@excerpt = +""
|
|
@current_length = 0
|
|
options || {}
|
|
@strip_links = options[:strip_links] == true
|
|
@strip_images = options[:strip_images] == true
|
|
@text_entities = options[:text_entities] == true
|
|
@markdown_images = options[:markdown_images] == true
|
|
@keep_newlines = options[:keep_newlines] == true
|
|
@keep_emoji_images = options[:keep_emoji_images] == true
|
|
@keep_onebox_source = options[:keep_onebox_source] == true
|
|
@keep_onebox_body = options[:keep_onebox_body] == true
|
|
@keep_quotes = options[:keep_quotes] == true
|
|
@keep_svg = options[:keep_svg] == true
|
|
@remap_emoji = options[:remap_emoji] == true
|
|
@start_excerpt = false
|
|
@in_details_depth = 0
|
|
@summary_contents = +""
|
|
@detail_contents = +""
|
|
end
|
|
|
|
def self.get_excerpt(html, length, options)
|
|
html ||= ''
|
|
length = html.length if html.include?('excerpt') && CUSTOM_EXCERPT_REGEX === html
|
|
me = self.new(length, options)
|
|
parser = Nokogiri::HTML::SAX::Parser.new(me)
|
|
catch(:done) do
|
|
parser.parse(html)
|
|
end
|
|
excerpt = me.excerpt.strip
|
|
excerpt = excerpt.gsub(/\s*\n+\s*/, "\n\n") if options[:keep_onebox_source] || options[:keep_onebox_body]
|
|
excerpt = CGI.unescapeHTML(excerpt) if options[:text_entities] == true
|
|
excerpt
|
|
end
|
|
|
|
def escape_attribute(v)
|
|
return "" unless v
|
|
|
|
v = v.dup
|
|
v.gsub!("&", "&")
|
|
v.gsub!("\"", """)
|
|
v.gsub!("<", "<")
|
|
v.gsub!(">", ">")
|
|
v
|
|
end
|
|
|
|
def include_tag(name, attributes)
|
|
characters("<#{name} #{attributes.map { |k, v| "#{k}=\"#{escape_attribute(v)}\"" }.join(' ')}>",
|
|
truncate: false, count_it: false, encode: false)
|
|
end
|
|
|
|
def start_element(name, attributes = [])
|
|
case name
|
|
when "img"
|
|
attributes = Hash[*attributes.flatten]
|
|
|
|
if attributes["class"]&.include?('emoji')
|
|
if @remap_emoji
|
|
title = (attributes["alt"] || "").gsub(":", "")
|
|
title = Emoji.lookup_unicode(title) || attributes["alt"]
|
|
return characters(title)
|
|
elsif @keep_emoji_images
|
|
return include_tag(name, attributes)
|
|
else
|
|
return characters(attributes["alt"])
|
|
end
|
|
end
|
|
|
|
unless @strip_images
|
|
# If include_images is set, include the image in markdown
|
|
characters("!") if @markdown_images
|
|
|
|
if !attributes["alt"].blank?
|
|
characters("[#{attributes["alt"]}]")
|
|
elsif !attributes["title"].blank?
|
|
characters("[#{attributes["title"]}]")
|
|
else
|
|
characters("[#{I18n.t 'excerpt_image'}]")
|
|
end
|
|
|
|
characters("(#{attributes['src']})") if @markdown_images
|
|
end
|
|
|
|
when "a"
|
|
unless @strip_links
|
|
include_tag(name, attributes)
|
|
@in_a = true
|
|
end
|
|
|
|
when "aside"
|
|
attributes = Hash[*attributes.flatten]
|
|
unless (@keep_onebox_source || @keep_onebox_body) && attributes['class']&.include?('onebox')
|
|
@in_quote = true
|
|
end
|
|
|
|
if attributes['class']&.include?('quote')
|
|
if @keep_quotes || (@keep_onebox_body && attributes['data-topic'].present?)
|
|
@in_quote = false
|
|
end
|
|
end
|
|
|
|
when 'article'
|
|
if attributes.include?(['class', 'onebox-body'])
|
|
@in_quote = !@keep_onebox_body
|
|
end
|
|
|
|
when 'header'
|
|
if attributes.include?(['class', 'source'])
|
|
@in_quote = !@keep_onebox_source
|
|
end
|
|
|
|
when "div", "span"
|
|
if attributes.include?(["class", "excerpt"])
|
|
@excerpt = +""
|
|
@current_length = 0
|
|
@start_excerpt = true
|
|
end
|
|
|
|
when "details"
|
|
@detail_contents = +"" if @in_details_depth == 0
|
|
@in_details_depth += 1
|
|
|
|
when "summary"
|
|
if @in_details_depth == 1 && !@in_summary
|
|
@summary_contents = +""
|
|
@in_summary = true
|
|
end
|
|
|
|
when "svg"
|
|
attributes = Hash[*attributes.flatten]
|
|
if attributes["class"].include?("d-icon") && @keep_svg
|
|
include_tag(name, attributes)
|
|
@in_svg = true
|
|
end
|
|
|
|
when "use"
|
|
if @in_svg && @keep_svg
|
|
include_tag(name, attributes)
|
|
end
|
|
end
|
|
end
|
|
|
|
def end_element(name)
|
|
case name
|
|
when "a"
|
|
unless @strip_links
|
|
characters("</a>", truncate: false, count_it: false, encode: false)
|
|
@in_a = false
|
|
end
|
|
when "p", "br"
|
|
if @keep_newlines
|
|
characters("<br>", truncate: false, count_it: false, encode: false)
|
|
else
|
|
characters(" ")
|
|
end
|
|
when "aside"
|
|
@in_quote = false
|
|
when "details"
|
|
@in_details_depth -= 1
|
|
if @in_details_depth == 0
|
|
@summary_contents = clean(@summary_contents)
|
|
@detail_contents = clean(@detail_contents)
|
|
|
|
if @current_length + @summary_contents.length >= @length
|
|
characters(@summary_contents,
|
|
encode: false,
|
|
before_string: "<details class='disabled'><summary>",
|
|
after_string: "</summary></details>")
|
|
else
|
|
characters(@summary_contents,
|
|
truncate: false,
|
|
encode: false,
|
|
before_string: "<details><summary>",
|
|
after_string: "</summary>")
|
|
|
|
characters(@detail_contents,
|
|
encode: false,
|
|
after_string: "</details>")
|
|
end
|
|
end
|
|
when "summary"
|
|
@in_summary = false if @in_details_depth == 1
|
|
when "div", "span"
|
|
throw :done if @start_excerpt
|
|
when "svg"
|
|
characters("</svg>", truncate: false, count_it: false, encode: false)
|
|
@in_svg = false
|
|
when "use"
|
|
characters("</use>", truncate: false, count_it: false, encode: false)
|
|
end
|
|
end
|
|
|
|
def clean(str)
|
|
ERB::Util.html_escape(str.strip)
|
|
end
|
|
|
|
def characters(string, truncate: true, count_it: true, encode: true, before_string: nil, after_string: nil)
|
|
return if @in_quote
|
|
|
|
# we call length on this so might as well ensure we have a string
|
|
string = string.to_s
|
|
if @in_details_depth > 0
|
|
if @in_summary
|
|
@summary_contents << string
|
|
else
|
|
@detail_contents << string
|
|
end
|
|
return
|
|
end
|
|
|
|
@excerpt << before_string if before_string
|
|
|
|
encode = encode ? lambda { |s| ERB::Util.html_escape(s) } : lambda { |s| s }
|
|
if count_it && @current_length + string.length > @length
|
|
length = [0, @length - @current_length - 1].max
|
|
@excerpt << encode.call(string[0..length]) if truncate && !emoji?(string)
|
|
@excerpt << (@text_entities ? "..." : "…")
|
|
@excerpt << "</a>" if @in_a
|
|
@excerpt << after_string if after_string
|
|
throw :done
|
|
end
|
|
|
|
@excerpt << encode.call(string)
|
|
@excerpt << after_string if after_string
|
|
@current_length += string.length if count_it
|
|
end
|
|
|
|
def emoji?(string)
|
|
string.match?(/\A:\w+:\Z/)
|
|
end
|
|
end
|