discourse/lib/pretty_text.rb

453 lines
13 KiB
Ruby
Raw Normal View History

2013-02-05 13:16:51 -06:00
require 'v8'
require 'nokogiri'
require_dependency 'url_helper'
2013-05-27 18:48:47 -05:00
require_dependency 'excerpt_parser'
require_dependency 'post'
2013-02-05 13:16:51 -06:00
module PrettyText
class Helpers
def t(key, opts)
key = "js." + key
unless opts
I18n.t(key)
else
str = I18n.t(key, Hash[opts.entries].symbolize_keys).dup
opts.each { |k,v| str.gsub!("{{#{k.to_s}}}", v.to_s) }
str
end
end
# functions here are available to v8
2013-02-05 13:16:51 -06:00
def avatar_template(username)
return "" unless username
user = User.find_by(username_lower: username.downcase)
return "" unless user.present?
# TODO: Add support for ES6 and call `avatar-template` directly
if !user.uploaded_avatar_id
avatar_template = User.default_template(username)
else
avatar_template = user.avatar_template
end
UrlHelper.schemaless UrlHelper.absolute avatar_template
2013-02-05 13:16:51 -06:00
end
def mention_lookup(username)
2013-02-05 13:16:51 -06:00
return false unless username
if Group.exec_sql('SELECT 1 FROM groups WHERE name = ?', username).values.length == 1
"group"
else
username = username.downcase
if User.exec_sql('SELECT 1 FROM users WHERE username_lower = ?', username).values.length == 1
"user"
else
nil
end
end
2013-02-05 13:16:51 -06:00
end
def category_hashtag_lookup(category_slug)
2016-01-12 02:40:36 -06:00
if category = Category.query_from_hashtag_slug(category_slug)
2016-02-11 09:04:40 -06:00
[category.url_with_id, category_slug]
else
nil
end
end
def get_topic_info(topic_id)
return unless Fixnum === topic_id
# TODO this only handles public topics, secured one do not get this
topic = Topic.find_by(id: topic_id)
if topic && Guardian.new.can_see?(topic)
{
title: topic.title,
href: topic.url
}
end
end
def category_tag_hashtag_lookup(text)
tag_postfix = '::tag'
is_tag = text =~ /#{tag_postfix}$/
if !is_tag && category = Category.query_from_hashtag_slug(text)
[category.url_with_id, text]
elsif is_tag && tag = TopicCustomField.find_by(name: TAGS_FIELD_NAME, value: text.gsub!("#{tag_postfix}", ''))
["#{Discourse.base_url}/tags/#{tag.value}", text]
else
nil
end
end
DiscourseEvent.on(:markdown_context) do |context|
context.eval('opts["categoryHashtagLookup"] = function(c){return helpers.category_tag_hashtag_lookup(c);}')
end
2013-02-05 13:16:51 -06:00
end
@mutex = Mutex.new
@ctx_init = Mutex.new
2013-02-05 13:16:51 -06:00
def self.app_root
Rails.root
end
def self.create_new_context
2016-04-21 18:52:12 -05:00
# timeout any eval that takes longer than 15 seconds
ctx = V8::Context.new(timeout: 15000)
2013-02-25 10:42:20 -06:00
ctx["helpers"] = Helpers.new
2013-02-05 13:16:51 -06:00
ctx_load(ctx,
2016-02-05 08:27:24 -06:00
"vendor/assets/javascripts/md5.js",
2014-04-14 15:55:57 -05:00
"vendor/assets/javascripts/lodash.js",
"vendor/assets/javascripts/Markdown.Converter.js",
"lib/headless-ember.js",
"vendor/assets/javascripts/rsvp.js",
Rails.configuration.ember.handlebars_location
)
2013-02-05 13:16:51 -06:00
ctx.eval("var Discourse = {}; Discourse.SiteSettings = {};")
ctx.eval("var window = {}; window.devicePixelRatio = 2;") # hack to make code think stuff is retina
ctx.eval("var I18n = {}; I18n.t = function(a,b){ return helpers.t(a,b); }");
2013-02-05 13:16:51 -06:00
ctx.eval("var modules = {};")
decorate_context(ctx)
ctx_load(ctx,
2014-04-14 15:55:57 -05:00
"vendor/assets/javascripts/better_markdown.js",
"app/assets/javascripts/defer/html-sanitizer-bundle.js",
"app/assets/javascripts/discourse/lib/utilities.js",
2014-04-14 15:55:57 -05:00
"app/assets/javascripts/discourse/dialects/dialect.js",
"app/assets/javascripts/discourse/lib/censored-words.js",
"app/assets/javascripts/discourse/lib/markdown.js",
2014-04-14 15:55:57 -05:00
)
2013-02-05 13:16:51 -06:00
Dir["#{app_root}/app/assets/javascripts/discourse/dialects/**.js"].sort.each do |dialect|
ctx.load(dialect) unless dialect =~ /\/dialect\.js$/
2013-08-08 17:14:12 -05:00
end
# emojis
emoji = ERB.new(File.read("#{app_root}/app/assets/javascripts/discourse/lib/emoji/emoji.js.erb"))
ctx.eval(emoji.result)
2013-02-05 13:16:51 -06:00
# Load server side javascripts
if DiscoursePluginRegistry.server_side_javascripts.present?
DiscoursePluginRegistry.server_side_javascripts.each do |ssjs|
if(ssjs =~ /\.erb/)
erb = ERB.new(File.read(ssjs))
erb.filename = ssjs
ctx.eval(erb.result)
else
ctx.load(ssjs)
end
2013-02-05 13:16:51 -06:00
end
end
ctx
end
def self.v8
return @ctx if @ctx
# ensure we only init one of these
@ctx_init.synchronize do
return @ctx if @ctx
@ctx = create_new_context
end
2014-04-14 15:55:57 -05:00
2013-02-05 13:16:51 -06:00
@ctx
end
def self.reset_context
@ctx_init.synchronize do
@ctx = nil
end
end
def self.decorate_context(context)
context.eval("Discourse.CDN = '#{Rails.configuration.action_controller.asset_host}';")
context.eval("Discourse.BaseUrl = '#{RailsMultisite::ConnectionManagement.current_hostname}'.replace(/:[\d]*$/,'');")
context.eval("Discourse.BaseUri = '#{Discourse::base_uri}';")
context.eval("Discourse.SiteSettings = #{SiteSetting.client_settings_json};")
context.eval("Discourse.getURL = function(url) {
if (!url) return url;
if (!/^\\/[^\\/]/.test(url)) return url;
var u = (Discourse.BaseUri === undefined ? '/' : Discourse.BaseUri);
if (u[u.length-1] === '/') u = u.substring(0, u.length-1);
if (url.indexOf(u) !== -1) return url;
if (u.length > 0 && url[0] !== '/') url = '/' + url;
return u + url;
};")
context.eval("Discourse.getURLWithCDN = function(url) {
url = this.getURL(url);
if (Discourse.CDN && /^\\/[^\\/]/.test(url)) {
url = Discourse.CDN + url;
} else if (Discourse.S3CDN) {
url = url.replace(Discourse.S3BaseUrl, Discourse.S3CDN);
}
return url;
};")
end
2013-02-05 13:16:51 -06:00
def self.markdown(text, opts=nil)
# we use the exact same markdown converter as the client
2013-02-25 10:42:20 -06:00
# TODO: use the same extensions on both client and server (in particular the template for mentions)
2013-02-05 13:16:51 -06:00
baked = nil
protect do
context = v8
2013-02-05 13:16:51 -06:00
# we need to do this to work in a multi site environment, many sites, many settings
decorate_context(context)
context_opts = opts || {}
context_opts[:sanitize] = true unless context_opts[:sanitize] == false
context['opts'] = context_opts
context['raw'] = text
if Post.white_listed_image_classes.present?
Post.white_listed_image_classes.each do |klass|
context.eval("Discourse.Markdown.whiteListClass('#{klass}')")
end
end
if SiteSetting.enable_emoji?
context.eval("Discourse.Dialect.setUnicodeReplacements(#{Emoji.unicode_replacements_json})");
else
context.eval("Discourse.Dialect.setUnicodeReplacements(null)");
end
# reset emojis (v8 context is shared amongst multisites)
2015-11-06 08:02:40 -06:00
context.eval("Discourse.Dialect.resetEmojis();")
# custom emojis
Emoji.custom.each do |emoji|
context.eval("Discourse.Dialect.registerEmoji('#{emoji.name}', '#{emoji.url}');")
end
2015-11-06 08:02:40 -06:00
# plugin emojis
context.eval("Discourse.Emoji.applyCustomEmojis();")
context.eval('opts["mentionLookup"] = function(u){return helpers.mention_lookup(u);}')
context.eval('opts["categoryHashtagLookup"] = function(c){return helpers.category_hashtag_lookup(c);}')
context.eval('opts["lookupAvatar"] = function(p){return Discourse.Utilities.avatarImg({size: "tiny", avatarTemplate: helpers.avatar_template(p)});}')
context.eval('opts["getTopicInfo"] = function(i){return helpers.get_topic_info(i)};')
DiscourseEvent.trigger(:markdown_context, context)
baked = context.eval('Discourse.Markdown.markdownConverter(opts).makeHtml(raw)')
2013-02-05 13:16:51 -06:00
end
if baked.blank? && !(opts || {})[:skip_blank_test]
# we may have a js engine issue
test = markdown("a", skip_blank_test: true)
if test.blank?
Rails.logger.warn("Markdown engine appears to have crashed, resetting context")
reset_context
opts ||= {}
opts = opts.dup
opts[:skip_blank_test] = true
baked = markdown(text, opts)
end
end
2013-02-05 13:16:51 -06:00
baked
end
# leaving this here, cause it invokes v8, don't want to implement twice
2013-08-13 15:08:29 -05:00
def self.avatar_img(avatar_template, size)
protect do
2013-08-13 15:08:29 -05:00
v8['avatarTemplate'] = avatar_template
2013-02-05 13:16:51 -06:00
v8['size'] = size
decorate_context(v8)
v8.eval("Discourse.Utilities.avatarImg({ avatarTemplate: avatarTemplate, size: size });")
2013-02-05 13:16:51 -06:00
end
end
2015-10-15 02:59:29 -05:00
def self.unescape_emoji(title)
protect do
v8["title"] = title
decorate_context(v8)
v8.eval("Discourse.Emoji.unescape(title)")
end
end
2013-02-05 13:16:51 -06:00
def self.cook(text, opts={})
options = opts.dup
2013-02-05 13:16:51 -06:00
# we have a minor inconsistency
options[:topicId] = opts[:topic_id]
working_text = text.dup
sanitized = markdown(working_text, options)
doc = Nokogiri::HTML.fragment(sanitized)
if !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
add_rel_nofollow_to_user_content(doc)
end
if SiteSetting.s3_cdn_url.present? && SiteSetting.enable_s3_uploads
add_s3_cdn(doc)
end
doc.to_html
end
def self.add_s3_cdn(doc)
doc.css("img").each do |img|
next unless img["src"]
img["src"] = img["src"].sub(Discourse.store.absolute_base_url, SiteSetting.s3_cdn_url)
end
end
2013-02-25 10:42:20 -06:00
def self.add_rel_nofollow_to_user_content(doc)
whitelist = []
2013-02-11 02:01:33 -06:00
domains = SiteSetting.exclude_rel_nofollow_domains
whitelist = domains.split('|') if domains.present?
2013-02-11 02:01:33 -06:00
site_uri = nil
doc.css("a").each do |l|
href = l["href"].to_s
2013-02-25 10:42:20 -06:00
begin
uri = URI(href)
site_uri ||= URI(Discourse.base_url)
2013-02-25 10:42:20 -06:00
2013-11-05 12:04:47 -06:00
if !uri.host.present? ||
uri.host == site_uri.host ||
uri.host.ends_with?("." << site_uri.host) ||
whitelist.any?{|u| uri.host == u || uri.host.ends_with?("." << u)}
# we are good no need for nofollow
else
l["rel"] = "nofollow"
end
rescue URI::InvalidURIError, URI::InvalidComponentError
2013-02-25 10:42:20 -06:00
# add a nofollow anyway
l["rel"] = "nofollow"
end
end
2013-02-05 13:16:51 -06:00
end
2014-07-10 23:17:01 -05:00
class DetectedLink
attr_accessor :is_quote, :url
def initialize(url, is_quote=false)
@url = url
@is_quote = is_quote
end
end
2013-02-05 13:16:51 -06:00
def self.extract_links(html)
links = []
2013-06-05 13:53:07 -05:00
doc = Nokogiri::HTML.fragment(html)
# remove href inside quotes & elided part
doc.css("aside.quote a, .elided a").each { |l| l["href"] = "" }
2014-07-10 23:17:01 -05:00
2013-06-05 13:53:07 -05:00
# extract all links from the post
2014-07-10 23:17:01 -05:00
doc.css("a").each { |l|
2016-04-15 13:02:18 -05:00
unless l["href"].blank? || "#".freeze == l["href"][0]
2014-07-10 23:17:01 -05:00
links << DetectedLink.new(l["href"])
end
}
2013-06-05 13:53:07 -05:00
# extract links to quotes
doc.css("aside.quote[data-topic]").each do |a|
2013-06-05 13:53:07 -05:00
topic_id = a['data-topic']
2013-02-25 10:42:20 -06:00
url = "/t/topic/#{topic_id}"
2013-06-05 13:53:07 -05:00
if post_number = a['data-post']
url << "/#{post_number}"
end
2014-07-10 23:17:01 -05:00
links << DetectedLink.new(url, true)
end
2013-02-05 13:16:51 -06:00
links
end
2013-05-27 18:48:47 -05:00
def self.excerpt(html, max_length, options={})
# TODO: properly fix this HACK in ExcerptParser without introducing XSS
doc = Nokogiri::HTML.fragment(html)
strip_image_wrapping(doc)
html = doc.to_html
2013-05-27 18:48:47 -05:00
ExcerptParser.get_excerpt(html, max_length, options)
end
2013-02-05 13:16:51 -06:00
def self.strip_links(string)
return string if string.blank?
# If the user is not basic, strip links from their bio
fragment = Nokogiri::HTML.fragment(string)
fragment.css('a').each {|a| a.replace(a.inner_html) }
fragment.to_html
end
# Given a Nokogiri doc, convert all links to absolute
def self.make_all_links_absolute(doc)
site_uri = nil
2014-02-03 19:57:16 -06:00
doc.css("a").each do |link|
href = link["href"].to_s
begin
uri = URI(href)
site_uri ||= URI(Discourse.base_url)
2014-02-03 19:57:16 -06:00
link["href"] = "#{site_uri}#{link['href']}" unless uri.host.present?
2014-07-30 02:09:55 -05:00
rescue URI::InvalidURIError, URI::InvalidComponentError
# leave it
end
end
end
def self.strip_image_wrapping(doc)
doc.css(".lightbox-wrapper .meta").remove
end
def self.format_for_email(html, post = nil)
doc = Nokogiri::HTML.fragment(html)
DiscourseEvent.trigger(:reduce_cooked, doc, post)
make_all_links_absolute(doc)
strip_image_wrapping(doc)
doc.to_html
end
2013-05-27 18:48:47 -05:00
protected
2013-02-05 13:16:51 -06:00
class JavaScriptError < StandardError
attr_accessor :message, :backtrace
def initialize(message, backtrace)
@message = message
@backtrace = backtrace
end
end
def self.protect
rval = nil
@mutex.synchronize do
begin
rval = yield
# This may seem a bit odd, but we don't want to leak out
# objects that require locks on the v8 vm, to get a backtrace
# you need a lock, if this happens in the wrong spot you can
# deadlock a process
rescue V8::Error => e
raise JavaScriptError.new(e.message, e.backtrace)
end
end
rval
end
def self.ctx_load(ctx, *files)
2013-05-27 18:48:47 -05:00
files.each do |file|
ctx.load(app_root + file)
2013-02-05 13:16:51 -06:00
end
end
end