diff --git a/app/assets/javascripts/defer/html-sanitizer-bundle.js b/app/assets/javascripts/defer/html-sanitizer-bundle.js index 12b212e535e..adcde0e4efa 100644 --- a/app/assets/javascripts/defer/html-sanitizer-bundle.js +++ b/app/assets/javascripts/defer/html-sanitizer-bundle.js @@ -2060,6 +2060,10 @@ var html = (function(html4) { html4.ATTRIBS.hasOwnProperty(attribKey))) { atype = html4.ATTRIBS[attribKey]; } + + // Discourse modification: give us more flexibility with whitelists + if (opt_nmTokenPolicy && opt_nmTokenPolicy(tagName, attribName, value)) { continue; } + if (atype !== null) { switch (atype) { case html4.atype['NONE']: break; @@ -2108,17 +2112,6 @@ var html = (function(html4) { log(opt_logger, tagName, attribName, oldValue, value); } break; - case html4.atype['ID']: - case html4.atype['IDREF']: - case html4.atype['IDREFS']: - case html4.atype['GLOBAL_NAME']: - case html4.atype['LOCAL_NAME']: - case html4.atype['CLASSES']: - value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; - if (opt_logger) { - log(opt_logger, tagName, attribName, oldValue, value); - } - break; case html4.atype['URI']: value = safeUri(value, getUriEffect(tagName, attribName), @@ -2135,7 +2128,6 @@ var html = (function(html4) { case html4.atype['URI_FRAGMENT']: if (value && '#' === value.charAt(0)) { value = value.substring(1); // remove the leading '#' - value = opt_nmTokenPolicy ? opt_nmTokenPolicy(value) : value; if (value !== null && value !== void 0) { value = '#' + value; // restore the leading '#' } diff --git a/app/assets/javascripts/discourse/dialects/autolink_dialect.js b/app/assets/javascripts/discourse/dialects/autolink_dialect.js index d231089cd92..57130511be0 100644 --- a/app/assets/javascripts/discourse/dialects/autolink_dialect.js +++ b/app/assets/javascripts/discourse/dialects/autolink_dialect.js @@ -4,7 +4,7 @@ **/ var urlReplacerArgs = { matcher: /^((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/gm, - spaceBoundary: true, + spaceOrTagBoundary: true, emitter: function(matches) { var url = matches[1], diff --git a/app/assets/javascripts/discourse/dialects/dialect.js b/app/assets/javascripts/discourse/dialects/dialect.js index 040e11028dd..86e1adf8e0b 100644 --- a/app/assets/javascripts/discourse/dialects/dialect.js +++ b/app/assets/javascripts/discourse/dialects/dialect.js @@ -42,10 +42,6 @@ function processTextNodes(node, event, emitter) { for (var j=1; j)$/))) { return true; } } /** @@ -147,9 +144,19 @@ Discourse.Dialect = { cook: function(text, opts) { if (!initialized) { initializeDialects(); } dialect.options = opts; - var tree = parser.toHTMLTree(text, 'Discourse'); + var tree = parser.toHTMLTree(text, 'Discourse'), + result = parser.renderJsonML(parseTree(tree)); - return parser.renderJsonML(parseTree(tree)); + // This feature is largely for MDTest. We prefer to strip comments + // in Discourse + + if (opts.sanitize) { + result = Discourse.Markdown.sanitize(result); + } else if (opts.sanitizerFunction) { + result = opts.sanitizerFunction(result); + } + + return result.trim(); }, /** diff --git a/app/assets/javascripts/discourse/lib/markdown.js b/app/assets/javascripts/discourse/lib/markdown.js index e3a629314ca..5212dbc5a96 100644 --- a/app/assets/javascripts/discourse/lib/markdown.js +++ b/app/assets/javascripts/discourse/lib/markdown.js @@ -7,10 +7,51 @@ @namespace Discourse @module Discourse **/ +var _validClasses = {}, + _validIframes = [], + _validTags = {}; + +function validateAttribute(tagName, attribName, value) { + var tag = _validTags[tagName]; + + // Handle classes + if (attribName === "class") { + if (_validClasses[value]) { return value; } + + if (tag) { + var classes = tag['class']; + if (classes && (classes.indexOf(value) !== -1 || classes.indexOf('*') !== -1)) { + return value; + } + } + } else if (attribName.indexOf('data-') === 0) { + // data-* attributes + if (tag) { + var allowed = tag[attribName] || tag['data-*']; + if (allowed === value || allowed.indexOf('*') !== -1) { return value; } + } + } + + if (tag) { + var attrs = tag[attribName]; + if (attrs && (attrs.indexOf(value) !== -1 || attrs.indexOf('*') !== -1)) { return value; } + } +} + Discourse.Markdown = { - validClasses: {}, - validIframes: [], + /** + Whitelist class for only a certain tag + + @param {String} tagName to whitelist + @param {String} attribName to whitelist + @param {String} value to whitelist + **/ + whiteListTag: function(tagName, attribName, value) { + _validTags[tagName] = _validTags[tagName] || {}; + _validTags[tagName][attribName] = _validTags[tagName][attribName] || []; + _validTags[tagName][attribName].push(value || '*'); + }, /** Whitelists more classes for sanitization. @@ -19,10 +60,8 @@ Discourse.Markdown = { @method whiteListClass **/ whiteListClass: function() { - var args = Array.prototype.slice.call(arguments), - validClasses = Discourse.Markdown.validClasses; - - args.forEach(function (a) { validClasses[a] = true; }); + var args = Array.prototype.slice.call(arguments); + args.forEach(function (a) { _validClasses[a] = true; }); }, /** @@ -32,7 +71,7 @@ Discourse.Markdown = { @param {Regexp} regexp The regexp to whitelist. **/ whiteListIframe: function(regexp) { - Discourse.Markdown.validIframes.push(regexp); + _validIframes.push(regexp); }, /** @@ -124,8 +163,8 @@ Discourse.Markdown = { // whitelist some iframe only if (hints && hints.XML_TAG === "iframe" && hints.XML_ATTR === "src") { - for (var i = 0, length = Discourse.Markdown.validIframes.length; i < length; i++) { - if(Discourse.Markdown.validIframes[i].test(url)) { return url; } + for (var i = 0, length = _validIframes.length; i < length; i++) { + if(_validIframes[i].test(url)) { return url; } } return; } @@ -144,12 +183,10 @@ Discourse.Markdown = { Checks to see if a name, class or id is allowed in the cooked content @method nameIdClassAllowed - @param {String} val The name, class or id to check - @return {String} val the transformed name class or id + @param {String} tagName to check + @param {String} attribName to check + @param {String} value to check **/ - nameIdClassAllowed: function(val) { - if (Discourse.Markdown.validClasses[val]) { return val; } - }, /** @@ -161,8 +198,11 @@ Discourse.Markdown = { **/ sanitize: function(text) { if (!window.html_sanitize || !text) return ""; - text = text.replace(/<([^A-Za-z\/]|$)/g, "<$1"); - return window.html_sanitize(text, Discourse.Markdown.urlAllowed, Discourse.Markdown.nameIdClassAllowed); + + // Allow things like <3 and <_< + text = text.replace(/<([^A-Za-z\/\!]|$)/g, "<$1"); + + return window.html_sanitize(text, Discourse.Markdown.urlAllowed, validateAttribute); }, /** @@ -185,5 +225,14 @@ Discourse.Markdown = { }; RSVP.EventTarget.mixin(Discourse.Markdown); -Discourse.Markdown.whiteListClass("attachment"); +Discourse.Markdown.whiteListTag('a', 'class', 'attachment'); +Discourse.Markdown.whiteListTag('a', 'target', '_blank'); +Discourse.Markdown.whiteListTag('a', 'class', 'onebox'); +Discourse.Markdown.whiteListTag('a', 'class', 'mention'); +Discourse.Markdown.whiteListTag('div', 'class', 'title'); +Discourse.Markdown.whiteListTag('div', 'class', 'quote-controls'); +Discourse.Markdown.whiteListTag('code', 'class', '*'); +Discourse.Markdown.whiteListTag('span', 'class', 'mention'); +Discourse.Markdown.whiteListTag('aside', 'class', 'quote'); +Discourse.Markdown.whiteListTag('aside', 'data-*'); Discourse.Markdown.whiteListIframe(/^(https?:)?\/\/www\.google\.com\/maps\/embed\?.+/i); diff --git a/plugins/emoji/assets/javascripts/emoji.js.erb b/plugins/emoji/assets/javascripts/emoji.js.erb index ccfbcfdff0a..2eeef638aa4 100644 --- a/plugins/emoji/assets/javascripts/emoji.js.erb +++ b/plugins/emoji/assets/javascripts/emoji.js.erb @@ -156,5 +156,5 @@ }); } - Discourse.Markdown.whiteListClass("emoji"); + Discourse.Markdown.whiteListTag('img', 'class', 'emoji'); }).call(this); diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index 0a078e619f3..df9e2e15d50 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -258,20 +258,11 @@ describe PrettyText do PrettyText.cook("**a*_b**").should match_html "

a*_b

" end - pending "small links" do - PrettyText.cook("\nhttp://a.com\n").should match_html "


http://a.com

" - end - - it "does not apply italics when there is a space inside" do PrettyText.cook("** hello**").should match_html "

** hello**

" PrettyText.cook("**hello **").should match_html "

**hello **

" end - pending "allows comments through" do - PrettyText.cook("boom ").should match_html "

boom

" - end - it "allows does not bold chinese intra word" do PrettyText.cook("你**hello**").should match_html "

你**hello**

" end @@ -279,11 +270,6 @@ describe PrettyText do it "allows bold chinese" do PrettyText.cook("**你hello**").should match_html "

你hello

" end - - pending "does not break a streak for mentions" do - Fabricate(:user, username: 'sam') - PrettyText.cook("a @sam c").should match_html "

a @sam c

" - end end end diff --git a/test/javascripts/lib/markdown_test.js b/test/javascripts/lib/markdown_test.js index 34d4e9f2753..b153c7175c9 100644 --- a/test/javascripts/lib/markdown_test.js +++ b/test/javascripts/lib/markdown_test.js @@ -6,6 +6,8 @@ module("Discourse.Markdown", { var cooked = function(input, expected, text) { var result = Discourse.Markdown.cook(input, {sanitize: true}); + expected = expected.replace(/\/>/g, ">"); + // result = result.replace("/>", ">"); equal(result, expected, text); }; @@ -138,6 +140,8 @@ test("Links", function() { cooked("User [MOD]: Hello!", "

User [MOD]: Hello!

", "It does not consider references that are obviously not URLs"); + + cooked("http://eviltrout.com", "

http://eviltrout.com

", "Links within HTML tags"); }); test("simple quotes", function() { @@ -240,6 +244,9 @@ test("Mentions", function() { "

@eviltrout

", "it doesn't onebox mentions"); + cookedOptions("a @sam c", alwaysTrue, + "

a @sam c

", + "it allows mentions within HTML tags"); }); @@ -370,7 +377,7 @@ test("sanitize", function() { cooked("[the answer](javascript:alert(42))", "

the answer

", "it prevents XSS"); - cooked("\n", "


<!-- -->

", "it doesn't circumvent XSS with comments"); + cooked("\n", "


", "it doesn't circumvent XSS with comments"); }); test("URLs in BBCode tags", function() { diff --git a/test/javascripts/mdtest/fixtures/Images.xhtml b/test/javascripts/mdtest/fixtures/Images.xhtml index 925bc1476eb..fdec8ebcb6c 100755 --- a/test/javascripts/mdtest/fixtures/Images.xhtml +++ b/test/javascripts/mdtest/fixtures/Images.xhtml @@ -12,7 +12,7 @@

alt text.

-

Empty

+

Empty

this is a stupid URL

diff --git a/test/javascripts/mdtest/fixtures/Links, inline style.xhtml b/test/javascripts/mdtest/fixtures/Links, inline style.xhtml index 2d5a29ff25f..70d02398ec8 100755 --- a/test/javascripts/mdtest/fixtures/Links, inline style.xhtml +++ b/test/javascripts/mdtest/fixtures/Links, inline style.xhtml @@ -10,9 +10,9 @@

URL wrapped in angle brackets.

-

URL w/ angle brackets + title.

+

URL w/ angle brackets + title.

-

Empty.

+

Empty.

With parens in the URL

diff --git a/test/javascripts/mdtest/mdtest.js.erb b/test/javascripts/mdtest/mdtest.js.erb index 5bf4c22fe05..f51a94ab2e9 100644 --- a/test/javascripts/mdtest/mdtest.js.erb +++ b/test/javascripts/mdtest/mdtest.js.erb @@ -8,13 +8,42 @@ module("MDTest", { // do not affect formatting. function normalize(str) { return str.replace(/\n\s*/g, ''). - replace(/ \/\>/g, '/>'). + replace(/ \/\>/g, '>'). replace(/ ?/g, "\t"). replace(/"/g, '"'); } +// We use a custom sanitizer for MD test that hoists out comments. In Discourse +// they are stripped, but to be compliant with the spec they should not be. +function hoistingSanitizer(result) { + var hoisted, + m = result.match(//g); + if (m && m.length) { + hoisted = []; + for (var i=0; i