SECURITY: Strip unrendered unicode bidirectional chars in code blocks (#15032)

When rendering the markdown code blocks we replace the offending characters in the output string with spans highlighting a textual representation of the character, along with a title attribute with information about why the character was highlighted. The list of characters stripped by this fix, which are the bidirectional characters considered relevant, are: U+202A U+202B U+202C U+202D U+202E U+2066 U+2067 U+2068 U+2069
2025-02-25 18:55:32 -06:00 · 2021-11-22 10:43:03 +10:00
parent 10a57825c8
commit fa6b87a1bf
4 changed files with 130 additions and 0 deletions
--- a/lib/pretty_text.rb
+++ b/lib/pretty_text.rb
@@ -5,6 +5,19 @@ require 'nokogiri'
 require 'erb'

 module PrettyText
+  DANGEROUS_BIDI_CHARACTERS = [
+    "\u202A",
+    "\u202B",
+    "\u202C",
+    "\u202D",
+    "\u202E",
+    "\u2066",
+    "\u2067",
+    "\u2068",
+    "\u2069",
+  ].freeze
+  DANGEROUS_BIDI_REGEXP = Regexp.new(DANGEROUS_BIDI_CHARACTERS.join("|")).freeze
+
  @mutex = Mutex.new
  @ctx_init = Mutex.new

@@ -278,6 +291,7 @@ module PrettyText

    add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
    add_rel_attributes_to_user_content(doc, add_nofollow)
+    strip_hidden_unicode_bidirectional_characters(doc)

    if SiteSetting.enable_mentions
      add_mentions(doc, user_id: opts[:user_id])
@@ -290,6 +304,24 @@ module PrettyText
    loofah_fragment.scrub!(scrubber).to_html
  end

+  def self.strip_hidden_unicode_bidirectional_characters(doc)
+    return if !DANGEROUS_BIDI_REGEXP.match?(doc.content)
+
+    doc.css("code,pre").each do |code_tag|
+      next if !DANGEROUS_BIDI_REGEXP.match?(code_tag.content)
+
+      DANGEROUS_BIDI_CHARACTERS.each do |bidi|
+        next if !code_tag.content.include?(bidi)
+
+        formatted = "&lt;U+#{bidi.ord.to_s(16).upcase}&gt;"
+        code_tag.inner_html = code_tag.inner_html.gsub(
+          bidi,
+          "<span class=\"bidi-warning\" title=\"#{I18n.t("post.hidden_bidi_character")}\">#{formatted}</span>"
+        )
+      end
+    end
+  end
+
  def self.add_rel_attributes_to_user_content(doc, add_nofollow)
    allowlist = []