FIX: Handle all UTF-8 characters (#21344)

Watched words were converted to regular expressions containing \W, which handled only ASCII characters. Using [^[:word]] instead ensures that UTF-8 characters are also handled correctly.
2025-02-25 18:55:32 -06:00 · 2023-05-15 11:45:04 +02:00
parent 23a146a7c6
commit 9a2780397f
7 changed files with 49 additions and 33 deletions
--- a/lib/pretty_text.rb
+++ b/lib/pretty_text.rb
@@ -204,9 +204,9 @@ module PrettyText
        __optInput.emojiUnicodeReplacer = __emojiUnicodeReplacer;
        __optInput.emojiDenyList = #{Emoji.denied.to_json};
        __optInput.lookupUploadUrls = __lookupUploadUrls;
-        __optInput.censoredRegexp = #{WordWatcher.serializable_word_matcher_regexp(:censor).to_json};
-        __optInput.watchedWordsReplace = #{WordWatcher.word_matcher_regexps(:replace).to_json};
-        __optInput.watchedWordsLink = #{WordWatcher.word_matcher_regexps(:link).to_json};
+        __optInput.censoredRegexp = #{WordWatcher.serializable_word_matcher_regexp(:censor, engine: :js).to_json};
+        __optInput.watchedWordsReplace = #{WordWatcher.word_matcher_regexps(:replace, engine: :js).to_json};
+        __optInput.watchedWordsLink = #{WordWatcher.word_matcher_regexps(:link, engine: :js).to_json};
        __optInput.additionalOptions = #{Site.markdown_additional_options.to_json};
      JS