FIX: Handle all UTF-8 characters (#21344)

Watched words were converted to regular expressions containing \W, which handled only ASCII characters. Using [^[:word]] instead ensures that UTF-8 characters are also handled correctly.
2025-02-25 18:55:32 -06:00 · 2023-05-15 11:45:04 +02:00
parent 23a146a7c6
commit 9a2780397f
7 changed files with 49 additions and 33 deletions
--- a/app/serializers/site_serializer.rb
+++ b/app/serializers/site_serializer.rb
@@ -205,7 +205,7 @@ class SiteSerializer < ApplicationSerializer
  end

  def censored_regexp
-    WordWatcher.serializable_word_matcher_regexp(:censor)
+    WordWatcher.serializable_word_matcher_regexp(:censor, engine: :js)
  end

  def custom_emoji_translation
@@ -221,11 +221,11 @@ class SiteSerializer < ApplicationSerializer
  end

  def watched_words_replace
-    WordWatcher.word_matcher_regexps(:replace)
+    WordWatcher.word_matcher_regexps(:replace, engine: :js)
  end

  def watched_words_link
-    WordWatcher.word_matcher_regexps(:link)
+    WordWatcher.word_matcher_regexps(:link, engine: :js)
  end

  def categories
--- a/app/serializers/watched_word_serializer.rb
+++ b/app/serializers/watched_word_serializer.rb
@@ -4,7 +4,7 @@ class WatchedWordSerializer < ApplicationSerializer
  attributes :id, :word, :regexp, :replacement, :action, :case_sensitive

  def regexp
-    WordWatcher.word_to_regexp(word, whole: true)
+    WordWatcher.word_to_regexp(word)
  end

  def action