FIX: Add word boundaries to replace and tag watched words (#13405)

The generated regular expressions did not contain \b which matched every text that contained the word, even if it was only a substring of a word. For example, if "art" was a watched word a post containing word "artist" matched.
2025-02-25 18:55:32 -06:00 · 2021-06-18 18:54:06 +03:00
parent 4afd8f9bdf
commit 74f7295631
8 changed files with 38 additions and 18 deletions
--- a/app/services/word_watcher.rb
+++ b/app/services/word_watcher.rb
@@ -54,17 +54,26 @@ class WordWatcher

  def self.word_matcher_regexps(action)
    if words = get_cached_words(action)
-      words.map { |w, r| [word_to_regexp(w), r] }.to_h
+      words.map { |w, r| [word_to_regexp(w, whole: true), r] }.to_h
    end
  end

-  def self.word_to_regexp(word)
+  def self.word_to_regexp(word, whole: false)
    if SiteSetting.watched_words_regular_expressions?
      # Strip ruby regexp format if present, we're going to make the whole thing
      # case insensitive anyway
-      return word.start_with?("(?-mix:") ? word[7..-2] : word
+      regexp = word.start_with?("(?-mix:") ? word[7..-2] : word
+      regexp = "(#{regexp})" if whole
+      return regexp
    end
-    Regexp.escape(word).gsub("\\*", '\S*')
+
+    regexp = Regexp.escape(word).gsub("\\*", '\S*')
+
+    if whole && !SiteSetting.watched_words_regular_expressions?
+      regexp = "(?:\\W|^)(#{regexp})(?=\\W|$)"
+    end
+
+    regexp
  end

  def self.word_matcher_regexp_key(action)
@@ -144,6 +153,6 @@ class WordWatcher
  end

  def word_matches?(word)
-    Regexp.new(WordWatcher.word_to_regexp(word), Regexp::IGNORECASE).match?(@raw)
+    Regexp.new(WordWatcher.word_to_regexp(word, whole: true), Regexp::IGNORECASE).match?(@raw)
  end
 end