FIX: Add word boundaries to replace and tag watched words (#13405)

The generated regular expressions did not contain \b which matched
every text that contained the word, even if it was only a substring of
a word.

For example, if "art" was a watched word a post containing word
"artist" matched.
This commit is contained in:
Bianca Nenciu
2021-06-18 18:54:06 +03:00
committed by GitHub
parent 4afd8f9bdf
commit 74f7295631
8 changed files with 38 additions and 18 deletions

View File

@@ -54,17 +54,26 @@ class WordWatcher
def self.word_matcher_regexps(action)
if words = get_cached_words(action)
words.map { |w, r| [word_to_regexp(w), r] }.to_h
words.map { |w, r| [word_to_regexp(w, whole: true), r] }.to_h
end
end
def self.word_to_regexp(word)
def self.word_to_regexp(word, whole: false)
if SiteSetting.watched_words_regular_expressions?
# Strip ruby regexp format if present, we're going to make the whole thing
# case insensitive anyway
return word.start_with?("(?-mix:") ? word[7..-2] : word
regexp = word.start_with?("(?-mix:") ? word[7..-2] : word
regexp = "(#{regexp})" if whole
return regexp
end
Regexp.escape(word).gsub("\\*", '\S*')
regexp = Regexp.escape(word).gsub("\\*", '\S*')
if whole && !SiteSetting.watched_words_regular_expressions?
regexp = "(?:\\W|^)(#{regexp})(?=\\W|$)"
end
regexp
end
def self.word_matcher_regexp_key(action)
@@ -144,6 +153,6 @@ class WordWatcher
end
def word_matches?(word)
Regexp.new(WordWatcher.word_to_regexp(word), Regexp::IGNORECASE).match?(@raw)
Regexp.new(WordWatcher.word_to_regexp(word, whole: true), Regexp::IGNORECASE).match?(@raw)
end
end