mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
FIX: Add word boundaries to replace and tag watched words (#13405)
The generated regular expressions did not contain \b which matched every text that contained the word, even if it was only a substring of a word. For example, if "art" was a watched word a post containing word "artist" matched.
This commit is contained in:
parent
4afd8f9bdf
commit
74f7295631
@ -118,7 +118,6 @@ acceptance("Admin - Watched Words - Bad regular expressions", function (needs) {
|
|||||||
action: "block",
|
action: "block",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
regular_expressions: true,
|
|
||||||
compiled_regular_expressions: {
|
compiled_regular_expressions: {
|
||||||
block: null,
|
block: null,
|
||||||
censor: null,
|
censor: null,
|
||||||
|
@ -11,14 +11,14 @@ export default {
|
|||||||
{
|
{
|
||||||
id: 7,
|
id: 7,
|
||||||
word: "hi",
|
word: "hi",
|
||||||
regexp: "hi",
|
regexp: "(hi)",
|
||||||
replacement: "hello",
|
replacement: "hello",
|
||||||
action: "replace",
|
action: "replace",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 8,
|
id: 8,
|
||||||
word: "hello",
|
word: "hello",
|
||||||
regexp: "hello",
|
regexp: "(hello)",
|
||||||
replacement: "greeting",
|
replacement: "greeting",
|
||||||
action: "tag",
|
action: "tag",
|
||||||
},
|
},
|
||||||
|
@ -1675,21 +1675,21 @@ var bar = 'bar';
|
|||||||
|
|
||||||
test("watched words replace", function (assert) {
|
test("watched words replace", function (assert) {
|
||||||
const opts = {
|
const opts = {
|
||||||
watchedWordsReplace: { fun: "times" },
|
watchedWordsReplace: { "(?:\\W|^)(fun)(?=\\W|$)": "times" },
|
||||||
};
|
};
|
||||||
|
|
||||||
assert.cookedOptions("test fun", opts, "<p>test times</p>");
|
assert.cookedOptions("test fun funny", opts, "<p>test times funny</p>");
|
||||||
});
|
});
|
||||||
|
|
||||||
test("watched words link", function (assert) {
|
test("watched words link", function (assert) {
|
||||||
const opts = {
|
const opts = {
|
||||||
watchedWordsLink: { fun: "https://discourse.org" },
|
watchedWordsLink: { "(?:\\W|^)(fun)(?=\\W|$)": "https://discourse.org" },
|
||||||
};
|
};
|
||||||
|
|
||||||
assert.cookedOptions(
|
assert.cookedOptions(
|
||||||
"test fun",
|
"test fun funny",
|
||||||
opts,
|
opts,
|
||||||
'<p>test <a href="https://discourse.org">fun</a></p>'
|
'<p>test <a href="https://discourse.org">fun</a> funny</p>'
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -1697,7 +1697,7 @@ var bar = 'bar';
|
|||||||
const maxMatches = 100; // same limit as MD watched-words-replace plugin
|
const maxMatches = 100; // same limit as MD watched-words-replace plugin
|
||||||
const opts = {
|
const opts = {
|
||||||
siteSettings: { watched_words_regular_expressions: true },
|
siteSettings: { watched_words_regular_expressions: true },
|
||||||
watchedWordsReplace: { "\\bu?\\b": "you" },
|
watchedWordsReplace: { "(\\bu?\\b)": "you" },
|
||||||
};
|
};
|
||||||
|
|
||||||
assert.cookedOptions(
|
assert.cookedOptions(
|
||||||
|
@ -20,8 +20,8 @@ function findAllMatches(text, matchers) {
|
|||||||
count++ < MAX_MATCHES
|
count++ < MAX_MATCHES
|
||||||
) {
|
) {
|
||||||
matches.push({
|
matches.push({
|
||||||
index: match.index,
|
index: match.index + match[0].indexOf(match[1]),
|
||||||
text: match[0],
|
text: match[1],
|
||||||
replacement: matcher.replacement,
|
replacement: matcher.replacement,
|
||||||
link: matcher.link,
|
link: matcher.link,
|
||||||
});
|
});
|
||||||
|
@ -4,7 +4,7 @@ class WatchedWordSerializer < ApplicationSerializer
|
|||||||
attributes :id, :word, :regexp, :replacement, :action
|
attributes :id, :word, :regexp, :replacement, :action
|
||||||
|
|
||||||
def regexp
|
def regexp
|
||||||
WordWatcher.word_to_regexp(word)
|
WordWatcher.word_to_regexp(word, whole: true)
|
||||||
end
|
end
|
||||||
|
|
||||||
def action
|
def action
|
||||||
|
@ -54,17 +54,26 @@ class WordWatcher
|
|||||||
|
|
||||||
def self.word_matcher_regexps(action)
|
def self.word_matcher_regexps(action)
|
||||||
if words = get_cached_words(action)
|
if words = get_cached_words(action)
|
||||||
words.map { |w, r| [word_to_regexp(w), r] }.to_h
|
words.map { |w, r| [word_to_regexp(w, whole: true), r] }.to_h
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.word_to_regexp(word)
|
def self.word_to_regexp(word, whole: false)
|
||||||
if SiteSetting.watched_words_regular_expressions?
|
if SiteSetting.watched_words_regular_expressions?
|
||||||
# Strip ruby regexp format if present, we're going to make the whole thing
|
# Strip ruby regexp format if present, we're going to make the whole thing
|
||||||
# case insensitive anyway
|
# case insensitive anyway
|
||||||
return word.start_with?("(?-mix:") ? word[7..-2] : word
|
regexp = word.start_with?("(?-mix:") ? word[7..-2] : word
|
||||||
|
regexp = "(#{regexp})" if whole
|
||||||
|
return regexp
|
||||||
end
|
end
|
||||||
Regexp.escape(word).gsub("\\*", '\S*')
|
|
||||||
|
regexp = Regexp.escape(word).gsub("\\*", '\S*')
|
||||||
|
|
||||||
|
if whole && !SiteSetting.watched_words_regular_expressions?
|
||||||
|
regexp = "(?:\\W|^)(#{regexp})(?=\\W|$)"
|
||||||
|
end
|
||||||
|
|
||||||
|
regexp
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.word_matcher_regexp_key(action)
|
def self.word_matcher_regexp_key(action)
|
||||||
@ -144,6 +153,6 @@ class WordWatcher
|
|||||||
end
|
end
|
||||||
|
|
||||||
def word_matches?(word)
|
def word_matches?(word)
|
||||||
Regexp.new(WordWatcher.word_to_regexp(word), Regexp::IGNORECASE).match?(@raw)
|
Regexp.new(WordWatcher.word_to_regexp(word, whole: true), Regexp::IGNORECASE).match?(@raw)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -502,13 +502,21 @@ describe PostCreator do
|
|||||||
end
|
end
|
||||||
|
|
||||||
context "without regular expressions" do
|
context "without regular expressions" do
|
||||||
it "works" do
|
it "works with many tags" do
|
||||||
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "HELLO", replacement: "greetings , hey")
|
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "HELLO", replacement: "greetings , hey")
|
||||||
|
|
||||||
@post = creator.create
|
@post = creator.create
|
||||||
expect(@post.topic.tags.map(&:name)).to match_array(['greetings', 'hey'])
|
expect(@post.topic.tags.map(&:name)).to match_array(['greetings', 'hey'])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "works with overlapping words" do
|
||||||
|
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "art", replacement: "about-art")
|
||||||
|
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "artist*", replacement: "about-artists")
|
||||||
|
|
||||||
|
post = PostCreator.new(user, title: "hello world topic", raw: "this is topic abour artists", archetype_id: 1).create
|
||||||
|
expect(post.topic.tags.map(&:name)).to match_array(['about-artists'])
|
||||||
|
end
|
||||||
|
|
||||||
it "does not treat as regular expressions" do
|
it "does not treat as regular expressions" do
|
||||||
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "he(llo|y)", replacement: "greetings , hey")
|
Fabricate(:watched_word, action: WatchedWord.actions[:tag], word: "he(llo|y)", replacement: "greetings , hey")
|
||||||
|
|
||||||
|
@ -1420,6 +1420,10 @@ HTML
|
|||||||
expect(PrettyText.cook("Lorem ipsum dolor sittt amet")).to match_html(<<~HTML)
|
expect(PrettyText.cook("Lorem ipsum dolor sittt amet")).to match_html(<<~HTML)
|
||||||
<p>Lorem ipsum something else amet</p>
|
<p>Lorem ipsum something else amet</p>
|
||||||
HTML
|
HTML
|
||||||
|
|
||||||
|
expect(PrettyText.cook("Lorem ipsum xdolor sit amet")).to match_html(<<~HTML)
|
||||||
|
<p>Lorem ipsum xdolor sit amet</p>
|
||||||
|
HTML
|
||||||
end
|
end
|
||||||
|
|
||||||
it "replaces words with links" do
|
it "replaces words with links" do
|
||||||
|
Loading…
Reference in New Issue
Block a user