PLT-1626/PLT-1424/PLT-1473/PLT-1483 Improved search highlighting (#3171)

* PLT-1626 Stopped breaking up hyphenated hashtags when highlighting search terms * Made hashtag search highlighting case independent * PLT-1424 Improved search highlighting when searching for CJK characters * PLT-1473 Added search term highlighting to single-line code blocks * PLT-1473 Added search term highlighting to code blocks that don't use syntax highlighting * PLT-1483 Added proper highlighting of at mentions * Fixing formatting
2025-02-25 18:55:24 -06:00 · 2016-05-31 10:37:59 -04:00
parent 397e0a3f68
commit 4a326dd6ce
3 changed files with 54 additions and 15 deletions
--- a/webapp/utils/markdown.jsx
+++ b/webapp/utils/markdown.jsx
@@ -43,11 +43,25 @@ class MattermostMarkdownRenderer extends marked.Renderer {
            usedLanguage = 'xml';
        }

-        return syntaxHightlighting.formatCode(usedLanguage, code);
+        return syntaxHightlighting.formatCode(usedLanguage, code, null, this.formattingOptions.searchTerm);
    }

    codespan(text) {
-        return '<span class="codespan__pre-wrap">' + super.codespan(text) + '</span>';
+        let output = text;
+
+        if (this.formattingOptions.searchTerm) {
+            const tokens = new Map();
+            output = TextFormatting.highlightSearchTerms(output, tokens, this.formattingOptions.searchTerm);
+            output = TextFormatting.replaceTokens(output, tokens);
+        }
+
+        return (
+            '<span class="codespan__pre-wrap">' +
+                '<code>' +
+                    output +
+                '</code>' +
+            '</span>'
+        );
    }

    br() {
--- a/webapp/utils/syntax_hightlighting.jsx
+++ b/webapp/utils/syntax_hightlighting.jsx
@@ -123,10 +123,12 @@ hlJS.registerLanguage('yaml', hljsYaml);

 const HighlightedLanguages = Constants.HighlightedLanguages;

-export function formatCode(lang, data, filename) {
+export function formatCode(lang, data, filename, searchTerm) {
    const language = lang.toLowerCase() || '';
+
    let contents;
    let header = '';
+    let className = 'post-code';

    if (HighlightedLanguages[language]) {
        let name = HighlightedLanguages[language].name;
@@ -147,10 +149,13 @@ export function formatCode(lang, data, filename) {
        contents = TextFormatting.sanitizeHtml(data);
    }

-    let className = 'post-code';
    if (!language) {
        // wrap when no language is specified
        className += ' post-code--wrap';
+
+        const tokens = new Map();
+        contents = TextFormatting.highlightSearchTerms(contents, tokens, searchTerm);
+        contents = TextFormatting.replaceTokens(contents, tokens);
    }

    if (filename) {
--- a/webapp/utils/text_formatting.jsx
+++ b/webapp/utils/text_formatting.jsx
@@ -11,6 +11,10 @@ import UserStore from 'stores/user_store.jsx';
 import twemoji from 'twemoji';
 import * as Utils from './utils.jsx';

+// pattern to detect the existance of a Chinese, Japanese, or Korean character in a string
+// http://stackoverflow.com/questions/15033196/using-javascript-to-check-whether-a-string-contains-japanese-characters-includi
+const cjkPattern = /[\u3000-\u303f\u3040-\u309f\u30a0-\u30ff\uff00-\uff9f\u4e00-\u9faf\u3400-\u4dbf]/;
+
 // Performs formatting of user posts including highlighting mentions and search terms and converting urls, hashtags, and
 // @mentions to links by taking a user's message and returning a string of formatted html. Also takes a number of options
 // as part of the second parameter:
@@ -61,7 +65,7 @@ export function doFormatText(text, options) {
    }

    if (options.searchTerm) {
-        output = highlightSearchTerm(output, tokens, options.searchTerm);
+        output = highlightSearchTerms(output, tokens, options.searchTerm);
    }

    if (!('mentionHighlight' in options) || options.mentionHighlight) {
@@ -323,18 +327,27 @@ function parseSearchTerms(searchTerm) {
            continue;
        }

+        // capture at mentions differently from the server so we can highlight them with the preceeding at sign
+        captured = (/^@\w+\b/).exec(termString);
+        if (captured) {
+            termString = termString.substring(captured[0].length);
+
+            terms.push(captured[0]);
+            continue;
+        }
+
        // capture any plain text up until the next quote or search flag
        captured = (/^.+?(?=\bin|\bfrom|\bchannel|"|$)/).exec(termString);
        if (captured) {
            termString = termString.substring(captured[0].length);

            // break the text up into words based on how the server splits them in SqlPostStore.SearchPosts and then discard empty terms
-            terms.push(...captured[0].split(/[ <>+\-\(\)~@]/).filter((term) => !!term));
+            terms.push(...captured[0].split(/[ <>+\(\)~@]/).filter((term) => !!term));
            continue;
        }

        // we should never reach this point since at least one of the regexes should match something in the remaining text
-        throw new Error('Infinite loop in search term parsing: ' + termString);
+        throw new Error('Infinite loop in search term parsing: "' + termString + '"');
    }

    // remove punctuation from each term
@@ -345,16 +358,23 @@ function parseSearchTerms(searchTerm) {

 function convertSearchTermToRegex(term) {
    let pattern;
-    if (term.endsWith('*')) {
-        pattern = '\\b' + escapeRegex(term.substring(0, term.length - 1));
+
+    if (cjkPattern.test(term)) {
+        // term contains Chinese, Japanese, or Korean characters so don't mark word boundaries
+        pattern = '()(' + escapeRegex(term.replace(/\*/g, '')) + ')';
+    } else if (term.endsWith('*')) {
+        pattern = '\\b()(' + escapeRegex(term.substring(0, term.length - 1)) + ')';
+    } else if (term.startsWith('@')) {
+        // needs special handling of the first boundary because a word boundary doesn't work before an @ sign
+        pattern = '(\\W|^)(' + escapeRegex(term) + ')\\b';
    } else {
-        pattern = '\\b' + escapeRegex(term) + '\\b';
+        pattern = '\\b()(' + escapeRegex(term) + ')\\b';
    }

    return new RegExp(pattern, 'gi');
 }

-function highlightSearchTerm(text, tokens, searchTerm) {
+export function highlightSearchTerms(text, tokens, searchTerm) {
    const terms = parseSearchTerms(searchTerm);

    if (terms.length === 0) {
@@ -363,7 +383,7 @@ function highlightSearchTerm(text, tokens, searchTerm) {

    let output = text;

-    function replaceSearchTermWithToken(word) {
+    function replaceSearchTermWithToken(match, prefix, word) {
        const index = tokens.size;
        const alias = `MM_SEARCHTERM${index}`;

@@ -372,14 +392,14 @@ function highlightSearchTerm(text, tokens, searchTerm) {
            originalText: word
        });

-        return alias;
+        return prefix + alias;
    }

    for (const term of terms) {
        // highlight existing tokens matching search terms
        var newTokens = new Map();
        for (const [alias, token] of tokens) {
-            if (token.originalText === term.replace(/\*$/, '')) {
+            if (token.originalText.toLowerCase() === term.replace(/\*$/, '').toLowerCase()) {
                const index = tokens.size + newTokens.size;
                const newAlias = `MM_SEARCHTERM${index}`;

@@ -403,7 +423,7 @@ function highlightSearchTerm(text, tokens, searchTerm) {
    return output;
 }

-function replaceTokens(text, tokens) {
+export function replaceTokens(text, tokens) {
    let output = text;

    // iterate backwards through the map so that we do replacement in the opposite order that we added tokens