mirror of
https://github.com/mattermost/mattermost.git
synced 2025-02-25 18:55:24 -06:00
MM-11451 Added autolinking to Markdown parser (#9151)
* MM-11451 Added autolinking to Markdown parser * Added missing headers * Added mailto and tel links
This commit is contained in:
committed by
Christopher Speller
parent
bfb2640451
commit
c8d3e42139
253
utils/markdown/autolink.go
Normal file
253
utils/markdown/autolink.go
Normal file
@@ -0,0 +1,253 @@
|
||||
// Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved.
|
||||
// See License.txt for license information.
|
||||
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Based off of extensions/autolink.c from https://github.com/github/cmark
|
||||
|
||||
var (
|
||||
DefaultUrlSchemes = []string{"http", "https", "ftp", "mailto", "tel"}
|
||||
)
|
||||
|
||||
// Given a string with a w at the given position, tries to parse and return a link starting with "www."
|
||||
// if one exists. If the text at the given position isn't a link, returns an empty string. Equivalent to
|
||||
// www_match from the reference code.
|
||||
func parseWWWAutolink(data string, position int) string {
|
||||
// Check that this isn't part of another word
|
||||
if position > 1 {
|
||||
prevChar := data[position-1]
|
||||
|
||||
if !isWhitespaceByte(prevChar) && !isAllowedBeforeWWWLink(prevChar) {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// Check that this starts with www
|
||||
if len(data)-position < 4 || !regexp.MustCompile(`^www\d{0,3}\.`).MatchString(data[position:]) {
|
||||
return ""
|
||||
}
|
||||
|
||||
end := checkDomain(data[position:], false)
|
||||
if end == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
end += position
|
||||
|
||||
// Grab all text until the end of the string or the next whitespace character
|
||||
for end < len(data) && !isWhitespaceByte(data[end]) {
|
||||
end += 1
|
||||
}
|
||||
|
||||
// Trim trailing punctuation
|
||||
link := trimTrailingCharactersFromLink(data[position:end])
|
||||
if link == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return link
|
||||
}
|
||||
|
||||
func isAllowedBeforeWWWLink(c byte) bool {
|
||||
switch c {
|
||||
case '*', '_', '~', ')':
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Given a string with a : at the given position, tried to parse and return a link starting with a URL scheme
|
||||
// if one exists. If the text around the given position isn't a link, returns an empty string. Equivalent to
|
||||
// url_match from the reference code.
|
||||
func parseURLAutolink(data string, position int) string {
|
||||
// Check that a :// exists. This doesn't match the clients that treat the slashes as optional.
|
||||
if len(data)-position < 4 || data[position+1] != '/' || data[position+2] != '/' {
|
||||
return ""
|
||||
}
|
||||
|
||||
start := position - 1
|
||||
for start > 0 && isAlphanumericByte(data[start-1]) {
|
||||
start -= 1
|
||||
}
|
||||
|
||||
// Ensure that the URL scheme is allowed and that at least one character after the scheme is valid.
|
||||
scheme := data[start:position]
|
||||
if !isSchemeAllowed(scheme) || !isValidHostCharacter(data[position+3:]) {
|
||||
return ""
|
||||
}
|
||||
|
||||
end := checkDomain(data[position+3:], true)
|
||||
if end == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
end += position
|
||||
|
||||
// Grab all text until the end of the string or the next whitespace character
|
||||
for end < len(data) && !isWhitespaceByte(data[end]) {
|
||||
end += 1
|
||||
}
|
||||
|
||||
// Trim trailing punctuation
|
||||
link := trimTrailingCharactersFromLink(data[start:end])
|
||||
if link == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return link
|
||||
}
|
||||
|
||||
func isSchemeAllowed(scheme string) bool {
|
||||
// Note that this doesn't support the custom URL schemes implemented by the client
|
||||
for _, allowed := range DefaultUrlSchemes {
|
||||
if strings.EqualFold(allowed, scheme) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Given a string starting with a URL, returns the number of valid characters that make up the URL's domain.
|
||||
// Returns 0 if the string doesn't start with a domain name. allowShort determines whether or not the domain
|
||||
// needs to contain a period to be considered valid. Equivalent to check_domain from the reference code.
|
||||
func checkDomain(data string, allowShort bool) int {
|
||||
foundUnderscore := false
|
||||
foundPeriod := false
|
||||
|
||||
i := 1
|
||||
for ; i < len(data)-1; i++ {
|
||||
if data[i] == '_' {
|
||||
foundUnderscore = true
|
||||
break
|
||||
} else if data[i] == '.' {
|
||||
foundPeriod = true
|
||||
} else if !isValidHostCharacter(data[i:]) && data[i] != '-' {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if foundUnderscore {
|
||||
return 0
|
||||
}
|
||||
|
||||
if allowShort {
|
||||
// If allowShort is set, accept any string of valid domain characters
|
||||
return i
|
||||
}
|
||||
|
||||
// If allowShort isn't set, a valid domain just requires at least a single period. Note that this
|
||||
// logic isn't entirely necessary because we already know the string starts with "www." when
|
||||
// this is called from parseWWWAutolink
|
||||
if foundPeriod {
|
||||
return i
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the provided link starts with a valid character for a domain name. Equivalent to
|
||||
// is_valid_hostchar from the reference code.
|
||||
func isValidHostCharacter(link string) bool {
|
||||
c, _ := utf8.DecodeRuneInString(link)
|
||||
if c == utf8.RuneError {
|
||||
return false
|
||||
}
|
||||
|
||||
return !unicode.IsSpace(c) && !unicode.IsPunct(c)
|
||||
}
|
||||
|
||||
// Removes any trailing characters such as punctuation or stray brackets that shouldn't be part of the link.
|
||||
// Equivalent to autolink_delim from the reference code.
|
||||
func trimTrailingCharactersFromLink(link string) string {
|
||||
runes := []rune(link)
|
||||
linkEnd := len(runes)
|
||||
|
||||
// Cut off the link before an open angle bracket if it contains one
|
||||
for i, c := range runes {
|
||||
if c == '<' {
|
||||
linkEnd = i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for linkEnd > 0 {
|
||||
c := runes[linkEnd-1]
|
||||
|
||||
if !canEndAutolink(c) {
|
||||
// Trim trailing quotes, periods, etc
|
||||
linkEnd = linkEnd - 1
|
||||
} else if c == ';' {
|
||||
// Trim a trailing HTML entity
|
||||
newEnd := linkEnd - 2
|
||||
|
||||
for newEnd > 0 && ((runes[newEnd] >= 'a' && runes[newEnd] <= 'z') || (runes[newEnd] >= 'A' && runes[newEnd] <= 'Z')) {
|
||||
newEnd -= 1
|
||||
}
|
||||
|
||||
if newEnd < linkEnd-2 && runes[newEnd] == '&' {
|
||||
linkEnd = newEnd
|
||||
} else {
|
||||
// This isn't actually an HTML entity, so just trim the semicolon
|
||||
linkEnd = linkEnd - 1
|
||||
}
|
||||
} else if c == ')' {
|
||||
// Only allow an autolink ending with a bracket if that bracket is part of a matching pair of brackets.
|
||||
// If there are more closing brackets than opening ones, remove the extra bracket
|
||||
|
||||
numClosing := 0
|
||||
numOpening := 0
|
||||
|
||||
// Examples (input text => output linked portion):
|
||||
//
|
||||
// http://www.pokemon.com/Pikachu_(Electric)
|
||||
// => http://www.pokemon.com/Pikachu_(Electric)
|
||||
//
|
||||
// http://www.pokemon.com/Pikachu_((Electric)
|
||||
// => http://www.pokemon.com/Pikachu_((Electric)
|
||||
//
|
||||
// http://www.pokemon.com/Pikachu_(Electric))
|
||||
// => http://www.pokemon.com/Pikachu_(Electric)
|
||||
//
|
||||
// http://www.pokemon.com/Pikachu_((Electric))
|
||||
// => http://www.pokemon.com/Pikachu_((Electric))
|
||||
|
||||
for i := 0; i < linkEnd; i++ {
|
||||
if runes[i] == '(' {
|
||||
numOpening += 1
|
||||
} else if runes[i] == ')' {
|
||||
numClosing += 1
|
||||
}
|
||||
}
|
||||
|
||||
if numClosing <= numOpening {
|
||||
// There's fewer or equal closing brackets, so we've found the end of the link
|
||||
break
|
||||
}
|
||||
|
||||
linkEnd -= 1
|
||||
} else {
|
||||
// There's no special characters at the end of the link, so we're at the end
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return string(runes[:linkEnd])
|
||||
}
|
||||
|
||||
func canEndAutolink(c rune) bool {
|
||||
switch c {
|
||||
case '?', '!', '.', ',', ':', '*', '_', '~', '\'', '"':
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
617
utils/markdown/autolink_test.go
Normal file
617
utils/markdown/autolink_test.go
Normal file
@@ -0,0 +1,617 @@
|
||||
// Copyright (c) 2017-present Mattermost, Inc. All Rights Reserved.
|
||||
// See License.txt for license information.
|
||||
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestParseURLAutolink(t *testing.T) {
|
||||
testCases := []struct {
|
||||
Description string
|
||||
Input string
|
||||
Position int
|
||||
Expected string
|
||||
}{
|
||||
{
|
||||
Description: "no link",
|
||||
Input: "This is an :emoji:",
|
||||
Position: 11,
|
||||
Expected: "",
|
||||
},
|
||||
{
|
||||
Description: "no link 2",
|
||||
Input: "These are two things: apple and orange",
|
||||
Position: 20,
|
||||
Expected: "",
|
||||
},
|
||||
{
|
||||
Description: "link with http",
|
||||
Input: "http://example.com and some text",
|
||||
Position: 4,
|
||||
Expected: "http://example.com",
|
||||
},
|
||||
{
|
||||
Description: "link with https",
|
||||
Input: "https://example.com and some text",
|
||||
Position: 5,
|
||||
Expected: "https://example.com",
|
||||
},
|
||||
{
|
||||
Description: "link with ftp",
|
||||
Input: "ftp://example.com and some text",
|
||||
Position: 3,
|
||||
Expected: "ftp://example.com",
|
||||
},
|
||||
{
|
||||
Description: "link with a path",
|
||||
Input: "https://example.com/abcd and some text",
|
||||
Position: 5,
|
||||
Expected: "https://example.com/abcd",
|
||||
},
|
||||
{
|
||||
Description: "link with parameters",
|
||||
Input: "ftp://example.com/abcd?foo=bar and some text",
|
||||
Position: 3,
|
||||
Expected: "ftp://example.com/abcd?foo=bar",
|
||||
},
|
||||
{
|
||||
Description: "link, not at start",
|
||||
Input: "This is https://example.com and some text",
|
||||
Position: 13,
|
||||
Expected: "https://example.com",
|
||||
},
|
||||
{
|
||||
Description: "link with a path, not at start",
|
||||
Input: "This is also http://www.example.com/abcd and some text",
|
||||
Position: 17,
|
||||
Expected: "http://www.example.com/abcd",
|
||||
},
|
||||
{
|
||||
Description: "link with parameters, not at start",
|
||||
Input: "These are https://www.example.com/abcd?foo=bar and some text",
|
||||
Position: 15,
|
||||
Expected: "https://www.example.com/abcd?foo=bar",
|
||||
},
|
||||
{
|
||||
Description: "link with trailing characters",
|
||||
Input: "This is ftp://www.example.com??",
|
||||
Position: 11,
|
||||
Expected: "ftp://www.example.com",
|
||||
},
|
||||
{
|
||||
Description: "multiple links",
|
||||
Input: "This is https://example.com/abcd and ftp://www.example.com/1234",
|
||||
Position: 13,
|
||||
Expected: "https://example.com/abcd",
|
||||
},
|
||||
{
|
||||
Description: "second of multiple links",
|
||||
Input: "This is https://example.com/abcd and ftp://www.example.com/1234",
|
||||
Position: 40,
|
||||
Expected: "ftp://www.example.com/1234",
|
||||
},
|
||||
{
|
||||
Description: "link with brackets",
|
||||
Input: "Go to ftp://www.example.com/my/page_(disambiguation) and some text",
|
||||
Position: 9,
|
||||
Expected: "ftp://www.example.com/my/page_(disambiguation)",
|
||||
},
|
||||
{
|
||||
Description: "link in brackets",
|
||||
Input: "(https://www.example.com/foo/bar)",
|
||||
Position: 6,
|
||||
Expected: "https://www.example.com/foo/bar",
|
||||
},
|
||||
{
|
||||
Description: "link in underscores",
|
||||
Input: "_http://www.example.com_",
|
||||
Position: 5,
|
||||
Expected: "http://www.example.com",
|
||||
},
|
||||
{
|
||||
Description: "link in asterisks",
|
||||
Input: "This is **ftp://example.com**",
|
||||
Position: 13,
|
||||
Expected: "ftp://example.com",
|
||||
},
|
||||
{
|
||||
Description: "link in strikethrough",
|
||||
Input: "Those were ~~https://example.com~~",
|
||||
Position: 18,
|
||||
Expected: "https://example.com",
|
||||
},
|
||||
{
|
||||
Description: "link with angle brackets",
|
||||
Input: "<b>We use http://example.com</b>",
|
||||
Position: 14,
|
||||
Expected: "http://example.com",
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.Description, func(t *testing.T) {
|
||||
assert.Equal(t, testCase.Expected, parseURLAutolink(testCase.Input, testCase.Position))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWWWAutolink(t *testing.T) {
|
||||
testCases := []struct {
|
||||
Description string
|
||||
Input string
|
||||
Position int
|
||||
Expected string
|
||||
}{
|
||||
{
|
||||
Description: "no link",
|
||||
Input: "This is some text",
|
||||
Position: 0,
|
||||
Expected: "",
|
||||
},
|
||||
{
|
||||
Description: "link",
|
||||
Input: "www.example.com and some text",
|
||||
Position: 0,
|
||||
Expected: "www.example.com",
|
||||
},
|
||||
{
|
||||
Description: "link with a path",
|
||||
Input: "www.example.com/abcd and some text",
|
||||
Position: 0,
|
||||
Expected: "www.example.com/abcd",
|
||||
},
|
||||
{
|
||||
Description: "link with parameters",
|
||||
Input: "www.example.com/abcd?foo=bar and some text",
|
||||
Position: 0,
|
||||
Expected: "www.example.com/abcd?foo=bar",
|
||||
},
|
||||
{
|
||||
Description: "link, not at start",
|
||||
Input: "This is www.example.com and some text",
|
||||
Position: 8,
|
||||
Expected: "www.example.com",
|
||||
},
|
||||
{
|
||||
Description: "link with a path, not at start",
|
||||
Input: "This is also www.example.com/abcd and some text",
|
||||
Position: 13,
|
||||
Expected: "www.example.com/abcd",
|
||||
},
|
||||
{
|
||||
Description: "link with parameters, not at start",
|
||||
Input: "These are www.example.com/abcd?foo=bar and some text",
|
||||
Position: 10,
|
||||
Expected: "www.example.com/abcd?foo=bar",
|
||||
},
|
||||
{
|
||||
Description: "link with trailing characters",
|
||||
Input: "This is www.example.com??",
|
||||
Position: 8,
|
||||
Expected: "www.example.com",
|
||||
},
|
||||
{
|
||||
Description: "link after current position",
|
||||
Input: "This is some text and www.example.com",
|
||||
Position: 0,
|
||||
Expected: "",
|
||||
},
|
||||
{
|
||||
Description: "multiple links",
|
||||
Input: "This is www.example.com/abcd and www.example.com/1234",
|
||||
Position: 8,
|
||||
Expected: "www.example.com/abcd",
|
||||
},
|
||||
{
|
||||
Description: "multiple links 2",
|
||||
Input: "This is www.example.com/abcd and www.example.com/1234",
|
||||
Position: 33,
|
||||
Expected: "www.example.com/1234",
|
||||
},
|
||||
{
|
||||
Description: "link with brackets",
|
||||
Input: "Go to www.example.com/my/page_(disambiguation) and some text",
|
||||
Position: 6,
|
||||
Expected: "www.example.com/my/page_(disambiguation)",
|
||||
},
|
||||
{
|
||||
Description: "link following other letters",
|
||||
Input: "aaawww.example.com and some text",
|
||||
Position: 3,
|
||||
Expected: "",
|
||||
},
|
||||
{
|
||||
Description: "link in brackets",
|
||||
Input: "(www.example.com)",
|
||||
Position: 1,
|
||||
Expected: "www.example.com",
|
||||
},
|
||||
{
|
||||
Description: "link in underscores",
|
||||
Input: "_www.example.com_",
|
||||
Position: 1,
|
||||
Expected: "www.example.com",
|
||||
},
|
||||
{
|
||||
Description: "link in asterisks",
|
||||
Input: "This is **www.example.com**",
|
||||
Position: 10,
|
||||
Expected: "www.example.com",
|
||||
},
|
||||
{
|
||||
Description: "link in strikethrough",
|
||||
Input: "Those were ~~www.example.com~~",
|
||||
Position: 13,
|
||||
Expected: "www.example.com",
|
||||
},
|
||||
{
|
||||
Description: "using www1",
|
||||
Input: "Our backup site is at www1.example.com/foo",
|
||||
Position: 22,
|
||||
Expected: "www1.example.com/foo",
|
||||
},
|
||||
{
|
||||
Description: "link with angle brackets",
|
||||
Input: "<b>We use www2.example.com</b>",
|
||||
Position: 10,
|
||||
Expected: "www2.example.com",
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.Description, func(t *testing.T) {
|
||||
assert.Equal(t, testCase.Expected, parseWWWAutolink(testCase.Input, testCase.Position))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTrimTrailingCharactersFromLink(t *testing.T) {
|
||||
testCases := []struct {
|
||||
Input string
|
||||
Expected string
|
||||
}{
|
||||
{
|
||||
Input: "http://www.example.com",
|
||||
Expected: "http://www.example.com",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com/abcd",
|
||||
Expected: "http://www.example.com/abcd",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com/abcd/",
|
||||
Expected: "http://www.example.com/abcd/",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com/1234",
|
||||
Expected: "http://www.example.com/1234",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com/abcd?foo=bar",
|
||||
Expected: "http://www.example.com/abcd?foo=bar",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com/abcd#heading",
|
||||
Expected: "http://www.example.com/abcd#heading",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com.",
|
||||
Expected: "http://www.example.com",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com,",
|
||||
Expected: "http://www.example.com",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com?",
|
||||
Expected: "http://www.example.com",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com)",
|
||||
Expected: "http://www.example.com",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com",
|
||||
Expected: "http://www.example.com",
|
||||
},
|
||||
{
|
||||
Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
|
||||
Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
|
||||
},
|
||||
{
|
||||
Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation",
|
||||
Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation",
|
||||
},
|
||||
{
|
||||
Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation))",
|
||||
Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)",
|
||||
},
|
||||
{
|
||||
Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)_(disambiguation)",
|
||||
Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation)_(disambiguation)",
|
||||
},
|
||||
{
|
||||
Input: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation_(disambiguation))",
|
||||
Expected: "https://en.wikipedia.org/wiki/Dolphin_(disambiguation_(disambiguation))",
|
||||
},
|
||||
{
|
||||
Input: "http://www.example.com"",
|
||||
Expected: "http://www.example.com",
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.Input, func(t *testing.T) {
|
||||
assert.Equal(t, testCase.Expected, trimTrailingCharactersFromLink(testCase.Input))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAutolinking(t *testing.T) {
|
||||
// These tests are adapted from https://github.com/mattermost/commonmark.js/test/mattermost.txt.
|
||||
// It is missing tests for:
|
||||
// 1. Links surrounded by emphasis (emphasis not implemented on the server)
|
||||
// 2. IPv6 addresses (not implemented on the server or by GitHub)
|
||||
// 3. Custom URL schemes (not implemented)
|
||||
|
||||
for name, tc := range map[string]struct {
|
||||
Markdown string
|
||||
ExpectedHTML string
|
||||
}{
|
||||
"valid-link-1": {
|
||||
Markdown: `http://example.com`,
|
||||
ExpectedHTML: `<p><a href="http://example.com">http://example.com</a></p>`,
|
||||
},
|
||||
"valid-link-2": {
|
||||
Markdown: `https://example.com`,
|
||||
ExpectedHTML: `<p><a href="https://example.com">https://example.com</a></p>`,
|
||||
},
|
||||
"valid-link-3": {
|
||||
Markdown: `ftp://example.com`,
|
||||
ExpectedHTML: `<p><a href="ftp://example.com">ftp://example.com</a></p>`,
|
||||
},
|
||||
// "valid-link-4": {
|
||||
// Markdown: `ts3server://example.com?port=9001`,
|
||||
// ExpectedHTML: `<p><a href="ts3server://example.com?port=9001">ts3server://example.com?port=9001</a></p>`,
|
||||
// },
|
||||
"valid-link-5": {
|
||||
Markdown: `www.example.com`,
|
||||
ExpectedHTML: `<p><a href="http://www.example.com">www.example.com</a></p>`,
|
||||
},
|
||||
"valid-link-6": {
|
||||
Markdown: `www.example.com/index`,
|
||||
ExpectedHTML: `<p><a href="http://www.example.com/index">www.example.com/index</a></p>`,
|
||||
},
|
||||
"valid-link-7": {
|
||||
Markdown: `www.example.com/index.html`,
|
||||
ExpectedHTML: `<p><a href="http://www.example.com/index.html">www.example.com/index.html</a></p>`,
|
||||
},
|
||||
"valid-link-8": {
|
||||
Markdown: `http://example.com/index/sub`,
|
||||
ExpectedHTML: `<p><a href="http://example.com/index/sub">http://example.com/index/sub</a></p>`,
|
||||
},
|
||||
"valid-link-9": {
|
||||
Markdown: `www1.example.com`,
|
||||
ExpectedHTML: `<p><a href="http://www1.example.com">www1.example.com</a></p>`,
|
||||
},
|
||||
"valid-link-10": {
|
||||
Markdown: `https://en.wikipedia.org/wiki/URLs#Syntax`,
|
||||
ExpectedHTML: `<p><a href="https://en.wikipedia.org/wiki/URLs#Syntax">https://en.wikipedia.org/wiki/URLs#Syntax</a></p>`,
|
||||
},
|
||||
"valid-link-11": {
|
||||
Markdown: `https://groups.google.com/forum/#!msg`,
|
||||
ExpectedHTML: `<p><a href="https://groups.google.com/forum/#!msg">https://groups.google.com/forum/#!msg</a></p>`,
|
||||
},
|
||||
"valid-link-12": {
|
||||
Markdown: `www.example.com/index?params=1`,
|
||||
ExpectedHTML: `<p><a href="http://www.example.com/index?params=1">www.example.com/index?params=1</a></p>`,
|
||||
},
|
||||
"valid-link-13": {
|
||||
Markdown: `www.example.com/index?params=1&other=2`,
|
||||
ExpectedHTML: `<p><a href="http://www.example.com/index?params=1&other=2">www.example.com/index?params=1&other=2</a></p>`,
|
||||
},
|
||||
"valid-link-14": {
|
||||
Markdown: `www.example.com/index?params=1;other=2`,
|
||||
ExpectedHTML: `<p><a href="http://www.example.com/index?params=1;other=2">www.example.com/index?params=1;other=2</a></p>`,
|
||||
},
|
||||
"valid-link-15": {
|
||||
Markdown: `http://www.example.com/_/page`,
|
||||
ExpectedHTML: `<p><a href="http://www.example.com/_/page">http://www.example.com/_/page</a></p>`,
|
||||
},
|
||||
"valid-link-16": {
|
||||
Markdown: `https://en.wikipedia.org/wiki/🐬`,
|
||||
ExpectedHTML: `<p><a href="https://en.wikipedia.org/wiki/%F0%9F%90%AC">https://en.wikipedia.org/wiki/🐬</a></p>`,
|
||||
},
|
||||
"valid-link-17": {
|
||||
Markdown: `http://✪df.ws/1234`,
|
||||
ExpectedHTML: `<p><a href="http://%E2%9C%AAdf.ws/1234">http://✪df.ws/1234</a></p>`,
|
||||
},
|
||||
"valid-link-18": {
|
||||
Markdown: `https://groups.google.com/forum/#!msg`,
|
||||
ExpectedHTML: `<p><a href="https://groups.google.com/forum/#!msg">https://groups.google.com/forum/#!msg</a></p>`,
|
||||
},
|
||||
"valid-link-19": {
|
||||
Markdown: `https://пример.срб/пример-26/`,
|
||||
ExpectedHTML: `<p><a href="https://%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80.%D1%81%D1%80%D0%B1/%D0%BF%D1%80%D0%B8%D0%BC%D0%B5%D1%80-26/">https://пример.срб/пример-26/</a></p>`,
|
||||
},
|
||||
"valid-link-20": {
|
||||
Markdown: `mailto://test@example.com`,
|
||||
ExpectedHTML: `<p><a href="mailto://test@example.com">mailto://test@example.com</a></p>`,
|
||||
},
|
||||
"valid-link-21": {
|
||||
Markdown: `tel://555-123-4567`,
|
||||
ExpectedHTML: `<p><a href="tel://555-123-4567">tel://555-123-4567</a></p>`,
|
||||
},
|
||||
|
||||
"ip-address-1": {
|
||||
Markdown: `http://127.0.0.1`,
|
||||
ExpectedHTML: `<p><a href="http://127.0.0.1">http://127.0.0.1</a></p>`,
|
||||
},
|
||||
"ip-address-2": {
|
||||
Markdown: `http://192.168.1.1:4040`,
|
||||
ExpectedHTML: `<p><a href="http://192.168.1.1:4040">http://192.168.1.1:4040</a></p>`,
|
||||
},
|
||||
"ip-address-3": {
|
||||
Markdown: `http://username:password@127.0.0.1`,
|
||||
ExpectedHTML: `<p><a href="http://username:password@127.0.0.1">http://username:password@127.0.0.1</a></p>`,
|
||||
},
|
||||
"ip-address-4": {
|
||||
Markdown: `http://username:password@[2001:0:5ef5:79fb:303a:62d5:3312:ff42]:80`,
|
||||
ExpectedHTML: `<p><a href="http://username:password@%5B2001:0:5ef5:79fb:303a:62d5:3312:ff42%5D:80">http://username:password@[2001:0:5ef5:79fb:303a:62d5:3312:ff42]:80</a></p>`,
|
||||
},
|
||||
|
||||
"link-with-brackets-1": {
|
||||
Markdown: `https://en.wikipedia.org/wiki/Rendering_(computer_graphics)`,
|
||||
ExpectedHTML: `<p><a href="https://en.wikipedia.org/wiki/Rendering_(computer_graphics)">https://en.wikipedia.org/wiki/Rendering_(computer_graphics)</a></p>`,
|
||||
},
|
||||
"link-with-brackets-2": {
|
||||
Markdown: `http://example.com/more_(than)_one_(parens)`,
|
||||
ExpectedHTML: `<p><a href="http://example.com/more_(than)_one_(parens)">http://example.com/more_(than)_one_(parens)</a></p>`,
|
||||
},
|
||||
"link-with-brackets-3": {
|
||||
Markdown: `http://example.com/(something)?after=parens`,
|
||||
ExpectedHTML: `<p><a href="http://example.com/(something)?after=parens">http://example.com/(something)?after=parens</a></p>`,
|
||||
},
|
||||
"link-with-brackets-4": {
|
||||
Markdown: `http://foo.com/unicode_(✪)_in_parens`,
|
||||
ExpectedHTML: `<p><a href="http://foo.com/unicode_(%E2%9C%AA)_in_parens">http://foo.com/unicode_(✪)_in_parens</a></p>`,
|
||||
},
|
||||
|
||||
"inside-another-link-1": {
|
||||
Markdown: `[www.example.com](https://example.com)`,
|
||||
ExpectedHTML: `<p><a href="https://example.com">www.example.com</a></p>`,
|
||||
},
|
||||
"inside-another-link-2": {
|
||||
Markdown: `[http://www.example.com](https://example.com)`,
|
||||
ExpectedHTML: `<p><a href="https://example.com">http://www.example.com</a></p>`,
|
||||
},
|
||||
|
||||
"link-in-sentence-1": {
|
||||
Markdown: `(http://example.com)`,
|
||||
ExpectedHTML: `<p>(<a href="http://example.com">http://example.com</a>)</p>`,
|
||||
},
|
||||
"link-in-sentence-2": {
|
||||
Markdown: `(see http://example.com)`,
|
||||
ExpectedHTML: `<p>(see <a href="http://example.com">http://example.com</a>)</p>`,
|
||||
},
|
||||
"link-in-sentence-3": {
|
||||
Markdown: `(http://example.com watch this)`,
|
||||
ExpectedHTML: `<p>(<a href="http://example.com">http://example.com</a> watch this)</p>`,
|
||||
},
|
||||
"link-in-sentence-4": {
|
||||
Markdown: `This is a sentence with a http://example.com in it.`,
|
||||
ExpectedHTML: `<p>This is a sentence with a <a href="http://example.com">http://example.com</a> in it.</p>`,
|
||||
},
|
||||
"link-in-sentence-5": {
|
||||
Markdown: `This is a sentence with a [link](http://example.com) in it.`,
|
||||
ExpectedHTML: `<p>This is a sentence with a <a href="http://example.com">link</a> in it.</p>`,
|
||||
},
|
||||
"link-in-sentence-6": {
|
||||
Markdown: `This is a sentence with a http://example.com/_/underscore in it.`,
|
||||
ExpectedHTML: `<p>This is a sentence with a <a href="http://example.com/_/underscore">http://example.com/_/underscore</a> in it.</p>`,
|
||||
},
|
||||
"link-in-sentence-7": {
|
||||
Markdown: `This is a sentence with a link (http://example.com) in it.`,
|
||||
ExpectedHTML: `<p>This is a sentence with a link (<a href="http://example.com">http://example.com</a>) in it.</p>`,
|
||||
},
|
||||
"link-in-sentence-8": {
|
||||
Markdown: `This is a sentence with a (https://en.wikipedia.org/wiki/Rendering_(computer_graphics)) in it.`,
|
||||
ExpectedHTML: `<p>This is a sentence with a (<a href="https://en.wikipedia.org/wiki/Rendering_(computer_graphics)">https://en.wikipedia.org/wiki/Rendering_(computer_graphics)</a>) in it.</p>`,
|
||||
},
|
||||
"link-in-sentence-9": {
|
||||
Markdown: `This is a sentence with a http://192.168.1.1:4040 in it.`,
|
||||
ExpectedHTML: `<p>This is a sentence with a <a href="http://192.168.1.1:4040">http://192.168.1.1:4040</a> in it.</p>`,
|
||||
},
|
||||
"link-in-sentence-10": {
|
||||
Markdown: `This is a link to http://example.com.`,
|
||||
ExpectedHTML: `<p>This is a link to <a href="http://example.com">http://example.com</a>.</p>`,
|
||||
},
|
||||
"link-in-sentence-11": {
|
||||
Markdown: `This is a link to http://example.com*`,
|
||||
ExpectedHTML: `<p>This is a link to <a href="http://example.com">http://example.com</a>*</p>`,
|
||||
},
|
||||
"link-in-sentence-12": {
|
||||
Markdown: `This is a link to http://example.com_`,
|
||||
ExpectedHTML: `<p>This is a link to <a href="http://example.com">http://example.com</a>_</p>`,
|
||||
},
|
||||
"link-in-sentence-13": {
|
||||
Markdown: `This is a link containing http://example.com/something?with,commas,in,url, but not at the end`,
|
||||
ExpectedHTML: `<p>This is a link containing <a href="http://example.com/something?with,commas,in,url">http://example.com/something?with,commas,in,url</a>, but not at the end</p>`,
|
||||
},
|
||||
"link-in-sentence-14": {
|
||||
Markdown: `This is a question about a link http://example.com?`,
|
||||
ExpectedHTML: `<p>This is a question about a link <a href="http://example.com">http://example.com</a>?</p>`,
|
||||
},
|
||||
|
||||
"plt-7250-link-with-trailing-periods-1": {
|
||||
Markdown: `http://example.com.`,
|
||||
ExpectedHTML: `<p><a href="http://example.com">http://example.com</a>.</p>`,
|
||||
},
|
||||
"plt-7250-link-with-trailing-periods-2": {
|
||||
Markdown: `http://example.com...`,
|
||||
ExpectedHTML: `<p><a href="http://example.com">http://example.com</a>...</p>`,
|
||||
},
|
||||
"plt-7250-link-with-trailing-periods-3": {
|
||||
Markdown: `http://example.com/foo.`,
|
||||
ExpectedHTML: `<p><a href="http://example.com/foo">http://example.com/foo</a>.</p>`,
|
||||
},
|
||||
"plt-7250-link-with-trailing-periods-4": {
|
||||
Markdown: `http://example.com/foo...`,
|
||||
ExpectedHTML: `<p><a href="http://example.com/foo">http://example.com/foo</a>...</p>`,
|
||||
},
|
||||
"plt-7250-link-with-trailing-periods-5": {
|
||||
Markdown: `http://example.com/foo.bar`,
|
||||
ExpectedHTML: `<p><a href="http://example.com/foo.bar">http://example.com/foo.bar</a></p>`,
|
||||
},
|
||||
"plt-7250-link-with-trailing-periods-6": {
|
||||
Markdown: `http://example.com/foo...bar`,
|
||||
ExpectedHTML: `<p><a href="http://example.com/foo...bar">http://example.com/foo...bar</a></p>`,
|
||||
},
|
||||
|
||||
"rn-319-www-link-as-part-of-word-1": {
|
||||
Markdown: `testwww.example.com`,
|
||||
ExpectedHTML: `<p>testwww.example.com</p>`,
|
||||
},
|
||||
|
||||
"mm-10180-link-containing-period-followed-by-non-letter-1": {
|
||||
Markdown: `https://example.com/123.+Pagetitle`,
|
||||
ExpectedHTML: `<p><a href="https://example.com/123.+Pagetitle">https://example.com/123.+Pagetitle</a></p>`,
|
||||
},
|
||||
"mm-10180-link-containing-period-followed-by-non-letter-2": {
|
||||
Markdown: `https://example.com/123.?Pagetitle`,
|
||||
ExpectedHTML: `<p><a href="https://example.com/123.?Pagetitle">https://example.com/123.?Pagetitle</a></p>`,
|
||||
},
|
||||
"mm-10180-link-containing-period-followed-by-non-letter-3": {
|
||||
Markdown: `https://example.com/123.-Pagetitle`,
|
||||
ExpectedHTML: `<p><a href="https://example.com/123.-Pagetitle">https://example.com/123.-Pagetitle</a></p>`,
|
||||
},
|
||||
"mm-10180-link-containing-period-followed-by-non-letter-4": {
|
||||
Markdown: `https://example.com/123._Pagetitle`,
|
||||
ExpectedHTML: `<p><a href="https://example.com/123._Pagetitle">https://example.com/123._Pagetitle</a></p>`,
|
||||
},
|
||||
"mm-10180-link-containing-period-followed-by-non-letter-5": {
|
||||
Markdown: `https://example.com/123.+`,
|
||||
ExpectedHTML: `<p><a href="https://example.com/123.+">https://example.com/123.+</a></p>`,
|
||||
},
|
||||
"mm-10180-link-containing-period-followed-by-non-letter-6": {
|
||||
Markdown: `https://example.com/123.?`,
|
||||
ExpectedHTML: `<p><a href="https://example.com/123">https://example.com/123</a>.?</p>`,
|
||||
},
|
||||
"mm-10180-link-containing-period-followed-by-non-letter-7": {
|
||||
Markdown: `https://example.com/123.-`,
|
||||
ExpectedHTML: `<p><a href="https://example.com/123.-">https://example.com/123.-</a></p>`,
|
||||
},
|
||||
"mm-10180-link-containing-period-followed-by-non-letter-8": {
|
||||
Markdown: `https://example.com/123._`,
|
||||
ExpectedHTML: `<p><a href="https://example.com/123">https://example.com/123</a>._</p>`,
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
assert.Equal(t, tc.ExpectedHTML, RenderHTML(tc.Markdown))
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -999,3 +999,46 @@ func TestCommonMarkReferenceStrings(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommonMarkRefernceAutolinks(t *testing.T) {
|
||||
// These tests are adapted from the GitHub-flavoured CommonMark extension tests located at
|
||||
// https://github.com/github/cmark/blob/master/test/extensions.txt
|
||||
for name, tc := range map[string]struct {
|
||||
Markdown string
|
||||
ExpectedHTML string
|
||||
}{
|
||||
"autolinks-1": {
|
||||
Markdown: `: http://google.com https://google.com
|
||||
|
||||
http://google.com/å
|
||||
|
||||
www.github.com www.github.com/á
|
||||
|
||||
www.google.com/a_b
|
||||
|
||||

|
||||
|
||||
Full stop outside parens shouldn't be included http://google.com/ok.
|
||||
|
||||
(Full stop inside parens shouldn't be included http://google.com/ok.)
|
||||
|
||||
"http://google.com"
|
||||
|
||||
'http://google.com'
|
||||
|
||||
http://🍄.ga/ http://x🍄.ga/`,
|
||||
ExpectedHTML: `<p>: <a href="http://google.com">http://google.com</a> <a href="https://google.com">https://google.com</a></p><p><a href="http://google.com/%C3%A5">http://google.com/å</a></p><p><a href="http://www.github.com">www.github.com</a> <a href="http://www.github.com/%C3%A1">www.github.com/á</a></p><p><a href="http://www.google.com/a_b">www.google.com/a_b</a></p><p><img src="http://inline.com/image" alt="http://inline.com/image" /></p><p>Full stop outside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.</p><p>(Full stop inside parens shouldn't be included <a href="http://google.com/ok">http://google.com/ok</a>.)</p><p>"<a href="http://google.com">http://google.com</a>"</p><p>'<a href="http://google.com">http://google.com</a>'</p><p><a href="http://%F0%9F%8D%84.ga/">http://🍄.ga/</a> <a href="http://x%F0%9F%8D%84.ga/">http://x🍄.ga/</a></p>`,
|
||||
},
|
||||
"autolinks-2": {
|
||||
Markdown: `These should not link:
|
||||
|
||||
* @a.b.c@. x
|
||||
* n@. b`,
|
||||
ExpectedHTML: `<p>These should not link:</p><ul><li>@a.b.c@. x</li><li>n@. b</li></ul>`,
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
assert.Equal(t, tc.ExpectedHTML, RenderHTML(tc.Markdown))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,6 +156,12 @@ func RenderInlineHTML(inline Inline) (result string) {
|
||||
result += RenderInlineHTML(inline)
|
||||
}
|
||||
result += "</a>"
|
||||
case *Autolink:
|
||||
result += `<a href="` + htmlEscaper.Replace(escapeURL(v.Link)) + `">`
|
||||
for _, inline := range v.Children {
|
||||
result += RenderInlineHTML(inline)
|
||||
}
|
||||
result += "</a>"
|
||||
default:
|
||||
panic(fmt.Sprintf("missing case for type %T", v))
|
||||
}
|
||||
|
||||
@@ -81,6 +81,14 @@ type ReferenceImage struct {
|
||||
ReferenceLinkOrImage
|
||||
}
|
||||
|
||||
type Autolink struct {
|
||||
inlineBase
|
||||
|
||||
Children []Inline
|
||||
|
||||
Link string
|
||||
}
|
||||
|
||||
type delimiterType int
|
||||
|
||||
const (
|
||||
@@ -182,7 +190,7 @@ func (p *inlineParser) parseEscapeCharacter() {
|
||||
}
|
||||
|
||||
func (p *inlineParser) parseText() {
|
||||
if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]"); next == -1 {
|
||||
if next := strings.IndexAny(p.raw[p.position:], "\r\n\\`&![]wW:"); next == -1 {
|
||||
absPos := relativeToAbsolutePosition(p.ranges, p.position)
|
||||
p.inlines = append(p.inlines, &Text{
|
||||
Text: strings.TrimRightFunc(p.raw[p.position:], isWhitespace),
|
||||
@@ -198,6 +206,12 @@ func (p *inlineParser) parseText() {
|
||||
Range: Range{absPos, absPos + len(s)},
|
||||
})
|
||||
} else {
|
||||
if next == 0 {
|
||||
// Always read at least one character since 'w', 'W', and ':' may not actually match another
|
||||
// type of node
|
||||
next = 1
|
||||
}
|
||||
|
||||
p.inlines = append(p.inlines, &Text{
|
||||
Text: p.raw[p.position : p.position+next],
|
||||
Range: Range{absPos, absPos + next},
|
||||
@@ -443,6 +457,60 @@ func (p *inlineParser) parseCharacterReference() {
|
||||
}
|
||||
}
|
||||
|
||||
func (p *inlineParser) parseAutolink(c rune) bool {
|
||||
for element := p.delimiterStack.Back(); element != nil; element = element.Prev() {
|
||||
d := element.Value.(*delimiter)
|
||||
if !d.IsInactive {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
link := ""
|
||||
text := ""
|
||||
if c == ':' {
|
||||
text = parseURLAutolink(p.raw, p.position)
|
||||
link = text
|
||||
|
||||
// Since the current position is at the colon, we have to rewind the parsing slightly so that
|
||||
// we don't duplicate the URL scheme
|
||||
rewind := strings.Index(text, ":")
|
||||
if rewind != -1 {
|
||||
lastInline := p.inlines[len(p.inlines)-1]
|
||||
lastText, ok := lastInline.(*Text)
|
||||
|
||||
if !ok {
|
||||
// This should never occur since parseURLAutolink will only return a non-empty value
|
||||
// when the previous text ends in a valid URL protocol which would mean that the previous
|
||||
// node is a Text node
|
||||
return false
|
||||
}
|
||||
|
||||
p.inlines = p.inlines[0 : len(p.inlines)-1]
|
||||
p.inlines = append(p.inlines, &Text{
|
||||
Text: lastText.Text[:len(lastText.Text)-rewind],
|
||||
Range: Range{lastText.Range.Position, lastText.Range.End - rewind},
|
||||
})
|
||||
p.position -= rewind
|
||||
|
||||
}
|
||||
} else if c == 'w' {
|
||||
text = parseWWWAutolink(p.raw, p.position)
|
||||
link = "http://" + text
|
||||
}
|
||||
|
||||
if text == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
p.inlines = append(p.inlines, &Autolink{
|
||||
Link: link,
|
||||
Children: []Inline{&Text{Text: text}},
|
||||
})
|
||||
p.position += len(text)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (p *inlineParser) Parse() []Inline {
|
||||
for _, r := range p.ranges {
|
||||
p.raw += p.markdown[r.Position:r.End]
|
||||
@@ -464,6 +532,12 @@ func (p *inlineParser) Parse() []Inline {
|
||||
p.parseLinkOrImageDelimiter()
|
||||
case ']':
|
||||
p.lookForLinkOrImage()
|
||||
case 'w', 'W', ':':
|
||||
matched := p.parseAutolink(c)
|
||||
|
||||
if !matched {
|
||||
p.parseText()
|
||||
}
|
||||
default:
|
||||
p.parseText()
|
||||
}
|
||||
|
||||
@@ -40,6 +40,14 @@ func isHexByte(c byte) bool {
|
||||
return isHex(rune(c))
|
||||
}
|
||||
|
||||
func isAlphanumeric(c rune) bool {
|
||||
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
|
||||
}
|
||||
|
||||
func isAlphanumericByte(c byte) bool {
|
||||
return isAlphanumeric(rune(c))
|
||||
}
|
||||
|
||||
func nextNonWhitespace(markdown string, position int) int {
|
||||
for offset, c := range []byte(markdown[position:]) {
|
||||
if !isWhitespaceByte(c) {
|
||||
|
||||
Reference in New Issue
Block a user