Updated serverside hashtag regex to ignore more punctuation around words

This commit is contained in:
hmhealey
2016-01-25 16:47:49 -05:00
parent ab5425f9b9
commit 97ede2a428
4 changed files with 54 additions and 25 deletions

View File

@@ -20,12 +20,7 @@ func splitWordsNoQuotes(text string) []string {
words := []string{}
for _, word := range strings.Fields(text) {
word = puncStart.ReplaceAllString(word, "")
word = puncEnd.ReplaceAllString(word, "")
if len(word) != 0 {
words = append(words, word)
}
words = append(words, word)
}
return words
@@ -94,7 +89,16 @@ func parseSearchFlags(input []string) ([]string, [][2]string) {
}
if !isFlag {
words = append(words, word)
// trim off surrounding punctuation
word = puncStart.ReplaceAllString(word, "")
word = puncEnd.ReplaceAllString(word, "")
// and remove extra pound #s
word = hashtagStart.ReplaceAllString(word, "#")
if len(word) != 0 {
words = append(words, word)
}
}
}

View File

@@ -118,19 +118,19 @@ func TestParseSearchFlags(t *testing.T) {
t.Fatalf("got incorrect flags %v", flags)
}
if words, flags := parseSearchFlags(splitWords("fruit: cherry")); len(words) != 2 || words[0] != "fruit:" || words[1] != "cherry" {
if words, flags := parseSearchFlags(splitWords("fruit: cherry")); len(words) != 2 || words[0] != "fruit" || words[1] != "cherry" {
t.Fatalf("got incorrect words %v", words)
} else if len(flags) != 0 {
t.Fatalf("got incorrect flags %v", flags)
}
if words, flags := parseSearchFlags(splitWords("channel:")); len(words) != 1 || words[0] != "channel:" {
if words, flags := parseSearchFlags(splitWords("channel:")); len(words) != 1 || words[0] != "channel" {
t.Fatalf("got incorrect words %v", words)
} else if len(flags) != 0 {
t.Fatalf("got incorrect flags %v", flags)
}
if words, flags := parseSearchFlags(splitWords("channel: first in: second from:")); len(words) != 1 || words[0] != "from:" {
if words, flags := parseSearchFlags(splitWords("channel: first in: second from:")); len(words) != 1 || words[0] != "from" {
t.Fatalf("got incorrect words %v", words)
} else if len(flags) != 2 || flags[0][0] != "channel" || flags[0][1] != "first" || flags[1][0] != "in" || flags[1][1] != "second" {
t.Fatalf("got incorrect flags %v", flags)
@@ -212,4 +212,8 @@ func TestParseSearchParams(t *testing.T) {
if sp := ParseSearchParams("testing in:channel from:someone"); len(sp) != 1 || sp[0].Terms != "testing" || len(sp[0].InChannels) != 1 || sp[0].InChannels[0] != "channel" || len(sp[0].FromUsers) != 1 || sp[0].FromUsers[0] != "someone" {
t.Fatalf("Incorrect output from parse search params: %v", sp[0])
}
if sp := ParseSearchParams("##hashtag +#plus+"); len(sp) != 1 || sp[0].Terms != "#hashtag #plus" || sp[0].IsHashtag != true || len(sp[0].InChannels) != 0 || len(sp[0].FromUsers) != 0 {
t.Fatalf("Incorrect output from parse search params: %v", sp[0])
}
}

View File

@@ -298,8 +298,9 @@ func Etag(parts ...interface{}) string {
}
var validHashtag = regexp.MustCompile(`^(#[A-Za-zäöüÄÖÜß]+[A-Za-z0-9äöüÄÖÜß_\-]*[A-Za-z0-9äöüÄÖÜß])$`)
var puncStart = regexp.MustCompile(`^[.,()&$!\?\[\]{}':;\\]+`)
var puncEnd = regexp.MustCompile(`[.,()&$#!\?\[\]{}';\\]+$`)
var puncStart = regexp.MustCompile(`^[.,()&$!\?\[\]{}':;\\<>\-+=%^*|]+`)
var hashtagStart = regexp.MustCompile(`^#{2,}`)
var puncEnd = regexp.MustCompile(`[.,()&$#!\?\[\]{}':;\\<>\-+=%^*|]+$`)
func ParseHashtags(text string) (string, string) {
words := strings.Fields(text)
@@ -307,8 +308,13 @@ func ParseHashtags(text string) (string, string) {
hashtagString := ""
plainString := ""
for _, word := range words {
// trim off surrounding punctuation
word = puncStart.ReplaceAllString(word, "")
word = puncEnd.ReplaceAllString(word, "")
// and remove extra pound #s
word = hashtagStart.ReplaceAllString(word, "#")
if validHashtag.MatchString(word) {
hashtagString += " " + word
} else {

View File

@@ -83,19 +83,34 @@ func TestEtag(t *testing.T) {
}
var hashtags map[string]string = map[string]string{
"#test": "#test",
"test": "",
"#test123": "#test123",
"#123test123": "",
"#test-test": "#test-test",
"#test?": "#test",
"hi #there": "#there",
"#bug #idea": "#bug #idea",
"#bug or #gif!": "#bug #gif",
"#hüllo": "#hüllo",
"#?test": "",
"#-test": "",
"#yo_yo": "#yo_yo",
"#test": "#test",
"test": "",
"#test123": "#test123",
"#123test123": "",
"#test-test": "#test-test",
"#test?": "#test",
"hi #there": "#there",
"#bug #idea": "#bug #idea",
"#bug or #gif!": "#bug #gif",
"#hüllo": "#hüllo",
"#?test": "",
"#-test": "",
"#yo_yo": "#yo_yo",
"(#brakets)": "#brakets",
")#stekarb(": "#stekarb",
"<#less_than<": "#less_than",
">#greater_than>": "#greater_than",
"-#minus-": "#minus",
"+#plus+": "#plus",
"=#equals=": "#equals",
"%#pct%": "#pct",
"&#and&": "#and",
"^#hat^": "#hat",
"##brown#": "#brown",
"*#star*": "#star",
"|#pipe|": "#pipe",
":#colon:": "#colon",
";#semi;": "#semi",
}
func TestParseHashtags(t *testing.T) {