Files
mattermost/services/docextractor/plain.go
Jesús Espino df695115be Removing FilesSearch feature flag (#17548)
* Removing FilesSearch feature flag

* Fixing tests

* Adding an improvement on plain text extraction

* Adding tests for plain text extraction

* Removed unneeded conversion

* Adding missed license

* Remove the feature flag from the migration

* Fixing some tests

* Updating i18n/en.json file
2021-04-30 23:21:26 +02:00

53 lines
1.0 KiB
Go

// Copyright (c) 2015-present Mattermost, Inc. All Rights Reserved.
// See LICENSE.txt for license information.
package docextractor
import (
"io"
"io/ioutil"
"unicode"
"unicode/utf8"
)
type plainExtractor struct{}
func (pe *plainExtractor) Match(filename string) bool {
return true
}
func (pe *plainExtractor) Extract(filename string, r io.ReadSeeker) (string, error) {
// This detects any visible character plus any whitespace
validRanges := append(unicode.GraphicRanges, unicode.White_Space)
runes := make([]byte, 1024)
total, err := r.Read(runes)
if err != nil && err != io.EOF {
return "", err
}
if total == 0 {
return "", nil
}
count := 0
for {
c, size := utf8.DecodeRune(runes[count:])
if !unicode.In(c, validRanges...) {
return "", nil
}
if size == 0 {
break
}
count += size
// subtract the max rune size to prevent accidentally splitted runes at the end of first 1024 bytes
if count > total-utf8.UTFMax {
break
}
}
text, _ := ioutil.ReadAll(r)
return string(runes[0:total]) + string(text), nil
}