FIX: strip invalid byte sequences

This commit is contained in:
Maja Komel
2019-02-26 00:12:34 +01:00
parent ed0120171c
commit 3d9981ac5c
2 changed files with 10 additions and 0 deletions

View File

@@ -27,6 +27,8 @@ class TextCleaner
end
def self.clean(text, opts = {})
# Remove invalid byte sequences
text.scrub!("")
# Replace !!!!! with a single !
text.gsub!(/!+/, '!') if opts[:deduplicate_exclamation_marks]
# Replace ????? with a single ?