FIX: stop stripping zero-width-whitespace

This char is used for formatting khmer words
This commit is contained in:
Sam
2015-03-27 13:01:31 +11:00
parent d5eed540ea
commit 58c95f64d2
2 changed files with 9 additions and 2 deletions

View File

@@ -44,7 +44,7 @@ class TextCleaner
text
end
@@whitespaces_regexp = Regexp.new("(\u00A0|\u1680|\u180E|[\u2000-\u200B]|\u2028|\u2029|\u202F|\u205F|\u3000|\uFEFF)", "u").freeze
@@whitespaces_regexp = Regexp.new("(\u00A0|\u1680|\u180E|[\u2000-\u200A]|\u2028|\u2029|\u202F|\u205F|\u3000)", "u").freeze
def self.normalize_whitespaces(text)
text.gsub(@@whitespaces_regexp, ' ')