2019-05-02 17:17:27 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2023-01-09 06:10:19 -06:00
|
|
|
require "rchardet"
|
2018-07-27 12:41:53 -05:00
|
|
|
|
|
|
|
module Encodings
|
2018-07-31 05:02:18 -05:00
|
|
|
def self.to_utf8(string)
|
|
|
|
result = CharDet.detect(string)
|
2018-07-27 12:41:53 -05:00
|
|
|
|
2023-01-09 06:10:19 -06:00
|
|
|
encoded_string = try_utf8(string, result["encoding"]) if result && result["encoding"]
|
2018-07-31 05:02:18 -05:00
|
|
|
encoded_string = force_utf8(string) if encoded_string.nil?
|
|
|
|
encoded_string
|
2018-07-27 12:41:53 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
def self.try_utf8(string, source_encoding)
|
|
|
|
encoded = string.encode(Encoding::UTF_8, source_encoding)
|
|
|
|
encoded&.valid_encoding? ? delete_bom!(encoded) : nil
|
|
|
|
rescue Encoding::InvalidByteSequenceError,
|
2023-01-09 06:10:19 -06:00
|
|
|
Encoding::UndefinedConversionError,
|
|
|
|
Encoding::ConverterNotFoundError
|
2018-07-27 12:41:53 -05:00
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
2018-07-31 05:02:18 -05:00
|
|
|
def self.force_utf8(string)
|
2023-01-09 06:10:19 -06:00
|
|
|
encoded_string = string.encode(Encoding::UTF_8, undef: :replace, invalid: :replace, replace: "")
|
2018-07-31 05:02:18 -05:00
|
|
|
delete_bom!(encoded_string)
|
|
|
|
end
|
|
|
|
|
2018-07-27 12:41:53 -05:00
|
|
|
def self.delete_bom!(string)
|
2023-01-09 06:10:19 -06:00
|
|
|
string.sub!(/\A\xEF\xBB\xBF/, "") unless string.blank?
|
2018-07-27 12:41:53 -05:00
|
|
|
string
|
|
|
|
end
|
|
|
|
end
|