mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
Use rchardet instead of charlock_holmes gem
This commit is contained in:
parent
5d421fb946
commit
a115aae45f
2
Gemfile
2
Gemfile
@ -180,7 +180,7 @@ gem 'rqrcode'
|
|||||||
|
|
||||||
gem 'sshkey', require: false
|
gem 'sshkey', require: false
|
||||||
|
|
||||||
gem 'charlock_holmes', require: false
|
gem 'rchardet', require: false
|
||||||
|
|
||||||
if ENV["IMPORT"] == "1"
|
if ENV["IMPORT"] == "1"
|
||||||
gem 'mysql2'
|
gem 'mysql2'
|
||||||
|
@ -75,7 +75,6 @@ GEM
|
|||||||
uniform_notifier (~> 1.11.0)
|
uniform_notifier (~> 1.11.0)
|
||||||
byebug (10.0.2)
|
byebug (10.0.2)
|
||||||
certified (1.0.0)
|
certified (1.0.0)
|
||||||
charlock_holmes (0.7.6)
|
|
||||||
chunky_png (1.3.10)
|
chunky_png (1.3.10)
|
||||||
claide (1.0.2)
|
claide (1.0.2)
|
||||||
claide-plugins (0.9.2)
|
claide-plugins (0.9.2)
|
||||||
@ -321,6 +320,7 @@ GEM
|
|||||||
ffi (>= 1.0.6)
|
ffi (>= 1.0.6)
|
||||||
msgpack (>= 0.4.3)
|
msgpack (>= 0.4.3)
|
||||||
trollop (>= 1.16.2)
|
trollop (>= 1.16.2)
|
||||||
|
rchardet (1.8.0)
|
||||||
redis (4.0.1)
|
redis (4.0.1)
|
||||||
redis-namespace (1.6.0)
|
redis-namespace (1.6.0)
|
||||||
redis (>= 3.0.4)
|
redis (>= 3.0.4)
|
||||||
@ -457,7 +457,6 @@ DEPENDENCIES
|
|||||||
bullet
|
bullet
|
||||||
byebug
|
byebug
|
||||||
certified
|
certified
|
||||||
charlock_holmes
|
|
||||||
cppjieba_rb
|
cppjieba_rb
|
||||||
danger
|
danger
|
||||||
discourse_image_optim
|
discourse_image_optim
|
||||||
@ -523,6 +522,7 @@ DEPENDENCIES
|
|||||||
rb-fsevent
|
rb-fsevent
|
||||||
rb-inotify (~> 0.9)
|
rb-inotify (~> 0.9)
|
||||||
rbtrace
|
rbtrace
|
||||||
|
rchardet
|
||||||
redis
|
redis
|
||||||
redis-namespace
|
redis-namespace
|
||||||
rinku
|
rinku
|
||||||
|
@ -90,7 +90,7 @@ module Jobs
|
|||||||
def parsed_feed
|
def parsed_feed
|
||||||
raw_feed, encoding = fetch_rss
|
raw_feed, encoding = fetch_rss
|
||||||
encoded_feed = Encodings.try_utf8(raw_feed, encoding) if encoding
|
encoded_feed = Encodings.try_utf8(raw_feed, encoding) if encoding
|
||||||
encoded_feed = Encodings.to_utf8(raw_feed, encoding_hint: encoding) unless encoded_feed
|
encoded_feed = Encodings.to_utf8(raw_feed) unless encoded_feed
|
||||||
|
|
||||||
return nil if encoded_feed.blank?
|
return nil if encoded_feed.blank?
|
||||||
|
|
||||||
|
@ -1,20 +1,12 @@
|
|||||||
require 'charlock_holmes'
|
require 'rchardet'
|
||||||
|
|
||||||
module Encodings
|
module Encodings
|
||||||
BINARY_SCAN_LENGTH = 0
|
def self.to_utf8(string)
|
||||||
|
result = CharDet.detect(string)
|
||||||
|
|
||||||
def self.to_utf8(string, encoding_hint: nil, delete_bom: true)
|
encoded_string = try_utf8(string, result['encoding']) if result && result['encoding']
|
||||||
detector = CharlockHolmes::EncodingDetector.new(BINARY_SCAN_LENGTH)
|
encoded_string = force_utf8(string) if encoded_string.nil?
|
||||||
result = detector.detect(string, encoding_hint&.to_s)
|
encoded_string
|
||||||
|
|
||||||
if result && result[:encoding]
|
|
||||||
string = CharlockHolmes::Converter.convert(string, result[:encoding], Encoding::UTF_8.name)
|
|
||||||
else
|
|
||||||
string = string.encode(Encoding::UTF_8, undef: :replace, invalid: :replace, replace: '')
|
|
||||||
end
|
|
||||||
|
|
||||||
delete_bom!(string) if delete_bom
|
|
||||||
string
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.try_utf8(string, source_encoding)
|
def self.try_utf8(string, source_encoding)
|
||||||
@ -26,6 +18,14 @@ module Encodings
|
|||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.force_utf8(string)
|
||||||
|
encoded_string = string.encode(Encoding::UTF_8,
|
||||||
|
undef: :replace,
|
||||||
|
invalid: :replace,
|
||||||
|
replace: '')
|
||||||
|
delete_bom!(encoded_string)
|
||||||
|
end
|
||||||
|
|
||||||
def self.delete_bom!(string)
|
def self.delete_bom!(string)
|
||||||
string.sub!(/\A\xEF\xBB\xBF/, '') unless string.blank?
|
string.sub!(/\A\xEF\xBB\xBF/, '') unless string.blank?
|
||||||
string
|
string
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
require 'rails_helper'
|
require 'rails_helper'
|
||||||
|
|
||||||
describe Encodings do
|
describe Encodings do
|
||||||
def to_utf8(filename, encoding_hint = nil)
|
def to_utf8(filename)
|
||||||
string = File.read("#{Rails.root}/spec/fixtures/encodings/#{filename}").chomp
|
string = File.read("#{Rails.root}/spec/fixtures/encodings/#{filename}").chomp
|
||||||
Encodings.to_utf8(string, encoding_hint: encoding_hint)
|
Encodings.to_utf8(string)
|
||||||
end
|
end
|
||||||
|
|
||||||
context "unicode" do
|
context "unicode" do
|
||||||
|
Loading…
Reference in New Issue
Block a user