FIX: Support IRIs (unicode URIs) when pulling hotlinked images (#9928)

This commit is contained in:
David Taylor
2020-05-29 17:47:05 +01:00
committed by GitHub
parent 3fe5eacd1b
commit ecfce93f28
2 changed files with 23 additions and 7 deletions

View File

@@ -59,7 +59,7 @@ module Jobs
if should_download_image?(src, post)
begin
# have we already downloaded that file?
schemeless_src = remove_scheme(original_src)
schemeless_src = normalize_src(original_src)
unless downloaded_images.include?(schemeless_src) || large_images.include?(schemeless_src) || broken_images.include?(schemeless_src)
@@ -75,17 +75,17 @@ module Jobs
if upload.persisted?
downloaded_urls[src] = upload.url
downloaded_images[remove_scheme(src)] = upload.id
downloaded_images[normalize_src(src)] = upload.id
has_downloaded_image = true
else
log(:info, "Failed to pull hotlinked image for post: #{post_id}: #{src} - #{upload.errors.full_messages.join("\n")}")
end
else
large_images << remove_scheme(original_src)
large_images << normalize_src(original_src)
has_new_large_image = true
end
else
broken_images << remove_scheme(original_src)
broken_images << normalize_src(original_src)
has_new_broken_image = true
end
end
@@ -95,8 +95,8 @@ module Jobs
escaped_src = Regexp.escape(original_src)
replace_raw = ->(match, match_src, replacement, _index) {
if remove_scheme(src) == remove_scheme(match_src)
if normalize_src(src) == normalize_src(match_src)
replacement =
if replacement.include?(InlineUploads::PLACEHOLDER)
replacement.sub(InlineUploads::PLACEHOLDER, upload.short_url)
@@ -215,8 +215,13 @@ module Jobs
private
def remove_scheme(src)
src.sub(/^https?:/i, "")
def normalize_src(src)
uri = Addressable::URI.heuristic_parse(src)
uri.normalize!
uri.scheme = nil
uri.to_s
rescue URI::Error
src
end
end