mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
FIX: Improvements and fixes to the image downsizing script (#9950)
Fixed bugs, added specs, extracted the upload downsizing code to a class, added support for non-S3 setups, changed it so that images aren't downloaded twice. This code has been tested on production and successfully resized ~180k uploads. Includes: * DEV: Extract upload downsizing logic * DEV: Add support for non-S3 uploads * DEV: Process only images uploaded by users * FIX: Incorrect usage of `count` and `exist?` typo * DEV: Spec S3 image downsizing * DEV: Avoid downloading images twice * DEV: Update filesizes earlier in the process * DEV: Return false on invalid upload * FIX: Download images that currently above the limit (If the image size limit is decreased, then there was no way to resize those images that now fall outside the allowed size range) * Update script/downsize_uploads.rb (Co-authored-by: Régis Hanol <regis@hanol.fr>)
This commit is contained in:
@@ -29,225 +29,13 @@ def log(*args)
|
||||
puts(*args) if ENV["VERBOSE"]
|
||||
end
|
||||
|
||||
def transform_post(post, upload_before, upload_after)
|
||||
post.raw.gsub!(/upload:\/\/#{upload_before.base62_sha1}(\.#{upload_before.extension})?/i, upload_after.short_url)
|
||||
post.raw.gsub!(Discourse.store.cdn_url(upload_before.url), Discourse.store.cdn_url(upload_after.url))
|
||||
post.raw.gsub!(Discourse.store.url_for(upload_before), Discourse.store.url_for(upload_after))
|
||||
post.raw.gsub!("#{Discourse.base_url}#{upload_before.short_path}", "#{Discourse.base_url}#{upload_after.short_path}")
|
||||
|
||||
path = SiteSetting.Upload.s3_upload_bucket.split("/", 2)[1]
|
||||
post.raw.gsub!(/<img src=\"https:\/\/.+?\/#{path}\/uploads\/default\/optimized\/.+?\/#{upload_before.sha1}_\d_(?<width>\d+)x(?<height>\d+).*?\" alt=\"(?<alt>.*?)\"\/?>/i) do
|
||||
"![#{$~[:alt]}|#{$~[:width]}x#{$~[:height]}](#{upload_after.short_url})"
|
||||
end
|
||||
|
||||
post.raw.gsub!(/!\[(.*?)\]\(\/uploads\/.+?\/#{upload_before.sha1}(\.#{upload_before.extension})?\)/i, "")
|
||||
end
|
||||
|
||||
def downsize_upload(upload, path)
|
||||
# Make sure the filesize is up to date
|
||||
upload.filesize = File.size(path)
|
||||
|
||||
OptimizedImage.downsize(path, path, "#{MAX_IMAGE_PIXELS}@", filename: upload.original_filename)
|
||||
sha1 = Upload.generate_digest(path)
|
||||
|
||||
if sha1 == upload.sha1
|
||||
log "No sha1 change"
|
||||
return
|
||||
end
|
||||
|
||||
w, h = FastImage.size(path, timeout: 15, raise_on_failure: true)
|
||||
|
||||
if !w || !h
|
||||
log "Invalid image dimensions after resizing"
|
||||
return
|
||||
end
|
||||
|
||||
# Neither #dup or #clone provide a complete copy
|
||||
original_upload = Upload.find(upload.id)
|
||||
ww, hh = ImageSizer.resize(w, h)
|
||||
|
||||
# A different upload record that matches the sha1 of the downsized image
|
||||
existing_upload = Upload.find_by(sha1: sha1)
|
||||
upload = existing_upload if existing_upload
|
||||
|
||||
upload.attributes = {
|
||||
sha1: sha1,
|
||||
width: w,
|
||||
height: h,
|
||||
thumbnail_width: ww,
|
||||
thumbnail_height: hh,
|
||||
filesize: File.size(path)
|
||||
}
|
||||
|
||||
if upload.filesize > upload.filesize_was
|
||||
log "No filesize reduction"
|
||||
return
|
||||
end
|
||||
|
||||
unless existing_upload
|
||||
url = Discourse.store.store_upload(File.new(path), upload)
|
||||
|
||||
unless url
|
||||
log "Couldn't store the upload"
|
||||
return
|
||||
end
|
||||
|
||||
upload.url = url
|
||||
end
|
||||
|
||||
log "base62: #{original_upload.base62_sha1} -> #{Upload.base62_sha1(sha1)}"
|
||||
log "sha: #{original_upload.sha1} -> #{sha1}"
|
||||
log "(an exisiting upload)" if existing_upload
|
||||
|
||||
success = true
|
||||
posts = Post.unscoped.joins(:post_uploads).where(post_uploads: { upload_id: original_upload.id }).uniq.sort_by(&:created_at)
|
||||
|
||||
posts.each do |post|
|
||||
transform_post(post, original_upload, upload)
|
||||
|
||||
if post.custom_fields[Post::DOWNLOADED_IMAGES].present?
|
||||
downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES])
|
||||
end
|
||||
|
||||
if post.raw_changed?
|
||||
log "Updating post"
|
||||
elsif downloaded_images&.has_value?(original_upload.id)
|
||||
log "A hotlinked, unreferenced image"
|
||||
elsif post.raw.include?(upload.short_url)
|
||||
log "Already processed"
|
||||
elsif post.trashed?
|
||||
log "A deleted post"
|
||||
elsif !post.topic || post.topic.trashed?
|
||||
log "A deleted topic"
|
||||
elsif post.cooked.include?(original_upload.sha1)
|
||||
if post.raw.include?("#{Discourse.base_url.sub(/^https?:\/\//i, "")}/t/")
|
||||
log "Updating a topic onebox"
|
||||
else
|
||||
log "Updating an external onebox"
|
||||
end
|
||||
else
|
||||
log "Could not find the upload URL"
|
||||
success = false
|
||||
end
|
||||
|
||||
log "#{Discourse.base_url}/p/#{post.id}"
|
||||
end
|
||||
|
||||
if posts.empty?
|
||||
log "Upload not used in any posts"
|
||||
|
||||
if User.where(uploaded_avatar_id: original_upload.id).count
|
||||
log "Used as a User avatar"
|
||||
elsif UserAvatar.where(gravatar_upload_id: original_upload.id).count
|
||||
log "Used as a UserAvatar gravatar"
|
||||
elsif UserAvatar.where(custom_upload_id: original_upload.id).count
|
||||
log "Used as a UserAvatar custom upload"
|
||||
elsif UserProfile.where(profile_background_upload_id: original_upload.id).count
|
||||
log "Used as a UserProfile profile background"
|
||||
elsif UserProfile.where(card_background_upload_id: original_upload.id).count
|
||||
log "Used as a UserProfile card background"
|
||||
elsif Category.where(uploaded_logo_id: original_upload.id).count
|
||||
log "Used as a Category logo"
|
||||
elsif Category.where(uploaded_background_id: original_upload.id).count
|
||||
log "Used as a Category background"
|
||||
elsif CustomEmoji.where(upload_id: original_upload.id).count
|
||||
log "Used as a CustomEmoji"
|
||||
elsif ThemeField.where(upload_id: original_upload.id).count
|
||||
log "Used as a ThemeField"
|
||||
else
|
||||
success = false
|
||||
end
|
||||
end
|
||||
|
||||
unless success
|
||||
if ENV["INTERACTIVE"]
|
||||
print "Press any key to continue with the upload"
|
||||
STDIN.beep
|
||||
STDIN.getch
|
||||
puts " k"
|
||||
elsif !existing_upload && !Upload.where(url: upload.url).exist?
|
||||
# We're bailing, so clean up the just uploaded file
|
||||
Discourse.store.remove_upload(upload)
|
||||
|
||||
log "⏩ Skipping"
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
upload.save!
|
||||
|
||||
if existing_upload
|
||||
begin
|
||||
PostUpload.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
|
||||
rescue ActiveRecord::RecordNotUnique, PG::UniqueViolation
|
||||
end
|
||||
|
||||
User.where(uploaded_avatar_id: original_upload.id).update_all(uploaded_avatar_id: upload.id)
|
||||
UserAvatar.where(gravatar_upload_id: original_upload.id).update_all(gravatar_upload_id: upload.id)
|
||||
UserAvatar.where(custom_upload_id: original_upload.id).update_all(custom_upload_id: upload.id)
|
||||
UserProfile.where(profile_background_upload_id: original_upload.id).update_all(profile_background_upload_id: upload.id)
|
||||
UserProfile.where(card_background_upload_id: original_upload.id).update_all(card_background_upload_id: upload.id)
|
||||
Category.where(uploaded_logo_id: original_upload.id).update_all(uploaded_logo_id: upload.id)
|
||||
Category.where(uploaded_background_id: original_upload.id).update_all(uploaded_background_id: upload.id)
|
||||
CustomEmoji.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
|
||||
ThemeField.where(upload_id: original_upload.id).update_all(upload_id: upload.id)
|
||||
else
|
||||
upload.optimized_images.each(&:destroy!)
|
||||
end
|
||||
|
||||
posts.each do |post|
|
||||
DistributedMutex.synchronize("process_post_#{post.id}") do
|
||||
current_post = Post.unscoped.find(post.id)
|
||||
|
||||
# If the post became outdated, reapply changes
|
||||
if current_post.updated_at != post.updated_at
|
||||
transform_post(current_post, original_upload, upload)
|
||||
post = current_post
|
||||
end
|
||||
|
||||
if post.raw_changed?
|
||||
post.update_columns(
|
||||
raw: post.raw,
|
||||
updated_at: Time.zone.now
|
||||
)
|
||||
end
|
||||
|
||||
if existing_upload && post.custom_fields[Post::DOWNLOADED_IMAGES].present?
|
||||
downloaded_images = JSON.parse(post.custom_fields[Post::DOWNLOADED_IMAGES])
|
||||
|
||||
downloaded_images.transform_values! do |upload_id|
|
||||
upload_id == original_upload.id ? upload.id : upload_id
|
||||
end
|
||||
|
||||
post.custom_fields[Post::DOWNLOADED_IMAGES] = downloaded_images.to_json if downloaded_images.present?
|
||||
post.save_custom_fields
|
||||
end
|
||||
|
||||
post.rebake!
|
||||
end
|
||||
end
|
||||
|
||||
if existing_upload
|
||||
original_upload.reload.destroy!
|
||||
else
|
||||
Discourse.store.remove_upload(original_upload)
|
||||
end
|
||||
|
||||
true
|
||||
end
|
||||
|
||||
def process_uploads
|
||||
unless SiteSetting.Upload.enable_s3_uploads
|
||||
puts "This script supports only S3 uploads"
|
||||
return
|
||||
end
|
||||
|
||||
puts "", "Downsizing images to no more than #{MAX_IMAGE_PIXELS} pixels"
|
||||
|
||||
dimensions_count = 0
|
||||
downsized_count = 0
|
||||
|
||||
scope = Upload.where("LOWER(extension) IN ('jpg', 'jpeg', 'gif', 'png')")
|
||||
scope = Upload.by_users.where("LOWER(extension) IN ('jpg', 'jpeg', 'gif', 'png')")
|
||||
scope = scope.where(<<-SQL, MAX_IMAGE_PIXELS)
|
||||
COALESCE(width, 0) = 0 OR
|
||||
COALESCE(height, 0) = 0 OR
|
||||
@@ -271,19 +59,20 @@ def process_uploads
|
||||
print "\r#{progress}% Fixed dimensions: #{dimensions_count} Downsized: #{downsized_count} Skipped: #{skipped} (upload id: #{upload.id})"
|
||||
log "\n"
|
||||
|
||||
source = upload.local? ? Discourse.store.path_for(upload) : "https:#{upload.url}"
|
||||
path = if upload.local?
|
||||
Discourse.store.path_for(upload)
|
||||
else
|
||||
(Discourse.store.download(upload, max_file_size_kb: 100.megabytes) rescue nil)&.path
|
||||
end
|
||||
|
||||
unless source
|
||||
log "No path or URL"
|
||||
unless path
|
||||
log "No image path"
|
||||
skipped += 1
|
||||
next
|
||||
end
|
||||
|
||||
begin
|
||||
w, h = FastImage.size(source, timeout: 15, raise_on_failure: true)
|
||||
rescue FastImage::ImageFetchFailure
|
||||
log "Retrying image resizing"
|
||||
w, h = FastImage.size(source, timeout: 15)
|
||||
w, h = FastImage.size(path, raise_on_failure: true)
|
||||
rescue FastImage::UnknownImageType
|
||||
log "Unknown image type"
|
||||
skipped += 1
|
||||
@@ -312,13 +101,14 @@ def process_uploads
|
||||
width: w,
|
||||
height: h,
|
||||
thumbnail_width: ww,
|
||||
thumbnail_height: hh
|
||||
thumbnail_height: hh,
|
||||
filesize: File.size(path)
|
||||
}
|
||||
|
||||
if upload.changed?
|
||||
log "Correcting the upload dimensions"
|
||||
log "Before: #{upload.width_was}x#{upload.height_was} #{upload.thumbnail_width_was}x#{upload.thumbnail_height_was}"
|
||||
log "After: #{w}x#{h} #{ww}x#{hh}"
|
||||
log "Before: #{upload.width_was}x#{upload.height_was} #{upload.thumbnail_width_was}x#{upload.thumbnail_height_was} (#{upload.filesize_was})"
|
||||
log "After: #{w}x#{h} #{ww}x#{hh} (#{upload.filesize})"
|
||||
|
||||
dimensions_count += 1
|
||||
upload.save!
|
||||
@@ -330,15 +120,15 @@ def process_uploads
|
||||
next
|
||||
end
|
||||
|
||||
path = upload.local? ? source : (Discourse.store.download(upload) rescue nil)&.path
|
||||
result = ShrinkUploadedImage.new(
|
||||
upload: upload,
|
||||
path: path,
|
||||
max_pixels: MAX_IMAGE_PIXELS,
|
||||
verbose: ENV["VERBOSE"],
|
||||
interactive: ENV["INTERACTIVE"]
|
||||
).perform
|
||||
|
||||
unless path
|
||||
log "No image path"
|
||||
skipped += 1
|
||||
next
|
||||
end
|
||||
|
||||
if downsize_upload(upload, path)
|
||||
if result
|
||||
downsized_count += 1
|
||||
else
|
||||
skipped += 1
|
||||
|
||||
Reference in New Issue
Block a user