From bd07658a375e66f43604ff5215a4afaa4226b02c Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Fri, 1 Jul 2016 15:22:30 +0800 Subject: [PATCH] PERF: Split queries when cleaning uploads. This reduces the number of scans that the db has to do in the query to fetch orphan uploads. Futheremore, we were not batching our records which bloats memory. --- app/jobs/scheduled/clean_up_uploads.rb | 36 +++++---- spec/components/cooked_post_processor_spec.rb | 18 ++--- .../components/file_store/local_store_spec.rb | 4 +- spec/components/file_store/s3_store_spec.rb | 4 +- spec/fabricators/upload_fabricator.rb | 4 +- spec/fabricators/user_avatar_fabricator.rb | 3 + spec/jobs/clean_up_uploads_spec.rb | 73 ++++++++++++++++++- 7 files changed, 111 insertions(+), 31 deletions(-) create mode 100644 spec/fabricators/user_avatar_fabricator.rb diff --git a/app/jobs/scheduled/clean_up_uploads.rb b/app/jobs/scheduled/clean_up_uploads.rb index 523a017c466..0ec3c96b54c 100644 --- a/app/jobs/scheduled/clean_up_uploads.rb +++ b/app/jobs/scheduled/clean_up_uploads.rb @@ -1,25 +1,35 @@ module Jobs - class CleanUpUploads < Jobs::Scheduled every 1.hour def execute(args) return unless SiteSetting.clean_up_uploads? + ignore_urls = [] + ignore_urls |= UserProfile.uniq.select(:profile_background).where("profile_background IS NOT NULL AND profile_background != ''").pluck(:profile_background) + ignore_urls |= UserProfile.uniq.select(:card_background).where("card_background IS NOT NULL AND card_background != ''").pluck(:card_background) + ignore_urls |= Category.uniq.select(:logo_url).where("logo_url IS NOT NULL AND logo_url != ''").pluck(:logo_url) + ignore_urls |= Category.uniq.select(:background_url).where("background_url IS NOT NULL AND background_url != ''").pluck(:background_url) + + ids = [] + ids |= PostUpload.uniq.select(:upload_id).pluck(:upload_id) + ids |= User.uniq.select(:uploaded_avatar_id).where("uploaded_avatar_id IS NOT NULL").pluck(:uploaded_avatar_id) + ids |= UserAvatar.uniq.select(:gravatar_upload_id).where("gravatar_upload_id IS NOT NULL").pluck(:gravatar_upload_id) + grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max - Upload.where("created_at < ?", grace_period.hour.ago) - .where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours") - .where("id NOT IN (SELECT upload_id FROM post_uploads WHERE upload_id IS NOT NULL)") - .where("id NOT IN (SELECT uploaded_avatar_id FROM users WHERE uploaded_avatar_id IS NOT NULL)") - .where("id NOT IN (SELECT gravatar_upload_id FROM user_avatars WHERE gravatar_upload_id IS NOT NULL)") - .where("url NOT IN (SELECT profile_background FROM user_profiles WHERE LENGTH(COALESCE(profile_background, '')) > 0)") - .where("url NOT IN (SELECT card_background FROM user_profiles WHERE LENGTH(COALESCE(card_background, '')) > 0)") - .where("url NOT IN (SELECT logo_url FROM categories WHERE LENGTH(COALESCE(logo_url, '')) > 0)") - .where("url NOT IN (SELECT background_url FROM categories WHERE LENGTH(COALESCE(background_url, '')) > 0)") - .destroy_all + result = Upload.where("created_at < ?", grace_period.hour.ago) + .where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours") + + if !ids.empty? + result = result.where("id NOT IN (?)", ids) + end + + if !ignore_urls.empty? + result = result.where("url NOT IN (?)", ignore_urls) + end + + result.find_each { |upload| upload.destroy } end - end - end diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb index 02e36bc8a58..fef5750de72 100644 --- a/spec/components/cooked_post_processor_spec.rb +++ b/spec/components/cooked_post_processor_spec.rb @@ -120,9 +120,9 @@ describe CookedPostProcessor do it "generates overlay information" do cpp.post_process_images - expect(cpp.html).to match_html '

' + expect(cpp.html).to match_html "

+logo.png1750x2000 1.21 KB +

" expect(cpp).to be_dirty end @@ -153,9 +153,9 @@ describe CookedPostProcessor do it "generates overlay information" do cpp.post_process_images - expect(cpp.html).to match_html '

' + expect(cpp.html).to match_html "

+logo.png1750x2000 1.21 KB +

" expect(cpp).to be_dirty end @@ -181,9 +181,9 @@ describe CookedPostProcessor do it "generates overlay information" do cpp.post_process_images - expect(cpp.html).to match_html '

' + expect(cpp.html).to match_html "

+ WAT1750x2000 1.21 KB +

" expect(cpp).to be_dirty end diff --git a/spec/components/file_store/local_store_spec.rb b/spec/components/file_store/local_store_spec.rb index 68724c57770..142c83d854a 100644 --- a/spec/components/file_store/local_store_spec.rb +++ b/spec/components/file_store/local_store_spec.rb @@ -14,7 +14,7 @@ describe FileStore::LocalStore do it "returns a relative url" do store.expects(:copy_file) - expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/) + expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+#{upload.sha1}\.png/) end end @@ -23,7 +23,7 @@ describe FileStore::LocalStore do it "returns a relative url" do store.expects(:copy_file) - expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/) + expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/) end end diff --git a/spec/components/file_store/s3_store_spec.rb b/spec/components/file_store/s3_store_spec.rb index 8b9a6309f56..8b504a72b52 100644 --- a/spec/components/file_store/s3_store_spec.rb +++ b/spec/components/file_store/s3_store_spec.rb @@ -23,7 +23,7 @@ describe FileStore::S3Store do it "returns an absolute schemaless url" do s3_helper.expects(:upload) - expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/) + expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+#{upload.sha1}\.png/) end end @@ -32,7 +32,7 @@ describe FileStore::S3Store do it "returns an absolute schemaless url" do s3_helper.expects(:upload) - expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/) + expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/) end end diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb index a64d7def4d7..96abf714055 100644 --- a/spec/fabricators/upload_fabricator.rb +++ b/spec/fabricators/upload_fabricator.rb @@ -1,11 +1,11 @@ Fabricator(:upload) do user - sha1 "e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98" + sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } } original_filename "logo.png" filesize 1234 width 100 height 200 - url "/uploads/default/1/1234567890123456.png" + url { sequence(:url) { |n| "/uploads/default/#{n}/1234567890123456.png" } } end Fabricator(:attachment, from: :upload) do diff --git a/spec/fabricators/user_avatar_fabricator.rb b/spec/fabricators/user_avatar_fabricator.rb new file mode 100644 index 00000000000..3cbd17cddc5 --- /dev/null +++ b/spec/fabricators/user_avatar_fabricator.rb @@ -0,0 +1,3 @@ +Fabricator(:user_avatar) do + user +end diff --git a/spec/jobs/clean_up_uploads_spec.rb b/spec/jobs/clean_up_uploads_spec.rb index 6035f17fc22..ca0fd7390d8 100644 --- a/spec/jobs/clean_up_uploads_spec.rb +++ b/spec/jobs/clean_up_uploads_spec.rb @@ -3,16 +3,14 @@ require 'rails_helper' require_dependency 'jobs/scheduled/clean_up_uploads' describe Jobs::CleanUpUploads do - before do Upload.destroy_all SiteSetting.clean_up_uploads = true SiteSetting.clean_orphan_uploads_grace_period_hours = 1 + @upload = Fabricate(:upload, created_at: 2.hours.ago) end it "deletes orphan uploads" do - Fabricate(:upload, created_at: 2.hours.ago) - expect(Upload.count).to be(1) Jobs::CleanUpUploads.new.execute(nil) @@ -20,4 +18,73 @@ describe Jobs::CleanUpUploads do expect(Upload.count).to be(0) end + it "does not delete profile background uploads" do + profile_background_upload = Fabricate(:upload, created_at: 2.hours.ago) + UserProfile.last.update_attributes!(profile_background: profile_background_upload.url) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: profile_background_upload.id)).to eq(profile_background_upload) + end + + it "does not delete card background uploads" do + card_background_upload = Fabricate(:upload, created_at: 2.hours.ago) + UserProfile.last.update_attributes!(card_background: card_background_upload.url) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: card_background_upload.id)).to eq(card_background_upload) + end + + it "does not delete category logo uploads" do + category_logo_upload = Fabricate(:upload, created_at: 2.hours.ago) + category = Fabricate(:category, logo_url: category_logo_upload.url) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: category_logo_upload.id)).to eq(category_logo_upload) + end + + it "does not delete category background url uploads" do + category_background_url = Fabricate(:upload, created_at: 2.hours.ago) + category = Fabricate(:category, background_url: category_background_url.url) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: category_background_url.id)).to eq(category_background_url) + end + + it "does not delete post uploads" do + upload = Fabricate(:upload, created_at: 2.hours.ago) + post = Fabricate(:post, uploads: [upload]) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: upload.id)).to eq(upload) + end + + it "does not delete user uploaded avatar" do + upload = Fabricate(:upload, created_at: 2.hours.ago) + user = Fabricate(:user, uploaded_avatar: upload) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: upload.id)).to eq(upload) + end + + it "does not delete user gravatar" do + upload = Fabricate(:upload, created_at: 2.hours.ago) + user = Fabricate(:user, user_avatar: Fabricate(:user_avatar, gravatar_upload: upload)) + + Jobs::CleanUpUploads.new.execute(nil) + + expect(Upload.find_by(id: @upload.id)).to eq(nil) + expect(Upload.find_by(id: upload.id)).to eq(upload) + end end