From bd07658a375e66f43604ff5215a4afaa4226b02c Mon Sep 17 00:00:00 2001
From: Guo Xiang Tan
Date: Fri, 1 Jul 2016 15:22:30 +0800
Subject: [PATCH] PERF: Split queries when cleaning uploads.
This reduces the number of scans that the db has to do in the query
to fetch orphan uploads. Futheremore, we were not batching our
records which bloats memory.
---
app/jobs/scheduled/clean_up_uploads.rb | 36 +++++----
spec/components/cooked_post_processor_spec.rb | 18 ++---
.../components/file_store/local_store_spec.rb | 4 +-
spec/components/file_store/s3_store_spec.rb | 4 +-
spec/fabricators/upload_fabricator.rb | 4 +-
spec/fabricators/user_avatar_fabricator.rb | 3 +
spec/jobs/clean_up_uploads_spec.rb | 73 ++++++++++++++++++-
7 files changed, 111 insertions(+), 31 deletions(-)
create mode 100644 spec/fabricators/user_avatar_fabricator.rb
diff --git a/app/jobs/scheduled/clean_up_uploads.rb b/app/jobs/scheduled/clean_up_uploads.rb
index 523a017c466..0ec3c96b54c 100644
--- a/app/jobs/scheduled/clean_up_uploads.rb
+++ b/app/jobs/scheduled/clean_up_uploads.rb
@@ -1,25 +1,35 @@
module Jobs
-
class CleanUpUploads < Jobs::Scheduled
every 1.hour
def execute(args)
return unless SiteSetting.clean_up_uploads?
+ ignore_urls = []
+ ignore_urls |= UserProfile.uniq.select(:profile_background).where("profile_background IS NOT NULL AND profile_background != ''").pluck(:profile_background)
+ ignore_urls |= UserProfile.uniq.select(:card_background).where("card_background IS NOT NULL AND card_background != ''").pluck(:card_background)
+ ignore_urls |= Category.uniq.select(:logo_url).where("logo_url IS NOT NULL AND logo_url != ''").pluck(:logo_url)
+ ignore_urls |= Category.uniq.select(:background_url).where("background_url IS NOT NULL AND background_url != ''").pluck(:background_url)
+
+ ids = []
+ ids |= PostUpload.uniq.select(:upload_id).pluck(:upload_id)
+ ids |= User.uniq.select(:uploaded_avatar_id).where("uploaded_avatar_id IS NOT NULL").pluck(:uploaded_avatar_id)
+ ids |= UserAvatar.uniq.select(:gravatar_upload_id).where("gravatar_upload_id IS NOT NULL").pluck(:gravatar_upload_id)
+
grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max
- Upload.where("created_at < ?", grace_period.hour.ago)
- .where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours")
- .where("id NOT IN (SELECT upload_id FROM post_uploads WHERE upload_id IS NOT NULL)")
- .where("id NOT IN (SELECT uploaded_avatar_id FROM users WHERE uploaded_avatar_id IS NOT NULL)")
- .where("id NOT IN (SELECT gravatar_upload_id FROM user_avatars WHERE gravatar_upload_id IS NOT NULL)")
- .where("url NOT IN (SELECT profile_background FROM user_profiles WHERE LENGTH(COALESCE(profile_background, '')) > 0)")
- .where("url NOT IN (SELECT card_background FROM user_profiles WHERE LENGTH(COALESCE(card_background, '')) > 0)")
- .where("url NOT IN (SELECT logo_url FROM categories WHERE LENGTH(COALESCE(logo_url, '')) > 0)")
- .where("url NOT IN (SELECT background_url FROM categories WHERE LENGTH(COALESCE(background_url, '')) > 0)")
- .destroy_all
+ result = Upload.where("created_at < ?", grace_period.hour.ago)
+ .where("retain_hours IS NULL OR created_at < current_timestamp - interval '1 hour' * retain_hours")
+
+ if !ids.empty?
+ result = result.where("id NOT IN (?)", ids)
+ end
+
+ if !ignore_urls.empty?
+ result = result.where("url NOT IN (?)", ignore_urls)
+ end
+
+ result.find_each { |upload| upload.destroy }
end
-
end
-
end
diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb
index 02e36bc8a58..fef5750de72 100644
--- a/spec/components/cooked_post_processor_spec.rb
+++ b/spec/components/cooked_post_processor_spec.rb
@@ -120,9 +120,9 @@ describe CookedPostProcessor do
it "generates overlay information" do
cpp.post_process_images
- expect(cpp.html).to match_html '
'
+ expect(cpp.html).to match_html ""
expect(cpp).to be_dirty
end
@@ -153,9 +153,9 @@ describe CookedPostProcessor do
it "generates overlay information" do
cpp.post_process_images
- expect(cpp.html).to match_html ''
+ expect(cpp.html).to match_html ""
expect(cpp).to be_dirty
end
@@ -181,9 +181,9 @@ describe CookedPostProcessor do
it "generates overlay information" do
cpp.post_process_images
- expect(cpp.html).to match_html ''
+ expect(cpp.html).to match_html ""
expect(cpp).to be_dirty
end
diff --git a/spec/components/file_store/local_store_spec.rb b/spec/components/file_store/local_store_spec.rb
index 68724c57770..142c83d854a 100644
--- a/spec/components/file_store/local_store_spec.rb
+++ b/spec/components/file_store/local_store_spec.rb
@@ -14,7 +14,7 @@ describe FileStore::LocalStore do
it "returns a relative url" do
store.expects(:copy_file)
- expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/)
+ expect(store.store_upload(uploaded_file, upload)).to match(/\/uploads\/default\/original\/.+#{upload.sha1}\.png/)
end
end
@@ -23,7 +23,7 @@ describe FileStore::LocalStore do
it "returns a relative url" do
store.expects(:copy_file)
- expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/)
+ expect(store.store_optimized_image({}, optimized_image)).to match(/\/uploads\/default\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/)
end
end
diff --git a/spec/components/file_store/s3_store_spec.rb b/spec/components/file_store/s3_store_spec.rb
index 8b9a6309f56..8b504a72b52 100644
--- a/spec/components/file_store/s3_store_spec.rb
+++ b/spec/components/file_store/s3_store_spec.rb
@@ -23,7 +23,7 @@ describe FileStore::S3Store do
it "returns an absolute schemaless url" do
s3_helper.expects(:upload)
- expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98\.png/)
+ expect(store.store_upload(uploaded_file, upload)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/original\/.+#{upload.sha1}\.png/)
end
end
@@ -32,7 +32,7 @@ describe FileStore::S3Store do
it "returns an absolute schemaless url" do
s3_helper.expects(:upload)
- expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98_#{OptimizedImage::VERSION}_100x200\.png/)
+ expect(store.store_optimized_image(optimized_image_file, optimized_image)).to match(/\/\/s3_upload_bucket\.s3\.amazonaws\.com\/optimized\/.+#{optimized_image.upload.sha1}_#{OptimizedImage::VERSION}_100x200\.png/)
end
end
diff --git a/spec/fabricators/upload_fabricator.rb b/spec/fabricators/upload_fabricator.rb
index a64d7def4d7..96abf714055 100644
--- a/spec/fabricators/upload_fabricator.rb
+++ b/spec/fabricators/upload_fabricator.rb
@@ -1,11 +1,11 @@
Fabricator(:upload) do
user
- sha1 "e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98"
+ sha1 { sequence(:sha1) { |n| Digest::SHA1.hexdigest(n.to_s) } }
original_filename "logo.png"
filesize 1234
width 100
height 200
- url "/uploads/default/1/1234567890123456.png"
+ url { sequence(:url) { |n| "/uploads/default/#{n}/1234567890123456.png" } }
end
Fabricator(:attachment, from: :upload) do
diff --git a/spec/fabricators/user_avatar_fabricator.rb b/spec/fabricators/user_avatar_fabricator.rb
new file mode 100644
index 00000000000..3cbd17cddc5
--- /dev/null
+++ b/spec/fabricators/user_avatar_fabricator.rb
@@ -0,0 +1,3 @@
+Fabricator(:user_avatar) do
+ user
+end
diff --git a/spec/jobs/clean_up_uploads_spec.rb b/spec/jobs/clean_up_uploads_spec.rb
index 6035f17fc22..ca0fd7390d8 100644
--- a/spec/jobs/clean_up_uploads_spec.rb
+++ b/spec/jobs/clean_up_uploads_spec.rb
@@ -3,16 +3,14 @@ require 'rails_helper'
require_dependency 'jobs/scheduled/clean_up_uploads'
describe Jobs::CleanUpUploads do
-
before do
Upload.destroy_all
SiteSetting.clean_up_uploads = true
SiteSetting.clean_orphan_uploads_grace_period_hours = 1
+ @upload = Fabricate(:upload, created_at: 2.hours.ago)
end
it "deletes orphan uploads" do
- Fabricate(:upload, created_at: 2.hours.ago)
-
expect(Upload.count).to be(1)
Jobs::CleanUpUploads.new.execute(nil)
@@ -20,4 +18,73 @@ describe Jobs::CleanUpUploads do
expect(Upload.count).to be(0)
end
+ it "does not delete profile background uploads" do
+ profile_background_upload = Fabricate(:upload, created_at: 2.hours.ago)
+ UserProfile.last.update_attributes!(profile_background: profile_background_upload.url)
+
+ Jobs::CleanUpUploads.new.execute(nil)
+
+ expect(Upload.find_by(id: @upload.id)).to eq(nil)
+ expect(Upload.find_by(id: profile_background_upload.id)).to eq(profile_background_upload)
+ end
+
+ it "does not delete card background uploads" do
+ card_background_upload = Fabricate(:upload, created_at: 2.hours.ago)
+ UserProfile.last.update_attributes!(card_background: card_background_upload.url)
+
+ Jobs::CleanUpUploads.new.execute(nil)
+
+ expect(Upload.find_by(id: @upload.id)).to eq(nil)
+ expect(Upload.find_by(id: card_background_upload.id)).to eq(card_background_upload)
+ end
+
+ it "does not delete category logo uploads" do
+ category_logo_upload = Fabricate(:upload, created_at: 2.hours.ago)
+ category = Fabricate(:category, logo_url: category_logo_upload.url)
+
+ Jobs::CleanUpUploads.new.execute(nil)
+
+ expect(Upload.find_by(id: @upload.id)).to eq(nil)
+ expect(Upload.find_by(id: category_logo_upload.id)).to eq(category_logo_upload)
+ end
+
+ it "does not delete category background url uploads" do
+ category_background_url = Fabricate(:upload, created_at: 2.hours.ago)
+ category = Fabricate(:category, background_url: category_background_url.url)
+
+ Jobs::CleanUpUploads.new.execute(nil)
+
+ expect(Upload.find_by(id: @upload.id)).to eq(nil)
+ expect(Upload.find_by(id: category_background_url.id)).to eq(category_background_url)
+ end
+
+ it "does not delete post uploads" do
+ upload = Fabricate(:upload, created_at: 2.hours.ago)
+ post = Fabricate(:post, uploads: [upload])
+
+ Jobs::CleanUpUploads.new.execute(nil)
+
+ expect(Upload.find_by(id: @upload.id)).to eq(nil)
+ expect(Upload.find_by(id: upload.id)).to eq(upload)
+ end
+
+ it "does not delete user uploaded avatar" do
+ upload = Fabricate(:upload, created_at: 2.hours.ago)
+ user = Fabricate(:user, uploaded_avatar: upload)
+
+ Jobs::CleanUpUploads.new.execute(nil)
+
+ expect(Upload.find_by(id: @upload.id)).to eq(nil)
+ expect(Upload.find_by(id: upload.id)).to eq(upload)
+ end
+
+ it "does not delete user gravatar" do
+ upload = Fabricate(:upload, created_at: 2.hours.ago)
+ user = Fabricate(:user, user_avatar: Fabricate(:user_avatar, gravatar_upload: upload))
+
+ Jobs::CleanUpUploads.new.execute(nil)
+
+ expect(Upload.find_by(id: @upload.id)).to eq(nil)
+ expect(Upload.find_by(id: upload.id)).to eq(upload)
+ end
end