From 186e415e383dfb8007bed9dc285bc2339e27dccc Mon Sep 17 00:00:00 2001 From: Martin Brennan Date: Mon, 20 Nov 2023 09:50:09 +1000 Subject: [PATCH] DEV: Housekeeping for CleanUpUploads job (#24361) Followup to 9db8f00b3dd6f2881adf1b786e29426889225e7a, we don't need this dead code any more. Also made some minor improvements and comments. --- app/jobs/scheduled/clean_up_uploads.rb | 64 +++++------------------ app/models/reviewable_queued_post.rb | 2 +- spec/fabricators/reviewable_fabricator.rb | 1 + spec/jobs/clean_up_uploads_spec.rb | 3 +- 4 files changed, 17 insertions(+), 53 deletions(-) diff --git a/app/jobs/scheduled/clean_up_uploads.rb b/app/jobs/scheduled/clean_up_uploads.rb index 4f8c89c11eb..71320a2db10 100644 --- a/app/jobs/scheduled/clean_up_uploads.rb +++ b/app/jobs/scheduled/clean_up_uploads.rb @@ -7,7 +7,7 @@ module Jobs def execute(args) grace_period = [SiteSetting.clean_orphan_uploads_grace_period_hours, 1].max - # always remove invalid upload records + # Always remove invalid upload records regardless of clean_up_uploads setting. Upload .by_users .where( @@ -19,21 +19,13 @@ module Jobs return unless SiteSetting.clean_up_uploads? - if c = last_cleanup - return if (Time.zone.now.to_i - c) < (grace_period / 2).hours + # Do nothing if the last cleanup was run too recently. + last_cleanup_timestamp = last_cleanup + if last_cleanup_timestamp.present? && + (Time.zone.now.to_i - last_cleanup_timestamp) < (grace_period / 2).hours + return end - base_url = - ( - if Discourse.store.internal? - Discourse.store.relative_base_url - else - Discourse.store.absolute_base_url - end - ) - s3_hostname = URI.parse(base_url).hostname - s3_cdn_hostname = URI.parse(SiteSetting.Upload.s3_cdn_url || "").hostname - result = Upload.by_users Upload.unused_callbacks&.each { |handler| result = handler.call(result) } result = @@ -42,46 +34,16 @@ module Jobs "uploads.retain_hours IS NULL OR uploads.created_at < current_timestamp - interval '1 hour' * uploads.retain_hours", ) .where("uploads.created_at < ?", grace_period.hour.ago) + # Don't remove any secure uploads. .where("uploads.access_control_post_id IS NULL") .joins("LEFT JOIN upload_references ON upload_references.upload_id = uploads.id") + # Don't remove any uploads linked to an UploadReference. .where("upload_references.upload_id IS NULL") .with_no_non_post_relations result.find_each do |upload| next if Upload.in_use_callbacks&.any? { |callback| callback.call(upload) } - - if upload.sha1.present? - # TODO: Remove this check after UploadReferences records were created - encoded_sha = Base62.encode(upload.sha1.hex) - if ReviewableQueuedPost - .pending - .where( - "payload->>'raw' LIKE ? OR payload->>'raw' LIKE ?", - "%#{upload.sha1}%", - "%#{encoded_sha}%", - ) - .exists? - next - end - if Draft.where( - "data LIKE ? OR data LIKE ?", - "%#{upload.sha1}%", - "%#{encoded_sha}%", - ).exists? - next - end - if UserProfile.where( - "bio_raw LIKE ? OR bio_raw LIKE ?", - "%#{upload.sha1}%", - "%#{encoded_sha}%", - ).exists? - next - end - - upload.destroy - else - upload.delete - end + upload.sha1.present? ? upload.destroy : upload.delete end ExternalUploadStub.cleanup! @@ -89,13 +51,13 @@ module Jobs self.last_cleanup = Time.zone.now.to_i end - def last_cleanup=(v) - Discourse.redis.setex(last_cleanup_key, 7.days.to_i, v.to_s) + def last_cleanup=(timestamp) + Discourse.redis.setex(last_cleanup_key, 7.days.to_i, timestamp.to_s) end def last_cleanup - v = Discourse.redis.get(last_cleanup_key) - v ? v.to_i : v + timestamp = Discourse.redis.get(last_cleanup_key) + timestamp ? timestamp.to_i : timestamp end def reset_last_cleanup! diff --git a/app/models/reviewable_queued_post.rb b/app/models/reviewable_queued_post.rb index bccce824bc2..7132282fe5a 100644 --- a/app/models/reviewable_queued_post.rb +++ b/app/models/reviewable_queued_post.rb @@ -7,7 +7,7 @@ class ReviewableQueuedPost < Reviewable end after_save do - if saved_change_to_payload? && self.status == Reviewable.statuses[:pending] && + if saved_change_to_payload? && self.status.to_sym == :pending && self.payload&.[]("raw").present? upload_ids = Upload.extract_upload_ids(self.payload["raw"]) UploadReference.ensure_exist!(upload_ids: upload_ids, target: self) diff --git a/spec/fabricators/reviewable_fabricator.rb b/spec/fabricators/reviewable_fabricator.rb index e9d4e288e2f..25d7c7eb5db 100644 --- a/spec/fabricators/reviewable_fabricator.rb +++ b/spec/fabricators/reviewable_fabricator.rb @@ -12,6 +12,7 @@ Fabricator(:reviewable) do category score 1.23 payload { { list: [1, 2, 3], name: "bandersnatch" } } + status { :pending } end Fabricator(:reviewable_queued_post_topic, class_name: :reviewable_queued_post) do diff --git a/spec/jobs/clean_up_uploads_spec.rb b/spec/jobs/clean_up_uploads_spec.rb index 837ae807fdd..654309b4841 100644 --- a/spec/jobs/clean_up_uploads_spec.rb +++ b/spec/jobs/clean_up_uploads_spec.rb @@ -289,6 +289,7 @@ RSpec.describe Jobs::CleanUpUploads do payload: { raw: "#{upload.short_url}\n#{upload2.short_url}", }, + status: :pending, ) Fabricate( @@ -296,7 +297,7 @@ RSpec.describe Jobs::CleanUpUploads do payload: { raw: "#{upload3.short_url}", }, - status: Reviewable.statuses[:rejected], + status: :rejected, ) Jobs::CleanUpUploads.new.execute(nil)