DEV: Introduce hidden s3_inventory_bucket site setting (#27304)

This commit introduces a hidden `s3_inventory_bucket` site setting which
replaces the `enable_s3_inventory` and `s3_configure_inventory_policy`
site setting.

The reason `enable_s3_inventory` and `s3_configure_inventory_policy`
site settings are removed is because this feature has technically been
broken since it was introduced. When the `enable_s3_inventory` feature
is turned on, the app will because configure a daily inventory policy for the
`s3_upload_bucket` bucket and store the inventories under a prefix in
the bucket. The problem here is that once the inventories are created,
there is nothing cleaning up all these inventories so whoever that has
enabled this feature would have been paying the cost of storing a whole
bunch of inventory files which are never used. Given that we have not
received any complains about inventory files inflating S3 storage costs,
we think that it is very likely that this feature is no longer being
used and we are looking to drop support for this feature in the not too
distance future.

For now, we will still support a hidden `s3_inventory_bucket` site
setting which site administrators can configure via the
`DISCOURSE_S3_INVENTORY_BUCKET` env.
This commit is contained in:
Alan Guo Xiang Tan 2024-06-10 13:16:00 +08:00 committed by GitHub
parent 3bff633ce0
commit 8cf4ed5f88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 62 additions and 260 deletions

View File

@ -1,21 +0,0 @@
# frozen_string_literal: true
require "s3_inventory"
module Jobs
# if upload bucket changes or inventory bucket changes we want to update s3 bucket policy and inventory configuration
class UpdateS3Inventory < ::Jobs::Base
def execute(args)
unless SiteSetting.enable_s3_inventory? && SiteSetting.Upload.enable_s3_uploads &&
SiteSetting.s3_configure_inventory_policy
return
end
%i[upload optimized].each do |type|
s3_inventory = S3Inventory.new(Discourse.store.s3_helper, type)
s3_inventory.update_bucket_policy if type == :upload
s3_inventory.update_bucket_inventory_configuration
end
end
end
end

View File

@ -15,10 +15,6 @@ module Jobs
end
end
def s3_helper
Discourse.store.s3_helper
end
def prepare_for_all_sites
inventory = S3Inventory.new(s3_helper, :upload)
@db_inventories = inventory.prepare_for_all_sites
@ -26,8 +22,7 @@ module Jobs
end
def execute(args)
return if !SiteSetting.enable_s3_inventory
require "s3_inventory"
return if (s3_inventory_bucket = SiteSetting.s3_inventory_bucket).blank?
if !@db_inventories && Rails.configuration.multisite && GlobalSetting.use_s3?
prepare_for_all_sites
@ -37,13 +32,13 @@ module Jobs
preloaded_inventory_file =
@db_inventories[RailsMultisite::ConnectionManagement.current_db]
S3Inventory.new(
s3_helper,
:upload,
s3_inventory_bucket:,
preloaded_inventory_file: preloaded_inventory_file,
preloaded_inventory_date: @inventory_date,
).backfill_etags_and_list_missing
else
S3Inventory.new(s3_helper, :upload).backfill_etags_and_list_missing
S3Inventory.new(:upload, s3_inventory_bucket:).backfill_etags_and_list_missing
end
end
end

View File

@ -53,8 +53,6 @@ DiscourseEvent.on(:site_setting_changed) do |name, old_value, new_value|
Scheduler::Defer.later("Null topic slug") { Topic.update_all(slug: nil) }
end
Jobs.enqueue(:update_s3_inventory) if %i[enable_s3_inventory s3_upload_bucket].include?(name)
SvgSprite.expire_cache if name.to_s.include?("_icon")
SiteIconManager.ensure_optimized! if SiteIconManager::WATCHED_SETTINGS.include?(name)

View File

@ -1973,7 +1973,6 @@ en:
s3_endpoint: "The endpoint can be modified to backup to an S3 compatible service like DigitalOcean Spaces or Minio. WARNING: Leave blank if using AWS S3."
s3_configure_tombstone_policy: "Enable automatic deletion policy for tombstone uploads. IMPORTANT: If disabled, no space will be reclaimed after uploads are deleted."
s3_disable_cleanup: "Prevent removal of old backups from S3 when there are more backups than the maximum allowed."
enable_s3_inventory: "Generate reports and verify uploads using Amazon S3 inventory. IMPORTANT: requires valid S3 credentials (both access key id & secret access key)."
s3_use_acls: "AWS recommends not using ACLs on S3 buckets; if you are following this advice, uncheck this option. This must be enabled if you are using secure uploads."
backup_time_of_day: "Time of day UTC when the backup should occur."
backup_with_uploads: "Include uploads in scheduled backups. Disabling this will only backup the database."

View File

@ -1581,11 +1581,6 @@ files:
default: true
s3_use_acls:
default: true
enable_s3_inventory:
default: false
s3_configure_inventory_policy:
default: true
hidden: true
s3_presigned_get_url_expires_after_seconds:
default: 300
hidden: true
@ -2487,6 +2482,10 @@ backups:
s3_backup_bucket:
default: ""
regex: '^[a-z0-9\-\/]+$' # can't use '.' when using HTTPS
s3_inventory_bucket:
hidden: true
default: ""
regex: '^[a-z0-9\-\/_]+$' # can't use '.' when using HTTPS
s3_disable_cleanup:
default: false
backup_time_of_day:

View File

@ -297,10 +297,12 @@ module FileStore
end
def list_missing_uploads(skip_optimized: false)
if SiteSetting.enable_s3_inventory
require "s3_inventory"
S3Inventory.new(s3_helper, :upload).backfill_etags_and_list_missing
S3Inventory.new(s3_helper, :optimized).backfill_etags_and_list_missing unless skip_optimized
if s3_inventory_bucket = SiteSetting.s3_inventory_bucket
S3Inventory.new(:upload, s3_inventory_bucket:).backfill_etags_and_list_missing
unless skip_optimized
S3Inventory.new(:optimized, s3_inventory_bucket:).backfill_etags_and_list_missing
end
else
list_missing(Upload.by_users, "original/")
list_missing(OptimizedImage, "optimized/") unless skip_optimized

View File

@ -282,7 +282,12 @@ class S3Helper
end
def s3_client
@s3_client ||= Aws::S3::Client.new(@s3_options)
@s3_client ||= init_aws_s3_client
end
def stub_client_responses!
raise "This method is only allowed to be used in the testing environment" if !Rails.env.test?
@s3_client = init_aws_s3_client(stub_responses: true)
end
def s3_inventory_path(path = "inventory")
@ -381,6 +386,12 @@ class S3Helper
private
def init_aws_s3_client(stub_responses: false)
options = @s3_options
options = options.merge(stub_responses: true) if stub_responses
Aws::S3::Client.new(options)
end
def fetch_bucket_cors_rules
begin
s3_resource.client.get_bucket_cors(bucket: @s3_bucket_name).cors_rules&.map(&:to_h) || []

View File

@ -4,17 +4,21 @@ require "aws-sdk-s3"
require "csv"
class S3Inventory
attr_reader :type, :inventory_date
attr_reader :type, :inventory_date, :s3_helper
CSV_KEY_INDEX = 1
CSV_ETAG_INDEX = 2
INVENTORY_PREFIX = "inventory"
INVENTORY_VERSION = "1"
INVENTORY_LAG = 2.days
WAIT_AFTER_RESTORE_DAYS = 2
def initialize(s3_helper, type, preloaded_inventory_file: nil, preloaded_inventory_date: nil)
@s3_helper = s3_helper
def initialize(
type,
s3_inventory_bucket:,
preloaded_inventory_file: nil,
preloaded_inventory_date: nil
)
@s3_helper = S3Helper.new(s3_inventory_bucket)
if preloaded_inventory_file && preloaded_inventory_date
# Data preloaded, so we don't need to fetch it again
@ -189,43 +193,6 @@ class S3Inventory
)
end
def update_bucket_policy
@s3_helper.s3_client.put_bucket_policy(
bucket: bucket_name,
policy: {
Version: "2012-10-17",
Statement: [
{
Sid: "InventoryAndAnalyticsPolicy",
Effect: "Allow",
Principal: {
Service: "s3.amazonaws.com",
},
Action: ["s3:PutObject"],
Resource: ["#{inventory_path_arn}/*"],
Condition: {
ArnLike: {
"aws:SourceArn": bucket_arn,
},
StringEquals: {
"s3:x-amz-acl": "bucket-owner-full-control",
},
},
},
],
}.to_json,
)
end
def update_bucket_inventory_configuration
@s3_helper.s3_client.put_bucket_inventory_configuration(
bucket: bucket_name,
id: inventory_id,
inventory_configuration: inventory_configuration,
use_accelerate_endpoint: false,
)
end
def prepare_for_all_sites
db_names = RailsMultisite::ConnectionManagement.all_dbs
db_files = {}
@ -248,6 +215,10 @@ class S3Inventory
cleanup!
end
def s3_client
@s3_helper.s3_client
end
private
def cleanup!
@ -318,31 +289,6 @@ class S3Inventory
end
end
def inventory_configuration
filter_prefix = type
filter_prefix = bucket_folder_path if bucket_folder_path.present?
{
destination: {
s3_bucket_destination: {
bucket: bucket_arn,
prefix: inventory_path,
format: "CSV",
},
},
filter: {
prefix: filter_prefix,
},
is_enabled: SiteSetting.enable_s3_inventory,
id: inventory_id,
included_object_versions: "Current",
optional_fields: ["ETag"],
schedule: {
frequency: "Daily",
},
}
end
def bucket_name
@s3_helper.s3_bucket_name
end
@ -353,8 +299,7 @@ class S3Inventory
def unsorted_files
objects = []
hive_path = File.join(inventory_path, bucket_name, inventory_id, "hive")
hive_path = File.join(bucket_folder_path, "hive")
@s3_helper.list(hive_path).each { |obj| objects << obj if obj.key.match?(/symlink\.txt\z/i) }
objects
@ -363,28 +308,6 @@ class S3Inventory
[]
end
def inventory_id
@inventory_id ||=
begin
id = Rails.configuration.multisite ? "original" : type # TODO: rename multisite path to "uploads"
bucket_folder_path.present? ? "#{bucket_folder_path}-#{id}" : id
end
end
def inventory_path_arn
File.join(bucket_arn, inventory_path)
end
def inventory_path
path = File.join(INVENTORY_PREFIX, INVENTORY_VERSION)
path = File.join(bucket_folder_path, path) if bucket_folder_path.present?
path
end
def bucket_arn
"arn:aws:s3:::#{bucket_name}"
end
def log(message, ex = nil)
puts(message)
Rails.logger.error("#{ex}\n" + (ex.backtrace || []).join("\n")) if ex

View File

@ -187,12 +187,6 @@ module SiteSettings::Validations
end
end
def validate_enable_s3_inventory(new_val)
if new_val == "t" && !SiteSetting.Upload.enable_s3_uploads
validate_error :enable_s3_uploads_is_required
end
end
def validate_backup_location(new_val)
return unless new_val == BackupLocationSiteSetting::S3
if SiteSetting.s3_backup_bucket.blank?

View File

@ -3,11 +3,8 @@
RSpec.describe Jobs::EnsureS3UploadsExistence do
subject(:job) { described_class.new }
context "with S3 inventory enabled" do
before do
setup_s3
SiteSetting.enable_s3_inventory = true
end
context "when `s3_inventory_bucket` has been set" do
before { SiteSetting.s3_inventory_bucket = "some-bucket-name" }
it "works" do
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).once
@ -15,8 +12,8 @@ RSpec.describe Jobs::EnsureS3UploadsExistence do
end
end
context "with S3 inventory disabled" do
before { SiteSetting.enable_s3_inventory = false }
context "when `s3_inventory_bucket` has not been set" do
before { SiteSetting.s3_inventory_bucket = nil }
it "doesn't execute" do
S3Inventory.any_instance.expects(:backfill_etags_and_list_missing).never

View File

@ -1,63 +0,0 @@
# frozen_string_literal: true
require "file_store/s3_store"
RSpec.describe Jobs::UpdateS3Inventory do
before do
setup_s3
SiteSetting.s3_upload_bucket = "special-bucket"
SiteSetting.enable_s3_inventory = true
store = FileStore::S3Store.new
@client = Aws::S3::Client.new(stub_responses: true)
store.s3_helper.stubs(:s3_client).returns(@client)
Discourse.stubs(:store).returns(store)
end
it "updates the bucket policy and inventory configuration in S3" do
id = "original"
path = File.join(S3Inventory::INVENTORY_PREFIX, S3Inventory::INVENTORY_VERSION)
@client.expects(:put_bucket_policy).with(
bucket: "special-bucket",
policy:
%Q|{"Version":"2012-10-17","Statement":[{"Sid":"InventoryAndAnalyticsPolicy","Effect":"Allow","Principal":{"Service":"s3.amazonaws.com"},"Action":["s3:PutObject"],"Resource":["arn:aws:s3:::special-bucket/#{path}/*"],"Condition":{"ArnLike":{"aws:SourceArn":"arn:aws:s3:::special-bucket"},"StringEquals":{"s3:x-amz-acl":"bucket-owner-full-control"}}}]}|,
)
@client.expects(:put_bucket_inventory_configuration)
@client.expects(:put_bucket_inventory_configuration).with(
bucket: "special-bucket",
id: id,
inventory_configuration: {
destination: {
s3_bucket_destination: {
bucket: "arn:aws:s3:::special-bucket",
prefix: path,
format: "CSV",
},
},
filter: {
prefix: id,
},
is_enabled: true,
id: id,
included_object_versions: "Current",
optional_fields: ["ETag"],
schedule: {
frequency: "Daily",
},
},
use_accelerate_endpoint: false,
)
described_class.new.execute(nil)
end
it "doesn't update the policy with s3_configure_inventory_policy disabled" do
SiteSetting.s3_configure_inventory_policy = false
@client.expects(:put_bucket_policy).never
@client.expects(:put_bucket_inventory_configuration).never
described_class.new.execute(nil)
end
end

View File

@ -5,15 +5,16 @@ require "s3_inventory"
require "file_store/s3_store"
RSpec.describe "S3Inventory", type: :multisite do
let(:client) { Aws::S3::Client.new(stub_responses: true) }
let(:helper) { S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client) }
let(:inventory) { S3Inventory.new(helper, :upload) }
let(:inventory) do
S3Inventory.new(:upload, s3_inventory_bucket: "some-inventory-bucket/some/prefix")
end
let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" }
it "can create per-site files" do
freeze_time
setup_s3
SiteSetting.enable_s3_inventory = true
inventory.stubs(:files).returns([{ key: "Key", filename: "#{csv_filename}.gz" }])
inventory.stubs(:cleanup!)
@ -23,6 +24,7 @@ RSpec.describe "S3Inventory", type: :multisite do
expect(db1.lines.count).to eq(3)
expect(db2.lines.count).to eq(1)
files.values.each do |f|
f.close
f.unlink

View File

@ -1,25 +1,19 @@
# frozen_string_literal: true
require "s3_helper"
require "s3_inventory"
require "file_store/s3_store"
RSpec.describe S3Inventory do
let(:client) { Aws::S3::Client.new(stub_responses: true) }
let(:resource) { Aws::S3::Resource.new(client: client) }
let(:bucket) { resource.bucket(SiteSetting.Upload.s3_upload_bucket.downcase) }
let(:helper) { S3Helper.new(bucket.name, "", client: client, bucket: bucket) }
let(:inventory) { S3Inventory.new(helper, :upload) }
let(:inventory) do
S3Inventory.new(:upload, s3_inventory_bucket: "some-inventory-bucket/inventoried-bucket/prefix")
end
let(:csv_filename) { "#{Rails.root}/spec/fixtures/csv/s3_inventory.csv" }
before do
setup_s3
SiteSetting.enable_s3_inventory = true
inventory.s3_helper.stub_client_responses!
inventory.stubs(:cleanup!)
end
it "should raise error if an inventory file is not found" do
client.stub_responses(:list_objects, contents: [])
inventory.s3_client.stub_responses(:list_objects, contents: [])
output = capture_stdout { inventory.backfill_etags_and_list_missing }
expect(output).to eq("Failed to list inventory from S3\n")
end
@ -141,7 +135,7 @@ RSpec.describe S3Inventory do
end
it "should run if inventory files are at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do
client.stub_responses(
inventory.s3_client.stub_responses(
:list_objects_v2,
{
contents: [
@ -155,16 +149,13 @@ RSpec.describe S3Inventory do
},
)
client.expects(:get_object).once
inventory.s3_client.expects(:get_object).once
capture_stdout do
inventory = described_class.new(helper, :upload)
inventory.backfill_etags_and_list_missing
end
capture_stdout { inventory.backfill_etags_and_list_missing }
end
it "should not run if inventory files are not at least #{described_class::WAIT_AFTER_RESTORE_DAYS.days} days older than the last restore date" do
client.stub_responses(
inventory.s3_client.stub_responses(
:list_objects_v2,
{
contents: [
@ -177,12 +168,9 @@ RSpec.describe S3Inventory do
},
)
client.expects(:get_object).never
inventory.s3_client.expects(:get_object).never
capture_stdout do
inventory = described_class.new(helper, :upload)
inventory.backfill_etags_and_list_missing
end
capture_stdout { inventory.backfill_etags_and_list_missing }
end
end
@ -203,11 +191,12 @@ RSpec.describe S3Inventory do
File.open(csv_filename) do |f|
preloaded_inventory =
S3Inventory.new(
helper,
:upload,
s3_inventory_bucket: "some-inventory-bucket",
preloaded_inventory_file: f,
preloaded_inventory_date: Time.now,
)
preloaded_inventory.backfill_etags_and_list_missing
end
end
@ -215,27 +204,4 @@ RSpec.describe S3Inventory do
expect(output).to eq("#{upload.url}\n#{no_etag.url}\n2 of 5 uploads are missing\n")
expect(Discourse.stats.get("missing_s3_uploads")).to eq(2)
end
describe "s3 inventory configuration" do
let(:bucket_name) { "s3-upload-bucket" }
let(:subfolder_path) { "subfolder" }
before { SiteSetting.s3_upload_bucket = "#{bucket_name}/#{subfolder_path}" }
it "is formatted correctly for subfolders" do
s3_helper = S3Helper.new(SiteSetting.Upload.s3_upload_bucket.downcase, "", client: client)
config = S3Inventory.new(s3_helper, :upload).send(:inventory_configuration)
expect(config[:destination][:s3_bucket_destination][:bucket]).to eq(
"arn:aws:s3:::#{bucket_name}",
)
expect(config[:destination][:s3_bucket_destination][:prefix]).to eq(
"#{subfolder_path}/inventory/1",
)
expect(config[:id]).to eq("#{subfolder_path}-original")
expect(config[:schedule][:frequency]).to eq("Daily")
expect(config[:included_object_versions]).to eq("Current")
expect(config[:optional_fields]).to eq(["ETag"])
expect(config[:filter][:prefix]).to eq(subfolder_path)
end
end
end