mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
REFACTOR: Restoring of backups and migration of uploads to S3
This commit is contained in:
@@ -224,59 +224,19 @@ def migrate_to_s3_all_sites
|
||||
end
|
||||
end
|
||||
|
||||
def migration_successful?(db, should_raise = false)
|
||||
success = true
|
||||
|
||||
failure_message = "S3 migration failed for db '#{db}'."
|
||||
prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/"
|
||||
|
||||
base_url = File.join(SiteSetting.Upload.s3_base_url, prefix)
|
||||
count = Upload.by_users.where("url NOT LIKE '#{base_url}%'").count
|
||||
|
||||
error_message = "#{count} of #{Upload.count} uploads are not migrated to S3. #{failure_message}"
|
||||
|
||||
raise error_message if count > 0 && should_raise
|
||||
success &&= count == 0
|
||||
|
||||
puts error_message if count > 0
|
||||
|
||||
cdn_path = SiteSetting.cdn_path("/uploads/#{db}/original").sub(/https?:/, "")
|
||||
count = Post.where("cooked LIKE '%#{cdn_path}%'").count
|
||||
error_message = "#{count} posts are not remapped to new S3 upload URL. #{failure_message}"
|
||||
|
||||
raise error_message if count > 0 && should_raise
|
||||
success &&= count == 0
|
||||
|
||||
puts error_message if count > 0
|
||||
|
||||
Rake::Task['posts:missing_uploads'].invoke('single_site')
|
||||
count = PostCustomField.where(name: Post::MISSING_UPLOADS).count
|
||||
error_message = "rake posts:missing_uploads identified #{count} issues. #{failure_message}"
|
||||
raise error_message if count > 0 && should_raise
|
||||
|
||||
success &&= count == 0
|
||||
|
||||
puts error_message if count > 0
|
||||
|
||||
count = Post.where('baked_version <> ? OR baked_version IS NULL', Post::BAKED_VERSION).count
|
||||
if count > 0
|
||||
puts "#{count} posts still require rebaking and will be rebaked during regular job"
|
||||
if count > 100
|
||||
puts "To speed up migrations of posts we recommend you run 'rake posts:rebake_uncooked_posts'"
|
||||
end
|
||||
success = false
|
||||
else
|
||||
puts "No posts require rebaking"
|
||||
end
|
||||
|
||||
success
|
||||
def migrate_to_s3
|
||||
FileStore::ToS3Migration.new(
|
||||
s3_options: FileStore::ToS3Migration.s3_options_from_env,
|
||||
dry_run: !!ENV["DRY_RUN"],
|
||||
migrate_to_multisite: !!ENV["MIGRATE_TO_MULTISITE"],
|
||||
skip_etag_verify: !!ENV["SKIP_ETAG_VERIFY"]
|
||||
).migrate
|
||||
end
|
||||
|
||||
task "uploads:s3_migration_status" => :environment do
|
||||
success = true
|
||||
RailsMultisite::ConnectionManagement.each_connection do
|
||||
db = RailsMultisite::ConnectionManagement.current_db
|
||||
success &&= migration_successful?(db)
|
||||
success &&= FileStore::ToS3Migration.new.migration_successful?
|
||||
end
|
||||
|
||||
queued_jobs = Sidekiq::Stats.new.queues.sum { |_ , x| x }
|
||||
@@ -293,266 +253,6 @@ task "uploads:s3_migration_status" => :environment do
|
||||
puts "All sites appear to have uploads in order!"
|
||||
end
|
||||
|
||||
def migrate_to_s3
|
||||
|
||||
# we don't want have migrated state, ensure we run all jobs here
|
||||
Jobs.run_immediately!
|
||||
|
||||
db = RailsMultisite::ConnectionManagement.current_db
|
||||
|
||||
dry_run = !!ENV["DRY_RUN"]
|
||||
|
||||
puts "Checking if #{db} already migrated..."
|
||||
return puts "Already migrated #{db}!" if migration_successful?(db)
|
||||
|
||||
puts "*" * 30 + " DRY RUN " + "*" * 30 if dry_run
|
||||
puts "Migrating uploads to S3 for '#{db}'..."
|
||||
|
||||
if Upload.by_users.where("url NOT LIKE '//%' AND url NOT LIKE '#{GlobalSetting.relative_url_root}/uploads/#{db}/original/_X/%'").exists?
|
||||
puts <<~TEXT
|
||||
Some uploads were not migrated to the new scheme. Please run these commands in the rails console
|
||||
|
||||
SiteSetting.migrate_to_new_scheme = true
|
||||
Jobs::MigrateUploadScheme.new.execute(nil)
|
||||
TEXT
|
||||
exit 1
|
||||
end
|
||||
|
||||
unless ENV["DISCOURSE_S3_BUCKET"].present? &&
|
||||
ENV["DISCOURSE_S3_REGION"].present? &&
|
||||
(
|
||||
(
|
||||
ENV["DISCOURSE_S3_ACCESS_KEY_ID"].present? &&
|
||||
ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"].present?
|
||||
) ||
|
||||
ENV["DISCOURSE_S3_USE_IAM_PROFILE"].present?
|
||||
)
|
||||
|
||||
puts <<~TEXT
|
||||
Please provide the following environment variables
|
||||
- DISCOURSE_S3_BUCKET
|
||||
- DISCOURSE_S3_REGION
|
||||
and either
|
||||
- DISCOURSE_S3_ACCESS_KEY_ID
|
||||
- DISCOURSE_S3_SECRET_ACCESS_KEY
|
||||
or
|
||||
- DISCOURSE_S3_USE_IAM_PROFILE
|
||||
TEXT
|
||||
exit 2
|
||||
end
|
||||
|
||||
if SiteSetting.Upload.s3_cdn_url.blank?
|
||||
puts "Please provide the 'DISCOURSE_S3_CDN_URL' environment variable"
|
||||
exit 3
|
||||
end
|
||||
|
||||
bucket_has_folder_path = true if ENV["DISCOURSE_S3_BUCKET"].include? "/"
|
||||
public_directory = Rails.root.join("public").to_s
|
||||
|
||||
opts = {
|
||||
region: ENV["DISCOURSE_S3_REGION"],
|
||||
access_key_id: ENV["DISCOURSE_S3_ACCESS_KEY_ID"],
|
||||
secret_access_key: ENV["DISCOURSE_S3_SECRET_ACCESS_KEY"]
|
||||
}
|
||||
|
||||
# S3::Client ignores the `region` option when an `endpoint` is provided.
|
||||
# Without `region`, non-default region bucket creation will break for S3, so we can only
|
||||
# define endpoint when not using S3 i.e. when SiteSetting.s3_endpoint is provided.
|
||||
opts[:endpoint] = SiteSetting.s3_endpoint if SiteSetting.s3_endpoint.present?
|
||||
s3 = Aws::S3::Client.new(opts)
|
||||
|
||||
if bucket_has_folder_path
|
||||
bucket, folder = S3Helper.get_bucket_and_folder_path(ENV["DISCOURSE_S3_BUCKET"])
|
||||
folder = File.join(folder, "/")
|
||||
else
|
||||
bucket, folder = ENV["DISCOURSE_S3_BUCKET"], ""
|
||||
end
|
||||
|
||||
puts "Uploading files to S3..."
|
||||
print " - Listing local files"
|
||||
|
||||
local_files = []
|
||||
IO.popen("cd #{public_directory} && find uploads/#{db}/original -type f").each do |file|
|
||||
local_files << file.chomp
|
||||
putc "." if local_files.size % 1000 == 0
|
||||
end
|
||||
|
||||
puts " => #{local_files.size} files"
|
||||
print " - Listing S3 files"
|
||||
|
||||
s3_objects = []
|
||||
prefix = ENV["MIGRATE_TO_MULTISITE"] ? "uploads/#{db}/original/" : "original/"
|
||||
|
||||
options = { bucket: bucket, prefix: folder + prefix }
|
||||
|
||||
loop do
|
||||
response = s3.list_objects_v2(options)
|
||||
s3_objects.concat(response.contents)
|
||||
putc "."
|
||||
break if response.next_continuation_token.blank?
|
||||
options[:continuation_token] = response.next_continuation_token
|
||||
end
|
||||
|
||||
puts " => #{s3_objects.size} files"
|
||||
puts " - Syncing files to S3"
|
||||
|
||||
synced = 0
|
||||
failed = []
|
||||
|
||||
skip_etag_verify = ENV["SKIP_ETAG_VERIFY"].present?
|
||||
local_files.each do |file|
|
||||
path = File.join(public_directory, file)
|
||||
name = File.basename(path)
|
||||
etag = Digest::MD5.file(path).hexdigest unless skip_etag_verify
|
||||
key = file[file.index(prefix)..-1]
|
||||
key.prepend(folder) if bucket_has_folder_path
|
||||
original_path = file.sub("uploads/#{db}", "")
|
||||
|
||||
if s3_object = s3_objects.find { |obj| obj.key.ends_with?(original_path) }
|
||||
next if File.size(path) == s3_object.size && (skip_etag_verify || s3_object.etag[etag])
|
||||
end
|
||||
|
||||
options = {
|
||||
acl: "public-read",
|
||||
body: File.open(path, "rb"),
|
||||
bucket: bucket,
|
||||
content_type: MiniMime.lookup_by_filename(name)&.content_type,
|
||||
key: key,
|
||||
}
|
||||
|
||||
if !FileHelper.is_supported_image?(name)
|
||||
upload = Upload.find_by(url: "/#{file}")
|
||||
|
||||
if upload&.original_filename
|
||||
options[:content_disposition] =
|
||||
%Q{attachment; filename="#{upload.original_filename}"}
|
||||
end
|
||||
|
||||
if upload&.secure
|
||||
options[:acl] = "private"
|
||||
end
|
||||
end
|
||||
|
||||
etag ||= Digest::MD5.file(path).hexdigest
|
||||
|
||||
if dry_run
|
||||
puts "#{file} => #{options[:key]}"
|
||||
synced += 1
|
||||
elsif s3.put_object(options).etag[etag]
|
||||
putc "."
|
||||
synced += 1
|
||||
else
|
||||
putc "X"
|
||||
failed << path
|
||||
end
|
||||
end
|
||||
|
||||
puts
|
||||
|
||||
failure_message = "S3 migration failed for db '#{db}'."
|
||||
|
||||
if failed.size > 0
|
||||
puts "Failed to upload #{failed.size} files"
|
||||
puts failed.join("\n")
|
||||
raise failure_message
|
||||
elsif s3_objects.size + synced >= local_files.size
|
||||
puts "Updating the URLs in the database..."
|
||||
|
||||
from = "/uploads/#{db}/original/"
|
||||
to = "#{SiteSetting.Upload.s3_base_url}/#{prefix}"
|
||||
|
||||
if dry_run
|
||||
puts "REPLACING '#{from}' WITH '#{to}'"
|
||||
else
|
||||
DbHelper.remap(from, to, anchor_left: true)
|
||||
end
|
||||
|
||||
[
|
||||
[
|
||||
"src=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
|
||||
"src=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
|
||||
],
|
||||
[
|
||||
"src='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
|
||||
"src='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
|
||||
],
|
||||
[
|
||||
"href=\"/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
|
||||
"href=\"#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
|
||||
],
|
||||
[
|
||||
"href='/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)",
|
||||
"href='#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1"
|
||||
],
|
||||
[
|
||||
"\\[img\\]/uploads/#{db}/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9\\.]*)\\[/img\\]",
|
||||
"[img]#{SiteSetting.Upload.s3_base_url}/#{prefix}\\1[/img]"
|
||||
]
|
||||
].each do |from_url, to_url|
|
||||
|
||||
if dry_run
|
||||
puts "REPLACING '#{from_url}' WITH '#{to_url}'"
|
||||
else
|
||||
DbHelper.regexp_replace(from_url, to_url)
|
||||
end
|
||||
end
|
||||
|
||||
unless dry_run
|
||||
# Legacy inline image format
|
||||
Post.where("raw LIKE '%%'").each do |post|
|
||||
regexp = /!\[\](\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/
|
||||
|
||||
post.raw.scan(regexp).each do |upload_url, _|
|
||||
upload = Upload.get_from_url(upload_url)
|
||||
post.raw = post.raw.gsub("", "")
|
||||
end
|
||||
|
||||
post.save!(validate: false)
|
||||
end
|
||||
end
|
||||
|
||||
if Discourse.asset_host.present?
|
||||
# Uploads that were on local CDN will now be on S3 CDN
|
||||
from = "#{Discourse.asset_host}/uploads/#{db}/original/"
|
||||
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
|
||||
|
||||
if dry_run
|
||||
puts "REMAPPING '#{from}' TO '#{to}'"
|
||||
else
|
||||
DbHelper.remap(from, to)
|
||||
end
|
||||
end
|
||||
|
||||
# Uploads that were on base hostname will now be on S3 CDN
|
||||
from = "#{Discourse.base_url}/uploads/#{db}/original/"
|
||||
to = "#{SiteSetting.Upload.s3_cdn_url}/#{prefix}"
|
||||
|
||||
if dry_run
|
||||
puts "REMAPPING '#{from}' TO '#{to}'"
|
||||
else
|
||||
DbHelper.remap(from, to)
|
||||
end
|
||||
|
||||
unless dry_run
|
||||
puts "Removing old optimized images..."
|
||||
|
||||
OptimizedImage
|
||||
.joins("LEFT JOIN uploads u ON optimized_images.upload_id = u.id")
|
||||
.where("u.id IS NOT NULL AND u.url LIKE '//%' AND optimized_images.url NOT LIKE '//%'")
|
||||
.delete_all
|
||||
|
||||
puts "Flagging all posts containing lightboxes for rebake..."
|
||||
|
||||
count = Post.where("cooked LIKE '%class=\"lightbox\"%'").update_all(baked_version: nil)
|
||||
puts "#{count} posts were flagged for a rebake"
|
||||
end
|
||||
end
|
||||
|
||||
migration_successful?(db, true)
|
||||
|
||||
puts "Done!"
|
||||
end
|
||||
|
||||
################################################################################
|
||||
# clean_up #
|
||||
################################################################################
|
||||
|
||||
Reference in New Issue
Block a user