mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
List and restore missing post uploads from S3 inventory.
This commit is contained in:
parent
bfcbfd7864
commit
e8fafbc123
@ -890,12 +890,12 @@ class Post < ActiveRecord::Base
|
|||||||
|
|
||||||
def link_post_uploads(fragments: nil)
|
def link_post_uploads(fragments: nil)
|
||||||
upload_ids = []
|
upload_ids = []
|
||||||
fragments ||= Nokogiri::HTML::fragment(self.cooked)
|
|
||||||
|
|
||||||
fragments.css("a/@href", "img/@src").each do |media|
|
each_upload_url(fragments: fragments) do |src, _, sha1|
|
||||||
if upload = Upload.get_from_url(media.value)
|
upload = nil
|
||||||
upload_ids << upload.id
|
upload = Upload.find_by(sha1: sha1) if sha1.present?
|
||||||
end
|
upload ||= Upload.get_from_url(src)
|
||||||
|
upload_ids << upload.id if upload.present?
|
||||||
end
|
end
|
||||||
|
|
||||||
upload_ids |= Upload.where(id: downloaded_images.values).pluck(:id)
|
upload_ids |= Upload.where(id: downloaded_images.values).pluck(:id)
|
||||||
@ -916,6 +916,84 @@ class Post < ActiveRecord::Base
|
|||||||
{}
|
{}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def each_upload_url(fragments: nil, include_local_upload: true)
|
||||||
|
upload_patterns = [
|
||||||
|
/\/uploads\/#{RailsMultisite::ConnectionManagement.current_db}\//,
|
||||||
|
/\/original\//,
|
||||||
|
/\/optimized\//
|
||||||
|
]
|
||||||
|
fragments ||= Nokogiri::HTML::fragment(self.cooked)
|
||||||
|
links = fragments.css("a/@href", "img/@src").map { |media| media.value }.uniq
|
||||||
|
|
||||||
|
links.each do |src|
|
||||||
|
next if src.blank? || upload_patterns.none? { |pattern| src =~ pattern }
|
||||||
|
|
||||||
|
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
|
||||||
|
next unless Discourse.store.has_been_uploaded?(src) || (include_local_upload && src =~ /\A\/[^\/]/i)
|
||||||
|
|
||||||
|
path = begin
|
||||||
|
URI(URI.unescape(src))&.path
|
||||||
|
rescue URI::Error
|
||||||
|
end
|
||||||
|
|
||||||
|
next if path.blank?
|
||||||
|
|
||||||
|
sha1 =
|
||||||
|
if path.include? "optimized"
|
||||||
|
OptimizedImage.extract_sha1(path)
|
||||||
|
else
|
||||||
|
Upload.extract_sha1(path)
|
||||||
|
end
|
||||||
|
|
||||||
|
yield(src, path, sha1)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.find_missing_uploads(include_local_upload: true)
|
||||||
|
PostCustomField.where(name: Post::MISSING_UPLOADS).delete_all
|
||||||
|
missing_uploads = []
|
||||||
|
missing_post_uploads = {}
|
||||||
|
|
||||||
|
Post.have_uploads.select(:id, :cooked).find_in_batches do |posts|
|
||||||
|
ids = posts.pluck(:id)
|
||||||
|
sha1s = Upload.joins(:post_uploads).where("post_uploads.post_id >= ? AND post_uploads.post_id <= ?", ids.min, ids.max).pluck(:sha1)
|
||||||
|
|
||||||
|
posts.each do |post|
|
||||||
|
post.each_upload_url do |src, path, sha1|
|
||||||
|
next if sha1.present? && sha1s.include?(sha1)
|
||||||
|
|
||||||
|
missing_post_uploads[post.id] ||= []
|
||||||
|
|
||||||
|
if missing_uploads.include?(src)
|
||||||
|
missing_post_uploads[post.id] << src
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
upload_id = nil
|
||||||
|
upload_id = Upload.where(sha1: sha1).pluck(:id).first if sha1.present?
|
||||||
|
upload_id ||= yield(post, src, path, sha1)
|
||||||
|
|
||||||
|
if upload_id.present?
|
||||||
|
attributes = { post_id: post.id, upload_id: upload_id }
|
||||||
|
PostUpload.create!(attributes) unless PostUpload.exists?(attributes)
|
||||||
|
else
|
||||||
|
missing_uploads << src
|
||||||
|
missing_post_uploads[post.id] << src
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
missing_post_uploads = missing_post_uploads.reject { |_, uploads| uploads.empty? }
|
||||||
|
missing_post_uploads.reject do |post_id, uploads|
|
||||||
|
PostCustomField.create!(post_id: post_id, name: Post::MISSING_UPLOADS, value: uploads.to_json)
|
||||||
|
count += uploads.count
|
||||||
|
end
|
||||||
|
|
||||||
|
return { uploads: missing_uploads, post_uploads: missing_post_uploads, count: count }
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def parse_quote_into_arguments(quote)
|
def parse_quote_into_arguments(quote)
|
||||||
|
@ -36,8 +36,6 @@ class S3Inventory
|
|||||||
|
|
||||||
ActiveRecord::Base.transaction do
|
ActiveRecord::Base.transaction do
|
||||||
begin
|
begin
|
||||||
table_name = "#{type}_inventory"
|
|
||||||
connection = ActiveRecord::Base.connection.raw_connection
|
|
||||||
connection.exec("CREATE TEMP TABLE #{table_name}(key text UNIQUE, etag text, PRIMARY KEY(etag, key))")
|
connection.exec("CREATE TEMP TABLE #{table_name}(key text UNIQUE, etag text, PRIMARY KEY(etag, key))")
|
||||||
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
|
connection.copy_data("COPY #{table_name} FROM STDIN CSV") do
|
||||||
files.each do |file|
|
files.each do |file|
|
||||||
@ -54,6 +52,8 @@ class S3Inventory
|
|||||||
WHERE #{model.table_name}.etag IS NULL
|
WHERE #{model.table_name}.etag IS NULL
|
||||||
AND url ILIKE '%' || #{table_name}.key")
|
AND url ILIKE '%' || #{table_name}.key")
|
||||||
|
|
||||||
|
list_missing_post_uploads if type == "original"
|
||||||
|
|
||||||
uploads = (model == Upload) ? model.by_users.where("created_at < ?", inventory_date) : model
|
uploads = (model == Upload) ? model.by_users.where("created_at < ?", inventory_date) : model
|
||||||
missing_uploads = uploads.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag").where("#{table_name}.etag is NULL")
|
missing_uploads = uploads.joins("LEFT JOIN #{table_name} ON #{table_name}.etag = #{model.table_name}.etag").where("#{table_name}.etag is NULL")
|
||||||
|
|
||||||
@ -73,6 +73,35 @@ class S3Inventory
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def list_missing_post_uploads
|
||||||
|
log "Listing missing post uploads..."
|
||||||
|
|
||||||
|
missing = Post.find_missing_uploads(include_local_upload: false) do |_, _, _, sha1|
|
||||||
|
next if sha1.blank?
|
||||||
|
|
||||||
|
upload_id = nil
|
||||||
|
result = connection.exec("SELECT * FROM #{table_name} WHERE key LIKE '%original/%/#{sha1}%'")
|
||||||
|
|
||||||
|
if result.count >= 0
|
||||||
|
key = result[0]["key"]
|
||||||
|
data = s3_helper.object(key).data
|
||||||
|
upload_id = Upload.create!(
|
||||||
|
user_id: Discourse.system_user.id,
|
||||||
|
original_filename: "",
|
||||||
|
filesize: data.content_length,
|
||||||
|
url: File.join(Discourse.store.absolute_base_url, key),
|
||||||
|
sha1: sha1,
|
||||||
|
etag: result[0]["etag"]
|
||||||
|
).id
|
||||||
|
end
|
||||||
|
|
||||||
|
upload_id
|
||||||
|
end
|
||||||
|
|
||||||
|
Discourse.stats.set("missing_post_uploads", missing[:count])
|
||||||
|
log "#{missing[:count]} post uploads are missing."
|
||||||
|
end
|
||||||
|
|
||||||
def download_inventory_files_to_tmp_directory
|
def download_inventory_files_to_tmp_directory
|
||||||
files.each do |file|
|
files.each do |file|
|
||||||
log "Downloading inventory file '#{file[:key]}' to tmp directory..."
|
log "Downloading inventory file '#{file[:key]}' to tmp directory..."
|
||||||
@ -128,6 +157,14 @@ class S3Inventory
|
|||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
def connection
|
||||||
|
@connection ||= ActiveRecord::Base.connection.raw_connection
|
||||||
|
end
|
||||||
|
|
||||||
|
def table_name
|
||||||
|
"#{type}_inventory"
|
||||||
|
end
|
||||||
|
|
||||||
def files
|
def files
|
||||||
@files ||= begin
|
@files ||= begin
|
||||||
symlink_file = unsorted_files.sort_by { |file| -file.last_modified.to_i }.first
|
symlink_file = unsorted_files.sort_by { |file| -file.last_modified.to_i }.first
|
||||||
|
@ -390,56 +390,15 @@ task 'posts:reorder_posts', [:topic_id] => [:environment] do |_, args|
|
|||||||
puts "", "Done.", ""
|
puts "", "Done.", ""
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_missing_uploads
|
|
||||||
PostCustomField.where(name: Post::MISSING_UPLOADS)
|
|
||||||
end
|
|
||||||
|
|
||||||
desc 'Finds missing post upload records from cooked HTML content'
|
desc 'Finds missing post upload records from cooked HTML content'
|
||||||
task 'posts:missing_uploads' => :environment do
|
task 'posts:missing_uploads' => :environment do
|
||||||
get_missing_uploads.delete_all
|
|
||||||
|
|
||||||
upload_patterns = [
|
|
||||||
/\/uploads\/#{RailsMultisite::ConnectionManagement.current_db}\//,
|
|
||||||
/\/original\//,
|
|
||||||
/\/optimized\//
|
|
||||||
]
|
|
||||||
missing_uploads = []
|
|
||||||
old_scheme_upload_count = 0
|
old_scheme_upload_count = 0
|
||||||
count = 0
|
|
||||||
|
|
||||||
Post.have_uploads.select(:id, :cooked).find_in_batches do |posts|
|
missing = Post.find_missing_uploads do |post, src, path, sha1|
|
||||||
ids = posts.pluck(:id)
|
next if sha1.present?
|
||||||
sha1s = Upload.joins(:post_uploads).where("post_uploads.post_id >= ? AND post_uploads.post_id <= ?", ids.min, ids.max).pluck(:sha1)
|
|
||||||
|
|
||||||
posts.each do |post|
|
|
||||||
missing_post_uploads = []
|
|
||||||
links = Nokogiri::HTML::fragment(post.cooked).css("a/@href", "img/@src").map { |media| media.value }.uniq
|
|
||||||
|
|
||||||
links.each do |src|
|
|
||||||
next if src.blank? || upload_patterns.none? { |pattern| src =~ pattern }
|
|
||||||
|
|
||||||
src = "#{SiteSetting.force_https ? "https" : "http"}:#{src}" if src.start_with?("//")
|
|
||||||
next unless Discourse.store.has_been_uploaded?(src) || src =~ /\A\/[^\/]/i
|
|
||||||
|
|
||||||
path = begin
|
|
||||||
URI(URI.unescape(src))&.path
|
|
||||||
rescue URI::Error
|
|
||||||
end
|
|
||||||
|
|
||||||
next if path.blank?
|
|
||||||
|
|
||||||
sha1 =
|
|
||||||
if path.include? "optimized"
|
|
||||||
OptimizedImage.extract_sha1(path)
|
|
||||||
else
|
|
||||||
Upload.extract_sha1(path)
|
|
||||||
end
|
|
||||||
|
|
||||||
if sha1.blank? || sha1s.exclude?(sha1)
|
|
||||||
upload_id = nil
|
upload_id = nil
|
||||||
|
|
||||||
if missing_uploads.exclude?(src)
|
|
||||||
if sha1.blank?
|
|
||||||
# recovering old scheme upload.
|
# recovering old scheme upload.
|
||||||
local_store = FileStore::LocalStore.new
|
local_store = FileStore::LocalStore.new
|
||||||
public_path = "#{local_store.public_dir}#{path}"
|
public_path = "#{local_store.public_dir}#{path}"
|
||||||
@ -476,37 +435,15 @@ task 'posts:missing_uploads' => :environment do
|
|||||||
else
|
else
|
||||||
old_scheme_upload_count += 1
|
old_scheme_upload_count += 1
|
||||||
end
|
end
|
||||||
else
|
|
||||||
upload_id = Upload.where(sha1: sha1).pluck(:id).first
|
upload_id
|
||||||
end
|
end
|
||||||
|
|
||||||
if upload_id.present?
|
puts "", "#{missing[:count]} post uploads are missing.", ""
|
||||||
attributes = { post_id: post.id, upload_id: upload_id }
|
|
||||||
PostUpload.create!(attributes) unless PostUpload.exists?(attributes)
|
|
||||||
else
|
|
||||||
missing_uploads << src
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
missing_post_uploads << src if upload_id.blank?
|
if missing[:count] > 0
|
||||||
end
|
puts "#{missing[:uploads].count} uploads are missing."
|
||||||
end
|
puts "#{old_scheme_upload_count} of #{missing[:uploads].count} are old scheme uploads." if old_scheme_upload_count > 0
|
||||||
|
puts "#{missing[:post_uploads].count} of #{Post.count} posts are affected.", ""
|
||||||
if missing_post_uploads.present?
|
|
||||||
PostCustomField.create!(post_id: post.id, name: Post::MISSING_UPLOADS, value: missing_post_uploads.to_json)
|
|
||||||
count += missing_post_uploads.count
|
|
||||||
putc "x"
|
|
||||||
else
|
|
||||||
putc "."
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
puts "", "#{count} post uploads are missing.", ""
|
|
||||||
|
|
||||||
if count > 0
|
|
||||||
puts "#{missing_uploads.count} uploads are missing."
|
|
||||||
puts "#{old_scheme_upload_count} of #{missing_uploads.count} are old scheme uploads." if old_scheme_upload_count > 0
|
|
||||||
puts "#{get_missing_uploads.count} of #{Post.count} posts are affected.", ""
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -75,7 +75,7 @@ describe "S3Inventory" do
|
|||||||
inventory.backfill_etags_and_list_missing
|
inventory.backfill_etags_and_list_missing
|
||||||
end
|
end
|
||||||
|
|
||||||
expect(output).to eq("#{upload.url}\n1 of 4 uploads are missing\n")
|
expect(output).to eq("Listing missing post uploads...\n0 post uploads are missing.\n#{upload.url}\n1 of 4 uploads are missing\n")
|
||||||
expect(Discourse.stats.get("missing_s3_uploads")).to eq(1)
|
expect(Discourse.stats.get("missing_s3_uploads")).to eq(1)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1261,19 +1261,23 @@ describe Post do
|
|||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
let(:base_url) { "#{Discourse.base_url_no_prefix}#{Discourse.base_uri}" }
|
||||||
|
let(:video_url) { "#{base_url}#{video_upload.url}" }
|
||||||
|
let(:audio_url) { "#{base_url}#{audio_upload.url}" }
|
||||||
|
|
||||||
let(:raw) do
|
let(:raw) do
|
||||||
<<~RAW
|
<<~RAW
|
||||||
<a href="#{attachment_upload.url}">Link</a>
|
<a href="#{attachment_upload.url}">Link</a>
|
||||||
<img src="#{image_upload.url}">
|
<img src="#{image_upload.url}">
|
||||||
|
|
||||||
<video width="100%" height="100%" controls>
|
<video width="100%" height="100%" controls>
|
||||||
<source src="http://myforum.com#{video_upload.url}">
|
<source src="#{video_url}">
|
||||||
<a href="http://myforum.com#{video_upload.url}">http://myforum.com#{video_upload.url}</a>
|
<a href="#{video_url}">#{video_url}</a>
|
||||||
</video>
|
</video>
|
||||||
|
|
||||||
<audio controls>
|
<audio controls>
|
||||||
<source src="http://myforum.com#{audio_upload.url}">
|
<source src="#{audio_url}">
|
||||||
<a href="http://myforum.com#{audio_upload.url}">http://myforum.com#{audio_upload.url}</a>
|
<a href="#{audio_url}">#{audio_url}</a>
|
||||||
</audio>
|
</audio>
|
||||||
RAW
|
RAW
|
||||||
end
|
end
|
||||||
|
Loading…
Reference in New Issue
Block a user