From 76f7e90e2353edf2619846c4a21e6f794de26189 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Fri, 24 May 2019 12:38:38 +1000 Subject: [PATCH] FEATURE: rake posts:recover_uploads_from_index improvements We now recover more smartly based on failed db remaps as well. --- lib/tasks/posts.rake | 45 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/lib/tasks/posts.rake b/lib/tasks/posts.rake index c8c9e09cc8c..982909c4ac4 100644 --- a/lib/tasks/posts.rake +++ b/lib/tasks/posts.rake @@ -527,8 +527,49 @@ task 'posts:destroy_old_data_exports' => :environment do end def recover_uploads_from_index(path) + lookup = [] + + db = RailsMultisite::ConnectionManagement.current_db + cdn_path = SiteSetting.cdn_path("/uploads/#{db}").sub(/https?:/, "") + Post.where("cooked LIKE '%#{cdn_path}%'").each do |post| + regex = Regexp.new("((https?)?#{Regexp.escape(cdn_path)}[^,;\t\n\s)\"\']+)") + uploads = [] + post.raw.scan(regex).each do |match| + uploads << match[0] + end + + if uploads.length > 0 + lookup << [post.id, uploads] + end + end + PostCustomField.where(name: Post::MISSING_UPLOADS).pluck(:post_id, :value).each do |post_id, uploads| uploads = JSON.parse(uploads) + raw = Post.where(id: post_id).pluck(:raw).first + uploads.map! do |upload| + orig = upload + if raw.scan(upload).length == 0 + upload = upload.sub(SiteSetting.Upload.s3_cdn_url, SiteSetting.Upload.s3_base_url) + end + if raw.scan(upload).length == 0 + upload = upload.sub(SiteSetting.Upload.s3_base_url, Discourse.base_url) + end + if raw.scan(upload).length == 0 + upload = upload.sub(Discourse.base_url + "/", "/") + end + if raw.scan(upload).length == 0 + puts "can not find #{orig} in\n\n#{raw}" + upload = nil + end + upload + end + uploads.compact! + if uploads.length > 0 + lookup << [post_id, uploads] + end + end + + lookup.each do |post_id, uploads| post = Post.find(post_id) changed = false @@ -538,8 +579,8 @@ def recover_uploads_from_index(path) next end - puts "Searching for #{url} in index" - name = File.basename(url) + name = File.basename(url).split("_")[0] + puts "Searching for #{url} (#{name}) in index" if name.length < 40 puts "Skipping #{url} in #{post.full_url} cause it appears to have a short file name" next