mirror of
				https://github.com/discourse/discourse.git
				synced 2025-02-25 18:55:32 -06:00 
			
		
		
		
	
		
			
				
	
	
		
			192 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			192 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| # frozen_string_literal: true
 | |
| 
 | |
| class UploadRecovery
 | |
|   def initialize(dry_run: false, stop_on_error: false)
 | |
|     @dry_run = dry_run
 | |
|     @stop_on_error = stop_on_error
 | |
|   end
 | |
| 
 | |
|   def recover(posts = Post)
 | |
|     posts.have_uploads.find_each { |post| recover_post post }
 | |
|   end
 | |
| 
 | |
|   def recover_post(post)
 | |
|     begin
 | |
|       analyzer = PostAnalyzer.new(post.raw, post.topic_id)
 | |
| 
 | |
|       analyzer.cooked_stripped.css("img", "a").each do |media|
 | |
|         if media.name == "img" && orig_src = media["data-orig-src"]
 | |
|           if dom_class = media["class"]
 | |
|             if (Post.allowed_image_classes & dom_class.split).count > 0
 | |
|               next
 | |
|             end
 | |
|           end
 | |
| 
 | |
|           if @dry_run
 | |
|             puts "#{post.full_url} #{orig_src}"
 | |
|           else
 | |
|             recover_post_upload(post, Upload.sha1_from_short_url(orig_src))
 | |
|           end
 | |
|         elsif url = (media["href"] || media["src"])
 | |
|           data = Upload.extract_url(url)
 | |
|           next unless data
 | |
| 
 | |
|           upload = Upload.get_from_url(url)
 | |
| 
 | |
|           if !upload || upload.verification_status == Upload.verification_statuses[:invalid_etag]
 | |
|             if @dry_run
 | |
|               puts "#{post.full_url} #{url}"
 | |
|             else
 | |
|               sha1 = data[2]
 | |
|               recover_post_upload(post, sha1)
 | |
|             end
 | |
|           end
 | |
|         end
 | |
|       end
 | |
|     rescue => e
 | |
|       raise e if @stop_on_error
 | |
|       puts "#{post.full_url} #{e.class}: #{e.message}"
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   private
 | |
| 
 | |
|   def recover_post_upload(post, sha1)
 | |
|     return unless valid_sha1?(sha1)
 | |
| 
 | |
|     attributes = {
 | |
|       post: post,
 | |
|       sha1: sha1
 | |
|     }
 | |
| 
 | |
|     if Discourse.store.external?
 | |
|       recover_post_upload_from_s3(**attributes)
 | |
|     else
 | |
|       recover_post_upload_from_local(**attributes)
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   def ensure_upload!(post:, sha1:, upload:)
 | |
|     return if !upload.persisted?
 | |
| 
 | |
|     if upload.sha1 != sha1
 | |
|       STDERR.puts "Warning #{post.url} had an incorrect #{sha1} should be #{upload.sha1} storing in custom field 'rake uploads:fix_relative_upload_links' can fix this"
 | |
| 
 | |
|       sha_map = post.custom_fields["UPLOAD_SHA1_MAP"] || "{}"
 | |
|       sha_map = JSON.parse(sha_map)
 | |
|       sha_map[sha1] = upload.sha1
 | |
| 
 | |
|       post.custom_fields["UPLOAD_SHA1_MAP"] = sha_map.to_json
 | |
|       post.save_custom_fields
 | |
|     end
 | |
| 
 | |
|     post.rebake!
 | |
|   end
 | |
| 
 | |
|   def recover_post_upload_from_local(post:, sha1:)
 | |
|     recover_from_local(sha1: sha1, user_id: post.user_id) do |upload|
 | |
|       ensure_upload!(post: post, sha1: sha1, upload: upload)
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   def recover_post_upload_from_s3(post:, sha1:)
 | |
|     recover_from_s3(sha1: sha1, user_id: post.user_id) do |upload|
 | |
|       ensure_upload!(post: post, sha1: sha1, upload: upload)
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   def recover_from_local(sha1:, user_id:)
 | |
|     @paths ||= begin
 | |
|       Dir.glob(File.join(
 | |
|         Discourse.store.tombstone_dir,
 | |
|         'original',
 | |
|         '**',
 | |
|         '*.*'
 | |
|       )).concat(Dir.glob(File.join(
 | |
|         Discourse.store.upload_path,
 | |
|         'original',
 | |
|         '**',
 | |
|         '*.*'
 | |
|       )))
 | |
|     end
 | |
| 
 | |
|     @paths.each do |path|
 | |
|       if path =~ /#{sha1}/
 | |
|         begin
 | |
|           tmp = Tempfile.new
 | |
|           tmp.write(File.read(path))
 | |
|           tmp.rewind
 | |
| 
 | |
|           upload = create_upload(tmp, File.basename(path), user_id)
 | |
|           yield upload if block_given?
 | |
|         ensure
 | |
|           tmp&.close
 | |
|         end
 | |
|       end
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   def recover_from_s3(sha1:, user_id:)
 | |
|     @object_keys ||= begin
 | |
|       s3_helper = Discourse.store.s3_helper
 | |
| 
 | |
|       if Rails.configuration.multisite
 | |
|         current_db = RailsMultisite::ConnectionManagement.current_db
 | |
|         s3_helper.list("uploads/#{current_db}/original").map(&:key).concat(
 | |
|           s3_helper.list("uploads/#{FileStore::S3Store::TOMBSTONE_PREFIX}#{current_db}/original").map(&:key)
 | |
|         )
 | |
|       else
 | |
|         s3_helper.list("original").map(&:key).concat(
 | |
|           s3_helper.list("#{FileStore::S3Store::TOMBSTONE_PREFIX}original").map(&:key)
 | |
|         )
 | |
|       end
 | |
|     end
 | |
| 
 | |
|     upload_exists = Upload.exists?(sha1: sha1)
 | |
| 
 | |
|     @object_keys.each do |key|
 | |
|       if key =~ /#{sha1}/
 | |
|         tombstone_prefix = FileStore::S3Store::TOMBSTONE_PREFIX
 | |
| 
 | |
|         if key.include?(tombstone_prefix)
 | |
|           old_key = key
 | |
|           key = key.sub(tombstone_prefix, "")
 | |
| 
 | |
|           Discourse.store.s3_helper.copy(
 | |
|             old_key,
 | |
|             key,
 | |
|             options: { acl: "public-read" }
 | |
|           )
 | |
|         end
 | |
| 
 | |
|         next if upload_exists
 | |
| 
 | |
|         url = "https:#{SiteSetting.Upload.absolute_base_url}/#{key}"
 | |
| 
 | |
|         begin
 | |
|           tmp = FileHelper.download(
 | |
|             url,
 | |
|             max_file_size: SiteSetting.max_image_size_kb.kilobytes,
 | |
|             tmp_file_name: "recover_from_s3"
 | |
|           )
 | |
| 
 | |
|           if tmp
 | |
|             upload = create_upload(tmp, File.basename(key), user_id)
 | |
|             yield upload if block_given?
 | |
|           end
 | |
|         ensure
 | |
|           tmp&.close
 | |
|         end
 | |
|       end
 | |
|     end
 | |
|   end
 | |
| 
 | |
|   def create_upload(file, filename, user_id)
 | |
|     UploadCreator.new(file, filename).create_for(user_id)
 | |
|   end
 | |
| 
 | |
|   def valid_sha1?(sha1)
 | |
|     sha1.present? && sha1.length == Upload::SHA1_LENGTH
 | |
|   end
 | |
| end
 |