diff --git a/Gemfile b/Gemfile index 1caacdadfea..d0878a8873e 100644 --- a/Gemfile +++ b/Gemfile @@ -151,7 +151,6 @@ group :development do gem 'bullet', require: !!ENV['BULLET'] gem 'better_errors' gem 'binding_of_caller' - gem 'diffy' # waiting on 2.7.5 per: https://github.com/ctran/annotate_models/pull/595 if rails_master? diff --git a/Gemfile.lock b/Gemfile.lock index 0fa35801f0d..db8907d12f5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -90,7 +90,6 @@ GEM crass (1.0.4) debug_inspector (0.0.3) diff-lcs (1.3) - diffy (3.3.0) discourse-ember-source (3.8.0.1) discourse_image_optim (0.26.2) exifr (~> 1.2, >= 1.2.2) @@ -436,7 +435,6 @@ DEPENDENCIES certified colored2 cppjieba_rb - diffy discourse-ember-source (~> 3.8.0) discourse_image_optim email_reply_trimmer (~> 0.1) diff --git a/app/assets/javascripts/pretty-text/engines/discourse-markdown/emoji.js.es6 b/app/assets/javascripts/pretty-text/engines/discourse-markdown/emoji.js.es6 index f1fc15e7aab..ba2c971541b 100644 --- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/emoji.js.es6 +++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/emoji.js.es6 @@ -236,7 +236,7 @@ function applyEmoji( export function setup(helper) { helper.registerOptions((opts, siteSettings, state) => { - opts.features.emoji = !!siteSettings.enable_emoji; + opts.features.emoji = !state.disableEmojis && !!siteSettings.enable_emoji; opts.features.emojiShortcuts = !!siteSettings.enable_emoji_shortcuts; opts.features.inlineEmoji = !!siteSettings.enable_inline_emoji_translation; opts.emojiSet = siteSettings.emoji_set || ""; diff --git a/app/assets/javascripts/pretty-text/pretty-text.js.es6 b/app/assets/javascripts/pretty-text/pretty-text.js.es6 index 33836cddd3f..e656c9a7d75 100644 --- a/app/assets/javascripts/pretty-text/pretty-text.js.es6 +++ b/app/assets/javascripts/pretty-text/pretty-text.js.es6 @@ -29,7 +29,8 @@ export function buildOptions(state) { lookupUploadUrls, previewing, linkify, - censoredWords + censoredWords, + disableEmojis } = state; let features = { @@ -76,7 +77,8 @@ export function buildOptions(state) { markdownIt: true, injectLineNumbersToPreview: siteSettings.enable_advanced_editor_preview_sync, - previewing + previewing, + disableEmojis }; // note, this will mutate options due to the way the API is designed diff --git a/app/services/inline_uploads.rb b/app/services/inline_uploads.rb index 4e3cece2c8d..a8ce2db5a7b 100644 --- a/app/services/inline_uploads.rb +++ b/app/services/inline_uploads.rb @@ -3,98 +3,147 @@ require_dependency "pretty_text" class InlineUploads + PLACEHOLDER = "__replace__" + private_constant :PLACEHOLDER + + UPLOAD_REGEXP_PATTERN = "/original/(\\dX/(?:[a-f0-9]/)*[a-f0-9]{40}[a-z0-9.]*)" + private_constant :UPLOAD_REGEXP_PATTERN + def self.process(markdown, on_missing: nil) markdown = markdown.dup - cooked_fragment = Nokogiri::HTML::fragment(PrettyText.cook(markdown)) + cooked_fragment = Nokogiri::HTML::fragment(PrettyText.cook(markdown, disable_emojis: true)) link_occurences = [] cooked_fragment.traverse do |node| if node.name == "img" # Do nothing - elsif !(node.children.count == 1 && (node.children[0].name != "img" && node.children[0].children.blank?)) + elsif !(node.children.count == 1 && (node.children[0].name != "img" && node.children[0].children.blank?)) || + !node.ancestors.all? { |parent| !parent.attributes["class"]&.value&.include?("quote") } next end if seen_link = matched_uploads(node).first - if actual_link = (node.attributes["href"]&.value || node.attributes["src"]&.value) - link_occurences << [actual_link, true] - else - link_occurences << [seen_link, false] + link_occurences << + if (actual_link = (node.attributes["href"]&.value || node.attributes["src"]&.value)) + { link: actual_link, is_valid: true } + else + { link: seen_link, is_valid: false } + end + end + end + + raw_matches = [] + + markdown.scan(/(\[img\]\s?(.+)\s?\[\/img\])/) do |match| + raw_matches << [match[0], match[1], +"![](#{PLACEHOLDER})", $~.offset(0)[0]] + end + + markdown.scan(/(!?\[([^\[\]]+)\]\(([a-zA-z0-9\.\/:-]+)\))/) do |match| + if matched_uploads(match[2]).present? + raw_matches << [ + match[0], + match[2], + +"#{match[0].start_with?("!") ? "!" : ""}[#{match[1]}](#{PLACEHOLDER})", + $~.offset(0)[0] + ] + end + end + + markdown.scan(/(<(?!img)[^<>]+\/?>)?(\n*)(([ ]*)]+)>([ ]*))(\n*)/) do |match| + node = Nokogiri::HTML::fragment(match[2].strip).children[0] + src = node.attributes["src"].value + + if matched_uploads(src).present? + text = node.attributes["alt"]&.value + width = node.attributes["width"]&.value + height = node.attributes["height"]&.value + text = "#{text}|#{width}x#{height}" if width && height + after_html_tag = match[0].present? + + spaces_before = + if after_html_tag && !match[0].end_with?("/>") + (match[3].present? ? match[3] : " ") + else + "" + end + + replacement = +"#{spaces_before}![#{text}](#{PLACEHOLDER})" + + if after_html_tag && (num_newlines = match[1].length) <= 1 + replacement.prepend("\n" * (num_newlines == 0 ? 2 : 1)) + end + + if after_html_tag && !match[0].end_with?("/>") && (num_newlines = match[6].length) <= 1 + replacement += ("\n" * (num_newlines == 0 ? 2 : 1)) + end + + match[2].strip! if !after_html_tag + + raw_matches << [ + match[2], + src, + replacement, + $~.offset(0)[0] + ] + end + end + + markdown.scan(/(()([^<\a>]*?)<\/a>)/) do |match| + node = Nokogiri::HTML::fragment(match[0]).children[0] + href = node.attributes["href"]&.value + + if href && matched_uploads(href).present? + has_attachment = node.attributes["class"]&.value + index = $~.offset(0)[0] + text = match[2].strip.gsub("\n", "").gsub(/ +/, " ") + text = "#{text}|attachment" if has_attachment + raw_matches << [match[0], href, +"[#{text}](#{PLACEHOLDER})", index] + end + end + + db = RailsMultisite::ConnectionManagement.current_db + + regexps = [ + /(^|\s)?(https?:\/\/[a-zA-Z0-9\.\/-]+\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})($|\s)/, + ] + + if Discourse.store.external? + regexps << /(^|\s)?(https?:#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})($|\s)?/ + regexps << /(^|\s)?(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})($|\s)?/ + end + + regexps.each do |regexp| + markdown.scan(regexp) do |match| + if matched_uploads(match[1]).present? + raw_matches << [match[1], match[1], +"![](#{PLACEHOLDER})", $~.offset(0)[0]] end end end - raw_fragment = Nokogiri::HTML::fragment(markdown) + raw_matches + .sort { |a, b| a[3] <=> b[3] } + .each do |match, link, replace_with, _index| - raw_fragment.traverse do |node| - if node.name == "img" - # Do nothing - elsif !(node.children.count == 0 || (node.children.count == 1 && node.children[0].children.blank?)) - next - end + node_info = link_occurences.shift + next unless node_info&.dig(:is_valid) - matches = matched_uploads(node) - next if matches.blank? - links = extract_links(node) + if link.include?(node_info[:link]) + begin + uri = URI(link) + rescue URI::Error + end - matches.zip(links).each do |_match, link| - seen_link, is_valid = link_occurences.shift - next unless (link && is_valid) + if !Discourse.store.external? + next if uri&.host && uri.host != Discourse.current_hostname + end - if link.include?(seen_link) - begin - uri = URI(link) - rescue URI::Error - end + upload = Upload.get_from_url(link) - if !Discourse.store.external? - next if uri&.host && uri.host != Discourse.current_hostname - end - - upload = Upload.get_from_url(link) - - if upload - new_node = - case node.name - when 'a' - attachment_postfix = - if node.attributes["class"]&.value&.split(" ")&.include?("attachment") - "|attachment" - else - "" - end - - text = node.children.text.strip.gsub("\n", "").gsub(/ +/, " ") - - markdown.sub!( - node.to_s, - "[#{text}#{attachment_postfix}](#{upload.short_url})" - ) - when "img" - text = node.attributes["alt"]&.value - width = node.attributes["width"]&.value - height = node.attributes["height"]&.value - text = "#{text}|#{width}x#{height}" if width && height - markdown.sub!(node.to_s, "![#{text}](#{upload.short_url})") - else - if markdown =~ /\[img\]\s?#{link}\s?\[\/img\]/ - capture = Regexp.last_match[0] - - if capture - markdown.sub!(capture, "![](#{upload.short_url})") - end - elsif markdown =~ /(!?\[([a-z0-9|]+)\]\([a-zA-z0-9\.\/]+\))/ - capture = Regexp.last_match[0] - - if capture - markdown.sub!(capture, "![#{Regexp.last_match[2]}](#{upload.short_url})") - end - end - end - - else - on_missing.call(link) if on_missing - end + if upload + replacement = replace_with.sub!(PLACEHOLDER, upload.short_url) + markdown.sub!(match, replacement) + else + on_missing.call(link) if on_missing end end end @@ -114,21 +163,21 @@ class InlineUploads if Discourse.store.external? if Rails.configuration.multisite - regexps << /(#{SiteSetting.Upload.s3_base_url}\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/ - regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/ + regexps << /(#{SiteSetting.Upload.s3_base_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/ + regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/ else - regexps << /(#{SiteSetting.Upload.s3_base_url}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/ - regexps << /(#{SiteSetting.Upload.s3_cdn_url}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/ - regexps << /(\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/ + regexps << /(#{SiteSetting.Upload.s3_base_url}#{UPLOAD_REGEXP_PATTERN})/ + regexps << /(#{SiteSetting.Upload.s3_cdn_url}#{UPLOAD_REGEXP_PATTERN})/ + regexps << /(\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/ end else - regexps << /(\/uploads\/#{db}\/original\/(\dX\/(?:[a-f0-9]\/)*[a-f0-9]{40}[a-z0-9\.]*))/ + regexps << /(\/uploads\/#{db}#{UPLOAD_REGEXP_PATTERN})/ end node = node.to_s regexps.each do |regexp| - node.scan(regexp).each do |matched| + node.scan(regexp) do |matched| matches << matched[0] end end @@ -136,16 +185,4 @@ class InlineUploads matches end private_class_method :matched_uploads - - def self.extract_links(node) - links = [] - links << node.attributes["href"]&.value - links << node.attributes["src"]&.value - links = links.concat(node.to_s.scan(/\[img\]\s?(.+)\s?\[\/img\]/)) - links = links.concat(node.to_s.scan(/!?\[[a-z0-9|]+\]\(([a-zA-z0-9\.\/]+)\)/)) - links.flatten! - links.compact! - links - end - private_class_method :extract_links end diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index 123bb916474..44f535bb8b4 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -149,6 +149,7 @@ module PrettyText buffer = +<<~JS __optInput = {}; __optInput.siteSettings = #{SiteSetting.client_settings_json}; + #{"__optInput.disableEmojis = true" if opts[:disable_emojis]} __paths = #{paths_json}; __optInput.getURL = __getURL; __optInput.getCurrentUser = __getCurrentUser; diff --git a/lib/tasks/posts.rake b/lib/tasks/posts.rake index 7da89fe4a96..439c39512d9 100644 --- a/lib/tasks/posts.rake +++ b/lib/tasks/posts.rake @@ -652,7 +652,7 @@ end desc "Coverts full upload URLs in `Post#raw` to short upload url" task 'posts:inline_uploads' => :environment do |_, args| - dry_run = ENV["DRY_RUN"] || true + dry_run = (ENV["DRY_RUN"].nil? ? true : ENV["DRY_RUN"] != "false") scope = Post.joins(:post_uploads) .distinct("posts.id") @@ -661,32 +661,54 @@ task 'posts:inline_uploads' => :environment do |_, args| affected_posts_count = scope.count fixed_count = 0 not_corrected_post_ids = [] + failed_to_correct_post_ids = [] scope.find_each do |post| - new_raw = InlineUploads.process(post.raw) + if post.raw !~ Upload::URL_REGEX + affected_posts_count -= 1 + next + end - if post.raw != new_raw - if dry_run - puts "Post id #{post.id} raw changed!" - Diffy::Diff.default_format = :color - puts Diffy::Diff.new(post.raw, new_raw, context: 1) + begin + new_raw = InlineUploads.process(post.raw) + + if post.raw != new_raw + if dry_run + putc "🏃‍" + else + post.revise!(Discourse.system_user, + { + raw: new_raw + }, + skip_validations: true, + force_new_version: true + ) + + putc "." + end + + fixed_count += 1 else - putc "." + not_corrected_post_ids << post.id end - - fixed_count += 1 - else - not_corrected_post_ids << post.id + rescue => e + putc "X" + failed_to_correct_post_ids << post.id end end + puts puts "#{fixed_count} out of #{affected_posts_count} affected posts corrected" - if fixed_count != affected_posts_count - puts "Ids of posts that were not correct: #{not_corrected_post_ids}" + if not_corrected_post_ids.present? + puts "Ids of posts that were not corrected: #{not_corrected_post_ids}" + end + + if failed_to_correct_post_ids.present? + puts "Ids of posts that encountered failures: #{failed_to_correct_post_ids}" end if dry_run - + puts "Task was ran in dry run mode. Set `DRY_RUN=false` to revise affected posts" end end diff --git a/spec/services/inline_uploads_spec.rb b/spec/services/inline_uploads_spec.rb index e6f02602940..a43c8acd96a 100644 --- a/spec/services/inline_uploads_spec.rb +++ b/spec/services/inline_uploads_spec.rb @@ -54,6 +54,21 @@ RSpec.describe InlineUploads do expect(InlineUploads.process(md)).to eq(md) end + it "should not correct links in quotes" do + post = Fabricate(:post) + user = Fabricate(:user) + + md = <<~MD + [quote="#{user.username}, post:#{post.post_number}, topic:#{post.topic.id}"] + some quote + + #{Discourse.base_url}#{upload3.url} + [/quote] + MD + + expect(InlineUploads.process(md)).to eq(md) + end + it "should correct bbcode img URLs to the short version" do md = <<~MD [img]#{upload.url}[/img] @@ -70,15 +85,29 @@ RSpec.describe InlineUploads do MD end + it "should correct raw image URLs to the short version" do + md = <<~MD + #{Discourse.base_url}#{upload3.url} #{Discourse.base_url}#{upload3.url} + MD + + expect(InlineUploads.process(md)).to eq(<<~MD) + ![](#{upload3.short_url}) ![](#{upload3.short_url}) + MD + end + it "should correct image URLs to the short version" do md = <<~MD ![image|690x290](#{upload.short_url}) - ![image](#{upload.url}) - ![image|100x100](#{upload.url}) + ![IMAge|690x190,60%](#{upload.short_url}) - some image - some imagesome image + ![image](#{upload2.url}) + ![image|100x100](#{upload3.url}) + + some image + some imagesome image + + #{Discourse.base_url}#{upload3.url} #{Discourse.base_url}#{upload3.url} MD @@ -86,25 +115,165 @@ RSpec.describe InlineUploads do expect(InlineUploads.process(md)).to eq(<<~MD) ![image|690x290](#{upload.short_url}) - ![image](#{upload.short_url}) - ![image|100x100](#{upload.short_url}) + ![IMAge|690x190,60%](#{upload.short_url}) + + ![image](#{upload2.short_url}) + ![image|100x100](#{upload3.short_url}) ![some image](#{upload.short_url}) - ![some image](#{upload.short_url})![some image](#{upload.short_url}) + ![some image](#{upload2.short_url})![some image](#{upload3.short_url}) + + ![](#{upload3.short_url}) ![](#{upload3.short_url}) ![|5x4](#{upload.short_url}) MD end - it "should correct attachment URLS with an upload before" do + it "should not be affected by an emoji" do + CustomEmoji.create!(name: 'test', upload: upload3) + Emoji.clear_cache + md = <<~MD - ![image](#{upload.short_url}) + :test: + + ![image|690x290](#{upload.url}) + MD + + expect(InlineUploads.process(md)).to eq(<<~MD) + :test: + + ![image|690x290](#{upload.short_url}) + MD + end + + it "should correctly update image sources within anchor or paragraph tags" do + md = <<~MD + + test + + +

+ test +

+ + test + + test + + + + + test + + + +

Test test

+ +
+ test + MD + + expect(InlineUploads.process(md)).to eq(<<~MD) + + + ![test|500x500](#{upload.short_url}) + + + +

+ + ![test](#{upload2.short_url}) + +

+ + + + ![test|500x500](#{upload3.short_url}) + + + + + + ![test|500x500](#{upload.short_url}) + + + + + + + ![test|500x500](#{upload.short_url}) + + + +

Test ![test|500x500](#{upload2.short_url})

+ +
+ + ![test|500x500](#{upload2.short_url}) + MD + end + + it "should not be affected by fake HTML tags" do + md = <<~MD + ``` + This is some test + + > some quote test2 MD expect(InlineUploads.process(md)).to eq(<<~MD) - ![image](#{upload.short_url}) + ``` + This is some some quote + + [test2|attachment](#{upload2.short_url}) + MD + end + + it "should not be affected by an external or invalid links" do + md = <<~MD + invalid + + [test]("https://this.is.some.external.link") + + test + + test2 + MD + + expect(InlineUploads.process(md)).to eq(<<~MD) + invalid + + [test]("https://this.is.some.external.link") + + test + + [test2|attachment](#{upload2.short_url}) + MD + end + + it "should correct attachment URLS to the short version when raw contains inline image" do + md = <<~MD + ![image](#{upload.short_url}) ![image](#{upload.short_url}) + + [some complicated.doc %50](#{upload3.url}) + + test2 + MD + + expect(InlineUploads.process(md)).to eq(<<~MD) + ![image](#{upload.short_url}) ![image](#{upload.short_url}) + + [some complicated.doc %50](#{upload3.short_url}) [test2|attachment](#{upload2.short_url}) MD @@ -120,23 +289,23 @@ RSpec.describe InlineUploads do - - test2 + - test2 - test2 - - test2 + - test2 - test3 - test3test3 + test3 + test3test3 MD expect(InlineUploads.process(md)).to eq(<<~MD) [this is some attachment|attachment](#{upload.short_url}) - - [test2|attachment](#{upload2.short_url}) + - [test2|attachment](#{upload.short_url}) - [test2|attachment](#{upload2.short_url}) - - [test2|attachment](#{upload2.short_url}) + - [test2|attachment](#{upload3.short_url}) - [test3|attachment](#{upload3.short_url}) - [test3|attachment](#{upload3.short_url})[test3|attachment](#{upload3.short_url}) + [test3|attachment](#{upload.short_url}) + [test3|attachment](#{upload2.short_url})[test3|attachment](#{upload3.short_url}) MD end @@ -200,6 +369,7 @@ RSpec.describe InlineUploads do describe "s3 uploads" do let(:upload) { Fabricate(:upload_s3) } + let(:upload2) { Fabricate(:upload_s3) } before do SiteSetting.enable_s3_uploads = true @@ -212,12 +382,12 @@ RSpec.describe InlineUploads do it "should correct image URLs to the short version" do md = <<~MD some image - some image + some image MD expect(InlineUploads.process(md)).to eq(<<~MD) ![some image](#{upload.short_url}) - ![some image](#{upload.short_url}) + ![some image](#{upload2.short_url}) MD end @@ -226,13 +396,19 @@ RSpec.describe InlineUploads do Rails.configuration.multisite = true md = <<~MD + https:#{upload2.url} https:#{upload2.url} + #{URI.join(SiteSetting.s3_cdn_url, URI.parse(upload2.url).path).to_s} + some image - some image + some image MD expect(InlineUploads.process(md)).to eq(<<~MD) + ![](#{upload2.short_url}) ![](#{upload2.short_url}) + ![](#{upload2.short_url}) + ![some image](#{upload.short_url}) - ![some image](#{upload.short_url}) + ![some image](#{upload2.short_url}) MD ensure Rails.configuration.multisite = false