From 5238f6788cd42f3ad13c34cb1a04c5073184766b Mon Sep 17 00:00:00 2001 From: David Taylor Date: Tue, 7 Jun 2022 15:23:04 +0100 Subject: [PATCH] FEATURE: Allow hotlinked media to be blocked (#16940) This commit introduces a new site setting: `block_hotlinked_media`. When enabled, all attempts to hotlink media (images, videos, and audio) will fail, and be replaced with a linked placeholder. Exceptions to the rule can be added via `block_hotlinked_media_exceptions`. `download_remote_image_to_local` can be used alongside this feature. In that case, hotlinked images will be blocked immediately when the post is created, but will then be replaced with the downloaded version a few seconds later. This implementation is purely server-side, and does not impact the composer preview. Technically, there are two stages to this feature: 1. `PrettyText.sanitize_hotlinked_media` is called during `PrettyText.cook`, and whenever new images are introduced by Onebox. It will iterate over all src/srcset attributes in the post HTML and check if they're allowed. If not, the attributes will be removed and replaced with a `data-blocked-hotlinked-src(set)` attribute 2. In the `CookedPostProcessor`, we iterate over all `data-blocked-hotlinked-src(set)` attributes and check whether we have a downloaded version of the media. If yes, we update the src to use the downloaded version. If not, the entire media element is replaced with a placeholder. The placeholder is labelled 'external media', and is a link to the offsite media. --- .../stylesheets/common/base/topic-post.scss | 17 +- app/jobs/regular/pull_hotlinked_images.rb | 4 +- app/models/post_analyzer.rb | 6 +- config/locales/server.en.yml | 9 +- config/site_settings.yml | 6 + lib/cooked_post_processor.rb | 31 +++- lib/cooked_processor_mixin.rb | 27 +++ lib/pretty_text.rb | 44 +++++ .../blocked_hotlinked_media_spec.rb | 156 ++++++++++++++++++ 9 files changed, 291 insertions(+), 9 deletions(-) create mode 100644 spec/integration/blocked_hotlinked_media_spec.rb diff --git a/app/assets/stylesheets/common/base/topic-post.scss b/app/assets/stylesheets/common/base/topic-post.scss index fa0828fcfa9..9d2c4c48146 100644 --- a/app/assets/stylesheets/common/base/topic-post.scss +++ b/app/assets/stylesheets/common/base/topic-post.scss @@ -1309,14 +1309,25 @@ a.mention-group { } } -.broken-image { +.broken-image, +.blocked-hotlinked-placeholder { + &:not(a) { + color: var(--primary-low-mid-or-secondary-high); + } + display: inline-flex; - color: var(--primary-low-mid-or-secondary-high); + flex-direction: column; border: 1px solid var(--primary-low); - font-size: $font-up-5; + align-items: center; + justify-content: center; .d-icon { margin: 16px; + font-size: var(--font-up-5); + } + + .notice { + margin: 0 0.5em 0.5em 0.5em; } } diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb index 6b5d6d2ddbd..f29874e05af 100644 --- a/app/jobs/regular/pull_hotlinked_images.rb +++ b/app/jobs/regular/pull_hotlinked_images.rb @@ -23,7 +23,7 @@ module Jobs changed_hotlink_records = false extract_images_from(post.cooked).each do |node| - download_src = original_src = node['src'] || node['href'] + download_src = original_src = node['src'] || node[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR] || node['href'] download_src = "#{SiteSetting.force_https ? "https" : "http"}:#{original_src}" if original_src.start_with?("//") normalized_src = normalize_src(download_src) @@ -130,7 +130,7 @@ module Jobs def extract_images_from(html) doc = Nokogiri::HTML5::fragment(html) - doc.css("img[src], a.lightbox[href]") - + doc.css("img[src], [#{PrettyText::BLOCKED_HOTLINKED_SRC_ATTR}], a.lightbox[href]") - doc.css("img.avatar") - doc.css(".lightbox img[src]") end diff --git a/app/models/post_analyzer.rb b/app/models/post_analyzer.rb index 9d5c81451c3..c049f80c394 100644 --- a/app/models/post_analyzer.rb +++ b/app/models/post_analyzer.rb @@ -46,7 +46,11 @@ class PostAnalyzer onebox end - cooked = result.to_html if result.changed? + if result.changed? + PrettyText.sanitize_hotlinked_media(result.doc) + cooked = result.to_html + end + cooked end diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 24ea1a07c38..703bce9c813 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -727,6 +727,11 @@ en: post: image_placeholder: broken: "This image is broken" + blocked_hotlinked_title: "Image hosted on another site. Click to open in a new tab." + blocked_hotlinked: "External Image" + media_placeholder: + blocked_hotlinked_title: "Media hosted on another site. Click to open in a new tab." + blocked_hotlinked: "External Media" hidden_bidi_character: "Bidirectional characters can change the order that text is rendered. This could be used to obscure malicious code." has_likes: one: "%{count} Like" @@ -1514,9 +1519,11 @@ en: contact_email: "Email address of key contact responsible for this site. Used for critical notifications, and also displayed on /about for urgent matters." contact_url: "Contact URL for this site. Displayed on the /about page for urgent matters." crawl_images: "Retrieve images from remote URLs to insert the correct width and height dimensions." - download_remote_images_to_local: "Convert remote images to local images by downloading them; this prevents broken images." + download_remote_images_to_local: "Convert remote (hotlinked) images to local images by downloading them; This preserves content even if the images are removed from the remote site in future." download_remote_images_threshold: "Minimum disk space necessary to download remote images locally (in percent)" disabled_image_download_domains: "Remote images will never be downloaded from these domains. Pipe-delimited list." + block_hotlinked_media: "Prevent users from introducing remote (hotlinked) media in their posts. Remote media which is not downloaded via 'download_remote_images_to_local' will be replaced with a placeholder link." + block_hotlinked_media_exceptions: "A list of base URLs which are exempt from the block_hotlinked_media setting. Include the protocol (e.g. https://example.com)." editing_grace_period: "For (n) seconds after posting, editing will not create a new version in the post history." editing_grace_period_max_diff: "Maximum number of character changes allowed in editing grace period, if more changed store another post revision (trust level 0 and 1)" editing_grace_period_max_diff_high_trust: "Maximum number of character changes allowed in editing grace period, if more changed store another post revision (trust level 2 and up)" diff --git a/config/site_settings.yml b/config/site_settings.yml index eca17bcab96..32285973e94 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -1326,6 +1326,12 @@ files: type: list list_type: simple default: "" + block_hotlinked_media: + default: false + block_hotlinked_media_exceptions: + default: "" + type: list + regex: '\A((https?:\/\/.+)(\|https?:\/\/.+[|$])*)?\z' create_thumbnails: true clean_up_uploads: true clean_orphan_uploads_grace_period_hours: 48 diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb index 189c048dcf3..340559a8392 100644 --- a/lib/cooked_post_processor.rb +++ b/lib/cooked_post_processor.rb @@ -42,6 +42,7 @@ class CookedPostProcessor remove_full_quote_on_direct_reply if new_post post_process_oneboxes post_process_images + add_blocked_hotlinked_media_placeholders post_process_quotes optimize_urls remove_user_ids @@ -120,7 +121,7 @@ class CookedPostProcessor def extract_images # all images with a src attribute - @doc.css("img[src]") - + @doc.css("img[src], img[#{PrettyText::BLOCKED_HOTLINKED_SRC_ATTR}]") - # minus data images @doc.css("img[src^='data']") - # minus emojis @@ -371,7 +372,7 @@ class CookedPostProcessor def process_hotlinked_image(img) @hotlinked_map ||= @post.post_hotlinked_media.preload(:upload).map { |r| [r.url, r] }.to_h - normalized_src = PostHotlinkedMedia.normalize_src(img["src"]) + normalized_src = PostHotlinkedMedia.normalize_src(img["src"] || img[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR]) info = @hotlinked_map[normalized_src] still_an_image = true @@ -384,11 +385,37 @@ class CookedPostProcessor still_an_image = false elsif info&.downloaded? && upload = info&.upload img["src"] = UrlHelper.cook_url(upload.url, secure: @with_secure_media) + img.delete(PrettyText::BLOCKED_HOTLINKED_SRC_ATTR) end still_an_image end + def add_blocked_hotlinked_media_placeholders + @doc.css([ + "[#{PrettyText::BLOCKED_HOTLINKED_SRC_ATTR}]", + "[#{PrettyText::BLOCKED_HOTLINKED_SRCSET_ATTR}]", + ].join(',')).each do |el| + src = el[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR] || + el[PrettyText::BLOCKED_HOTLINKED_SRCSET_ATTR]&.split(',')&.first&.split(' ')&.first + + if el.name == "img" + add_blocked_hotlinked_image_placeholder!(el) + next + end + + if ["video", "audio"].include?(el.parent.name) + el = el.parent + end + + if el.parent.classes.include?("video-container") + el = el.parent + end + + add_blocked_hotlinked_media_placeholder!(el, src) + end + end + def is_svg?(img) path = begin diff --git a/lib/cooked_processor_mixin.rb b/lib/cooked_processor_mixin.rb index 8d49865c3c0..975f765b8fa 100644 --- a/lib/cooked_processor_mixin.rb +++ b/lib/cooked_processor_mixin.rb @@ -40,6 +40,8 @@ module CookedProcessorMixin end end + PrettyText.sanitize_hotlinked_media(@doc) + oneboxed_images.each do |img| next if img["src"].blank? @@ -251,6 +253,31 @@ module CookedProcessorMixin true end + def add_blocked_hotlinked_image_placeholder!(el) + el.name = "a" + el.set_attribute("href", el[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR]) + el.set_attribute("class", "blocked-hotlinked-placeholder") + el.set_attribute("title", I18n.t("post.image_placeholder.blocked_hotlinked_title")) + el << "" + el << "#{CGI.escapeHTML(I18n.t("post.image_placeholder.blocked_hotlinked"))}" + + true + end + + def add_blocked_hotlinked_media_placeholder!(el, src) + placeholder = Nokogiri::XML::Node.new("a", el.document) + placeholder.name = "a" + placeholder.set_attribute("href", src) + placeholder.set_attribute("class", "blocked-hotlinked-placeholder") + placeholder.set_attribute("title", I18n.t("post.media_placeholder.blocked_hotlinked_title")) + placeholder << "" + placeholder << "#{CGI.escapeHTML(I18n.t("post.media_placeholder.blocked_hotlinked"))}" + + el.replace(placeholder) + + true + end + def oneboxed_images @doc.css(".onebox-body img, .onebox img, img.onebox") end diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index 1724700858f..df2f2a96b1b 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -18,6 +18,9 @@ module PrettyText ].freeze DANGEROUS_BIDI_REGEXP = Regexp.new(DANGEROUS_BIDI_CHARACTERS.join("|")).freeze + BLOCKED_HOTLINKED_SRC_ATTR = "data-blocked-hotlinked-src" + BLOCKED_HOTLINKED_SRCSET_ATTR = "data-blocked-hotlinked-srcset" + @mutex = Mutex.new @ctx_init = Mutex.new @@ -318,6 +321,7 @@ module PrettyText add_nofollow = !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content add_rel_attributes_to_user_content(doc, add_nofollow) strip_hidden_unicode_bidirectional_characters(doc) + sanitize_hotlinked_media(doc) if SiteSetting.enable_mentions add_mentions(doc, user_id: opts[:user_id]) @@ -348,6 +352,25 @@ module PrettyText end end + def self.sanitize_hotlinked_media(doc) + return if !SiteSetting.block_hotlinked_media + + allowed_pattern = allowed_src_pattern + + doc.css("img[src], source[src], source[srcset], track[src]").each do |el| + if el["src"] && !el["src"].match?(allowed_pattern) + el[PrettyText::BLOCKED_HOTLINKED_SRC_ATTR] = el.delete("src") + end + + if el["srcset"] + srcs = el["srcset"].split(',').map { |e| e.split(' ', 2)[0].presence } + if srcs.any? { |src| !src.match?(allowed_pattern) } + el[PrettyText::BLOCKED_HOTLINKED_SRCSET_ATTR] = el.delete("srcset") + end + end + end + end + def self.add_rel_attributes_to_user_content(doc, add_nofollow) allowlist = [] @@ -673,4 +696,25 @@ module PrettyText mentions end + def self.allowed_src_pattern + allowed_src_prefixes = [ + Discourse.base_path, + Discourse.base_url, + GlobalSetting.s3_cdn_url, + GlobalSetting.cdn_url, + SiteSetting.external_emoji_url.presence, + *SiteSetting.block_hotlinked_media_exceptions.split("|") + ] + + patterns = allowed_src_prefixes.compact.map do |url| + pattern = Regexp.escape(url) + + # If 'https://example.com' is allowed, ensure 'https://example.com.blah.com' is not + pattern += '(?:/|\z)' if !pattern.ends_with?("\/") + + pattern + end + + /\A(data:|#{patterns.join("|")})/ + end end diff --git a/spec/integration/blocked_hotlinked_media_spec.rb b/spec/integration/blocked_hotlinked_media_spec.rb new file mode 100644 index 00000000000..5ea328c4f5c --- /dev/null +++ b/spec/integration/blocked_hotlinked_media_spec.rb @@ -0,0 +1,156 @@ +# frozen_string_literal: true + +describe "hotlinked media blocking" do + let(:hotlinked_url) { "http://example.com/images/2/2e/Longcat1.png" } + let(:onebox_url) { "http://example.com/onebox" } + let(:png) { Base64.decode64("R0lGODlhAQABALMAAAAAAIAAAACAAICAAAAAgIAAgACAgMDAwICAgP8AAAD/AP//AAAA//8A/wD//wBiZCH5BAEAAA8ALAAAAAABAAEAAAQC8EUAOw==") } + + before do + SiteSetting.download_remote_images_to_local = false + stub_request(:get, hotlinked_url).to_return(body: png, headers: { "Content-Type" => "image/png" }) + stub_image_size + end + + it "normally allows hotlinked images" do + post = Fabricate(:post, raw: "") + expect(post.cooked).to have_tag("img", with: { "src" => hotlinked_url }) + end + + context "with hotlinked media blocked, before post-processing" do + before do + SiteSetting.block_hotlinked_media = true + Oneboxer.stubs(:cached_onebox).returns("") + end + + it "blocks hotlinked images" do + post = Fabricate(:post, raw: "") + expect(post.cooked).not_to have_tag("img[src]") + expect(post.cooked).to have_tag("img", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => hotlinked_url }) + end + + it "blocks hotlinked videos with src" do + post = Fabricate(:post, raw: "![alt text|video](#{hotlinked_url})") + expect(post.cooked).not_to have_tag("video source[src]") + expect(post.cooked).to have_tag("video source", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => hotlinked_url }) + end + + it "blocks hotlinked videos with srcset" do + srcset = "#{hotlinked_url} 1x,https://example.com 2x" + post = Fabricate(:post, raw: "") + expect(post.cooked).not_to have_tag("video source[srcset]") + expect(post.cooked).to have_tag("video source", with: { PrettyText::BLOCKED_HOTLINKED_SRCSET_ATTR => srcset }) + end + + it "blocks hotlinked audio" do + post = Fabricate(:post, raw: "![alt text|audio](#{hotlinked_url})") + expect(post.cooked).not_to have_tag("audio source[src]") + expect(post.cooked).to have_tag("audio source", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => hotlinked_url }) + end + + it "blocks hotlinked onebox content when cached (post_analyzer)" do + post = Fabricate(:post, raw: "#{onebox_url}") + expect(post.cooked).not_to have_tag("img[src]") + expect(post.cooked).to have_tag("img", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => hotlinked_url }) + end + + it "allows relative URLs" do + src = "/assets/images/blah.png" + post = Fabricate(:post, raw: "![](#{src})") + expect(post.cooked).to have_tag("img", with: { src: src }) + end + + it "allows data URIs" do + src = "" + post = Fabricate(:post, raw: "![](#{src})") + expect(post.cooked).to have_tag("img", with: { src: src }) + end + + it "allows an exception" do + post = Fabricate :post, raw: <<~RAW + ![](https://example.com) + ![](https://example.com/myimage.png) + ![](https://example.com.malicious.com/myimage.png) + ![](https://malicious.invalid/https://example.com) + RAW + + expect(post.cooked).not_to have_tag("img[src]") + + SiteSetting.block_hotlinked_media_exceptions = "https://example.com" + + post.rebake! + post.reload + expect(post.cooked).to have_tag("img", with: { "src" => "https://example.com" }) + expect(post.cooked).to have_tag("img", with: { "src" => "https://example.com/myimage.png" }) + expect(post.cooked).to have_tag("img", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => "https://example.com.malicious.com/myimage.png" }) + expect(post.cooked).to have_tag("img", with: { PrettyText::BLOCKED_HOTLINKED_SRC_ATTR => "https://malicious.invalid/https://example.com" }) + end + + it "allows multiple exceptions" do + post = Fabricate :post, raw: <<~RAW + ![](https://example.com) + ![](https://exampleb.com/myimage.png) + RAW + + expect(post.cooked).not_to have_tag("img[src]") + + SiteSetting.block_hotlinked_media_exceptions = "https://example.com|https://exampleb.com" + + post.rebake! + post.reload + expect(post.cooked).to have_tag("img", with: { "src" => "https://example.com" }) + expect(post.cooked).to have_tag("img", with: { "src" => "https://exampleb.com/myimage.png" }) + end + end + + context "with hotlinked media blocked, with post-processing" do + before do + SiteSetting.block_hotlinked_media = true + Jobs.run_immediately! + Oneboxer.stubs(:onebox).returns("") + end + + it "renders placeholders for all media types (CookedPostProcessor)" do + post = Fabricate :post, raw: <<~RAW + + + ![alt text|video](#{hotlinked_url}) + + ![alt text|audio](#{hotlinked_url}) + + #{onebox_url} + RAW + post.rebake! + post.reload + expect(post.cooked).not_to have_tag("img") + expect(post.cooked).not_to have_tag("video") + expect(post.cooked).not_to have_tag("audio") + expect(post.cooked).to have_tag( + "a.blocked-hotlinked-placeholder[href^='http://example.com'][rel='noopener nofollow ugc']", + count: 4 + ) + end + end + + context "with hotlinked media blocked, and download_remote_images_to_local enabled" do + before do + SiteSetting.block_hotlinked_media = true + SiteSetting.download_remote_images_to_local = true + Oneboxer.stubs(:onebox).returns("") + Jobs.run_immediately! + end + + it "can still download remote images after they're blocked" do + post = Fabricate :post, raw: <<~RAW + + + #{onebox_url} + RAW + post.rebake! + post.reload + expect(post.uploads.count).to eq(1) + upload = post.uploads.first + expect(post.cooked).to have_tag("img", count: 2) + expect(post.cooked).to have_tag("img[src$=\"#{upload.url}\"]", count: 2) + end + end +end