From 9207dee69a2c6723ac1479d31bddbc72f06edd46 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Wed, 15 Nov 2017 16:39:29 +0100 Subject: [PATCH] FEATURE: escape HTML when cooking plaintext emails --- app/models/post_analyzer.rb | 2 +- lib/email/receiver.rb | 9 +- lib/email_cook.rb | 59 +++++++--- spec/components/email_cook_spec.rb | 169 ++++++++++++++++++++++++----- spec/models/post_analyzer_spec.rb | 9 +- 5 files changed, 201 insertions(+), 47 deletions(-) diff --git a/app/models/post_analyzer.rb b/app/models/post_analyzer.rb index 2d12b99e776..0d6c2120aef 100644 --- a/app/models/post_analyzer.rb +++ b/app/models/post_analyzer.rb @@ -19,7 +19,7 @@ class PostAnalyzer return raw if cook_method == Post.cook_methods[:raw_html] if cook_method == Post.cook_methods[:email] - cooked = EmailCook.new(raw).cook + cooked = EmailCook.new(raw).cook(opts) else cooked = PrettyText.cook(raw, opts) end diff --git a/lib/email/receiver.rb b/lib/email/receiver.rb index fbf4e7f84df..fb3eacabb52 100644 --- a/lib/email/receiver.rb +++ b/lib/email/receiver.rb @@ -38,6 +38,11 @@ module Email attr_reader :mail attr_reader :message_id + def self.formats + @formats ||= Enum.new(plaintext: 1, + markdown: 2) + end + def initialize(mail_string) raise EmptyEmailError if mail_string.blank? @staged_users = [] @@ -236,9 +241,9 @@ module Email end if text.blank? || (SiteSetting.incoming_email_prefer_html && markdown.present?) - return [markdown, elided_markdown] + return [markdown, elided_markdown, Receiver::formats[:markdown]] else - return [text, elided_text] + return [text, elided_text, Receiver::formats[:plaintext]] end end diff --git a/lib/email_cook.rb b/lib/email_cook.rb index 162643da663..209d940ca3d 100644 --- a/lib/email_cook.rb +++ b/lib/email_cook.rb @@ -1,13 +1,19 @@ -# A very simple formatter for imported emails +require_dependency 'pretty_text' +# A very simple formatter for imported emails class EmailCook def self.url_regexp - /((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/ + @url_regexp ||= /((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/ + end + + def self.raw_regexp + @raw_regexp ||= /^\[plaintext\]$\n(.*)\n^\[\/plaintext\]$(?:\s^\[attachments\]$\n(.*)\n^\[\/attachments\]$)?(?:\s^\[elided\]$\n(.*)\n^\[\/elided\]$)?/m end def initialize(raw) @raw = raw + @body, @attachment_html, @elided = @raw.scan(EmailCook.raw_regexp).first end def add_quote(result, buffer) @@ -17,53 +23,62 @@ class EmailCook end end - def link_string!(str) - str.scan(EmailCook.url_regexp).each do |m| + def link_string!(line, unescaped_line) + unescaped_line = unescaped_line.strip + unescaped_line.scan(EmailCook.url_regexp).each do |m| url = m[0] - if str.strip == url + if unescaped_line == url # this could be oneboxed val = %|#{url}| else val = %|#{url}| end - str.gsub!(url, val) + line.gsub!(url, val) end end - def cook + def htmlify(text) result = "" in_text = false in_quote = false quote_buffer = "" - @raw.each_line do |l| + text.each_line do |line| - if l =~ /^\s*>/ + if line =~ /^\s*>/ in_quote = true - link_string!(l) - quote_buffer << l.sub(/^[\s>]*/, '') << "
" + line.sub!(/^[\s>]*/, '') + + unescaped_line = line + line = CGI.escapeHTML(line) + link_string!(line, unescaped_line) + + quote_buffer << line << "
" elsif in_quote add_quote(result, quote_buffer) quote_buffer = "" in_quote = false else - sz = l.size + sz = line.size - link_string!(l) - - result << l + unescaped_line = line + line = CGI.escapeHTML(line) + link_string!(line, unescaped_line) if sz < 60 - result << "
" - if in_text + if in_text && line == "\n" result << "
" end + result << line + result << "
" + in_text = false else + result << line in_text = true end end @@ -77,4 +92,14 @@ class EmailCook result end + def cook(opts = {}) + # fallback to PrettyText if we failed to detect a body + return PrettyText.cook(@raw, opts) if @body.nil? + + result = htmlify(@body) + result << "\n
" << @attachment_html if @attachment_html.present? + result << "\n

" << Email::Receiver.elided_html(htmlify(@elided)) if @elided.present? + result + end + end diff --git a/spec/components/email_cook_spec.rb b/spec/components/email_cook_spec.rb index 22181c29d9a..a29259c6111 100644 --- a/spec/components/email_cook_spec.rb +++ b/spec/components/email_cook_spec.rb @@ -1,45 +1,164 @@ require 'rails_helper' require 'email_cook' +require 'pretty_text' describe EmailCook do + it "uses to PrettyText when there is no [plaintext] in raw" do + raw = "**Hello world!**" + expect(cook(raw)).to eq(PrettyText.cook(raw)) + end - it 'adds linebreaks to short lines' do - expect(EmailCook.new("hello\nworld\n").cook).to eq("hello\n
world\n
") + it "adds linebreaks to short lines" do + raw = plaintext("hello\nworld\n") + expect(cook(raw)).to eq("hello\n
world\n
") end it "doesn't add linebreaks to long lines" do - long = < -
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat -risus. Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan. -Vestibulum feugiat mi vitae turpis tempor dignissim. -

-LONG_COOKED - expect(EmailCook.new(long).cook).to eq(long_cooked.strip) + long_cooked = <<~LONG_COOKED.strip! + Hello, +
+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat + risus. Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan. + Vestibulum feugiat mi vitae turpis tempor dignissim. +
+ LONG_COOKED + + expect(cook(long)).to eq(long_cooked) end - it 'creates oneboxed link when the line contains only a link' do - expect(EmailCook.new("https://www.eviltrout.com").cook).to eq('https://www.eviltrout.com
') + it "replaces a blank line with 2 linebreaks" do + long = plaintext(<<~LONG_EMAIL) + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat + risus. + Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan. + + Vestibulum feugiat mi vitae turpis tempor dignissim. + + Stet clita kasd gubergren. + LONG_EMAIL + + long_cooked = <<~LONG_COOKED.strip! + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat + risus. +
Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan. +
+
Vestibulum feugiat mi vitae turpis tempor dignissim. +
+
Stet clita kasd gubergren. +
+ LONG_COOKED + + expect(cook(long)).to eq(long_cooked) end - it 'autolinks without the beginning of a line' do - expect(EmailCook.new("my site: https://www.eviltrout.com").cook).to eq('my site: https://www.eviltrout.com
') + it "escapes HTML" do + long = plaintext(<<~LONG_EMAIL) + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +
+ +
+ + Nunc convallis volutpat risus. + LONG_EMAIL + + long_cooked = <<~LONG_COOKED.strip! + Lorem ipsum dolor sit amet, consectetur adipiscing elit. +
+
<form name="f1" method="post" action="test.html" onsubmit="javascript:showAlert()"> + <input type="submit" name="submit" value="Click this button" /> + </form> +
+
Nunc convallis volutpat risus. +
+ LONG_COOKED + + expect(cook(long)).to eq(long_cooked) end - it 'autolinks without the end of a line' do - expect(EmailCook.new("https://www.eviltrout.com is my site").cook).to eq('https://www.eviltrout.com is my site
') + it "creates oneboxed link when the line contains only a link" do + raw = plaintext("https://www.eviltrout.com") + expect(cook(raw)).to eq('https://www.eviltrout.com
') end - it 'links even within a quote' do - expect(EmailCook.new("> https://www.eviltrout.com").cook).to eq('
https://www.eviltrout.com
') + it "autolinks without the beginning of a line" do + raw = plaintext("my site: https://www.eviltrout.com") + expect(cook(raw)).to eq('my site: https://www.eviltrout.com
') + end + + it "autolinks without the end of a line" do + raw = plaintext("https://www.eviltrout.com is my site") + expect(cook(raw)).to eq('https://www.eviltrout.com is my site
') + end + + it "links even within a quote" do + raw = plaintext("> https://www.eviltrout.com is my site") + expect(cook(raw)).to eq('
https://www.eviltrout.com is my site
') + end + + it "it works and does not interpret Markdown in plaintext and elided" do + long = <<~LONG_EMAIL + [plaintext] + *Lorem ipsum* dolor sit amet, consectetur adipiscing elit. + [/plaintext] + [attachments] + + [/attachments] + [elided] + At vero eos *et accusam* et justo duo dolores et ea rebum. + [/elided] + LONG_EMAIL + + long_cooked = <<~LONG_COOKED + *Lorem ipsum* dolor sit amet, consectetur adipiscing elit.
+
+

+ +
+ ··· + At vero eos *et accusam* et justo duo dolores et ea rebum.
+
+ LONG_COOKED + + expect(cook(long)).to eq(long_cooked) + end + + it "works without attachments" do + long = <<~LONG_EMAIL + [plaintext] + *Lorem ipsum* dolor sit amet, consectetur adipiscing elit. + [/plaintext] + [elided] + At vero eos *et accusam* et justo duo dolores et ea rebum. + [/elided] + LONG_EMAIL + + long_cooked = <<~LONG_COOKED + *Lorem ipsum* dolor sit amet, consectetur adipiscing elit.
+

+ +
+ ··· + At vero eos *et accusam* et justo duo dolores et ea rebum.
+
+ LONG_COOKED + + expect(cook(long)).to eq(long_cooked) + end + + def cook(raw) + EmailCook.new(raw).cook + end + + def plaintext(text) + "[plaintext]\n#{text}\n[/plaintext]" end end diff --git a/spec/models/post_analyzer_spec.rb b/spec/models/post_analyzer_spec.rb index 2ae66cef1c0..937b47687cf 100644 --- a/spec/models/post_analyzer_spec.rb +++ b/spec/models/post_analyzer_spec.rb @@ -38,11 +38,16 @@ describe PostAnalyzer do expect(cooked).to eq('Hello
world') end - it "does not interpret Markdown when cook_method is 'email'" do - cooked = post_analyzer.cook('*this is not italic* and here is a link: https://www.example.com', cook_method: Post.cook_methods[:email]) + it "does not interpret Markdown when cook_method is 'email' and raw contains plaintext" do + cooked = post_analyzer.cook("[plaintext]\n*this is not italic* and here is a link: https://www.example.com\n[/plaintext]", cook_method: Post.cook_methods[:email]) expect(cooked).to eq('*this is not italic* and here is a link: https://www.example.com') end + it "does interpret Markdown when cook_method is 'email' and raw does not contain plaintext" do + cooked = post_analyzer.cook('*this is italic*', cook_method: Post.cook_methods[:email]) + expect(cooked).to eq('

this is italic

') + end + it "does interpret Markdown when cook_method is 'regular'" do cooked = post_analyzer.cook('*this is italic*', cook_method: Post.cook_methods[:regular]) expect(cooked).to eq('

this is italic

')