FEATURE: escape HTML when cooking plaintext emails

This commit is contained in:
Gerhard Schlager 2017-11-15 16:39:29 +01:00
parent cef64e8f03
commit 9207dee69a
5 changed files with 201 additions and 47 deletions

View File

@ -19,7 +19,7 @@ class PostAnalyzer
return raw if cook_method == Post.cook_methods[:raw_html] return raw if cook_method == Post.cook_methods[:raw_html]
if cook_method == Post.cook_methods[:email] if cook_method == Post.cook_methods[:email]
cooked = EmailCook.new(raw).cook cooked = EmailCook.new(raw).cook(opts)
else else
cooked = PrettyText.cook(raw, opts) cooked = PrettyText.cook(raw, opts)
end end

View File

@ -38,6 +38,11 @@ module Email
attr_reader :mail attr_reader :mail
attr_reader :message_id attr_reader :message_id
def self.formats
@formats ||= Enum.new(plaintext: 1,
markdown: 2)
end
def initialize(mail_string) def initialize(mail_string)
raise EmptyEmailError if mail_string.blank? raise EmptyEmailError if mail_string.blank?
@staged_users = [] @staged_users = []
@ -236,9 +241,9 @@ module Email
end end
if text.blank? || (SiteSetting.incoming_email_prefer_html && markdown.present?) if text.blank? || (SiteSetting.incoming_email_prefer_html && markdown.present?)
return [markdown, elided_markdown] return [markdown, elided_markdown, Receiver::formats[:markdown]]
else else
return [text, elided_text] return [text, elided_text, Receiver::formats[:plaintext]]
end end
end end

View File

@ -1,13 +1,19 @@
# A very simple formatter for imported emails require_dependency 'pretty_text'
# A very simple formatter for imported emails
class EmailCook class EmailCook
def self.url_regexp def self.url_regexp
/((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/ @url_regexp ||= /((?:https?:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.])(?:[^\s()<>]+|\([^\s()<>]+\))+(?:\([^\s()<>]+\)|[^`!()\[\]{};:'".,<>?«»“”‘’\s]))/
end
def self.raw_regexp
@raw_regexp ||= /^\[plaintext\]$\n(.*)\n^\[\/plaintext\]$(?:\s^\[attachments\]$\n(.*)\n^\[\/attachments\]$)?(?:\s^\[elided\]$\n(.*)\n^\[\/elided\]$)?/m
end end
def initialize(raw) def initialize(raw)
@raw = raw @raw = raw
@body, @attachment_html, @elided = @raw.scan(EmailCook.raw_regexp).first
end end
def add_quote(result, buffer) def add_quote(result, buffer)
@ -17,53 +23,62 @@ class EmailCook
end end
end end
def link_string!(str) def link_string!(line, unescaped_line)
str.scan(EmailCook.url_regexp).each do |m| unescaped_line = unescaped_line.strip
unescaped_line.scan(EmailCook.url_regexp).each do |m|
url = m[0] url = m[0]
if str.strip == url if unescaped_line == url
# this could be oneboxed # this could be oneboxed
val = %|<a href="#{url}" class="onebox" target="_blank">#{url}</a>| val = %|<a href="#{url}" class="onebox" target="_blank">#{url}</a>|
else else
val = %|<a href="#{url}">#{url}</a>| val = %|<a href="#{url}">#{url}</a>|
end end
str.gsub!(url, val) line.gsub!(url, val)
end end
end end
def cook def htmlify(text)
result = "" result = ""
in_text = false in_text = false
in_quote = false in_quote = false
quote_buffer = "" quote_buffer = ""
@raw.each_line do |l| text.each_line do |line|
if l =~ /^\s*>/ if line =~ /^\s*>/
in_quote = true in_quote = true
link_string!(l) line.sub!(/^[\s>]*/, '')
quote_buffer << l.sub(/^[\s>]*/, '') << "<br>"
unescaped_line = line
line = CGI.escapeHTML(line)
link_string!(line, unescaped_line)
quote_buffer << line << "<br>"
elsif in_quote elsif in_quote
add_quote(result, quote_buffer) add_quote(result, quote_buffer)
quote_buffer = "" quote_buffer = ""
in_quote = false in_quote = false
else else
sz = l.size sz = line.size
link_string!(l) unescaped_line = line
line = CGI.escapeHTML(line)
result << l link_string!(line, unescaped_line)
if sz < 60 if sz < 60
result << "<br>" if in_text && line == "\n"
if in_text
result << "<br>" result << "<br>"
end end
result << line
result << "<br>"
in_text = false in_text = false
else else
result << line
in_text = true in_text = true
end end
end end
@ -77,4 +92,14 @@ class EmailCook
result result
end end
def cook(opts = {})
# fallback to PrettyText if we failed to detect a body
return PrettyText.cook(@raw, opts) if @body.nil?
result = htmlify(@body)
result << "\n<br>" << @attachment_html if @attachment_html.present?
result << "\n<br><br>" << Email::Receiver.elided_html(htmlify(@elided)) if @elided.present?
result
end
end end

View File

@ -1,14 +1,20 @@
require 'rails_helper' require 'rails_helper'
require 'email_cook' require 'email_cook'
require 'pretty_text'
describe EmailCook do describe EmailCook do
it "uses to PrettyText when there is no [plaintext] in raw" do
raw = "**Hello world!**"
expect(cook(raw)).to eq(PrettyText.cook(raw))
end
it 'adds linebreaks to short lines' do it "adds linebreaks to short lines" do
expect(EmailCook.new("hello\nworld\n").cook).to eq("hello\n<br>world\n<br>") raw = plaintext("hello\nworld\n")
expect(cook(raw)).to eq("hello\n<br>world\n<br>")
end end
it "doesn't add linebreaks to long lines" do it "doesn't add linebreaks to long lines" do
long = <<LONG_EMAIL long = plaintext(<<~LONG_EMAIL)
Hello, Hello,
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
@ -16,30 +22,143 @@ risus. Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accum
Vestibulum feugiat mi vitae turpis tempor dignissim. Vestibulum feugiat mi vitae turpis tempor dignissim.
LONG_EMAIL LONG_EMAIL
long_cooked = <<LONG_COOKED long_cooked = <<~LONG_COOKED.strip!
Hello, Hello,
<br> <br>
<br>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat <br>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
risus. Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan. risus. Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan.
Vestibulum feugiat mi vitae turpis tempor dignissim. Vestibulum feugiat mi vitae turpis tempor dignissim.
<br><br> <br>
LONG_COOKED LONG_COOKED
expect(EmailCook.new(long).cook).to eq(long_cooked.strip)
expect(cook(long)).to eq(long_cooked)
end end
it 'creates oneboxed link when the line contains only a link' do it "replaces a blank line with 2 linebreaks" do
expect(EmailCook.new("https://www.eviltrout.com").cook).to eq('<a href="https://www.eviltrout.com" class="onebox" target="_blank">https://www.eviltrout.com</a><br>') long = plaintext(<<~LONG_EMAIL)
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
risus.
Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan.
Vestibulum feugiat mi vitae turpis tempor dignissim.
Stet clita kasd gubergren.
LONG_EMAIL
long_cooked = <<~LONG_COOKED.strip!
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc convallis volutpat
risus.
<br>Nulla ac faucibus quam, quis cursus lorem. Sed rutrum eget nunc sed accumsan.
<br>
<br>Vestibulum feugiat mi vitae turpis tempor dignissim.
<br>
<br>Stet clita kasd gubergren.
<br>
LONG_COOKED
expect(cook(long)).to eq(long_cooked)
end end
it 'autolinks without the beginning of a line' do it "escapes HTML" do
expect(EmailCook.new("my site: https://www.eviltrout.com").cook).to eq('my site: <a href="https://www.eviltrout.com">https://www.eviltrout.com</a><br>') long = plaintext(<<~LONG_EMAIL)
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
<form name="f1" method="post" action="test.html" onsubmit="javascript:showAlert()">
<input type="submit" name="submit" value="Click this button" />
</form>
Nunc convallis volutpat risus.
LONG_EMAIL
long_cooked = <<~LONG_COOKED.strip!
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
<br>
<br>&lt;form name=&quot;f1&quot; method=&quot;post&quot; action=&quot;test.html&quot; onsubmit=&quot;javascript:showAlert()&quot;&gt;
&lt;input type=&quot;submit&quot; name=&quot;submit&quot; value=&quot;Click this button&quot; /&gt;
&lt;/form&gt;
<br>
<br>Nunc convallis volutpat risus.
<br>
LONG_COOKED
expect(cook(long)).to eq(long_cooked)
end end
it 'autolinks without the end of a line' do it "creates oneboxed link when the line contains only a link" do
expect(EmailCook.new("https://www.eviltrout.com is my site").cook).to eq('<a href="https://www.eviltrout.com">https://www.eviltrout.com</a> is my site<br>') raw = plaintext("https://www.eviltrout.com")
expect(cook(raw)).to eq('<a href="https://www.eviltrout.com" class="onebox" target="_blank">https://www.eviltrout.com</a><br>')
end end
it 'links even within a quote' do it "autolinks without the beginning of a line" do
expect(EmailCook.new("> https://www.eviltrout.com").cook).to eq('<blockquote><a href="https://www.eviltrout.com">https://www.eviltrout.com</a><br></blockquote>') raw = plaintext("my site: https://www.eviltrout.com")
expect(cook(raw)).to eq('my site: <a href="https://www.eviltrout.com">https://www.eviltrout.com</a><br>')
end
it "autolinks without the end of a line" do
raw = plaintext("https://www.eviltrout.com is my site")
expect(cook(raw)).to eq('<a href="https://www.eviltrout.com">https://www.eviltrout.com</a> is my site<br>')
end
it "links even within a quote" do
raw = plaintext("> https://www.eviltrout.com is my site")
expect(cook(raw)).to eq('<blockquote><a href="https://www.eviltrout.com">https://www.eviltrout.com</a> is my site<br></blockquote>')
end
it "it works and does not interpret Markdown in plaintext and elided" do
long = <<~LONG_EMAIL
[plaintext]
*Lorem ipsum* dolor sit amet, consectetur adipiscing elit.
[/plaintext]
[attachments]
<img src='some_image.png' width='100' height='100'>
[/attachments]
[elided]
At vero eos *et accusam* et justo duo dolores et ea rebum.
[/elided]
LONG_EMAIL
long_cooked = <<~LONG_COOKED
*Lorem ipsum* dolor sit amet, consectetur adipiscing elit.<br>
<br><img src='some_image.png' width='100' height='100'>
<br><br>
<details class='elided'>
<summary title='Show trimmed content'>&#183;&#183;&#183;</summary>
At vero eos *et accusam* et justo duo dolores et ea rebum.<br>
</details>
LONG_COOKED
expect(cook(long)).to eq(long_cooked)
end
it "works without attachments" do
long = <<~LONG_EMAIL
[plaintext]
*Lorem ipsum* dolor sit amet, consectetur adipiscing elit.
[/plaintext]
[elided]
At vero eos *et accusam* et justo duo dolores et ea rebum.
[/elided]
LONG_EMAIL
long_cooked = <<~LONG_COOKED
*Lorem ipsum* dolor sit amet, consectetur adipiscing elit.<br>
<br><br>
<details class='elided'>
<summary title='Show trimmed content'>&#183;&#183;&#183;</summary>
At vero eos *et accusam* et justo duo dolores et ea rebum.<br>
</details>
LONG_COOKED
expect(cook(long)).to eq(long_cooked)
end
def cook(raw)
EmailCook.new(raw).cook
end
def plaintext(text)
"[plaintext]\n#{text}\n[/plaintext]"
end end
end end

View File

@ -38,11 +38,16 @@ describe PostAnalyzer do
expect(cooked).to eq('Hello <div/> world') expect(cooked).to eq('Hello <div/> world')
end end
it "does not interpret Markdown when cook_method is 'email'" do it "does not interpret Markdown when cook_method is 'email' and raw contains plaintext" do
cooked = post_analyzer.cook('*this is not italic* and here is a link: https://www.example.com', cook_method: Post.cook_methods[:email]) cooked = post_analyzer.cook("[plaintext]\n*this is not italic* and here is a link: https://www.example.com\n[/plaintext]", cook_method: Post.cook_methods[:email])
expect(cooked).to eq('*this is not italic* and here is a link: <a href="https://www.example.com">https://www.example.com</a>') expect(cooked).to eq('*this is not italic* and here is a link: <a href="https://www.example.com">https://www.example.com</a>')
end end
it "does interpret Markdown when cook_method is 'email' and raw does not contain plaintext" do
cooked = post_analyzer.cook('*this is italic*', cook_method: Post.cook_methods[:email])
expect(cooked).to eq('<p><em>this is italic</em></p>')
end
it "does interpret Markdown when cook_method is 'regular'" do it "does interpret Markdown when cook_method is 'regular'" do
cooked = post_analyzer.cook('*this is italic*', cook_method: Post.cook_methods[:regular]) cooked = post_analyzer.cook('*this is italic*', cook_method: Post.cook_methods[:regular])
expect(cooked).to eq('<p><em>this is italic</em></p>') expect(cooked).to eq('<p><em>this is italic</em></p>')