diff --git a/lib/html_to_markdown.rb b/lib/html_to_markdown.rb index a17a78eb2b0..a3eb5e8aaeb 100644 --- a/lib/html_to_markdown.rb +++ b/lib/html_to_markdown.rb @@ -31,7 +31,7 @@ class HtmlToMarkdown def remove_whitespaces! @doc.traverse do |node| - if node.is_a? Nokogiri::XML::Text + if node.is_a?(Nokogiri::XML::Text) && node.parent.name != "pre" node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element&.description&.block? node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element.nil? && node.parent.description&.block? node.content = node.content.gsub(/[[:space:]]+\z/, "") if node.next_element&.description&.block? @@ -220,10 +220,21 @@ class HtmlToMarkdown end def visit_text(node) + top_block = @stack[-1] + + if top_block.name == "pre" + top_block.markdown << node.text + return + end + node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element.nil? && EMPHASIS.include?(node.parent.name) - indent = node.text[/^\s+/] || "" + + if top_block.markdown.present? && indent = node.text[/^\s+/] + top_block.markdown << indent + end + text = node.text.gsub(/^\s+/, "").gsub(/\s{2,}/, " ") - @stack[-1].markdown << [indent, text].join("") + top_block.markdown << text end def format_block diff --git a/spec/components/html_to_markdown_spec.rb b/spec/components/html_to_markdown_spec.rb index aa89ac351c7..266e875aca6 100644 --- a/spec/components/html_to_markdown_spec.rb +++ b/spec/components/html_to_markdown_spec.rb @@ -204,6 +204,7 @@ describe HtmlToMarkdown do expect(html_to_markdown("
var foo = 'bar';
")).to eq("```\nvar foo = 'bar';\n```") expect(html_to_markdown("
var foo = 'bar';
")).to eq("```\nvar foo = 'bar';\n```") expect(html_to_markdown(%Q{
var foo = 'bar';
})).to eq("```javascript\nvar foo = 'bar';\n```") + expect(html_to_markdown("
    function f() {\n        console.log('Hello world!');\n    }
")).to eq("```\n function f() {\n console.log('Hello world!');\n }\n```") end it "supports
 inside 
" do @@ -220,6 +221,7 @@ describe HtmlToMarkdown do it "handles

" do expect(html_to_markdown("

1st paragraph

2nd paragraph

")).to eq("1st paragraph\n\n2nd paragraph") + expect(html_to_markdown("

1st paragraph

\n

2nd paragraph\n 2nd paragraph

\n

3rd paragraph

")).to eq("1st paragraph\n\n2nd paragraph\n2nd paragraph\n\n3rd paragraph") end it "handles
" do