diff --git a/lib/html_to_markdown.rb b/lib/html_to_markdown.rb index 9e18269c664..60f88d9095f 100644 --- a/lib/html_to_markdown.rb +++ b/lib/html_to_markdown.rb @@ -15,10 +15,10 @@ class HtmlToMarkdown def remove_whitespaces! @doc.traverse do |node| if node.is_a? Nokogiri::XML::Text - node.content = node.content.lstrip if node.previous_element&.description&.block? - node.content = node.content.lstrip if node.previous_element.nil? && node.parent.description&.block? - node.content = node.content.rstrip if node.next_element&.description&.block? - node.content = node.content.rstrip if node.next_element.nil? && node.parent.description&.block? + node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element&.description&.block? + node.content = node.content.gsub(/\A[[:space:]]+/, "") if node.previous_element.nil? && node.parent.description&.block? + node.content = node.content.gsub(/[[:space:]]+\z/, "") if node.next_element&.description&.block? + node.content = node.content.gsub(/[[:space:]]+\z/, "") if node.next_element.nil? && node.parent.description&.block? node.remove if node.content.empty? end end diff --git a/spec/components/html_to_markdown_spec.rb b/spec/components/html_to_markdown_spec.rb index 43651536e31..e5a9c5b8e2c 100644 --- a/spec/components/html_to_markdown_spec.rb +++ b/spec/components/html_to_markdown_spec.rb @@ -7,6 +7,20 @@ describe HtmlToMarkdown do HtmlToMarkdown.new(html).to_markdown end + it "remove whitespaces" do + expect(html_to_markdown(<<-HTML +