diff --git a/app/assets/javascripts/discourse/lib/to-markdown.js.es6 b/app/assets/javascripts/discourse/lib/to-markdown.js.es6 index 3a4ff590dc6..692d4350ed2 100644 --- a/app/assets/javascripts/discourse/lib/to-markdown.js.es6 +++ b/app/assets/javascripts/discourse/lib/to-markdown.js.es6 @@ -3,6 +3,7 @@ import parseHTML from 'discourse/helpers/parse-html'; const trimLeft = text => text.replace(/^\s+/,""); const trimRight = text => text.replace(/\s+$/,""); const countPipes = text => (text.replace(/\\\|/,"").match(/\|/g) || []).length; +const msoListClasses = ["MsoListParagraphCxSpFirst", "MsoListParagraphCxSpMiddle", "MsoListParagraphCxSpLast"]; class Tag { constructor(name, prefix = "", suffix = "", inline = false) { @@ -207,7 +208,22 @@ class Tag { static li() { return class extends Tag.slice("li", "\n") { decorate(text) { - const indent = this.element.filterParentNames(["ol", "ul"]).slice(1).map(() => "\t").join(""); + let indent = this.element.filterParentNames(["ol", "ul"]).slice(1).map(() => "\t").join(""); + const attrs = this.element.attributes; + + if (msoListClasses.includes(attrs.class)) { + try { + const level = parseInt(attrs.style.match(/level./)[0].replace("level", "")); + indent = Array(level).join("\t") + indent; + } finally { + if (attrs.class === "MsoListParagraphCxSpFirst") { + indent = `\n\n${indent}`; + } else if (attrs.class === "MsoListParagraphCxSpLast") { + text = `${text}\n`; + } + } + } + return super.decorate(`${indent}* ${trimLeft(text)}`); } }; @@ -356,6 +372,13 @@ class Element { this.parentNames = this.parentNames || []; this.previous = previous; this.next = next; + + if (this.name === "p") { + if (msoListClasses.includes(this.attributes.class)) { + this.name = "li"; + this.parentNames.push("ul"); + } + } } tag() { @@ -433,7 +456,7 @@ class Element { } } -function trimUnwantedSpaces(html) { +function trimUnwanted(html) { const body = html.match(/<body[^>]*>([\s\S]*?)<\/body>/); html = body ? body[1] : html; html = html.replace(/\r|\n| /g, " "); @@ -443,6 +466,8 @@ function trimUnwantedSpaces(html) { html = html.replace(match[0], match[0].replace(/>\s{2,}</, "> <")); } + html = html.replace(/<!\[if !?\S*]>[^!]*<!\[endif]>/g, ""); // to support ms word list tags + return html; } @@ -461,7 +486,7 @@ function putPlaceholders(html) { match = codeRegEx.exec(origHtml); } - const elements = parseHTML(trimUnwantedSpaces(html)); + const elements = parseHTML(trimUnwanted(html)); return { elements, placeholders }; } diff --git a/test/javascripts/lib/to-markdown-test.js.es6 b/test/javascripts/lib/to-markdown-test.js.es6 index 12e136ec264..4a99ab8138e 100644 --- a/test/javascripts/lib/to-markdown-test.js.es6 +++ b/test/javascripts/lib/to-markdown-test.js.es6 @@ -239,3 +239,50 @@ QUnit.test("converts ol list tag", assert => { const markdown = `Testing\n\n1. Item 1\n2. Item 2\n 100. Sub Item 1\n 101. Sub Item 2\n3. Item 3`; assert.equal(toMarkdown(html), markdown); }); + +QUnit.test("converts list tag from word", assert => { + const html = `Sample<!--StartFragment--> + <p class=MsoListParagraphCxSpFirst style='text-indent:-.25in;mso-list:l0 level1 lfo1'> + <![if !supportLists]> + <span style='font-family:Symbol;mso-fareast-font-family:Symbol;mso-bidi-font-family: Symbol;mso-bidi-font-weight:bold'> + <span style='mso-list:Ignore'>· + <span style='font:7.0pt "Times New Roman"'> </span> + </span> + </span> + <![endif]> + <b>Item 1 + <o:p></o:p> + </b> + </p> + <p class=MsoListParagraphCxSpMiddle style='text-indent:-.25in;mso-list:l0 level2 lfo1'> + <![if !supportLists]> + <span style='font-family:Symbol;mso-fareast-font-family:Symbol;mso-bidi-font-family: Symbol;mso-bidi-font-style:italic'> + <span style='mso-list:Ignore'>· + <span style='font:7.0pt "Times New Roman"'> </span> + </span> + </span> + <![endif]> + <i>Item 2 + <o:p></o:p> + </i> + </p> + <p class=MsoListParagraphCxSpMiddle style='text-indent:-.25in;mso-list:l0 level3 lfo1'> + <![if !supportLists]> + <span style='font-family:Symbol;mso-fareast-font-family:Symbol;mso-bidi-font-family: Symbol'> + <span style='mso-list:Ignore'>· + <span style='font:7.0pt "Times New Roman"'> </span> + </span> + </span> + <![endif]>Item 3 </p> + <p class=MsoListParagraphCxSpLast style='text-indent:-.25in;mso-list:l0 level1 lfo1'> + <![if !supportLists]> + <span style='font-family:Symbol;mso-fareast-font-family:Symbol;mso-bidi-font-family: Symbol'> + <span style='mso-list:Ignore'>· + <span style='font:7.0pt "Times New Roman"'> </span> + </span> + </span> + <![endif]>Item 4</p> + <!--EndFragment-->List`; + const markdown = `Sample\n\n* **Item 1**\n * *Item 2*\n * Item 3\n* Item 4\n\nList`; + assert.equal(toMarkdown(html), markdown); +});