diff --git a/app/assets/javascripts/application.js b/app/assets/javascripts/application.js index 6cf6807169d..81e09b2adfd 100644 --- a/app/assets/javascripts/application.js +++ b/app/assets/javascripts/application.js @@ -10,7 +10,6 @@ //= require ./deprecated // Stuff we need to load first -//= require ./discourse/helpers/parse-html //= require ./discourse/lib/to-markdown //= require ./discourse/lib/utilities //= require ./discourse/lib/page-visible diff --git a/app/assets/javascripts/discourse/helpers/parse-html.js.es6 b/app/assets/javascripts/discourse/helpers/parse-html.js.es6 deleted file mode 100644 index c9469fa6b9f..00000000000 --- a/app/assets/javascripts/discourse/helpers/parse-html.js.es6 +++ /dev/null @@ -1,8 +0,0 @@ -/* global Tautologistics */ -export default function parseHTML(rawHtml) { - const builder = new Tautologistics.NodeHtmlParser.HtmlBuilder(); - const parser = new Tautologistics.NodeHtmlParser.Parser(builder); - - parser.parseComplete(rawHtml); - return builder.dom; -} diff --git a/app/assets/javascripts/discourse/lib/to-markdown.js.es6 b/app/assets/javascripts/discourse/lib/to-markdown.js.es6 index ee3bd1e0e3e..8e4eaa48832 100644 --- a/app/assets/javascripts/discourse/lib/to-markdown.js.es6 +++ b/app/assets/javascripts/discourse/lib/to-markdown.js.es6 @@ -1,5 +1,3 @@ -import parseHTML from "discourse/helpers/parse-html"; - const trimLeft = text => text.replace(/^\s+/, ""); const trimRight = text => text.replace(/\s+$/, ""); const countPipes = text => (text.replace(/\\\|/, "").match(/\|/g) || []).length; @@ -495,10 +493,9 @@ function tags() { class Element { constructor(element, parent, previous, next) { this.name = element.name; - this.type = element.type; this.data = element.data; this.children = element.children; - this.attributes = element.attributes || {}; + this.attributes = element.attributes; if (parent) { this.parent = parent; @@ -554,14 +551,7 @@ class Element { } toMarkdown() { - switch (this.type) { - case "text": - return this.text(); - break; - case "tag": - return this.tag().toMarkdown(); - break; - } + return this.name === "#text" ? this.text() : this.tag().toMarkdown(); } filterParentNames(names) { @@ -628,7 +618,42 @@ function putPlaceholders(html) { match = codeRegEx.exec(origHtml); } - const elements = parseHTML(trimUnwanted(html)); + const transformNode = node => { + if (node.nodeName !== "#text" && node.length !== undefined) { + const ret = []; + for (let i = 0; i < node.length; ++i) { + if (node[i].nodeName !== "#comment") { + ret.push(transformNode(node[i])); + } + } + return ret; + } + + const ret = { + name: node.nodeName.toLowerCase(), + data: node.data, + children: [], + attributes: {} + }; + + if (node.nodeName === "#text") { + return ret; + } + + for (let i = 0; i < node.childNodes.length; ++i) { + if (node.childNodes[i].nodeName !== "#comment") { + ret.children.push(transformNode(node.childNodes[i])); + } + } + + for (let i = 0; i < node.attributes.length; ++i) { + ret.attributes[node.attributes[i].name] = node.attributes[i].value; + } + + return ret; + }; + + const elements = transformNode($.parseHTML(trimUnwanted(html))); return { elements, placeholders }; } diff --git a/app/assets/javascripts/vendor.js b/app/assets/javascripts/vendor.js index 0503964494e..6304b9816ac 100644 --- a/app/assets/javascripts/vendor.js +++ b/app/assets/javascripts/vendor.js @@ -30,5 +30,4 @@ //= require virtual-dom //= require virtual-dom-amd //= require highlight.js -//= require htmlparser.js //= require intersection-observer diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb index 1943417ef25..c32f6034298 100644 --- a/lib/pretty_text.rb +++ b/lib/pretty_text.rb @@ -82,7 +82,6 @@ module PrettyText ctx_load_manifest(ctx, "markdown-it-bundle.js") root_path = "#{Rails.root}/app/assets/javascripts/" - apply_es6_file(ctx, root_path, "discourse/helpers/parse-html") apply_es6_file(ctx, root_path, "discourse/lib/to-markdown") apply_es6_file(ctx, root_path, "discourse/lib/utilities") diff --git a/lib/tasks/javascript.rake b/lib/tasks/javascript.rake index cf5f2e10b71..004fad219cb 100644 --- a/lib/tasks/javascript.rake +++ b/lib/tasks/javascript.rake @@ -50,8 +50,6 @@ task 'javascript:update' do }, { source: 'highlight.js/build/.', destination: 'highlightjs' - }, { - source: 'htmlparser/lib/htmlparser.js' }, { source: 'jquery-resize/jquery.ba-resize.js' }, { diff --git a/test/javascripts/lib/category-badge-test.js.es6 b/test/javascripts/lib/category-badge-test.js.es6 index 0a9f3432ff9..9d0c02e0c52 100644 --- a/test/javascripts/lib/category-badge-test.js.es6 +++ b/test/javascripts/lib/category-badge-test.js.es6 @@ -2,7 +2,6 @@ import createStore from "helpers/create-store"; QUnit.module("lib:category-link"); -import parseHTML from "discourse/helpers/parse-html"; import { categoryBadgeHTML } from "discourse/helpers/category-link"; QUnit.test("categoryBadge without a category", assert => { @@ -18,24 +17,19 @@ QUnit.test("Regular categoryBadge", assert => { color: "ff0", text_color: "f00" }); - const tag = parseHTML(categoryBadgeHTML(category))[0]; + const tag = $.parseHTML(categoryBadgeHTML(category))[0]; - assert.equal(tag.name, "a", "it creates a `a` wrapper tag"); + assert.equal(tag.tagName, "A", "it creates a `a` wrapper tag"); assert.equal( - tag.attributes["class"].trim(), + tag.className.trim(), "badge-wrapper", "it has the correct class" ); const label = tag.children[1]; + assert.equal(label.title, "cool description", "it has the correct title"); assert.equal( - label.attributes.title, - "cool description", - "it has the correct title" - ); - - assert.equal( - label.children[0].children[0].data, + label.children[0].innerText, "hello", "it has the category name" ); @@ -44,10 +38,10 @@ QUnit.test("Regular categoryBadge", assert => { QUnit.test("undefined color", assert => { const store = createStore(); const noColor = store.createRecord("category", { name: "hello", id: 123 }); - const tag = parseHTML(categoryBadgeHTML(noColor))[0]; + const tag = $.parseHTML(categoryBadgeHTML(noColor))[0]; assert.blank( - tag.attributes.style, + tag.attributes["style"], "it has no color style because there are no colors" ); }); @@ -89,11 +83,11 @@ QUnit.test("category names are wrapped in dir-spans", assert => { id: 234 }); - let tag = parseHTML(categoryBadgeHTML(rtlCategory))[0]; + let tag = $.parseHTML(categoryBadgeHTML(rtlCategory))[0]; let dirSpan = tag.children[1].children[0]; - assert.equal(dirSpan.attributes.dir, "rtl"); + assert.equal(dirSpan.dir, "rtl"); - tag = parseHTML(categoryBadgeHTML(ltrCategory))[0]; + tag = $.parseHTML(categoryBadgeHTML(ltrCategory))[0]; dirSpan = tag.children[1].children[0]; - assert.equal(dirSpan.attributes.dir, "ltr"); + assert.equal(dirSpan.dir, "ltr"); }); diff --git a/test/javascripts/test_helper.js b/test/javascripts/test_helper.js index 4559fb92205..de9f4a2d4c4 100644 --- a/test/javascripts/test_helper.js +++ b/test/javascripts/test_helper.js @@ -25,7 +25,6 @@ //= require markdown-it-bundle //= require application //= require plugin -//= require htmlparser.js //= require admin //= require sinon/pkg/sinon diff --git a/vendor/assets/javascripts/htmlparser.js b/vendor/assets/javascripts/htmlparser.js deleted file mode 100644 index 04668e06545..00000000000 --- a/vendor/assets/javascripts/htmlparser.js +++ /dev/null @@ -1,993 +0,0 @@ -/*********************************************** -Copyright 2010 - 2012 Chris Winberry . All rights reserved. -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to -deal in the Software without restriction, including without limitation the -rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -IN THE SOFTWARE. -***********************************************/ -/* v2.0.0 */ - -(function () { - -var exports; -if (typeof(module) !== 'undefined' && typeof(module.exports) !== 'undefined') { - exports = module.exports; -} else { - exports = {}; - if (!this.Tautologistics) { - this.Tautologistics = {}; - } - if (this.Tautologistics.NodeHtmlParser) { - return; - } - this.Tautologistics.NodeHtmlParser = exports; -} - -function inherits (ctor, superCtor) { - var tempCtor = function(){}; - tempCtor.prototype = superCtor.prototype; - ctor.super_ = superCtor; - ctor.prototype = new tempCtor(); - ctor.prototype.constructor = ctor; -} - -var Mode = { - Text: 'text', - Tag: 'tag', - Attr: 'attr', - CData: 'cdata', - Doctype: 'doctype', - Comment: 'comment' -}; - -function Parser (builder, options) { - this._options = options ? options : { }; - // if (this._options.includeLocation === undefined) { - // this._options.includeLocation = false; //Include position of element (row, col) on nodes - // } - this._validateBuilder(builder); - var self = this; - this._builder = builder; - this.reset(); -} - -if (typeof(module) !== 'undefined' && typeof(module.exports) !== 'undefined') { - - var Stream = require('stream'); - inherits(Parser, Stream); - - Parser.prototype.writable = true; - Parser.prototype.write = function(data) { - if(data instanceof Buffer) { - data = data.toString(); - } - this.parseChunk(data); - }; - - Parser.prototype.end = function(data) { - if (arguments.length) { - this.write(data); - } - this.writable = false; - this.done(); - }; - - Parser.prototype.destroy = function() { - this.writable = false; - }; - -} - - //**Public**// - Parser.prototype.reset = function Parser$reset () { - this._state = { - mode: Mode.Text, - pos: 0, - data: null, - pendingText: null, - pendingWrite: null, - lastTag: null, - isScript: false, - needData: false, - output: [], - done: false//, - // line: 1, - // col: 1 - }; - this._builder.reset(); - }; - - Parser.prototype.parseChunk = function Parser$parseChunk (chunk) { - this._state.needData = false; - this._state.data = (this._state.data !== null) ? - this._state.data.substr(this.pos) + chunk - : - chunk - ; - while (this._state.pos < this._state.data.length && !this._state.needData) { - this._parse(this._state); - } - }; - - Parser.prototype.parseComplete = function Parser$parseComplete (data) { - this.reset(); - this.parseChunk(data); - this.done(); - }; - - Parser.prototype.done = function Parser$done () { - this._state.done = true; - this._parse(this._state); - this._flushWrite(); - this._builder.done(); - }; - - //**Private**// - Parser.prototype._validateBuilder = function Parser$_validateBuilder (builder) { - if ((typeof builder) != "object") { - throw new Error("Builder is not an object"); - } - if ((typeof builder.reset) != "function") { - throw new Error("Builder method 'reset' is invalid"); - } - if ((typeof builder.done) != "function") { - throw new Error("Builder method 'done' is invalid"); - } - if ((typeof builder.write) != "function") { - throw new Error("Builder method 'write' is invalid"); - } - if ((typeof builder.error) != "function") { - throw new Error("Builder method 'error' is invalid"); - } - }; - - Parser.prototype._parse = function Parser$_parse () { - switch (this._state.mode) { - case Mode.Text: - return this._parseText(this._state); - case Mode.Tag: - return this._parseTag(this._state); - case Mode.Attr: - return this._parseAttr(this._state); - case Mode.CData: - return this._parseCData(this._state); - case Mode.Doctype: - return this._parseDoctype(this._state); - case Mode.Comment: - return this._parseComment(this._state); - } - }; - - Parser.prototype._writePending = function Parser$_writePending (node) { - if (!this._state.pendingWrite) { - this._state.pendingWrite = []; - } - this._state.pendingWrite.push(node); - }; - - Parser.prototype._flushWrite = function Parser$_flushWrite () { - if (this._state.pendingWrite) { - for (var i = 0, len = this._state.pendingWrite.length; i < len; i++) { - var node = this._state.pendingWrite[i]; - this._builder.write(node); - } - this._state.pendingWrite = null; - } - }; - - Parser.prototype._write = function Parser$_write (node) { - this._flushWrite(); - this._builder.write(node); - }; - - Parser._re_parseText_scriptClose = /<\s*\/\s*script/ig; - Parser.prototype._parseText = function Parser$_parseText () { - var state = this._state; - var foundPos; - if (state.isScript) { - Parser._re_parseText_scriptClose.lastIndex = state.pos; - foundPos = Parser._re_parseText_scriptClose.exec(state.data); - foundPos = (foundPos) ? - foundPos.index - : - -1 - ; - } else { - foundPos = state.data.indexOf('<', state.pos); - } - var text = (foundPos === -1) ? state.data.substring(state.pos, state.data.length) : state.data.substring(state.pos, foundPos); - if (foundPos < 0 && state.done) { - foundPos = state.data.length; - } - if (foundPos < 0) { - if (state.isScript) { - state.needData = true; - return; - } - if (!state.pendingText) { - state.pendingText = []; - } - state.pendingText.push(state.data.substring(state.pos, state.data.length)); - state.pos = state.data.length; - } else { - if (state.pendingText) { - state.pendingText.push(state.data.substring(state.pos, foundPos)); - text = state.pendingText.join(''); - state.pendingText = null; - } else { - text = state.data.substring(state.pos, foundPos); - } - if (text !== '') { - this._write({ type: Mode.Text, data: text }); - } - state.pos = foundPos + 1; - state.mode = Mode.Tag; - } - }; - - Parser.re_parseTag = /\s*(\/?)\s*([^\s>\/]+)(\s*)\??(>?)/g; - Parser.prototype._parseTag = function Parser$_parseTag () { - var state = this._state; - Parser.re_parseTag.lastIndex = state.pos; - var match = Parser.re_parseTag.exec(state.data); - if (match) { - if (!match[1] && match[2].substr(0, 3) === '!--') { - state.mode = Mode.Comment; - state.pos += 3; - return; - } - if (!match[1] && match[2].substr(0, 8) === '![CDATA[') { - state.mode = Mode.CData; - state.pos += 8; - return; - } - if (!match[1] && match[2].substr(0, 8) === '!DOCTYPE') { - state.mode = Mode.Doctype; - state.pos += 8; - return; - } - if (!state.done && (state.pos + match[0].length) === state.data.length) { - //We're at the and of the data, might be incomplete - state.needData = true; - return; - } - var raw; - if (match[4] === '>') { - state.mode = Mode.Text; - raw = match[0].substr(0, match[0].length - 1); - } else { - state.mode = Mode.Attr; - raw = match[0]; - } - state.pos += match[0].length; - var tag = { type: Mode.Tag, name: match[1] + match[2], raw: raw }; - if (state.mode === Mode.Attr) { - state.lastTag = tag; - } - if (tag.name.toLowerCase() === 'script') { - state.isScript = true; - } else if (tag.name.toLowerCase() === '/script') { - state.isScript = false; - } - if (state.mode === Mode.Attr) { - this._writePending(tag); - } else { - this._write(tag); - } - } else { - //TODO: end of tag? - //TODO: push to pending? - state.needData = true; - } - }; - - Parser.re_parseAttr_findName = /\s*([^=<>\s'"\/]+)\s*/g; - Parser.prototype._parseAttr_findName = function Parser$_parseAttr_findName () { - Parser.re_parseAttr_findName.lastIndex = this._state.pos; - var match = Parser.re_parseAttr_findName.exec(this._state.data); - if (!match) { - return null; - } - if (this._state.pos + match[0].length !== Parser.re_parseAttr_findName.lastIndex) { - return null; - } - return { - match: match[0] - , name: match[1] - }; - }; - Parser.re_parseAttr_findValue = /\s*=\s*(?:'([^']*)'|"([^"]*)"|([^'"\s\/>]+))\s*/g; - Parser.re_parseAttr_findValue_last = /\s*=\s*['"]?(.*)$/g; - Parser.prototype._parseAttr_findValue = function Parser$_parseAttr_findValue () { - var state = this._state; - Parser.re_parseAttr_findValue.lastIndex = state.pos; - var match = Parser.re_parseAttr_findValue.exec(state.data); - if (!match) { - if (!state.done) { - return null; - } - Parser.re_parseAttr_findValue_last.lastIndex = state.pos; - match = Parser.re_parseAttr_findValue_last.exec(state.data); - if (!match) { - return null; - } - return { - match: match[0] - , value: (match[1] !== '') ? match[1] : null - }; - } - if (state.pos + match[0].length !== Parser.re_parseAttr_findValue.lastIndex) { - return null; - } - return { - match: match[0] - , value: match[1] || match[2] || match[3] - }; - }; - Parser.re_parseAttr_splitValue = /\s*=\s*['"]?/g; - Parser.re_parseAttr_selfClose = /(\s*\/\s*)(>?)/g; - Parser.prototype._parseAttr = function Parser$_parseAttr () { - var state = this._state; - var name_data = this._parseAttr_findName(state); - if (!name_data || name_data.name === '?') { - Parser.re_parseAttr_selfClose.lastIndex = state.pos; - var matchTrailingSlash = Parser.re_parseAttr_selfClose.exec(state.data); - if (matchTrailingSlash && matchTrailingSlash.index === state.pos) { - if (!state.done && !matchTrailingSlash[2] && state.pos + matchTrailingSlash[0].length === state.data.length) { - state.needData = true; - return; - } - state.lastTag.raw += matchTrailingSlash[1]; - // state.output.push({ type: Mode.Tag, name: '/' + state.lastTag.name, raw: null }); - this._write({ type: Mode.Tag, name: '/' + state.lastTag.name, raw: null }); - state.pos += matchTrailingSlash[1].length; - } - var foundPos = state.data.indexOf('>', state.pos); - if (foundPos < 0) { - if (state.done) { //TODO: is this needed? - state.lastTag.raw += state.data.substr(state.pos); - state.pos = state.data.length; - return; - } - state.needData = true; - } else { - // state.lastTag = null; - state.pos = foundPos + 1; - state.mode = Mode.Text; - } - return; - } - if (!state.done && state.pos + name_data.match.length === state.data.length) { - state.needData = true; - return null; - } - state.pos += name_data.match.length; - var value_data = this._parseAttr_findValue(state); - if (value_data) { - if (!state.done && state.pos + value_data.match.length === state.data.length) { - state.needData = true; - state.pos -= name_data.match.length; - return; - } - state.pos += value_data.match.length; - } else { - Parser.re_parseAttr_splitValue.lastIndex = state.pos; - if (Parser.re_parseAttr_splitValue.exec(state.data)) { - state.needData = true; - state.pos -= name_data.match.length; - return; - } - value_data = { - match: '' - , value: null - }; - } - state.lastTag.raw += name_data.match + value_data.match; - - this._writePending({ type: Mode.Attr, name: name_data.name, data: value_data.value }); - }; - - Parser.re_parseCData_findEnding = /\]{1,2}$/; - Parser.prototype._parseCData = function Parser$_parseCData () { - var state = this._state; - var foundPos = state.data.indexOf(']]>', state.pos); - if (foundPos < 0 && state.done) { - foundPos = state.data.length; - } - if (foundPos < 0) { - Parser.re_parseCData_findEnding.lastIndex = state.pos; - var matchPartialCDataEnd = Parser.re_parseCData_findEnding.exec(state.data); - if (matchPartialCDataEnd) { - state.needData = true; - return; - } - if (!state.pendingText) { - state.pendingText = []; - } - state.pendingText.push(state.data.substr(state.pos, state.data.length)); - state.pos = state.data.length; - state.needData = true; - } else { - var text; - if (state.pendingText) { - state.pendingText.push(state.data.substring(state.pos, foundPos)); - text = state.pendingText.join(''); - state.pendingText = null; - } else { - text = state.data.substring(state.pos, foundPos); - } - this._write({ type: Mode.CData, data: text }); - state.mode = Mode.Text; - state.pos = foundPos + 3; - } - }; - - Parser.prototype._parseDoctype = function Parser$_parseDoctype () { - var state = this._state; - var foundPos = state.data.indexOf('>', state.pos); - if (foundPos < 0 && state.done) { - foundPos = state.data.length; - } - if (foundPos < 0) { - Parser.re_parseCData_findEnding.lastIndex = state.pos; - if (!state.pendingText) { - state.pendingText = []; - } - state.pendingText.push(state.data.substr(state.pos, state.data.length)); - state.pos = state.data.length; - state.needData = true; - } else { - var text; - if (state.pendingText) { - state.pendingText.push(state.data.substring(state.pos, foundPos)); - text = state.pendingText.join(''); - state.pendingText = null; - } else { - text = state.data.substring(state.pos, foundPos); - } - this._write({ type: Mode.Doctype, data: text }); - state.mode = Mode.Text; - state.pos = foundPos + 1; - } - }; - - Parser.re_parseComment_findEnding = /\-{1,2}$/; - Parser.prototype._parseComment = function Parser$_parseComment () { - var state = this._state; - var foundPos = state.data.indexOf('-->', state.pos); - if (foundPos < 0 && state.done) { - foundPos = state.data.length; - } - if (foundPos < 0) { - Parser.re_parseComment_findEnding.lastIndex = state.pos; - var matchPartialCommentEnd = Parser.re_parseComment_findEnding.exec(state.data); - if (matchPartialCommentEnd) { - state.needData = true; - return; - } - if (!state.pendingText) { - state.pendingText = []; - } - state.pendingText.push(state.data.substr(state.pos, state.data.length)); - state.pos = state.data.length; - state.needData = true; - } else { - var text; - if (state.pendingText) { - state.pendingText.push(state.data.substring(state.pos, foundPos)); - text = state.pendingText.join(''); - state.pendingText = null; - } else { - text = state.data.substring(state.pos, foundPos); - } - // state.output.push({ type: Mode.Comment, data: text }); - this._write({ type: Mode.Comment, data: text }); - state.mode = Mode.Text; - state.pos = foundPos + 3; - } - }; - - -function HtmlBuilder (callback, options) { - this.reset(); - this._options = options ? options : { }; - if (this._options.ignoreWhitespace === undefined) { - this._options.ignoreWhitespace = false; //Keep whitespace-only text nodes - } - if (this._options.includeLocation === undefined) { - this._options.includeLocation = false; //Include position of element (row, col) on nodes - } - if (this._options.verbose === undefined) { - this._options.verbose = true; //Keep data property for tags and raw property for all - } - if (this._options.enforceEmptyTags === undefined) { - this._options.enforceEmptyTags = true; //Don't allow children for HTML tags defined as empty in spec - } - if (this._options.caseSensitiveTags === undefined) { - this._options.caseSensitiveTags = false; //Lowercase all tag names - } - if (this._options.caseSensitiveAttr === undefined) { - this._options.caseSensitiveAttr = false; //Lowercase all attribute names - } - if ((typeof callback) == "function") { - this._callback = callback; - } -} - - //**"Static"**// - //HTML Tags that shouldn't contain child nodes - HtmlBuilder._emptyTags = { - area: 1 - , base: 1 - , basefont: 1 - , br: 1 - , col: 1 - , frame: 1 - , hr: 1 - , img: 1 - , input: 1 - , isindex: 1 - , link: 1 - , meta: 1 - , param: 1 - , embed: 1 - , '?xml': 1 - }; - //Regex to detect whitespace only text nodes - HtmlBuilder.reWhitespace = /^\s*$/; - - //**Public**// - //Properties// - HtmlBuilder.prototype.dom = null; //The hierarchical object containing the parsed HTML - //Methods// - //Resets the builder back to starting state - HtmlBuilder.prototype.reset = function HtmlBuilder$reset() { - this.dom = []; - // this._raw = []; - this._done = false; - this._tagStack = []; - this._lastTag = null; - this._tagStack.last = function HtmlBuilder$_tagStack$last () { - return(this.length ? this[this.length - 1] : null); - }; - this._line = 1; - this._col = 1; - }; - //Signals the builder that parsing is done - HtmlBuilder.prototype.done = function HtmlBuilder$done () { - this._done = true; - this.handleCallback(null); - }; - - HtmlBuilder.prototype.error = function HtmlBuilder$error (error) { - this.handleCallback(error); - }; - - HtmlBuilder.prototype.handleCallback = function HtmlBuilder$handleCallback (error) { - if ((typeof this._callback) != "function") { - if (error) { - throw error; - } else { - return; - } - } - this._callback(error, this.dom); - }; - - HtmlBuilder.prototype.isEmptyTag = function HtmlBuilder$isEmptyTag (element) { - var name = element.name.toLowerCase(); - if (name.charAt(0) == '?') { - return true; - } - if (name.charAt(0) == '/') { - name = name.substring(1); - } - return this._options.enforceEmptyTags && !!HtmlBuilder._emptyTags[name]; - }; - - HtmlBuilder.prototype._getLocation = function HtmlBuilder$_getLocation () { - return { line: this._line, col: this._col }; - }; - - // HtmlBuilder.reLineSplit = /(\r\n|\r|\n)/g; - HtmlBuilder.prototype._updateLocation = function HtmlBuilder$_updateLocation (node) { - var positionData = (node.type === Mode.Tag) ? node.raw : node.data; - if (positionData === null) { - return; - } - // var lines = positionData.split(HtmlBuilder.reLineSplit); - var lines = positionData.split("\n"); - this._line += lines.length - 1; - if (lines.length > 1) { - this._col = 1; - } - this._col += lines[lines.length - 1].length; - if (node.type === Mode.Tag) { - this._col += 2; - } else if (node.type === Mode.Comment) { - this._col += 7; - } else if (node.type === Mode.CData) { - this._col += 12; - } - }; - - HtmlBuilder.prototype._copyElement = function HtmlBuilder$_copyElement (element) { - var newElement = { type: element.type }; - - if (this._options.verbose && element['raw'] !== undefined) { - newElement.raw = element.raw; - } - if (element['name'] !== undefined) { - switch (element.type) { - - case Mode.Tag: - newElement.name = this._options.caseSensitiveTags ? - element.name - : - element.name.toLowerCase() - ; - break; - - case Mode.Attr: - newElement.name = this._options.caseSensitiveAttr ? - element.name - : - element.name.toLowerCase() - ; - break; - - default: - newElement.name = this._options.caseSensitiveTags ? - element.name - : - element.name.toLowerCase() - ; - break; - - } - } - if (element['data'] !== undefined) { - newElement.data = element.data; - } - if (element.location) { - newElement.location = { line: element.location.line, col: element.location.col }; - } - - return newElement; - }; - - HtmlBuilder.prototype.write = function HtmlBuilder$write (element) { - // this._raw.push(element); - if (this._done) { - this.handleCallback(new Error("Writing to the builder after done() called is not allowed without a reset()")); - } - if (this._options.includeLocation) { - if (element.type !== Mode.Attr) { - element.location = this._getLocation(); - this._updateLocation(element); - } - } - if (element.type === Mode.Text && this._options.ignoreWhitespace) { - if (HtmlBuilder.reWhitespace.test(element.data)) { - return; - } - } - var parent; - var node; - if (!this._tagStack.last()) { //There are no parent elements - //If the element can be a container, add it to the tag stack and the top level list - if (element.type === Mode.Tag) { - if (element.name.charAt(0) != "/") { //Ignore closing tags that obviously don't have an opening tag - node = this._copyElement(element); - this.dom.push(node); - if (!this.isEmptyTag(node)) { //Don't add tags to the tag stack that can't have children - this._tagStack.push(node); - } - this._lastTag = node; - } - } else if (element.type === Mode.Attr && this._lastTag) { - if (!this._lastTag.attributes) { - this._lastTag.attributes = {}; - } - this._lastTag.attributes[this._options.caseSensitiveAttr ? element.name : element.name.toLowerCase()] = - element.data; - } else { //Otherwise just add to the top level list - this.dom.push(this._copyElement(element)); - } - } else { //There are parent elements - //If the element can be a container, add it as a child of the element - //on top of the tag stack and then add it to the tag stack - if (element.type === Mode.Tag) { - if (element.name.charAt(0) == "/") { - //This is a closing tag, scan the tagStack to find the matching opening tag - //and pop the stack up to the opening tag's parent - var baseName = this._options.caseSensitiveTags ? - element.name.substring(1) - : - element.name.substring(1).toLowerCase() - ; - if (!this.isEmptyTag(element)) { - var pos = this._tagStack.length - 1; - while (pos > -1 && this._tagStack[pos--].name != baseName) { } - if (pos > -1 || this._tagStack[0].name == baseName) { - while (pos < this._tagStack.length - 1) { - this._tagStack.pop(); - } - } - } - } - else { //This is not a closing tag - parent = this._tagStack.last(); - if (element.type === Mode.Attr) { - if (!parent.attributes) { - parent.attributes = {}; - } - parent.attributes[this._options.caseSensitiveAttr ? element.name : element.name.toLowerCase()] = - element.data; - } else { - node = this._copyElement(element); - if (!parent.children) { - parent.children = []; - } - parent.children.push(node); - if (!this.isEmptyTag(node)) { //Don't add tags to the tag stack that can't have children - this._tagStack.push(node); - } - if (element.type === Mode.Tag) { - this._lastTag = node; - } - } - } - } - else { //This is not a container element - parent = this._tagStack.last(); - if (element.type === Mode.Attr) { - if (!parent.attributes) { - parent.attributes = {}; - } - parent.attributes[this._options.caseSensitiveAttr ? element.name : element.name.toLowerCase()] = - element.data; - } else { - if (!parent.children) { - parent.children = []; - } - parent.children.push(this._copyElement(element)); - } - } - } - }; - - - //**Private**// - //Properties// - HtmlBuilder.prototype._options = null; //Builder options for how to behave - HtmlBuilder.prototype._callback = null; //Callback to respond to when parsing done - HtmlBuilder.prototype._done = false; //Flag indicating whether builder has been notified of parsing completed - HtmlBuilder.prototype._tagStack = null; //List of parents to the currently element being processed - //Methods// - - -function RssBuilder (callback) { - RssBuilder.super_.call(this, callback, { ignoreWhitespace: true, verbose: false, enforceEmptyTags: false, caseSensitiveTags: true }); -} -inherits(RssBuilder, HtmlBuilder); - - RssBuilder.prototype.done = function RssBuilder$done () { - var feed = {}; - var feedRoot; - - var found = DomUtils.getElementsByTagName(function (value) { return(value == "rss" || value == "feed"); }, this.dom, false); - if (found.length) { - feedRoot = found[0]; - } - if (feedRoot) { - if (feedRoot.name == "rss") { - feed.type = "rss"; - feedRoot = feedRoot.children[0]; // - feed.id = ""; - try { - feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data; - } catch (ex) { } - try { - feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].children[0].data; - } catch (ex) { } - try { - feed.description = DomUtils.getElementsByTagName("description", feedRoot.children, false)[0].children[0].data; - } catch (ex) { } - try { - feed.updated = new Date(DomUtils.getElementsByTagName("lastBuildDate", feedRoot.children, false)[0].children[0].data); - } catch (ex) { } - try { - feed.author = DomUtils.getElementsByTagName("managingEditor", feedRoot.children, false)[0].children[0].data; - } catch (ex) { } - feed.items = []; - DomUtils.getElementsByTagName("item", feedRoot.children).forEach(function (item, index, list) { - var entry = {}; - try { - entry.id = DomUtils.getElementsByTagName("guid", item.children, false)[0].children[0].data; - } catch (ex) { } - try { - entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data; - } catch (ex) { } - try { - entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].children[0].data; - } catch (ex) { } - try { - entry.description = DomUtils.getElementsByTagName("description", item.children, false)[0].children[0].data; - } catch (ex) { } - try { - entry.pubDate = new Date(DomUtils.getElementsByTagName("pubDate", item.children, false)[0].children[0].data); - } catch (ex) { } - feed.items.push(entry); - }); - } else { - feed.type = "atom"; - try { - feed.id = DomUtils.getElementsByTagName("id", feedRoot.children, false)[0].children[0].data; - } catch (ex) { } - try { - feed.title = DomUtils.getElementsByTagName("title", feedRoot.children, false)[0].children[0].data; - } catch (ex) { } - try { - feed.link = DomUtils.getElementsByTagName("link", feedRoot.children, false)[0].attributes.href; - } catch (ex) { } - try { - feed.description = DomUtils.getElementsByTagName("subtitle", feedRoot.children, false)[0].children[0].data; - } catch (ex) { } - try { - feed.updated = new Date(DomUtils.getElementsByTagName("updated", feedRoot.children, false)[0].children[0].data); - } catch (ex) { } - try { - feed.author = DomUtils.getElementsByTagName("email", feedRoot.children, true)[0].children[0].data; - } catch (ex) { } - feed.items = []; - DomUtils.getElementsByTagName("entry", feedRoot.children).forEach(function (item, index, list) { - var entry = {}; - try { - entry.id = DomUtils.getElementsByTagName("id", item.children, false)[0].children[0].data; - } catch (ex) { } - try { - entry.title = DomUtils.getElementsByTagName("title", item.children, false)[0].children[0].data; - } catch (ex) { } - try { - entry.link = DomUtils.getElementsByTagName("link", item.children, false)[0].attributes.href; - } catch (ex) { } - try { - entry.description = DomUtils.getElementsByTagName("summary", item.children, false)[0].children[0].data; - } catch (ex) { } - try { - entry.pubDate = new Date(DomUtils.getElementsByTagName("updated", item.children, false)[0].children[0].data); - } catch (ex) { } - feed.items.push(entry); - }); - } - - this.dom = feed; - } - RssBuilder.super_.prototype.done.call(this); - }; - - var DomUtils = { - testElement: function DomUtils$testElement (options, element) { - if (!element) { - return false; - } - - for (var key in options) { - if (!options.hasOwnProperty(key)) { - continue; - } - if (key == "tag_name") { - if (element.type !== Mode.Tag) { - return false; - } - if (!options["tag_name"](element.name)) { - return false; - } - } else if (key == "tag_type") { - if (!options["tag_type"](element.type)) { - return false; - } - } else if (key == "tag_contains") { - if (element.type !== Mode.Text && element.type !== Mode.Comment && element.type !== Mode.CData) { - return false; - } - if (!options["tag_contains"](element.data)) { - return false; - } - } else { - if (!element.attributes || !options[key](element.attributes[key])) { - return false; - } - } - } - - return true; - } - - , getElements: function DomUtils$getElements (options, currentElement, recurse, limit) { - recurse = (recurse === undefined || recurse === null) || !!recurse; - limit = isNaN(parseInt(limit)) ? -1 : parseInt(limit); - - if (!currentElement) { - return([]); - } - - var found = []; - var elementList; - - function getTest (checkVal) { - return function (value) { - return(value == checkVal); - }; - } - for (var key in options) { - if ((typeof options[key]) != "function") { - options[key] = getTest(options[key]); - } - } - - if (DomUtils.testElement(options, currentElement)) { - found.push(currentElement); - } - - if (limit >= 0 && found.length >= limit) { - return(found); - } - - if (recurse && currentElement.children) { - elementList = currentElement.children; - } else if (currentElement instanceof Array) { - elementList = currentElement; - } else { - return(found); - } - - for (var i = 0; i < elementList.length; i++) { - found = found.concat(DomUtils.getElements(options, elementList[i], recurse, limit)); - if (limit >= 0 && found.length >= limit) { - break; - } - } - - return(found); - } - - , getElementById: function DomUtils$getElementById (id, currentElement, recurse) { - var result = DomUtils.getElements({ id: id }, currentElement, recurse, 1); - return(result.length ? result[0] : null); - } - - , getElementsByTagName: function DomUtils$getElementsByTagName (name, currentElement, recurse, limit) { - return(DomUtils.getElements({ tag_name: name }, currentElement, recurse, limit)); - } - - , getElementsByTagType: function DomUtils$getElementsByTagType (type, currentElement, recurse, limit) { - return(DomUtils.getElements({ tag_type: type }, currentElement, recurse, limit)); - } - }; - -exports.Parser = Parser; - -exports.HtmlBuilder = HtmlBuilder; - -exports.RssBuilder = RssBuilder; - -exports.ElementType = Mode; - -exports.DomUtils = DomUtils; - -})();