mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
Replace Hpricot with Nokogiri
This commit is contained in:
@@ -9,7 +9,7 @@ class CookedPostProcessor
|
||||
@dirty = false
|
||||
@opts = opts
|
||||
@post = post
|
||||
@doc = Hpricot(post.cooked)
|
||||
@doc = Nokogiri::HTML(post.cooked)
|
||||
end
|
||||
|
||||
def dirty?
|
||||
|
||||
@@ -34,7 +34,7 @@ module Oneboxer
|
||||
if Whitelist.allowed?(url)
|
||||
page_html = open(url).read
|
||||
if page_html.present?
|
||||
doc = Hpricot(page_html)
|
||||
doc = Nokogiri::HTML(page_html)
|
||||
|
||||
# See if if it has an oembed thing we can use
|
||||
(doc/"link[@type='application/json+oembed']").each do |oembed|
|
||||
@@ -56,7 +56,7 @@ module Oneboxer
|
||||
# Parse URLs out of HTML, returning the document when finished.
|
||||
def self.each_onebox_link(string_or_doc)
|
||||
doc = string_or_doc
|
||||
doc = Hpricot(doc) if doc.is_a?(String)
|
||||
doc = Nokogiri::HTML(doc) if doc.is_a?(String)
|
||||
|
||||
onebox_links = doc.search("a.onebox")
|
||||
if onebox_links.present?
|
||||
|
||||
@@ -22,19 +22,19 @@ module Oneboxer
|
||||
end
|
||||
|
||||
def parse(data)
|
||||
hp = Hpricot(data)
|
||||
html_doc = Nokogiri::HTML(data)
|
||||
|
||||
result = {}
|
||||
result[:title] = hp.at("h1")
|
||||
result[:title] = html_doc.at("h1")
|
||||
result[:title] = result[:title].inner_html if result[:title].present?
|
||||
|
||||
image = hp.at(".main-image img")
|
||||
image = html_doc.at(".main-image img")
|
||||
result[:image] = image['src'] if image
|
||||
|
||||
result[:by_info] = hp.at("#by-line")
|
||||
result[:by_info] = html_doc.at("#by-line")
|
||||
result[:by_info] = BaseOnebox.remove_whitespace(result[:by_info].inner_html) if result[:by_info].present?
|
||||
|
||||
summary = hp.at("#description-and-details-content")
|
||||
summary = html_doc.at("#description-and-details-content")
|
||||
result[:text] = summary.inner_html if summary.present?
|
||||
|
||||
result
|
||||
|
||||
@@ -12,20 +12,20 @@ module Oneboxer
|
||||
|
||||
def parse(data)
|
||||
|
||||
hp = Hpricot(data)
|
||||
html_doc = Nokogiri::HTML(data)
|
||||
|
||||
result = {}
|
||||
|
||||
m = hp.at("h1.doc-banner-title")
|
||||
m = html_doc.at("h1.doc-banner-title")
|
||||
result[:title] = m.inner_text if m
|
||||
|
||||
m = hp.at("div#doc-original-text")
|
||||
m = html_doc.at("div#doc-original-text")
|
||||
if m
|
||||
result[:text] = BaseOnebox.replace_tags_with_spaces(m.inner_html)
|
||||
result[:text] = result[:text][0..MAX_TEXT]
|
||||
end
|
||||
|
||||
m = hp.at("div.doc-banner-icon img")
|
||||
m = html_doc.at("div.doc-banner-icon img")
|
||||
result[:image] = m['src'] if m
|
||||
|
||||
result
|
||||
|
||||
@@ -17,17 +17,17 @@ module Oneboxer
|
||||
|
||||
def parse(data)
|
||||
|
||||
hp = Hpricot(data)
|
||||
html_doc = Nokogiri::HTML(data)
|
||||
|
||||
result = {}
|
||||
|
||||
m = hp.at("h1")
|
||||
m = html_doc.at("h1")
|
||||
result[:title] = m.inner_text if m
|
||||
|
||||
m = hp.at("h4 ~ p")
|
||||
m = html_doc.at("h4 ~ p")
|
||||
result[:text] = m.inner_text[0..MAX_TEXT] if m
|
||||
|
||||
m = hp.at(".product img.artwork")
|
||||
m = html_doc.at(".product img.artwork")
|
||||
result[:image] = m['src'] if m
|
||||
|
||||
result
|
||||
|
||||
@@ -9,7 +9,7 @@ module Oneboxer
|
||||
|
||||
page_html = open(@url).read
|
||||
return nil if page_html.blank?
|
||||
doc = Hpricot(page_html)
|
||||
doc = Nokogiri::HTML(page_html)
|
||||
|
||||
# Flikrs oembed just stopped returning images for no reason. Let's use opengraph instead.
|
||||
open_graph = Oneboxer.parse_open_graph(doc)
|
||||
|
||||
@@ -20,23 +20,23 @@ module Oneboxer
|
||||
|
||||
def parse(data)
|
||||
|
||||
hp = Hpricot(data)
|
||||
html_doc = Nokogiri::HTML(data)
|
||||
|
||||
result = {}
|
||||
|
||||
title = hp.at('title').inner_html
|
||||
title = html_doc.at('title').inner_html
|
||||
result[:title] = title.gsub!(/ - Wikipedia, the free encyclopedia/, '') if title.present?
|
||||
|
||||
# get the first image > 150 pix high
|
||||
images = hp.search("img").select { |img| img['height'].to_i > 150 }
|
||||
images = html_doc.search("img").select { |img| img['height'].to_i > 150 }
|
||||
|
||||
result[:image] = "http:#{images[0]["src"]}" unless images.empty?
|
||||
|
||||
# remove the table from mobile layout, as it can contain paras in some rare cases
|
||||
hp.search("table").remove
|
||||
html_doc.search("table").remove
|
||||
|
||||
# get all the paras
|
||||
paras = hp.search("p")
|
||||
paras = html_doc.search("p")
|
||||
text = ""
|
||||
|
||||
unless paras.empty?
|
||||
|
||||
Reference in New Issue
Block a user