From 621995f35a5315271ec8e7b0447587113859e1f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Hanol?= Date: Thu, 7 Mar 2013 03:30:40 +0100 Subject: [PATCH] globalized wikipedia onebox --- lib/oneboxer/wikipedia_onebox.rb | 11 ++++--- .../oneboxer/wikipedia_onebox_spec.rb | 30 +++++++++++++++---- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/lib/oneboxer/wikipedia_onebox.rb b/lib/oneboxer/wikipedia_onebox.rb index 54457a5bb67..51cc54b942c 100644 --- a/lib/oneboxer/wikipedia_onebox.rb +++ b/lib/oneboxer/wikipedia_onebox.rb @@ -3,7 +3,7 @@ require_dependency 'oneboxer/handlebars_onebox' module Oneboxer class WikipediaOnebox < HandlebarsOnebox - matcher /^https?:\/\/.*wikipedia.(com|org)\/.*$/ + matcher /^https?:\/\/.*wikipedia\.(com|org)\/.*$/ favicon 'wikipedia.png' def template @@ -11,11 +11,10 @@ module Oneboxer end def translate_url - m = @url.match(/wiki\/(?[^#\/]+)/mi) - + m = @url.match(/^https?:\/\/((?.+)\.)?wikipedia\.(com|org)\/wiki\/(?[^#\/]+)/mi) + subdomain = m[:subdomain] || "en" article_id = CGI::unescape(m[:identifier]) - return "http://en.m.wikipedia.org/w/index.php?title=#{URI::encode(article_id)}" - @url + "http://#{subdomain}.m.wikipedia.org/w/index.php?title=#{URI::encode(article_id)}" end def parse(data) @@ -25,7 +24,7 @@ module Oneboxer result = {} title = html_doc.at('title').inner_html - result[:title] = title.gsub!(/ - Wikipedia, the free encyclopedia/, '') if title.present? + result[:title] = title.gsub!(/ - Wikipedia.*$/, '') if title.present? # get the first image > 150 pix high images = html_doc.search("img").select { |img| img['height'].to_i > 150 } diff --git a/spec/components/oneboxer/wikipedia_onebox_spec.rb b/spec/components/oneboxer/wikipedia_onebox_spec.rb index b27aa7b718c..43d2e8055a9 100644 --- a/spec/components/oneboxer/wikipedia_onebox_spec.rb +++ b/spec/components/oneboxer/wikipedia_onebox_spec.rb @@ -5,14 +5,32 @@ require 'oneboxer' require 'oneboxer/wikipedia_onebox' describe Oneboxer::WikipediaOnebox do - before(:each) do - @o = Oneboxer::WikipediaOnebox.new("http://en.wikipedia.org/wiki/Ruby") - FakeWeb.register_uri(:get, @o.translate_url, :response => fixture_file('oneboxer/wikipedia.response')) - FakeWeb.register_uri(:get, 'http://en.m.wikipedia.org/wiki/Ruby', :response => fixture_file('oneboxer/wikipedia_redirected.response')) - end it "generates the expected onebox for Wikipedia" do - @o.onebox.should == expected_wikipedia_result + o = Oneboxer::WikipediaOnebox.new('http://en.wikipedia.org/wiki/Ruby') + FakeWeb.register_uri(:get, o.translate_url, :response => fixture_file('oneboxer/wikipedia.response')) + FakeWeb.register_uri(:get, 'http://en.m.wikipedia.org/wiki/Ruby', :response => fixture_file('oneboxer/wikipedia_redirected.response')) + o.onebox.should == expected_wikipedia_result + end + + it "accepts .com extention" do + o = Oneboxer::WikipediaOnebox.new('http://en.wikipedia.com/wiki/Postgres') + o.translate_url.should == 'http://en.m.wikipedia.org/w/index.php?title=Postgres' + end + + it "encodes identifier" do + o = Oneboxer::WikipediaOnebox.new('http://en.wikipedia.com/wiki/Café') + o.translate_url.should == 'http://en.m.wikipedia.org/w/index.php?title=Caf%C3%A9' + end + + it "defaults to en locale" do + o = Oneboxer::WikipediaOnebox.new('http://wikipedia.org/wiki/Ruby_on_rails') + o.translate_url.should == 'http://en.m.wikipedia.org/w/index.php?title=Ruby_on_rails' + end + + it "generates localized url" do + o = Oneboxer::WikipediaOnebox.new('http://fr.wikipedia.org/wiki/Ruby') + o.translate_url.should == 'http://fr.m.wikipedia.org/w/index.php?title=Ruby' end private