From 5d8508c523b4109afc4e29e6d222b802f247f2ff Mon Sep 17 00:00:00 2001 From: Jon Bartlett Date: Fri, 24 Mar 2017 13:49:32 +1100 Subject: [PATCH] New yahoo groups importer --- script/import_scripts/yahoogroup.rb | 159 ++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 script/import_scripts/yahoogroup.rb diff --git a/script/import_scripts/yahoogroup.rb b/script/import_scripts/yahoogroup.rb new file mode 100644 index 00000000000..0b1a5456b1f --- /dev/null +++ b/script/import_scripts/yahoogroup.rb @@ -0,0 +1,159 @@ +require File.expand_path(File.dirname(__FILE__) + "/base.rb") +require 'mongo' + +# Import YahooGroups data as exported into MongoDB by: +# https://github.com/jonbartlett/yahoo-groups-export +# +# Optionally paste these lines into your shell before running this: +# +# =begin +# export CATEGORY_ID= +# =end + +class ImportScripts::YahooGroup < ImportScripts::Base + + MONGODB_HOST = '192.168.10.1:27017' + MONGODB_DB = 'syncro' + + def initialize + super + + client = Mongo::Client.new([ MONGODB_HOST ], database: MONGODB_DB) + db = client.database + Mongo::Logger.logger.level = Logger::FATAL + puts "connected to db...." + + @collection = client[:posts] + + @user_profile_map = {} + + end + + def execute + puts "", "Importing from Mongodb...." + + import_users + import_discussions + + puts "", "Done" + end + + def import_users + + puts '', "Importing users" + + # fetch distinct list of Yahoo "profile" names + profiles = @collection.aggregate( + [ + { "$group": { "_id": { profile: "$ygData.profile" } } } + ] + ) + + user_id = 0 + + create_users(profiles.to_a) do |u| + + user_id = user_id + 1 + + # fetch last message for profile to pickup latest user info as this may have changed + user_info = @collection.find("ygData.profile": u["_id"]["profile"]).sort("ygData.msgId": -1).limit(1).to_a[0] + + # Store user_id to profile lookup + @user_profile_map.store(user_info["ygData"]["profile"], user_id) + + puts "User created: #{user_info["ygData"]["profile"]}" + + user = + { + id: user_id, # yahoo "userId" sequence appears to have changed mid forum life so generate this + username: user_info["ygData"]["profile"], + name: user_info["ygData"]["authorName"], + email: user_info["ygData"]["from"], # mandatory + created_at: Time.now + } + user + end + + puts "#{user_id} users created" + + end + + def import_discussions + puts "", "Importing discussions" + + topics_count = 0 + posts_count = 0 + + topics = @collection.aggregate( + [ + { "$group": { "_id": { topicId: "$ygData.topicId" } } } + ] + ).to_a + + # for each distinct topicId found + topics.each_with_index do |t, tidx| + + # create "topic" post first. + # fetch topic document + topic_post = @collection.find("ygData.msgId": t["_id"]["topicId"]).to_a[0] + next if topic_post.nil? + + puts "Topic: #{tidx + 1} / #{topics.count()} (#{sprintf('%.2f', ((tidx + 1).to_f / topics.count().to_f) * 100)}%) Subject: #{topic_post["ygData"]["subject"]}" + + if topic_post["ygData"]["subject"].to_s.empty? + topic_title = "No Subject" + else + topic_title = topic_post["ygData"]["subject"] + end + + topic = { + id: tidx + 1, + user_id: @user_profile_map[topic_post["ygData"]["profile"]] || -1, + raw: topic_post["ygData"]["messageBody"], + created_at: Time.at(topic_post["ygData"]["postDate"].to_i), + cook_method: Post.cook_methods[:raw_html], + title: topic_title, + category: ENV['CATEGORY_ID'], + custom_fields: { import_id: topic_post["ygData"]["msgId"] } + } + + topics_count += 1 + + # create topic post + parent_post = create_post(topic, topic[:id]) + + # find all posts for topic id + posts = @collection.find("ygData.topicId": topic_post["ygData"]["topicId"]).to_a + + posts.each_with_index do |p, pidx| + + # skip over first post as this is created by topic above + next if p["ygData"]["msgId"] == topic_post["ygData"]["topicId"] + + puts " Post: #{pidx + 1} / #{posts.count()}" + + post = { + id: pidx + 1, + topic_id: parent_post[:topic_id], + user_id: @user_profile_map[p["ygData"]["profile"]] || -1, + raw: p["ygData"]["messageBody"], + created_at: Time.at(p["ygData"]["postDate"].to_i), + cook_method: Post.cook_methods[:raw_html], + custom_fields: { import_id: p["ygData"]["msgId"] } + } + + child_post = create_post(post, post[:id]) + + posts_count += 1 + + end + + end + + puts "", "Imported #{topics_count} topics with #{topics_count + posts_count} posts." + + end + +end + +ImportScripts::YahooGroup.new.perform