diff --git a/script/import_scripts/jive.rb b/script/import_scripts/jive.rb new file mode 100644 index 00000000000..bb5cedd8546 --- /dev/null +++ b/script/import_scripts/jive.rb @@ -0,0 +1,346 @@ +# Jive importer +require 'nokogiri' +require 'csv' +require File.expand_path(File.dirname(__FILE__) + "/base.rb") + +class ImportScripts::Jive < ImportScripts::Base + + BATCH_SIZE = 1000 + CATEGORY_IDS = [2023,2003,2004,2042,2036,2029] # categories that should be skipped + + def initialize(path) + @path = path + super() + @bbcode_to_md = true + + puts "loading post mappings..." + @post_number_map = {} + Post.pluck(:id, :post_number).each do |post_id, post_number| + @post_number_map[post_id] = post_number + end + end + + def created_post(post) + @post_number_map[post.id] = post.post_number + super + end + + def execute + import_users + import_groups + import_group_members + import_categories + import_posts + + # Topic.update_all(closed: true) + end + + class RowResolver + def load(row) + @row = row + end + + def self.create(cols) + Class.new(RowResolver).new(cols) + end + + def initialize(cols) + cols.each_with_index do |col,idx| + self.class.send(:define_method, col) do + @row[idx] + end + end + end + end + + def load_user_batch!(users, offset, total) + if users.length > 0 + create_users(users, offset: offset, total: total) do |user| + user + end + users.clear + end + end + + def csv_parse(name) + filename = "#{@path}/#{name}.csv" + first = true + row = nil + + current_row = ""; + double_quote_count = 0 + + File.open(filename).each_line do |line| + + line.gsub!(/\\(.{1})/){|m| m[-1] == '"'? '""': m[-1]} + line.strip! + + current_row << "\n" unless current_row.empty? + current_row << line + + double_quote_count += line.scan('"').count + + if double_quote_count % 2 == 1 + next + end + + raw = begin + CSV.parse(current_row) + rescue CSV::MalformedCSVError => e + puts e.message + puts "*" * 100 + puts "Bad row skipped, line is: #{line}" + puts + puts current_row + puts + puts "double quote count is : #{double_quote_count}" + puts "*" * 100 + + current_row = "" + double_quote_count = 0 + next + end[0] + + if first + row = RowResolver.create(raw) + + current_row = "" + double_quote_count = 0 + first = false + next + end + + row.load(raw) + + yield row + + current_row = "" + double_quote_count = 0 + end + end + + def total_rows(table) + File.foreach("#{@path}/#{table}.csv").inject(0) {|c, line| c+1} - 1 + end + + def import_groups + puts "", "importing groups..." + + rows = [] + csv_parse("group") do |row| + rows << {id: row.groupid, name: row.name} + end + + create_groups(rows) do |row| + row + end + end + + def import_users + puts "", "creating users" + + count = 0 + users = [] + + total = total_rows("user") + + csv_parse("user") do |row| + + id = row.userid + + # append "-x" at the end of each email + email = "#{row.email}-x" + + # fake it + if row.email.blank? || row.email !~ /@/ + email = SecureRandom.hex << "@domain.com" + end + + name = "#{row.firstname} #{row.lastname}" + username = row.username + created_at = DateTime.parse(row.creationdate) + last_seen_at = DateTime.parse(row.lastloggedin) + is_activated = row.userenabled + + username = name if username == "NULL" + username = email.split("@")[0] if username.blank? + name = email.split("@")[0] if name.blank? + + users << { + id: id, + email: email, + name: name, + username: username, + created_at: created_at, + last_seen_at: last_seen_at, + active: is_activated.to_i == 1, + approved: is_activated.to_i == 1 + } + + count += 1 + if count % BATCH_SIZE == 0 + load_user_batch! users, count - users.length, total + end + + end + + load_user_batch! users, count, total + end + + def import_group_members + puts "", "importing group members..." + + csv_parse("group_members") do |row| + user_id = user_id_from_imported_user_id(row.userid) + group_id = group_id_from_imported_group_id(row.groupid) + + if user_id && group_id + GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) + end + end + end + + def import_categories + rows = [] + + csv_parse("community") do |row| + next unless CATEGORY_IDS.include?(row.communityid.to_i) + rows << {id: row.communityid, name: "#{row.name} (#{row.communityid})"} + end + + create_categories(rows) do |row| + row + end + end + + def normalize_raw!(raw) + raw = raw.dup + raw = raw[5..-6] + + doc = Nokogiri::HTML.fragment(raw) + doc.css('img').each do |img| + img.remove if img['class'] == "jive-image" + end + + raw = doc.to_html + raw = raw[4..-1] + + raw + end + + def import_post_batch!(posts, topics, offset, total) + create_posts(posts, total: total, offset: offset) do |post| + + mapped = {} + + mapped[:id] = post[:id] + mapped[:user_id] = user_id_from_imported_user_id(post[:user_id]) || -1 + mapped[:raw] = post[:body] + mapped[:created_at] = post[:created_at] + + topic = topics[post[:topic_id]] + + unless topic + p "MISSING TOPIC #{post[:topic_id]}" + p post + next + end + + unless topic[:post_id] + mapped[:category] = category_id_from_imported_category_id(topic[:category_id]) + mapped[:title] = post[:title] + topic[:post_id] = post[:id] + else + parent = topic_lookup_from_imported_post_id(topic[:post_id]) + next unless parent + + mapped[:topic_id] = parent[:topic_id] + + reply_to_post_id = post_id_from_imported_post_id(post[:reply_id]) + if reply_to_post_id + reply_to_post_number = @post_number_map[reply_to_post_id] + if reply_to_post_number && reply_to_post_number > 1 + mapped[:reply_to_post_number] = reply_to_post_number + end + end + end + + next if topic[:deleted] or post[:deleted] + + mapped + end + + posts.clear + end + + def import_posts + puts "", "creating topics and posts" + + topic_map = {} + thread_map = {} + + csv_parse("message") do |thread| + + next unless CATEGORY_IDS.include?(thread.containerid.to_i) + + if !thread.parentmessageid + # topic + + thread_map[thread.threadid] = thread.messageid + + topic_map[thread.messageid] = { + id: thread.messageid, + topic_id: thread.messageid, + category_id: thread.containerid, + user_id: thread.userid, + title: thread.subject, + body: normalize_raw!(thread.body || thread.subject || ""), + created_at: DateTime.parse(thread.creationdate), + } + + end + end + + total = total_rows("message") + posts = [] + count = 0 + + topic_map.each do |_, topic| + posts << topic if topic[:body] + count+=1 + end + + csv_parse("message") do |thread| + # post + + next unless CATEGORY_IDS.include?(thread.containerid.to_i) + + if thread.parentmessageid + row = { + id: thread.messageid, + topic_id: thread_map["#{thread.threadid}"], + user_id: thread.userid, + title: thread.subject, + body: normalize_raw!(thread.body), + created_at: DateTime.parse(thread.creationdate) + } + posts << row + count+=1 + + if posts.length > 0 && posts.length % BATCH_SIZE == 0 + import_post_batch!(posts, topic_map, count - posts.length, total) + end + end + end + + import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0 + end + +end + +unless ARGV[0] && Dir.exist?(ARGV[0]) + puts "", "Usage:", "", "bundle exec ruby script/import_scripts/jive.rb DIRNAME", "" + exit 1 +end + +ImportScripts::Jive.new(ARGV[0]).perform