From 9cbb90c5ed1bf61ef55c02bcb3cd0ca81f22fb58 Mon Sep 17 00:00:00 2001 From: Jay Pfaffman Date: Mon, 30 Oct 2017 16:50:34 -0700 Subject: [PATCH] add importer for modx forum (#5239) --- script/import_scripts/modx.rb | 569 ++++++++++++++++++++++++++++++++++ 1 file changed, 569 insertions(+) create mode 100644 script/import_scripts/modx.rb diff --git a/script/import_scripts/modx.rb b/script/import_scripts/modx.rb new file mode 100644 index 00000000000..191135f6bfe --- /dev/null +++ b/script/import_scripts/modx.rb @@ -0,0 +1,569 @@ +require 'mysql2' +require File.expand_path(File.dirname(__FILE__) + "/base.rb") +require 'htmlentities' + +class ImportScripts::Modx < ImportScripts::Base + BATCH_SIZE = 1000 + + # CHANGE THESE BEFORE RUNNING THE IMPORTER + + DB_HOST ||= ENV['DB_HOST'] || "localhost" + DB_NAME ||= ENV['DB_NAME'] || "modx" + DB_PW ||= ENV['DB_PW'] || "modex" + DB_USER ||= ENV['DB_USER'] || "modx" + TIMEZONE ||= ENV['TIMEZONE'] || "America/Los_Angeles" + TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "modx_" + ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/path/to/your/attachment/folder' + RANDOM_CATEGORY_COLOR ||= !ENV['RANDOM_CATEGORY_COLOR'].nil? + SUSPEND_ALL_USERS ||= !ENV['SUSPEND_ALL_USERS'] + + # TODO: replace modx_ with #{TABLE_PREFIX} + + puts "#{DB_USER}:#{DB_PW}@#{DB_HOST} wants #{DB_NAME}" + + def initialize + super + + SiteSetting.disable_emails = true + + @old_username_to_new_usernames = {} + + @tz = TZInfo::Timezone.get(TIMEZONE) + + @htmlentities = HTMLEntities.new + + @client = Mysql2::Client.new( + host: DB_HOST, + username: DB_USER, + password: DB_PW, + database: DB_NAME + ) + rescue Exception => e + puts '=' * 50 + puts e.message + puts < #{last_user_id} + ORDER BY id + LIMIT #{BATCH_SIZE}; + SQL + ).to_a + + break if users.empty? + + last_user_id = users[-1]["userid"] + before = users.size + users.reject! { |u| @lookup.user_already_imported?(u["userid"].to_i) } + + create_users(users, total: user_count, offset: offset) do |user| + { + id: user["userid"], + name: user['name'], + username: user['username'], + email: user['email'], + website: user['website'], + created_at: parse_timestamp(user["created_at"]), + last_seen_at: parse_timestamp(user["last_seen_at"]), + date_of_birth: user['date_of_birth'], + password: "#{user['password']}:#{user['salt']}" # not tested + } + end + end + end + + def import_categories + # import modx_discuss_categories as categories + # import modx_discuss_boards as subcategories + puts "", "importing categories..." + + categories = mysql_query("select id, name, description from modx_discuss_categories").to_a + + create_categories(categories) do |category| + puts "Creating #{category['name']}" + puts category + { + id: "cat#{category['id']}", + name: category["name"], + color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil, + description: category["description"] + } + end + + puts "", "importing boards as subcategories..." + + boards = mysql_query("select id, category, name, description from modx_discuss_boards;").to_a + create_categories(boards) do |category| + puts category + parent_category_id = category_id_from_imported_category_id("cat#{category['category']}") + { + id: category["id"], + parent_category_id: parent_category_id.to_s, + name: category["name"], + color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil, + description: category["description"] + } + end + end + + def import_topics_and_posts + puts "", "creating topics and posts" + + total_count = mysql_query("SELECT count(id) count from #{TABLE_PREFIX}discuss_posts").first["count"] + + topic_first_post_id = {} + + batches(BATCH_SIZE) do |offset| + results = mysql_query(" + SELECT id, + thread topic_id, + board category_id, + title, + message raw, + parent, + author user_id, + createdon created_at + from modx_discuss_posts + ORDER BY createdon + LIMIT #{BATCH_SIZE} + OFFSET #{offset}; + ") + + break if results.size < 1 + + next if all_records_exist? :posts, results.map { |m| m['id'].to_i } + + create_posts(results, total: total_count, offset: offset) do |m| + skip = false + mapped = {} + + mapped[:id] = m['id'] + mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1 + mapped[:raw] = post_process_raw(m['raw']) + mapped[:created_at] = Time.zone.at(m['created_at']) + + if m['parent'] == 0 + mapped[:category] = category_id_from_imported_category_id(m['category_id']) + mapped[:title] = m['title'] + topic_first_post_id[m['topic_id']] = m['id'] + else + parent = topic_lookup_from_imported_post_id(topic_first_post_id[m['topic_id']]) + if parent + mapped[:topic_id] = parent[:topic_id] + else + puts "Parent post #{first_post_id} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}" + skip = true + end + end + + skip ? nil : mapped + end + end + end + + def post_process_raw(raw) + # [QUOTE]...[/QUOTE] + raw = raw.gsub(/\[quote.*?\](.+?)\[\/quote\]/im) { |quote| + quote = quote.gsub(/\[quote author=(.*?) .+\]/i) { "\n[quote=\"#{$1}\"]\n" } + quote = quote.gsub(/[^\n]\[\/quote\]/im) { "\n[/quote]\n" } + } + + raw + end + + def not_mark_topics_as_solved + puts "", "Marking topics as solved..." + + PostAction.exec_sql <<-SQL + INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at) + SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at + FROM post_custom_fields pcf + JOIN posts p ON p.id = pcf.post_id + WHERE pcf.name = 'is_accepted_answer' + SQL + end + + # find the uploaded file information from the db + def not_find_upload(post, attachment_id) + sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename, + a.caption caption + FROM #{TABLE_PREFIX}attachment a + WHERE a.attachmentid = #{attachment_id}" + results = mysql_query(sql) + + unless row = results.first + puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}" + return + end + + filename = File.join(ATTACHMENT_DIR, row['user_id'].to_s.split('').join('/'), "#{row['file_id']}.attach") + unless File.exists?(filename) + puts "Attachment file doesn't exist: #{filename}" + return + end + + real_filename = row['filename'] + real_filename.prepend SecureRandom.hex if real_filename[0] == '.' + upload = create_upload(post.user.id, filename, real_filename) + + if upload.nil? || !upload.valid? + puts "Upload not valid :(" + puts upload.errors.inspect if upload + return + end + + [upload, real_filename] + rescue Mysql2::Error => e + puts "SQL Error" + puts e.message + puts sql + end + + def not_import_private_messages + puts "", "importing private messages..." + + topic_count = mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"] + + last_private_message_id = -1 + + batches(BATCH_SIZE) do |offset| + private_messages = mysql_query(<<-SQL + SELECT pmtextid, fromuserid, title, message, touserarray, dateline + FROM #{TABLE_PREFIX}pmtext + WHERE pmtextid > #{last_private_message_id} + ORDER BY pmtextid + LIMIT #{BATCH_SIZE} + SQL + ).to_a + + break if private_messages.empty? + + last_private_message_id = private_messages[-1]["pmtextid"] + private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm['pmtextid']}") } + + title_username_of_pm_first_post = {} + + create_posts(private_messages, total: topic_count, offset: offset) do |m| + skip = false + mapped = {} + + mapped[:id] = "pm-#{m['pmtextid']}" + mapped[:user_id] = user_id_from_imported_user_id(m['fromuserid']) || Discourse::SYSTEM_USER_ID + mapped[:raw] = preprocess_post_raw(m['message']) rescue nil + mapped[:created_at] = Time.zone.at(m['dateline']) + title = @htmlentities.decode(m['title']).strip[0...255] + topic_id = nil + + next if mapped[:raw].blank? + + # users who are part of this private message. + target_usernames = [] + target_userids = [] + begin + to_user_array = PHP.unserialize(m['touserarray']) + rescue + puts "#{m['pmtextid']} -- #{m['touserarray']}" + skip = true + end + + begin + to_user_array.each do |to_user| + if to_user[0] == "cc" || to_user[0] == "bcc" # not sure if we should include bcc users + to_user[1].each do |to_user_cc| + user_id = user_id_from_imported_user_id(to_user_cc[0]) + username = User.find_by(id: user_id).try(:username) + target_userids << user_id || Discourse::SYSTEM_USER_ID + target_usernames << username if username + end + else + user_id = user_id_from_imported_user_id(to_user[0]) + username = User.find_by(id: user_id).try(:username) + target_userids << user_id || Discourse::SYSTEM_USER_ID + target_usernames << username if username + end + end + rescue + puts "skipping pm-#{m['pmtextid']} `to_user_array` is not properly serialized -- #{to_user_array.inspect}" + skip = true + end + + participants = target_userids + participants << mapped[:user_id] + begin + participants.sort! + rescue + puts "one of the participant's id is nil -- #{participants.inspect}" + end + + if title =~ /^Re:/ + + parent_id = title_username_of_pm_first_post[[title[3..-1], participants]] || + title_username_of_pm_first_post[[title[4..-1], participants]] || + title_username_of_pm_first_post[[title[5..-1], participants]] || + title_username_of_pm_first_post[[title[6..-1], participants]] || + title_username_of_pm_first_post[[title[7..-1], participants]] || + title_username_of_pm_first_post[[title[8..-1], participants]] + + if parent_id + if t = topic_lookup_from_imported_post_id("pm-#{parent_id}") + topic_id = t[:topic_id] + end + end + else + title_username_of_pm_first_post[[title, participants]] ||= m['pmtextid'] + end + + unless topic_id + mapped[:title] = title + mapped[:archetype] = Archetype.private_message + mapped[:target_usernames] = target_usernames.join(',') + + if mapped[:target_usernames].size < 1 # pm with yourself? + # skip = true + mapped[:target_usernames] = "system" + puts "pm-#{m['pmtextid']} has no target (#{m['touserarray']})" + end + else + mapped[:topic_id] = topic_id + end + + skip ? nil : mapped + end + end + end + + def not_import_attachments + puts '', 'importing attachments...' + + current_count = 0 + + total_count = mysql_query(<<-SQL + SELECT COUNT(postid) count + FROM #{TABLE_PREFIX}post p + JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid + WHERE t.firstpostid <> p.postid + SQL + ).first["count"] + + success_count = 0 + fail_count = 0 + + attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i + + Post.find_each do |post| + current_count += 1 + print_status current_count, total_count + + new_raw = post.raw.dup + new_raw.gsub!(attachment_regex) do |s| + matches = attachment_regex.match(s) + attachment_id = matches[1] + + upload, filename = find_upload(post, attachment_id) + unless upload + fail_count += 1 + next + end + + html_for_upload(upload, filename) + end + + if new_raw != post.raw + PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from modx') + end + + success_count += 1 + end + end + + def not_close_topics + puts "", "Closing topics..." + + # keep track of closed topics + closed_topic_ids = [] + + topics = mysql_query <<-MYSQL + SELECT t.threadid threadid, firstpostid, open + FROM #{TABLE_PREFIX}thread t + JOIN #{TABLE_PREFIX}post p ON p.postid = t.firstpostid + ORDER BY t.threadid + MYSQL + topics.each do |topic| + topic_id = "thread-#{topic["threadid"]}" + closed_topic_ids << topic_id if topic["open"] == 0 + end + + sql = <<-SQL + WITH closed_topic_ids AS ( + SELECT t.id AS topic_id + FROM post_custom_fields pcf + JOIN posts p ON p.id = pcf.post_id + JOIN topics t ON t.id = p.topic_id + WHERE pcf.name = 'import_id' + AND pcf.value IN (?) + ) + UPDATE topics + SET closed = true + WHERE id IN (SELECT topic_id FROM closed_topic_ids) + SQL + + Topic.exec_sql(sql, closed_topic_ids) + end + + def not_post_process_posts + puts "", "Postprocessing posts..." + + current = 0 + max = Post.count + + Post.find_each do |post| + begin + new_raw = postprocess_post_raw(post.raw) + if new_raw != post.raw + post.raw = new_raw + post.save + end + rescue PrettyText::JavaScriptError + nil + ensure + print_status(current += 1, max) + end + end + end + + def not_create_permalink_file + puts '', 'Creating Permalink File...', '' + + id_mapping = [] + + Topic.listable_topics.find_each do |topic| + pcf = topic.first_post.custom_fields + if pcf && pcf["import_id"] + id = pcf["import_id"].split('-').last + id_mapping.push("XXX#{id} YYY#{topic.id}") + end + end + + # Category.find_each do |cat| + # ccf = cat.custom_fields + # if ccf && ccf["import_id"] + # id = ccf["import_id"].to_i + # id_mapping.push("/forumdisplay.php?#{id} http://forum.quartertothree.com#{cat.url}") + # end + # end + + CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv| + id_mapping.each do |value| + csv << [value] + end + end + + end + + def deactivate_all_users + User.where("id > 0 and admin != true").update_all(active: true) + end + + def suspend_users + puts '', "updating blocked users" + + banned = 0 + failed = 0 + total = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}user_attributes where blocked != 0").first['count'] + + system_user = Discourse.system_user + + mysql_query("SELECT id, blockedafter, blockeduntil FROM #{TABLE_PREFIX}user_attributes").each do |b| + user = User.find_by_id(user_id_from_imported_user_id(b['id'])) + if user + user.suspended_at = parse_timestamp(user["blockedafter"]) + user.suspended_till = parse_timestamp(user["blockeduntil"]) + if user.save + StaffActionLogger.new(system_user).log_user_suspend(user, "banned during initial import") + banned += 1 + else + puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}" + failed += 1 + end + else + puts "Not found: #{b['userid']}" + failed += 1 + end + + print_status banned + failed, total + end + end + + def parse_timestamp(timestamp) + Time.zone.at(@tz.utc_to_local(timestamp)) + end + + def mysql_query(sql) + @client.query(sql, cache_rows: true) + end + +end + +ImportScripts::Modx.new.perform