diff --git a/script/bulk_import/base.rb b/script/bulk_import/base.rb index a04c7deee12..3b0efbc1c91 100644 --- a/script/bulk_import/base.rb +++ b/script/bulk_import/base.rb @@ -153,6 +153,7 @@ class BulkImport::Base puts "Loading imported user ids..." @users, imported_user_ids = imported_ids("user") @last_imported_user_id = imported_user_ids.max || -1 + @pre_existing_user_ids = Set.new puts "Loading imported category ids..." @categories, imported_category_ids = imported_ids("category") @@ -197,7 +198,7 @@ class BulkImport::Base puts "Loading users indexes..." @last_user_id = last_id(User) @last_user_email_id = last_id(UserEmail) - @emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email").to_set + @emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h @usernames_lower = User.unscoped.pluck(:username_lower).to_set @mapped_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("user_custom_fields.value", "users.username").to_h @@ -393,6 +394,17 @@ class BulkImport::Base end def process_user(user) + if user[:email].present? + user[:email].downcase! + + if existing_user_id = @emails[user[:email]] + @pre_existing_user_ids << existing_user_id + @users[user[:imported_id].to_i] = existing_user_id + user[:skip] = true + return user + end + end + @users[user[:imported_id].to_i] = user[:id] = @last_user_id += 1 imported_username = user[:username].dup @@ -412,11 +424,6 @@ class BulkImport::Base end user[:username_lower] = user[:username].downcase - user[:email] ||= random_email - user[:email].downcase! - - # unique email - user[:email] = random_email until user[:email] =~ EmailValidator.email_regex && @emails.add?(user[:email]) user[:trust_level] ||= TrustLevel[1] user[:active] = true unless user.has_key?(:active) user[:admin] ||= false @@ -428,18 +435,28 @@ class BulkImport::Base end def process_user_email(user_email) + user_id = @users[user_email[:imported_user_id].to_i] + return { skip: true } if @pre_existing_user_ids.include?(user_id) + user_email[:id] = @last_user_email_id += 1 - user_email[:user_id] = @users[user_email[:imported_user_id].to_i] + user_email[:user_id] = user_id user_email[:primary] = true user_email[:created_at] ||= NOW user_email[:updated_at] ||= user_email[:created_at] + user_email[:email] ||= random_email user_email[:email].downcase! + # unique email + user_email[:email] = random_email until user_email[:email] =~ EmailValidator.email_regex && !@emails.has_key?(user_email[:email]) + user_email end def process_user_stat(user_stat) - user_stat[:user_id] = @users[user_stat[:imported_user_id].to_i] + user_id = @users[user_stat[:imported_user_id].to_i] + return { skip: true } if @pre_existing_user_ids.include?(user_id) + + user_stat[:user_id] = user_id user_stat[:topics_entered] ||= 0 user_stat[:time_read] ||= 0 user_stat[:days_visited] ||= 0 @@ -455,6 +472,8 @@ class BulkImport::Base end def process_user_profile(user_profile) + return { skip: true } if @pre_existing_user_ids.include?(user_profile[:user_id]) + user_profile[:bio_raw] = (user_profile[:bio_raw].presence || "").scrub.strip.presence user_profile[:bio_cooked] = pre_cook(user_profile[:bio_raw]) if user_profile[:bio_raw].present? user_profile[:views] ||= 0 @@ -697,7 +716,7 @@ class BulkImport::Base processed = send(process_method_name, mapped) imported_ids << mapped[:imported_id] unless mapped[:imported_id].nil? imported_ids |= mapped[:imported_ids] unless mapped[:imported_ids].nil? - @raw_connection.put_copy_data columns.map { |c| processed[c] } + @raw_connection.put_copy_data columns.map { |c| processed[c] } unless processed[:skip] print "\r%7d - %6d/sec" % [imported_ids.size, imported_ids.size.to_f / (Time.now - start)] if imported_ids.size % 5000 == 0 rescue => e puts "\n" diff --git a/script/bulk_import/phpbb_postgresql.rb b/script/bulk_import/phpbb_postgresql.rb index d7d7205a07c..cd5fa626fd5 100644 --- a/script/bulk_import/phpbb_postgresql.rb +++ b/script/bulk_import/phpbb_postgresql.rb @@ -83,6 +83,7 @@ class BulkImport::PhpBB < BulkImport::Base u = { imported_id: row["user_id"], username: normalize_text(row["username"]), + email: row["user_email"], created_at: Time.zone.at(row["user_regdate"].to_i), last_seen_at: row["user_lastvisit"] == 0 ? Time.zone.at(row["user_regdate"].to_i) : Time.zone.at(row["user_lastvisit"].to_i), trust_level: row["user_posts"] == 0 ? TrustLevel[0] : TrustLevel[1], diff --git a/script/bulk_import/vbulletin.rb b/script/bulk_import/vbulletin.rb index c251ef07ab5..8a57522002c 100644 --- a/script/bulk_import/vbulletin.rb +++ b/script/bulk_import/vbulletin.rb @@ -118,6 +118,7 @@ class BulkImport::VBulletin < BulkImport::Base imported_id: row[0], username: normalize_text(row[1]), name: normalize_text(row[1]), + email: row[2], created_at: Time.zone.at(row[3]), date_of_birth: parse_birthday(row[4]), primary_group_id: group_id_from_imported_id(row[6]),