From fc973f9363828a16775ffde9fabe23ba783fed8b Mon Sep 17 00:00:00 2001 From: discoursehosting Date: Tue, 12 Jun 2018 20:41:21 +0200 Subject: [PATCH] Improve the VBulletin importer (#5922) --- script/import_scripts/vbulletin.rb | 112 +++++++++++++++++++---------- 1 file changed, 74 insertions(+), 38 deletions(-) diff --git a/script/import_scripts/vbulletin.rb b/script/import_scripts/vbulletin.rb index 048fea099b0..656c27e7b35 100644 --- a/script/import_scripts/vbulletin.rb +++ b/script/import_scripts/vbulletin.rb @@ -32,9 +32,11 @@ class ImportScripts::VBulletin < ImportScripts::Base puts "#{DB_USER}:#{DB_PW}@#{DB_HOST} wants #{DB_NAME}" def initialize + @bbcode_to_md = true + super - @old_username_to_new_usernames = {} + @usernames = {} @tz = TZInfo::Timezone.get(TIMEZONE) @@ -107,6 +109,14 @@ EOM end end + def get_username_for_old_username(old_username) + if @usernames.has_key?(old_username) + @usernames[old_username] + else + old_username + end + end + def import_users puts "", "importing users" @@ -116,7 +126,8 @@ EOM batches(BATCH_SIZE) do |offset| users = mysql_query(<<-SQL - SELECT userid, username, homepage, usertitle, usergroupid, joindate, email + SELECT userid, username, homepage, usertitle, usergroupid, joindate, email, + CONCAT(password, ':', salt) AS crypted_password FROM #{TABLE_PREFIX}user WHERE userid > #{last_user_id} ORDER BY userid @@ -140,6 +151,7 @@ EOM id: user["userid"], name: username, username: username, + password: user["crypted_password"], email: email, website: user["homepage"].strip, title: @htmlentities.decode(user["usertitle"]).strip, @@ -147,13 +159,15 @@ EOM created_at: parse_timestamp(user["joindate"]), last_seen_at: parse_timestamp(user["lastvisit"]), post_create_action: proc do |u| - @old_username_to_new_usernames[user["username"]] = u.username import_profile_picture(user, u) import_profile_background(user, u) end } end end + + @usernames = UserCustomField.joins(:user).where(name: 'import_username').pluck('user_custom_fields.value', 'users.username').to_h + end def create_groups_membership @@ -321,16 +335,17 @@ EOM t end - # uncomment below lines to create permalink - # topics.each do |thread| - # topic_id = "thread-#{thread["threadid"]}" - # topic = topic_lookup_from_imported_post_id(topic_id) - # if topic.present? - # title_slugified = thread["title"].gsub(" ","-").gsub(".","-") if thread["title"].present? - # url_slug = "threads/#{thread["threadid"]}-#{title_slugified}" if thread["title"].present? - # Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) if url_slug.present? && topic[:topic_id].present? - # end - # end + # Add the following to permalink_normalizations for this to work: + # /forum\/.*?\/(\d*)\-.*/thread/\1 + + topics.each do |thread| + topic_id = "thread-#{thread["threadid"]}" + topic = topic_lookup_from_imported_post_id(topic_id) + if topic.present? + url_slug = "thread/#{thread["threadid"]}" if thread["title"].present? + Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) if url_slug.present? && topic[:topic_id].present? + end + end end end @@ -388,8 +403,9 @@ EOM # find the uploaded file information from the db def find_upload(post, attachment_id) sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename, - a.caption caption + LENGTH(fd.filedata) AS dbsize, filedata, a.caption caption FROM #{TABLE_PREFIX}attachment a + LEFT JOIN #{TABLE_PREFIX}filedata fd ON fd.filedataid = a.filedataid WHERE a.attachmentid = #{attachment_id}" results = mysql_query(sql) @@ -399,13 +415,22 @@ EOM end filename = File.join(ATTACHMENT_DIR, row['user_id'].to_s.split('').join('/'), "#{row['file_id']}.attach") - unless File.exists?(filename) - puts "Attachment file doesn't exist: #{filename}" - return - end - real_filename = row['filename'] real_filename.prepend SecureRandom.hex if real_filename[0] == '.' + + unless File.exists?(filename) + if row['dbsize'].to_i == 0 + puts "Attachment file #{row['filedataid']} doesn't exist" + return nil + end + + tmpfile = 'attach_' + row['filedataid'].to_s + filename = File.join('/tmp/', tmpfile) + File.open(filename, 'wb') { |f| + f.write(row['filedata']) + } + end + upload = create_upload(post.user.id, filename, real_filename) if upload.nil? || !upload.valid? @@ -620,8 +645,9 @@ EOM Post.find_each do |post| begin + old_raw = post.raw.dup new_raw = postprocess_post_raw(post.raw) - if new_raw != post.raw + if new_raw != old_raw post.raw = new_raw post.save end @@ -685,11 +711,8 @@ EOM # [MENTION][/MENTION] raw.gsub!(/\[mention\](.+?)\[\/mention\]/i) do - old_username = $1 - if @old_username_to_new_usernames.has_key?(old_username) - old_username = @old_username_to_new_usernames[old_username] - end - "@#{old_username}" + new_username = get_username_for_old_username($1) + "@#{new_username}" end # [FONT=blah] and [COLOR=blah] @@ -698,6 +721,7 @@ EOM raw.gsub! /\[COLOR=#.*?\](.*?)\[\/COLOR\]/im, '\1' raw.gsub! /\[SIZE=.*?\](.*?)\[\/SIZE\]/im, '\1' + raw.gsub! /\[SUP\](.*?)\[\/SUP\]/im, '\1' raw.gsub! /\[h=.*?\](.*?)\[\/h\]/im, '\1' # [CENTER]...[/CENTER] @@ -705,10 +729,18 @@ EOM # [INDENT]...[/INDENT] raw.gsub! /\[INDENT\](.*?)\[\/INDENT\]/im, '\1' - raw.gsub! /\[TABLE\](.*?)\[\/TABLE\]/im, '\1' - raw.gsub! /\[TR\](.*?)\[\/TR\]/im, '\1' - raw.gsub! /\[TD\](.*?)\[\/TD\]/im, '\1' - raw.gsub! /\[TD="?.*?"?\](.*?)\[\/TD\]/im, '\1' + + # Tables to MD + raw.gsub!(/\[TABLE.*?\](.*?)\[\/TABLE\]/im) { |t| + rows = $1.gsub!(/\s*\[TR\](.*?)\[\/TR\]\s*/im) { |r| + cols = $1.gsub! /\s*\[TD.*?\](.*?)\[\/TD\]\s*/im, '|\1' + "#{cols}|\n" + } + header, rest = rows.split "\n", 2 + c = header.count "|" + sep = "|---" * (c - 1) + "#{header}\n#{sep}|\n#{rest}\n" + } # [QUOTE]...[/QUOTE] raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote| @@ -719,10 +751,8 @@ EOM # [QUOTE=]...[/QUOTE] raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do old_username, quote = $1, $2 - if @old_username_to_new_usernames.has_key?(old_username) - old_username = @old_username_to_new_usernames[old_username] - end - "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" + new_username = get_username_for_old_username(old_username) + "\n[quote=\"#{new_username}\"]\n#{quote}\n[/quote]\n" end # [YOUTUBE][/YOUTUBE] @@ -731,6 +761,9 @@ EOM # [VIDEO=youtube;]...[/VIDEO] raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" } + # Fix uppercase B U and I tags + raw.gsub!(/(\[\/?[BUI]\])/i) { $1.downcase } + # More Additions .... # [spoiler=Some hidden stuff]SPOILER HERE!![/spoiler] @@ -760,16 +793,19 @@ EOM raw.gsub!(/\[quote=([^;]+);(\d+)\](.+?)\[\/quote\]/im) do old_username, post_id, quote = $1, $2, $3 - if @old_username_to_new_usernames.has_key?(old_username) - old_username = @old_username_to_new_usernames[old_username] - end + new_username = get_username_for_old_username(old_username) + + # There is a bug here when the first post in a topic is quoted. + # The first post in a topic does not have an post_custom_field referring to the post number, + # but it refers to thread-XXX instead, so this lookup fails miserably then. + # Fixing this would imply rewriting that logic completely. if topic_lookup = topic_lookup_from_imported_post_id(post_id) post_number = topic_lookup[:post_number] topic_id = topic_lookup[:topic_id] - "\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n" + "\n[quote=\"#{new_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n" else - "\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n" + "\n[quote=\"#{new_username}\"]\n#{quote}\n[/quote]\n" end end