DEV: Apply syntax_tree formatting to script/*

This commit is contained in:
David Taylor
2023-01-07 11:53:14 +00:00
parent ff508d1ae5
commit 436b3b392b
143 changed files with 8905 additions and 7353 deletions

View File

@@ -1,9 +1,9 @@
# frozen_string_literal: true
require 'csv'
require 'reverse_markdown'
require_relative 'base'
require_relative 'base/generic_database'
require "csv"
require "reverse_markdown"
require_relative "base"
require_relative "base/generic_database"
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/answerbase.rb DIRNAME
@@ -15,8 +15,10 @@ class ImportScripts::Answerbase < ImportScripts::Base
ANSWER_IMAGE_DIRECTORY = "Answer Images"
QUESTION_ATTACHMENT_DIRECTORY = "Question Attachments"
QUESTION_IMAGE_DIRECTORY = "Question Images"
EMBEDDED_IMAGE_REGEX = /<a[^>]*href="[^"]*relativeUrl=(?<path>[^"\&]*)[^"]*"[^>]*>\s*<img[^>]*>\s*<\/a>/i
QUESTION_LINK_REGEX = /<a[^>]*?href="#{Regexp.escape(OLD_DOMAIN)}\/[^"]*?(?:q|questionid=)(?<id>\d+)[^"]*?"[^>]*>(?<text>.*?)<\/a>/i
EMBEDDED_IMAGE_REGEX =
%r{<a[^>]*href="[^"]*relativeUrl=(?<path>[^"\&]*)[^"]*"[^>]*>\s*<img[^>]*>\s*</a>}i
QUESTION_LINK_REGEX =
%r{<a[^>]*?href="#{Regexp.escape(OLD_DOMAIN)}/[^"]*?(?:q|questionid=)(?<id>\d+)[^"]*?"[^>]*>(?<text>.*?)</a>}i
TOPIC_LINK_NORMALIZATION = '/.*?-(q\d+).*/\1'
BATCH_SIZE = 1000
@@ -24,12 +26,13 @@ class ImportScripts::Answerbase < ImportScripts::Base
super()
@path = path
@db = ImportScripts::GenericDatabase.new(
@path,
batch_size: BATCH_SIZE,
recreate: true,
numeric_keys: true
)
@db =
ImportScripts::GenericDatabase.new(
@path,
batch_size: BATCH_SIZE,
recreate: true,
numeric_keys: true,
)
end
def execute
@@ -47,11 +50,7 @@ class ImportScripts::Answerbase < ImportScripts::Base
category_position = 0
csv_parse("categories") do |row|
@db.insert_category(
id: row[:id],
name: row[:name],
position: category_position += 1
)
@db.insert_category(id: row[:id], name: row[:name], position: category_position += 1)
end
csv_parse("users") do |row|
@@ -62,7 +61,7 @@ class ImportScripts::Answerbase < ImportScripts::Base
bio: row[:description],
avatar_path: row[:profile_image],
created_at: parse_date(row[:createtime]),
active: true
active: true,
)
end
@@ -74,8 +73,9 @@ class ImportScripts::Answerbase < ImportScripts::Base
begin
if row[:type] == "Question"
attachments = parse_filenames(row[:attachments], QUESTION_ATTACHMENT_DIRECTORY) +
parse_filenames(row[:images], QUESTION_IMAGE_DIRECTORY)
attachments =
parse_filenames(row[:attachments], QUESTION_ATTACHMENT_DIRECTORY) +
parse_filenames(row[:images], QUESTION_IMAGE_DIRECTORY)
@db.insert_topic(
id: row[:id],
@@ -84,12 +84,13 @@ class ImportScripts::Answerbase < ImportScripts::Base
category_id: row[:categorylist],
user_id: user_id,
created_at: created_at,
attachments: attachments
attachments: attachments,
)
last_topic_id = row[:id]
else
attachments = parse_filenames(row[:attachments], ANSWER_ATTACHMENT_DIRECTORY) +
parse_filenames(row[:images], ANSWER_IMAGE_DIRECTORY)
attachments =
parse_filenames(row[:attachments], ANSWER_ATTACHMENT_DIRECTORY) +
parse_filenames(row[:images], ANSWER_IMAGE_DIRECTORY)
@db.insert_post(
id: row[:id],
@@ -97,10 +98,10 @@ class ImportScripts::Answerbase < ImportScripts::Base
topic_id: last_topic_id,
user_id: user_id,
created_at: created_at,
attachments: attachments
attachments: attachments,
)
end
rescue
rescue StandardError
p row
raise
end
@@ -110,9 +111,7 @@ class ImportScripts::Answerbase < ImportScripts::Base
def parse_filenames(text, directory)
return [] if text.blank?
text
.split(';')
.map { |filename| File.join(@path, directory, filename.strip) }
text.split(";").map { |filename| File.join(@path, directory, filename.strip) }
end
def parse_date(text)
@@ -132,10 +131,10 @@ class ImportScripts::Answerbase < ImportScripts::Base
create_categories(rows) do |row|
{
id: row['id'],
name: row['name'],
description: row['description'],
position: row['position']
id: row["id"],
name: row["name"],
description: row["description"],
position: row["position"],
}
end
end
@@ -153,19 +152,17 @@ class ImportScripts::Answerbase < ImportScripts::Base
rows, last_id = @db.fetch_users(last_id)
break if rows.empty?
next if all_records_exist?(:users, rows.map { |row| row['id'] })
next if all_records_exist?(:users, rows.map { |row| row["id"] })
create_users(rows, total: total_count, offset: offset) do |row|
{
id: row['id'],
email: row['email'],
username: row['username'],
bio_raw: row['bio'],
created_at: row['created_at'],
active: row['active'] == 1,
post_create_action: proc do |user|
create_avatar(user, row['avatar_path'])
end
id: row["id"],
email: row["email"],
username: row["username"],
bio_raw: row["bio"],
created_at: row["created_at"],
active: row["active"] == 1,
post_create_action: proc { |user| create_avatar(user, row["avatar_path"]) },
}
end
end
@@ -191,24 +188,25 @@ class ImportScripts::Answerbase < ImportScripts::Base
rows, last_id = @db.fetch_topics(last_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| row['id'] })
next if all_records_exist?(:posts, rows.map { |row| row["id"] })
create_posts(rows, total: total_count, offset: offset) do |row|
attachments = @db.fetch_topic_attachments(row['id']) if row['upload_count'] > 0
user_id = user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id
attachments = @db.fetch_topic_attachments(row["id"]) if row["upload_count"] > 0
user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id
{
id: row['id'],
title: row['title'],
raw: raw_with_attachments(row['raw'].presence || row['title'], attachments, user_id),
category: category_id_from_imported_category_id(row['category_id']),
id: row["id"],
title: row["title"],
raw: raw_with_attachments(row["raw"].presence || row["title"], attachments, user_id),
category: category_id_from_imported_category_id(row["category_id"]),
user_id: user_id,
created_at: row['created_at'],
closed: row['closed'] == 1,
post_create_action: proc do |post|
url = "q#{row['id']}"
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
end
created_at: row["created_at"],
closed: row["closed"] == 1,
post_create_action:
proc do |post|
url = "q#{row["id"]}"
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
end,
}
end
end
@@ -223,19 +221,19 @@ class ImportScripts::Answerbase < ImportScripts::Base
rows, last_row_id = @db.fetch_posts(last_row_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| row['id'] })
next if all_records_exist?(:posts, rows.map { |row| row["id"] })
create_posts(rows, total: total_count, offset: offset) do |row|
topic = topic_lookup_from_imported_post_id(row['topic_id'])
attachments = @db.fetch_post_attachments(row['id']) if row['upload_count'] > 0
user_id = user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id
topic = topic_lookup_from_imported_post_id(row["topic_id"])
attachments = @db.fetch_post_attachments(row["id"]) if row["upload_count"] > 0
user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id
{
id: row['id'],
raw: raw_with_attachments(row['raw'], attachments, user_id),
id: row["id"],
raw: raw_with_attachments(row["raw"], attachments, user_id),
user_id: user_id,
topic_id: topic[:topic_id],
created_at: row['created_at']
created_at: row["created_at"],
}
end
end
@@ -247,7 +245,7 @@ class ImportScripts::Answerbase < ImportScripts::Base
raw = ReverseMarkdown.convert(raw) || ""
attachments&.each do |attachment|
path = attachment['path']
path = attachment["path"]
next if embedded_paths.include?(path)
if File.exist?(path)
@@ -269,23 +267,24 @@ class ImportScripts::Answerbase < ImportScripts::Base
paths = []
upload_ids = []
raw = raw.gsub(EMBEDDED_IMAGE_REGEX) do
path = File.join(@path, Regexp.last_match['path'])
filename = File.basename(path)
path = find_image_path(filename)
raw =
raw.gsub(EMBEDDED_IMAGE_REGEX) do
path = File.join(@path, Regexp.last_match["path"])
filename = File.basename(path)
path = find_image_path(filename)
if path
upload = @uploader.create_upload(user_id, path, filename)
if path
upload = @uploader.create_upload(user_id, path, filename)
if upload.present? && upload.persisted?
paths << path
upload_ids << upload.id
@uploader.html_for_upload(upload, filename)
if upload.present? && upload.persisted?
paths << path
upload_ids << upload.id
@uploader.html_for_upload(upload, filename)
end
else
STDERR.puts "Could not find file: #{path}"
end
else
STDERR.puts "Could not find file: #{path}"
end
end
[raw, paths, upload_ids]
end
@@ -311,11 +310,11 @@ class ImportScripts::Answerbase < ImportScripts::Base
def add_permalink_normalizations
normalizations = SiteSetting.permalink_normalizations
normalizations = normalizations.blank? ? [] : normalizations.split('|')
normalizations = normalizations.blank? ? [] : normalizations.split("|")
add_normalization(normalizations, TOPIC_LINK_NORMALIZATION)
SiteSetting.permalink_normalizations = normalizations.join('|')
SiteSetting.permalink_normalizations = normalizations.join("|")
end
def add_normalization(normalizations, normalization)
@@ -327,11 +326,13 @@ class ImportScripts::Answerbase < ImportScripts::Base
end
def csv_parse(table_name)
CSV.foreach(File.join(@path, "#{table_name}.csv"),
headers: true,
header_converters: :symbol,
skip_blanks: true,
encoding: 'bom|utf-8') { |row| yield row }
CSV.foreach(
File.join(@path, "#{table_name}.csv"),
headers: true,
header_converters: :symbol,
skip_blanks: true,
encoding: "bom|utf-8",
) { |row| yield row }
end
end

View File

@@ -5,34 +5,29 @@
# Based on having access to a mysql dump.
# Pass in the ENV variables listed below before running the script.
require_relative 'base'
require 'mysql2'
require 'open-uri'
require_relative "base"
require "mysql2"
require "open-uri"
class ImportScripts::AnswerHub < ImportScripts::Base
DB_NAME ||= ENV['DB_NAME'] || "answerhub"
DB_PASS ||= ENV['DB_PASS'] || "answerhub"
DB_USER ||= ENV['DB_USER'] || "answerhub"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "network1"
BATCH_SIZE ||= ENV['BATCH_SIZE'].to_i || 1000
ATTACHMENT_DIR = ENV['ATTACHMENT_DIR'] || ''
PROCESS_UPLOADS = ENV['PROCESS_UPLOADS'].to_i || 0
ANSWERHUB_DOMAIN = ENV['ANSWERHUB_DOMAIN']
AVATAR_DIR = ENV['AVATAR_DIR'] || ""
SITE_ID = ENV['SITE_ID'].to_i || 0
CATEGORY_MAP_FROM = ENV['CATEGORY_MAP_FROM'].to_i || 0
CATEGORY_MAP_TO = ENV['CATEGORY_MAP_TO'].to_i || 0
SCRAPE_AVATARS = ENV['SCRAPE_AVATARS'].to_i || 0
DB_NAME ||= ENV["DB_NAME"] || "answerhub"
DB_PASS ||= ENV["DB_PASS"] || "answerhub"
DB_USER ||= ENV["DB_USER"] || "answerhub"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "network1"
BATCH_SIZE ||= ENV["BATCH_SIZE"].to_i || 1000
ATTACHMENT_DIR = ENV["ATTACHMENT_DIR"] || ""
PROCESS_UPLOADS = ENV["PROCESS_UPLOADS"].to_i || 0
ANSWERHUB_DOMAIN = ENV["ANSWERHUB_DOMAIN"]
AVATAR_DIR = ENV["AVATAR_DIR"] || ""
SITE_ID = ENV["SITE_ID"].to_i || 0
CATEGORY_MAP_FROM = ENV["CATEGORY_MAP_FROM"].to_i || 0
CATEGORY_MAP_TO = ENV["CATEGORY_MAP_TO"].to_i || 0
SCRAPE_AVATARS = ENV["SCRAPE_AVATARS"].to_i || 0
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: DB_USER,
password: DB_PASS,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: "localhost", username: DB_USER, password: DB_PASS, database: DB_NAME)
@skip_updates = true
SiteSetting.tagging_enabled = true
SiteSetting.max_tags_per_topic = 10
@@ -56,7 +51,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
query =
"SELECT count(*) count
@@ -64,12 +59,13 @@ class ImportScripts::AnswerHub < ImportScripts::Base
WHERE c_type = 'user'
AND c_active = 1
AND c_system <> 1;"
total_count = @client.query(query).first['count']
total_count = @client.query(query).first["count"]
puts "Total count: #{total_count}"
@last_user_id = -1
batches(BATCH_SIZE) do |offset|
query = "SELECT c_id, c_creation_date, c_name, c_primaryEmail, c_last_seen, c_description
query =
"SELECT c_id, c_creation_date, c_name, c_primaryEmail, c_last_seen, c_description
FROM #{TABLE_PREFIX}_authoritables
WHERE c_type = 'user'
AND c_active = 1
@@ -79,17 +75,18 @@ class ImportScripts::AnswerHub < ImportScripts::Base
results = @client.query(query)
break if results.size < 1
@last_user_id = results.to_a.last['c_id']
@last_user_id = results.to_a.last["c_id"]
create_users(results, total: total_count, offset: offset) do |user|
# puts user['c_id'].to_s + ' ' + user['c_name']
next if @lookup.user_id_from_imported_user_id(user['c_id'])
{ id: user['c_id'],
next if @lookup.user_id_from_imported_user_id(user["c_id"])
{
id: user["c_id"],
email: "#{SecureRandom.hex}@invalid.invalid",
username: user['c_name'],
created_at: user['c_creation_date'],
bio_raw: user['c_description'],
last_seen_at: user['c_last_seen'],
username: user["c_name"],
created_at: user["c_creation_date"],
bio_raw: user["c_description"],
last_seen_at: user["c_last_seen"],
}
end
end
@@ -99,7 +96,8 @@ class ImportScripts::AnswerHub < ImportScripts::Base
puts "", "importing categories..."
# Import parent categories first
query = "SELECT c_id, c_name, c_plug, c_parent
query =
"SELECT c_id, c_name, c_plug, c_parent
FROM containers
WHERE c_type = 'space'
AND c_active = 1
@@ -107,15 +105,12 @@ class ImportScripts::AnswerHub < ImportScripts::Base
results = @client.query(query)
create_categories(results) do |c|
{
id: c['c_id'],
name: c['c_name'],
parent_category_id: check_parent_id(c['c_parent']),
}
{ id: c["c_id"], name: c["c_name"], parent_category_id: check_parent_id(c["c_parent"]) }
end
# Import sub-categories
query = "SELECT c_id, c_name, c_plug, c_parent
query =
"SELECT c_id, c_name, c_plug, c_parent
FROM containers
WHERE c_type = 'space'
AND c_active = 1
@@ -125,9 +120,9 @@ class ImportScripts::AnswerHub < ImportScripts::Base
create_categories(results) do |c|
# puts c.inspect
{
id: c['c_id'],
name: c['c_name'],
parent_category_id: category_id_from_imported_category_id(check_parent_id(c['c_parent'])),
id: c["c_id"],
name: c["c_name"],
parent_category_id: category_id_from_imported_category_id(check_parent_id(c["c_parent"])),
}
end
end
@@ -141,7 +136,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
WHERE c_visibility <> 'deleted'
AND (c_type = 'question'
OR c_type = 'kbentry');"
total_count = @client.query(count_query).first['count']
total_count = @client.query(count_query).first["count"]
@last_topic_id = -1
@@ -159,26 +154,25 @@ class ImportScripts::AnswerHub < ImportScripts::Base
topics = @client.query(query)
break if topics.size < 1
@last_topic_id = topics.to_a.last['c_id']
@last_topic_id = topics.to_a.last["c_id"]
create_posts(topics, total: total_count, offset: offset) do |t|
user_id = user_id_from_imported_user_id(t['c_author']) || Discourse::SYSTEM_USER_ID
body = process_mentions(t['c_body'])
if PROCESS_UPLOADS == 1
body = process_uploads(body, user_id)
end
user_id = user_id_from_imported_user_id(t["c_author"]) || Discourse::SYSTEM_USER_ID
body = process_mentions(t["c_body"])
body = process_uploads(body, user_id) if PROCESS_UPLOADS == 1
markdown_body = HtmlToMarkdown.new(body).to_markdown
{
id: t['c_id'],
id: t["c_id"],
user_id: user_id,
title: t['c_title'],
category: category_id_from_imported_category_id(t['c_primaryContainer']),
title: t["c_title"],
category: category_id_from_imported_category_id(t["c_primaryContainer"]),
raw: markdown_body,
created_at: t['c_creation_date'],
post_create_action: proc do |post|
tag_names = t['c_topic_names'].split(',')
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end
created_at: t["c_creation_date"],
post_create_action:
proc do |post|
tag_names = t["c_topic_names"].split(",")
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end,
}
end
end
@@ -194,7 +188,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
AND (c_type = 'answer'
OR c_type = 'comment'
OR c_type = 'kbentry');"
total_count = @client.query(count_query).first['count']
total_count = @client.query(count_query).first["count"]
@last_post_id = -1
@@ -210,49 +204,49 @@ class ImportScripts::AnswerHub < ImportScripts::Base
ORDER BY c_id ASC
LIMIT #{BATCH_SIZE};"
posts = @client.query(query)
next if all_records_exist? :posts, posts.map { |p| p['c_id'] }
next if all_records_exist? :posts, posts.map { |p| p["c_id"] }
break if posts.size < 1
@last_post_id = posts.to_a.last['c_id']
@last_post_id = posts.to_a.last["c_id"]
create_posts(posts, total: total_count, offset: offset) do |p|
t = topic_lookup_from_imported_post_id(p['c_originalParent'])
t = topic_lookup_from_imported_post_id(p["c_originalParent"])
next unless t
reply_to_post_id = post_id_from_imported_post_id(p['c_parent'])
reply_to_post_id = post_id_from_imported_post_id(p["c_parent"])
reply_to_post = reply_to_post_id.present? ? Post.find(reply_to_post_id) : nil
reply_to_post_number = reply_to_post.present? ? reply_to_post.post_number : nil
user_id = user_id_from_imported_user_id(p['c_author']) || Discourse::SYSTEM_USER_ID
user_id = user_id_from_imported_user_id(p["c_author"]) || Discourse::SYSTEM_USER_ID
body = process_mentions(p['c_body'])
if PROCESS_UPLOADS == 1
body = process_uploads(body, user_id)
end
body = process_mentions(p["c_body"])
body = process_uploads(body, user_id) if PROCESS_UPLOADS == 1
markdown_body = HtmlToMarkdown.new(body).to_markdown
{
id: p['c_id'],
id: p["c_id"],
user_id: user_id,
topic_id: t[:topic_id],
reply_to_post_number: reply_to_post_number,
raw: markdown_body,
created_at: p['c_creation_date'],
post_create_action: proc do |post_info|
begin
if p['c_type'] == 'answer' && p['c_marked'] == 1
post = Post.find(post_info[:id])
if post
user_id = user_id_from_imported_user_id(p['c_author']) || Discourse::SYSTEM_USER_ID
current_user = User.find(user_id)
solved = DiscourseSolved.accept_answer!(post, current_user)
# puts "SOLVED: #{solved}"
created_at: p["c_creation_date"],
post_create_action:
proc do |post_info|
begin
if p["c_type"] == "answer" && p["c_marked"] == 1
post = Post.find(post_info[:id])
if post
user_id =
user_id_from_imported_user_id(p["c_author"]) || Discourse::SYSTEM_USER_ID
current_user = User.find(user_id)
solved = DiscourseSolved.accept_answer!(post, current_user)
# puts "SOLVED: #{solved}"
end
end
rescue ActiveRecord::RecordInvalid
puts "SOLVED: Skipped post_id: #{post.id} because invalid"
end
rescue ActiveRecord::RecordInvalid
puts "SOLVED: Skipped post_id: #{post.id} because invalid"
end
end
end,
}
end
end
@@ -269,11 +263,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
groups = @client.query(query)
create_groups(groups) do |group|
{
id: group["c_id"],
name: group["c_name"],
visibility_level: 1
}
{ id: group["c_id"], name: group["c_name"], visibility_level: 1 }
end
end
@@ -298,11 +288,16 @@ class ImportScripts::AnswerHub < ImportScripts::Base
group_members.map
groups.each do |group|
dgroup = find_group_by_import_id(group['c_id'])
dgroup = find_group_by_import_id(group["c_id"])
next if dgroup.custom_fields['import_users_added']
next if dgroup.custom_fields["import_users_added"]
group_member_ids = group_members.map { |m| user_id_from_imported_user_id(m["c_members"]) if m["c_groups"] == group['c_id'] }.compact
group_member_ids =
group_members
.map do |m|
user_id_from_imported_user_id(m["c_members"]) if m["c_groups"] == group["c_id"]
end
.compact
# add members
dgroup.bulk_add(group_member_ids)
@@ -310,7 +305,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
# reload group
dgroup.reload
dgroup.custom_fields['import_users_added'] = true
dgroup.custom_fields["import_users_added"] = true
dgroup.save
progress_count += 1
@@ -362,7 +357,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
avatars.each do |a|
begin
user_id = user_id_from_imported_user_id(a['c_user'])
user_id = user_id_from_imported_user_id(a["c_user"])
user = User.find(user_id)
if user
filename = "avatar-#{user_id}.png"
@@ -371,9 +366,11 @@ class ImportScripts::AnswerHub < ImportScripts::Base
# Scrape Avatars - Avatars are saved in the db, but it might be easier to just scrape them
if SCRAPE_AVATARS == 1
File.open(path, 'wb') { |f|
f << open("https://#{ANSWERHUB_DOMAIN}/forums/users/#{a['c_user']}/photo/view.html?s=240").read
}
File.open(path, "wb") do |f|
f << open(
"https://#{ANSWERHUB_DOMAIN}/forums/users/#{a["c_user"]}/photo/view.html?s=240",
).read
end
end
upload = @uploader.create_upload(user.id, path, filename)
@@ -389,7 +386,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
end
end
rescue ActiveRecord::RecordNotFound
puts "Could not find User for user_id: #{a['c_user']}"
puts "Could not find User for user_id: #{a["c_user"]}"
end
end
end
@@ -438,9 +435,10 @@ class ImportScripts::AnswerHub < ImportScripts::Base
raw = body.dup
# https://example.forum.com/forums/users/1469/XYZ_Rob.html
raw.gsub!(/(https:\/\/example.forum.com\/forums\/users\/\d+\/[\w_%-.]*.html)/) do
raw.gsub!(%r{(https://example.forum.com/forums/users/\d+/[\w_%-.]*.html)}) do
legacy_url = $1
import_user_id = legacy_url.match(/https:\/\/example.forum.com\/forums\/users\/(\d+)\/[\w_%-.]*.html/).captures
import_user_id =
legacy_url.match(%r{https://example.forum.com/forums/users/(\d+)/[\w_%-.]*.html}).captures
user = @lookup.find_user_by_import_id(import_user_id[0])
if user.present?
@@ -453,9 +451,9 @@ class ImportScripts::AnswerHub < ImportScripts::Base
end
# /forums/users/395/petrocket.html
raw.gsub!(/(\/forums\/users\/\d+\/[\w_%-.]*.html)/) do
raw.gsub!(%r{(/forums/users/\d+/[\w_%-.]*.html)}) do
legacy_url = $1
import_user_id = legacy_url.match(/\/forums\/users\/(\d+)\/[\w_%-.]*.html/).captures
import_user_id = legacy_url.match(%r{/forums/users/(\d+)/[\w_%-.]*.html}).captures
# puts raw
user = @lookup.find_user_by_import_id(import_user_id[0])
@@ -472,7 +470,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
end
def create_permalinks
puts '', 'Creating redirects...', ''
puts "", "Creating redirects...", ""
# https://example.forum.com/forums/questions/2005/missing-file.html
Topic.find_each do |topic|
@@ -480,8 +478,12 @@ class ImportScripts::AnswerHub < ImportScripts::Base
if pcf && pcf["import_id"]
id = pcf["import_id"]
slug = Slug.for(topic.title)
Permalink.create(url: "questions/#{id}/#{slug}.html", topic_id: topic.id) rescue nil
print '.'
begin
Permalink.create(url: "questions/#{id}/#{slug}.html", topic_id: topic.id)
rescue StandardError
nil
end
print "."
end
end
end
@@ -496,7 +498,6 @@ class ImportScripts::AnswerHub < ImportScripts::Base
return CATEGORY_MAP_TO if CATEGORY_MAP_FROM > 0 && id == CATEGORY_MAP_FROM
id
end
end
ImportScripts::AnswerHub.new.perform

View File

@@ -1,23 +1,23 @@
# frozen_string_literal: true
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'pg'
require "pg"
class ImportScripts::MyAskBot < ImportScripts::Base
# CHANGE THESE BEFORE RUNNING THE IMPORTER
BATCH_SIZE = 1000
OLD_SITE = "ask.cvxr.com"
DB_NAME = "cvxforum"
DB_USER = "cvxforum"
DB_PORT = 5432
DB_HOST = "ask.cvxr.com"
DB_PASS = 'yeah, right'
OLD_SITE = "ask.cvxr.com"
DB_NAME = "cvxforum"
DB_USER = "cvxforum"
DB_PORT = 5432
DB_HOST = "ask.cvxr.com"
DB_PASS = "yeah, right"
# A list of categories to create. Any post with one of these tags will be
# assigned to that category. Ties are broken by list order.
CATEGORIES = [ 'Nonconvex', 'TFOCS', 'MIDCP', 'FAQ' ]
CATEGORIES = %w[Nonconvex TFOCS MIDCP FAQ]
def initialize
super
@@ -25,13 +25,8 @@ class ImportScripts::MyAskBot < ImportScripts::Base
@thread_parents = {}
@tagmap = []
@td = PG::TextDecoder::TimestampWithTimeZone.new
@client = PG.connect(
dbname: DB_NAME,
host: DB_HOST,
port: DB_PORT,
user: DB_USER,
password: DB_PASS
)
@client =
PG.connect(dbname: DB_NAME, host: DB_HOST, port: DB_PORT, user: DB_USER, password: DB_PASS)
end
def execute
@@ -55,18 +50,17 @@ class ImportScripts::MyAskBot < ImportScripts::Base
def read_tags
puts "", "reading thread tags..."
tag_count = @client.exec(<<-SQL
tag_count = @client.exec(<<-SQL)[0]["count"]
SELECT COUNT(A.id)
FROM askbot_thread_tags A
JOIN tag B
ON A.tag_id = B.id
WHERE A.tag_id > 0
SQL
)[0]["count"]
tags_done = 0
batches(BATCH_SIZE) do |offset|
tags = @client.exec(<<-SQL
tags = @client.exec(<<-SQL)
SELECT A.thread_id, B.name
FROM askbot_thread_tags A
JOIN tag B
@@ -75,7 +69,6 @@ class ImportScripts::MyAskBot < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if tags.ntuples() < 1
tags.each do |tag|
tid = tag["thread_id"].to_i
@@ -83,7 +76,7 @@ class ImportScripts::MyAskBot < ImportScripts::Base
if @tagmap[tid]
@tagmap[tid].push(tnm)
else
@tagmap[tid] = [ tnm ]
@tagmap[tid] = [tnm]
end
tags_done += 1
print_status tags_done, tag_count
@@ -94,21 +87,19 @@ class ImportScripts::MyAskBot < ImportScripts::Base
def import_users
puts "", "importing users"
total_count = @client.exec(<<-SQL
total_count = @client.exec(<<-SQL)[0]["count"]
SELECT COUNT(id)
FROM auth_user
SQL
)[0]["count"]
batches(BATCH_SIZE) do |offset|
users = @client.query(<<-SQL
users = @client.query(<<-SQL)
SELECT id, username, email, is_staff, date_joined, last_seen, real_name, website, location, about
FROM auth_user
ORDER BY date_joined
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if users.ntuples() < 1
@@ -133,17 +124,16 @@ class ImportScripts::MyAskBot < ImportScripts::Base
def import_posts
puts "", "importing questions..."
post_count = @client.exec(<<-SQL
post_count = @client.exec(<<-SQL)[0]["count"]
SELECT COUNT(A.id)
FROM askbot_post A
JOIN askbot_thread B
ON A.thread_id = B.id
WHERE NOT B.closed AND A.post_type='question'
SQL
)[0]["count"]
batches(BATCH_SIZE) do |offset|
posts = @client.exec(<<-SQL
posts = @client.exec(<<-SQL)
SELECT A.id, A.author_id, A.added_at, A.text, A.thread_id, B.title
FROM askbot_post A
JOIN askbot_thread B
@@ -153,7 +143,6 @@ class ImportScripts::MyAskBot < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if posts.ntuples() < 1
@@ -176,7 +165,11 @@ class ImportScripts::MyAskBot < ImportScripts::Base
id: pid,
title: post["title"],
category: cat,
custom_fields: { import_id: pid, import_thread_id: tid, import_tags: tags },
custom_fields: {
import_id: pid,
import_thread_id: tid,
import_tags: tags,
},
user_id: user_id_from_imported_user_id(post["author_id"]) || Discourse::SYSTEM_USER_ID,
created_at: Time.zone.at(@td.decode(post["added_at"])),
raw: post["text"],
@@ -188,17 +181,16 @@ class ImportScripts::MyAskBot < ImportScripts::Base
def import_replies
puts "", "importing answers and comments..."
post_count = @client.exec(<<-SQL
post_count = @client.exec(<<-SQL)[0]["count"]
SELECT COUNT(A.id)
FROM askbot_post A
JOIN askbot_thread B
ON A.thread_id = B.id
WHERE NOT B.closed AND A.post_type<>'question'
SQL
)[0]["count"]
batches(BATCH_SIZE) do |offset|
posts = @client.exec(<<-SQL
posts = @client.exec(<<-SQL)
SELECT A.id, A.author_id, A.added_at, A.text, A.thread_id, B.title
FROM askbot_post A
JOIN askbot_thread B
@@ -208,7 +200,6 @@ class ImportScripts::MyAskBot < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if posts.ntuples() < 1
@@ -222,10 +213,12 @@ class ImportScripts::MyAskBot < ImportScripts::Base
{
id: pid,
topic_id: parent[:topic_id],
custom_fields: { import_id: pid },
custom_fields: {
import_id: pid,
},
user_id: user_id_from_imported_user_id(post["author_id"]) || Discourse::SYSTEM_USER_ID,
created_at: Time.zone.at(@td.decode(post["added_at"])),
raw: post["text"]
raw: post["text"],
}
end
end
@@ -240,32 +233,37 @@ class ImportScripts::MyAskBot < ImportScripts::Base
# I am sure this is incomplete, but we didn't make heavy use of internal
# links on our site.
tmp = Regexp.quote("http://#{OLD_SITE}")
r1 = /"(#{tmp})?\/question\/(\d+)\/[a-zA-Z-]*\/?"/
r2 = /\((#{tmp})?\/question\/(\d+)\/[a-zA-Z-]*\/?\)/
r3 = /<?#tmp\/question\/(\d+)\/[a-zA-Z-]*\/?>?/
r1 = %r{"(#{tmp})?/question/(\d+)/[a-zA-Z-]*/?"}
r2 = %r{\((#{tmp})?/question/(\d+)/[a-zA-Z-]*/?\)}
r3 = %r{<?#tmp/question/(\d+)/[a-zA-Z-]*/?>?}
Post.find_each do |post|
raw = post.raw.gsub(r1) do
if topic = topic_lookup_from_imported_post_id($2)
"\"#{topic[:url]}\""
else
$&
raw =
post
.raw
.gsub(r1) do
if topic = topic_lookup_from_imported_post_id($2)
"\"#{topic[:url]}\""
else
$&
end
end
raw =
raw.gsub(r2) do
if topic = topic_lookup_from_imported_post_id($2)
"(#{topic[:url]})"
else
$&
end
end
end
raw = raw.gsub(r2) do
if topic = topic_lookup_from_imported_post_id($2)
"(#{topic[:url]})"
else
$&
raw =
raw.gsub(r3) do
if topic = topic_lookup_from_imported_post_id($1)
trec = Topic.find_by(id: topic[:topic_id])
"[#{trec.title}](#{topic[:url]})"
else
$&
end
end
end
raw = raw.gsub(r3) do
if topic = topic_lookup_from_imported_post_id($1)
trec = Topic.find_by(id: topic[:topic_id])
"[#{trec.title}](#{topic[:url]})"
else
$&
end
end
if raw != post.raw
post.raw = raw

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
if ARGV.include?('bbcode-to-md')
if ARGV.include?("bbcode-to-md")
# Replace (most) bbcode with markdown before creating posts.
# This will dramatically clean up the final posts in Discourse.
#
@@ -10,17 +10,17 @@ if ARGV.include?('bbcode-to-md')
# cd ruby-bbcode-to-md
# gem build ruby-bbcode-to-md.gemspec
# gem install ruby-bbcode-to-md-*.gem
require 'ruby-bbcode-to-md'
require "ruby-bbcode-to-md"
end
require_relative '../../config/environment'
require_relative 'base/lookup_container'
require_relative 'base/uploader'
require_relative "../../config/environment"
require_relative "base/lookup_container"
require_relative "base/uploader"
module ImportScripts; end
module ImportScripts
end
class ImportScripts::Base
def initialize
preload_i18n
@@ -62,15 +62,14 @@ class ImportScripts::Base
end
elapsed = Time.now - @start_times[:import]
puts '', '', 'Done (%02dh %02dmin %02dsec)' % [elapsed / 3600, elapsed / 60 % 60, elapsed % 60]
puts "", "", "Done (%02dh %02dmin %02dsec)" % [elapsed / 3600, elapsed / 60 % 60, elapsed % 60]
ensure
reset_site_settings
end
def get_site_settings_for_import
{
blocked_email_domains: '',
blocked_email_domains: "",
min_topic_title_length: 1,
min_post_length: 1,
min_first_post_length: 1,
@@ -78,21 +77,23 @@ class ImportScripts::Base
min_personal_message_title_length: 1,
allow_duplicate_topic_titles: true,
allow_duplicate_topic_titles_category: false,
disable_emails: 'yes',
max_attachment_size_kb: 102400,
max_image_size_kb: 102400,
authorized_extensions: '*',
disable_emails: "yes",
max_attachment_size_kb: 102_400,
max_image_size_kb: 102_400,
authorized_extensions: "*",
clean_up_inactive_users_after_days: 0,
clean_up_unused_staged_users_after_days: 0,
clean_up_uploads: false,
clean_orphan_uploads_grace_period_hours: 1800
clean_orphan_uploads_grace_period_hours: 1800,
}
end
def change_site_settings
if SiteSetting.bootstrap_mode_enabled
SiteSetting.default_trust_level = TrustLevel[0] if SiteSetting.default_trust_level == TrustLevel[1]
SiteSetting.default_email_digest_frequency = 10080 if SiteSetting.default_email_digest_frequency == 1440
SiteSetting.default_trust_level = TrustLevel[0] if SiteSetting.default_trust_level ==
TrustLevel[1]
SiteSetting.default_email_digest_frequency =
10_080 if SiteSetting.default_email_digest_frequency == 1440
SiteSetting.bootstrap_mode_enabled = false
end
@@ -131,7 +132,7 @@ class ImportScripts::Base
raise NotImplementedError
end
%i{
%i[
add_category
add_group
add_post
@@ -146,9 +147,7 @@ class ImportScripts::Base
topic_lookup_from_imported_post_id
user_already_imported?
user_id_from_imported_user_id
}.each do |method_name|
delegate method_name, to: :@lookup
end
].each { |method_name| delegate method_name, to: :@lookup }
def create_admin(opts = {})
admin = User.new
@@ -196,7 +195,11 @@ class ImportScripts::Base
end
end
print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("groups"))
print_status(
created + skipped + failed + (opts[:offset] || 0),
total,
get_start_time("groups"),
)
end
[created, skipped]
@@ -224,23 +227,22 @@ class ImportScripts::Base
ActiveRecord::Base.transaction do
begin
connection = ActiveRecord::Base.connection.raw_connection
connection.exec('CREATE TEMP TABLE import_ids(val text PRIMARY KEY)')
connection.exec("CREATE TEMP TABLE import_ids(val text PRIMARY KEY)")
import_id_clause = import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",")
import_id_clause =
import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",")
connection.exec("INSERT INTO import_ids VALUES #{import_id_clause}")
existing = "#{type.to_s.classify}CustomField".constantize
existing = existing.where(name: 'import_id')
.joins('JOIN import_ids ON val = value')
.count
existing = existing.where(name: "import_id").joins("JOIN import_ids ON val = value").count
if existing == import_ids.length
puts "Skipping #{import_ids.length} already imported #{type}"
true
end
ensure
connection.exec('DROP TABLE import_ids') unless connection.nil?
connection.exec("DROP TABLE import_ids") unless connection.nil?
end
end
end
@@ -292,7 +294,11 @@ class ImportScripts::Base
end
end
print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("users"))
print_status(
created + skipped + failed + (opts[:offset] || 0),
total,
get_start_time("users"),
)
end
[created, skipped]
@@ -305,7 +311,9 @@ class ImportScripts::Base
post_create_action = opts.delete(:post_create_action)
existing = find_existing_user(opts[:email], opts[:username])
return existing if existing && (merge || existing.custom_fields["import_id"].to_s == import_id.to_s)
if existing && (merge || existing.custom_fields["import_id"].to_s == import_id.to_s)
return existing
end
bio_raw = opts.delete(:bio_raw)
website = opts.delete(:website)
@@ -316,8 +324,11 @@ class ImportScripts::Base
original_name = opts[:name]
original_email = opts[:email] = opts[:email].downcase
if !UsernameValidator.new(opts[:username]).valid_format? || !User.username_available?(opts[:username])
opts[:username] = UserNameSuggester.suggest(opts[:username].presence || opts[:name].presence || opts[:email])
if !UsernameValidator.new(opts[:username]).valid_format? ||
!User.username_available?(opts[:username])
opts[:username] = UserNameSuggester.suggest(
opts[:username].presence || opts[:name].presence || opts[:email],
)
end
if !EmailAddressValidator.valid_value?(opts[:email])
@@ -339,7 +350,8 @@ class ImportScripts::Base
u = User.new(opts)
(opts[:custom_fields] || {}).each { |k, v| u.custom_fields[k] = v }
u.custom_fields["import_id"] = import_id
u.custom_fields["import_username"] = original_username if original_username.present? && original_username != opts[:username]
u.custom_fields["import_username"] = original_username if original_username.present? &&
original_username != opts[:username]
u.custom_fields["import_avatar_url"] = avatar_url if avatar_url.present?
u.custom_fields["import_pass"] = opts[:password] if opts[:password].present?
u.custom_fields["import_email"] = original_email if original_email != opts[:email]
@@ -359,9 +371,7 @@ class ImportScripts::Base
end
end
if opts[:active] && opts[:password].present?
u.activate
end
u.activate if opts[:active] && opts[:password].present?
rescue => e
# try based on email
if e.try(:record).try(:errors).try(:messages).try(:[], :primary_email).present?
@@ -377,7 +387,7 @@ class ImportScripts::Base
end
end
if u.custom_fields['import_email']
if u.custom_fields["import_email"]
u.suspended_at = Time.zone.at(Time.now)
u.suspended_till = 200.years.from_now
u.save!
@@ -388,11 +398,15 @@ class ImportScripts::Base
user_option.email_messages_level = UserOption.email_level_types[:never]
user_option.save!
if u.save
StaffActionLogger.new(Discourse.system_user).log_user_suspend(u, 'Invalid email address on import')
StaffActionLogger.new(Discourse.system_user).log_user_suspend(
u,
"Invalid email address on import",
)
else
Rails.logger.error("Failed to suspend user #{u.username}. #{u.errors.try(:full_messages).try(:inspect)}")
Rails.logger.error(
"Failed to suspend user #{u.username}. #{u.errors.try(:full_messages).try(:inspect)}",
)
end
end
post_create_action.try(:call, u) if u.persisted?
@@ -402,7 +416,8 @@ class ImportScripts::Base
def find_existing_user(email, username)
# Force the use of the index on the 'user_emails' table
UserEmail.where("lower(email) = ?", email.downcase).first&.user || User.where(username: username).first
UserEmail.where("lower(email) = ?", email.downcase).first&.user ||
User.where(username: username).first
end
def created_category(category)
@@ -435,7 +450,8 @@ class ImportScripts::Base
# make sure categories don't go more than 2 levels deep
if params[:parent_category_id]
top = Category.find_by_id(params[:parent_category_id])
top = top.parent_category while (top&.height_of_ancestors || -1) + 1 >= SiteSetting.max_category_nesting
top = top.parent_category while (top&.height_of_ancestors || -1) + 1 >=
SiteSetting.max_category_nesting
params[:parent_category_id] = top.id if top
end
@@ -471,15 +487,16 @@ class ImportScripts::Base
post_create_action = opts.delete(:post_create_action)
new_category = Category.new(
name: opts[:name],
user_id: opts[:user_id] || opts[:user].try(:id) || Discourse::SYSTEM_USER_ID,
position: opts[:position],
parent_category_id: opts[:parent_category_id],
color: opts[:color] || category_color(opts[:parent_category_id]),
text_color: opts[:text_color] || "FFF",
read_restricted: opts[:read_restricted] || false,
)
new_category =
Category.new(
name: opts[:name],
user_id: opts[:user_id] || opts[:user].try(:id) || Discourse::SYSTEM_USER_ID,
position: opts[:position],
parent_category_id: opts[:parent_category_id],
color: opts[:color] || category_color(opts[:parent_category_id]),
text_color: opts[:text_color] || "FFF",
read_restricted: opts[:read_restricted] || false,
)
new_category.custom_fields["import_id"] = import_id if import_id
new_category.save!
@@ -498,10 +515,16 @@ class ImportScripts::Base
end
def category_color(parent_category_id)
@category_colors ||= SiteSetting.category_colors.split('|')
@category_colors ||= SiteSetting.category_colors.split("|")
index = @next_category_color_index[parent_category_id].presence || 0
@next_category_color_index[parent_category_id] = index + 1 >= @category_colors.count ? 0 : index + 1
@next_category_color_index[parent_category_id] = (
if index + 1 >= @category_colors.count
0
else
index + 1
end
)
@category_colors[index]
end
@@ -571,7 +594,7 @@ class ImportScripts::Base
opts = opts.merge(skip_validations: true)
opts[:import_mode] = true
opts[:custom_fields] ||= {}
opts[:custom_fields]['import_id'] = import_id
opts[:custom_fields]["import_id"] = import_id
unless opts[:topic_id]
opts[:meta_data] = meta_data = {}
@@ -582,7 +605,11 @@ class ImportScripts::Base
opts[:guardian] = STAFF_GUARDIAN
if @bbcode_to_md
opts[:raw] = opts[:raw].bbcode_to_md(false, {}, :disable, :quote) rescue opts[:raw]
opts[:raw] = begin
opts[:raw].bbcode_to_md(false, {}, :disable, :quote)
rescue StandardError
opts[:raw]
end
end
post_creator = PostCreator.new(user, opts)
@@ -628,7 +655,7 @@ class ImportScripts::Base
created += 1 if manager.errors.none?
skipped += 1 if manager.errors.any?
rescue
rescue StandardError
skipped += 1
end
end
@@ -671,14 +698,14 @@ class ImportScripts::Base
def close_inactive_topics(opts = {})
num_days = opts[:days] || 30
puts '', "Closing topics that have been inactive for more than #{num_days} days."
puts "", "Closing topics that have been inactive for more than #{num_days} days."
query = Topic.where('last_posted_at < ?', num_days.days.ago).where(closed: false)
query = Topic.where("last_posted_at < ?", num_days.days.ago).where(closed: false)
total_count = query.count
closed_count = 0
query.find_each do |topic|
topic.update_status('closed', true, Discourse.system_user)
topic.update_status("closed", true, Discourse.system_user)
closed_count += 1
print_status(closed_count, total_count, get_start_time("close_inactive_topics"))
end
@@ -790,7 +817,9 @@ class ImportScripts::Base
puts "", "Updating user digest_attempted_at..."
DB.exec("UPDATE user_stats SET digest_attempted_at = now() - random() * interval '1 week' WHERE digest_attempted_at IS NULL")
DB.exec(
"UPDATE user_stats SET digest_attempted_at = now() - random() * interval '1 week' WHERE digest_attempted_at IS NULL",
)
end
# scripts that are able to import last_seen_at from the source data should override this method
@@ -854,13 +883,15 @@ class ImportScripts::Base
count = 0
total = User.count
User.includes(:user_stat).find_each do |user|
begin
user.update_columns(trust_level: 0) if user.trust_level > 0 && user.post_count == 0
rescue Discourse::InvalidAccess
User
.includes(:user_stat)
.find_each do |user|
begin
user.update_columns(trust_level: 0) if user.trust_level > 0 && user.post_count == 0
rescue Discourse::InvalidAccess
end
print_status(count += 1, total, get_start_time("update_tl0"))
end
print_status(count += 1, total, get_start_time("update_tl0"))
end
end
def update_user_signup_date_based_on_first_post
@@ -870,7 +901,7 @@ class ImportScripts::Base
total = User.count
User.find_each do |user|
if first = user.posts.order('created_at ASC').first
if first = user.posts.order("created_at ASC").first
user.created_at = first.created_at
user.save!
end
@@ -893,16 +924,16 @@ class ImportScripts::Base
def print_status(current, max, start_time = nil)
if start_time.present?
elapsed_seconds = Time.now - start_time
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60]
else
elements_per_minute = ''
elements_per_minute = ""
end
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
end
def print_spinner
@spinner_chars ||= %w{ | / - \\ }
@spinner_chars ||= %w[| / - \\]
@spinner_chars.push @spinner_chars.shift
print "\b#{@spinner_chars[0]}"
end

View File

@@ -13,65 +13,69 @@ module ImportScripts
def initialize(cols)
cols.each_with_index do |col, idx|
self.class.public_send(:define_method, col.downcase.gsub(/[\W]/, '_').squeeze('_')) do
@row[idx]
end
self
.class
.public_send(:define_method, col.downcase.gsub(/[\W]/, "_").squeeze("_")) { @row[idx] }
end
end
end
def csv_parse(filename, col_sep = ',')
def csv_parse(filename, col_sep = ",")
first = true
row = nil
current_row = +""
double_quote_count = 0
File.open(filename).each_line do |line|
File
.open(filename)
.each_line do |line|
line.strip!
line.strip!
current_row << "\n" unless current_row.empty?
current_row << line
current_row << "\n" unless current_row.empty?
current_row << line
double_quote_count += line.scan('"').count
double_quote_count += line.scan('"').count
next if double_quote_count % 2 == 1 # this row continues on a new line. don't parse until we have the whole row.
next if double_quote_count % 2 == 1 # this row continues on a new line. don't parse until we have the whole row.
raw =
begin
CSV.parse(current_row, col_sep: col_sep)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
raw = begin
CSV.parse(current_row, col_sep: col_sep)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
current_row = ""
double_quote_count = 0
current_row = ""
double_quote_count = 0
next
end[
0
]
next
end[0]
if first
row = RowResolver.create(raw)
if first
row = RowResolver.create(raw)
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
end
end
end
end

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'sqlite3'
require "sqlite3"
module ImportScripts
class GenericDatabase
@@ -80,24 +80,20 @@ module ImportScripts
VALUES (:id, :raw, :topic_id, :user_id, :created_at, :reply_to_post_id, :url, :upload_count)
SQL
attachments&.each do |attachment|
@db.execute(<<-SQL, post_id: post[:id], path: attachment)
attachments&.each { |attachment| @db.execute(<<-SQL, post_id: post[:id], path: attachment) }
INSERT OR REPLACE INTO post_upload (post_id, path)
VALUES (:post_id, :path)
SQL
end
like_user_ids&.each do |user_id|
@db.execute(<<-SQL, post_id: post[:id], user_id: user_id)
like_user_ids&.each { |user_id| @db.execute(<<-SQL, post_id: post[:id], user_id: user_id) }
INSERT OR REPLACE INTO like (post_id, user_id)
VALUES (:post_id, :user_id)
SQL
end
end
end
def sort_posts_by_created_at
@db.execute 'DELETE FROM post_order'
@db.execute "DELETE FROM post_order"
@db.execute <<-SQL
INSERT INTO post_order (post_id)
@@ -146,7 +142,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'id')
add_last_column_value(rows, "id")
end
def get_user_id(username)
@@ -173,7 +169,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'id')
add_last_column_value(rows, "id")
end
def fetch_topic_attachments(topic_id)
@@ -200,7 +196,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
add_last_column_value(rows, "rowid")
end
def fetch_sorted_posts(last_row_id)
@@ -213,7 +209,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
add_last_column_value(rows, "rowid")
end
def fetch_post_attachments(post_id)
@@ -240,7 +236,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
add_last_column_value(rows, "rowid")
end
def execute_sql(sql)
@@ -254,12 +250,12 @@ module ImportScripts
private
def configure_database
@db.execute 'PRAGMA journal_mode = OFF'
@db.execute 'PRAGMA locking_mode = EXCLUSIVE'
@db.execute "PRAGMA journal_mode = OFF"
@db.execute "PRAGMA locking_mode = EXCLUSIVE"
end
def key_data_type
@numeric_keys ? 'INTEGER' : 'TEXT'
@numeric_keys ? "INTEGER" : "TEXT"
end
def create_category_table
@@ -299,7 +295,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS user_by_username ON user (username)'
@db.execute "CREATE INDEX IF NOT EXISTS user_by_username ON user (username)"
end
def create_topic_table
@@ -317,7 +313,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS topic_by_user_id ON topic (user_id)'
@db.execute "CREATE INDEX IF NOT EXISTS topic_by_user_id ON topic (user_id)"
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS topic_upload (
@@ -326,7 +322,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE UNIQUE INDEX IF NOT EXISTS topic_upload_unique ON topic_upload(topic_id, path)'
@db.execute "CREATE UNIQUE INDEX IF NOT EXISTS topic_upload_unique ON topic_upload(topic_id, path)"
end
def create_post_table
@@ -343,7 +339,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS post_by_user_id ON post (user_id)'
@db.execute "CREATE INDEX IF NOT EXISTS post_by_user_id ON post (user_id)"
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS post_order (
@@ -358,7 +354,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE UNIQUE INDEX IF NOT EXISTS post_upload_unique ON post_upload(post_id, path)'
@db.execute "CREATE UNIQUE INDEX IF NOT EXISTS post_upload_unique ON post_upload(post_id, path)"
end
def prepare(hash)

View File

@@ -3,27 +3,26 @@
module ImportScripts
class LookupContainer
def initialize
puts 'Loading existing groups...'
@groups = GroupCustomField.where(name: 'import_id').pluck(:value, :group_id).to_h
puts "Loading existing groups..."
@groups = GroupCustomField.where(name: "import_id").pluck(:value, :group_id).to_h
puts 'Loading existing users...'
@users = UserCustomField.where(name: 'import_id').pluck(:value, :user_id).to_h
puts "Loading existing users..."
@users = UserCustomField.where(name: "import_id").pluck(:value, :user_id).to_h
puts 'Loading existing categories...'
@categories = CategoryCustomField.where(name: 'import_id').pluck(:value, :category_id).to_h
puts "Loading existing categories..."
@categories = CategoryCustomField.where(name: "import_id").pluck(:value, :category_id).to_h
puts 'Loading existing posts...'
@posts = PostCustomField.where(name: 'import_id').pluck(:value, :post_id).to_h
puts "Loading existing posts..."
@posts = PostCustomField.where(name: "import_id").pluck(:value, :post_id).to_h
puts 'Loading existing topics...'
puts "Loading existing topics..."
@topics = {}
Post.joins(:topic).pluck('posts.id, posts.topic_id, posts.post_number, topics.slug').each do |p|
@topics[p[0]] = {
topic_id: p[1],
post_number: p[2],
url: Post.url(p[3], p[1], p[2])
}
end
Post
.joins(:topic)
.pluck("posts.id, posts.topic_id, posts.post_number, topics.slug")
.each do |p|
@topics[p[0]] = { topic_id: p[1], post_number: p[2], url: Post.url(p[3], p[1], p[2]) }
end
end
# Get the Discourse Post id based on the id of the source record
@@ -44,7 +43,7 @@ module ImportScripts
# Get the Discourse Group based on the id of the source group
def find_group_by_import_id(import_id)
GroupCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:group)
GroupCustomField.where(name: "import_id", value: import_id.to_s).first.try(:group)
end
# Get the Discourse User id based on the id of the source user
@@ -54,7 +53,7 @@ module ImportScripts
# Get the Discourse User based on the id of the source user
def find_user_by_import_id(import_id)
UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user)
UserCustomField.where(name: "import_id", value: import_id.to_s).first.try(:user)
end
def find_username_by_import_id(import_id)
@@ -84,11 +83,7 @@ module ImportScripts
end
def add_topic(post)
@topics[post.id] = {
post_number: post.post_number,
topic_id: post.topic_id,
url: post.url,
}
@topics[post.id] = { post_number: post.post_number, topic_id: post.topic_id, url: post.url }
end
def user_already_imported?(import_id)
@@ -98,6 +93,5 @@ module ImportScripts
def post_already_imported?(import_id)
@posts.has_key?(import_id) || @posts.has_key?(import_id.to_s)
end
end
end

View File

@@ -13,8 +13,16 @@ module ImportScripts
STDERR.puts "Failed to create upload: #{e}"
nil
ensure
tmp.close rescue nil
tmp.unlink rescue nil
begin
tmp.close
rescue StandardError
nil
end
begin
tmp.unlink
rescue StandardError
nil
end
end
def create_avatar(user, avatar_path)
@@ -30,7 +38,7 @@ module ImportScripts
STDERR.puts "Failed to upload avatar for user #{user.username}: #{avatar_path}"
STDERR.puts upload.errors.inspect if upload
end
rescue
rescue StandardError
STDERR.puts "Failed to create avatar for user #{user.username}: #{avatar_path}"
ensure
tempfile.close! if tempfile
@@ -52,11 +60,9 @@ module ImportScripts
def copy_to_tempfile(source_path)
extension = File.extname(source_path)
tmp = Tempfile.new(['discourse-upload', extension])
tmp = Tempfile.new(["discourse-upload", extension])
File.open(source_path) do |source_stream|
IO.copy_stream(source_stream, tmp)
end
File.open(source_path) { |source_stream| IO.copy_stream(source_stream, tmp) }
tmp.rewind
tmp

View File

@@ -1,29 +1,29 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Bbpress < ImportScripts::Base
BB_PRESS_HOST ||= ENV['BBPRESS_HOST'] || "localhost"
BB_PRESS_DB ||= ENV['BBPRESS_DB'] || "bbpress"
BATCH_SIZE ||= 1000
BB_PRESS_PW ||= ENV['BBPRESS_PW'] || ""
BB_PRESS_USER ||= ENV['BBPRESS_USER'] || "root"
BB_PRESS_PREFIX ||= ENV['BBPRESS_PREFIX'] || "wp_"
BB_PRESS_ATTACHMENTS_DIR ||= ENV['BBPRESS_ATTACHMENTS_DIR'] || "/path/to/attachments"
BB_PRESS_HOST ||= ENV["BBPRESS_HOST"] || "localhost"
BB_PRESS_DB ||= ENV["BBPRESS_DB"] || "bbpress"
BATCH_SIZE ||= 1000
BB_PRESS_PW ||= ENV["BBPRESS_PW"] || ""
BB_PRESS_USER ||= ENV["BBPRESS_USER"] || "root"
BB_PRESS_PREFIX ||= ENV["BBPRESS_PREFIX"] || "wp_"
BB_PRESS_ATTACHMENTS_DIR ||= ENV["BBPRESS_ATTACHMENTS_DIR"] || "/path/to/attachments"
def initialize
super
@he = HTMLEntities.new
@client = Mysql2::Client.new(
host: BB_PRESS_HOST,
username: BB_PRESS_USER,
database: BB_PRESS_DB,
password: BB_PRESS_PW,
)
@client =
Mysql2::Client.new(
host: BB_PRESS_HOST,
username: BB_PRESS_USER,
database: BB_PRESS_DB,
password: BB_PRESS_PW,
)
end
def execute
@@ -40,17 +40,16 @@ class ImportScripts::Bbpress < ImportScripts::Base
puts "", "importing users..."
last_user_id = -1
total_users = bbpress_query(<<-SQL
total_users = bbpress_query(<<-SQL).first["cnt"]
SELECT COUNT(DISTINCT(u.id)) AS cnt
FROM #{BB_PRESS_PREFIX}users u
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
WHERE p.post_type IN ('forum', 'reply', 'topic')
AND user_email LIKE '%@%'
SQL
).first["cnt"]
batches(BATCH_SIZE) do |offset|
users = bbpress_query(<<-SQL
users = bbpress_query(<<-SQL).to_a
SELECT u.id, user_nicename, display_name, user_email, user_registered, user_url, user_pass
FROM #{BB_PRESS_PREFIX}users u
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
@@ -61,7 +60,6 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY u.id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@@ -73,22 +71,20 @@ class ImportScripts::Bbpress < ImportScripts::Base
user_ids_sql = user_ids.join(",")
users_description = {}
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each { |um| users_description[um["user_id"]] = um["description"] }
SELECT user_id, meta_value description
FROM #{BB_PRESS_PREFIX}usermeta
WHERE user_id IN (#{user_ids_sql})
AND meta_key = 'description'
SQL
).each { |um| users_description[um["user_id"]] = um["description"] }
users_last_activity = {}
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] }
SELECT user_id, meta_value last_activity
FROM #{BB_PRESS_PREFIX}usermeta
WHERE user_id IN (#{user_ids_sql})
AND meta_key = 'last_activity'
SQL
).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] }
create_users(users, total: total_users, offset: offset) do |u|
{
@@ -96,7 +92,7 @@ class ImportScripts::Bbpress < ImportScripts::Base
username: u["user_nicename"],
password: u["user_pass"],
email: u["user_email"].downcase,
name: u["display_name"].presence || u['user_nicename'],
name: u["display_name"].presence || u["user_nicename"],
created_at: u["user_registered"],
website: u["user_url"],
bio_raw: users_description[u["id"]],
@@ -114,67 +110,60 @@ class ImportScripts::Bbpress < ImportScripts::Base
emails = Array.new
# gather anonymous users via postmeta table
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each do |pm|
SELECT post_id, meta_key, meta_value
FROM #{BB_PRESS_PREFIX}postmeta
WHERE meta_key LIKE '_bbp_anonymous%'
SQL
).each do |pm|
anon_posts[pm['post_id']] = Hash.new if not anon_posts[pm['post_id']]
anon_posts[pm["post_id"]] = Hash.new if not anon_posts[pm["post_id"]]
if pm['meta_key'] == '_bbp_anonymous_email'
anon_posts[pm['post_id']]['email'] = pm['meta_value']
if pm["meta_key"] == "_bbp_anonymous_email"
anon_posts[pm["post_id"]]["email"] = pm["meta_value"]
end
if pm['meta_key'] == '_bbp_anonymous_name'
anon_posts[pm['post_id']]['name'] = pm['meta_value']
if pm["meta_key"] == "_bbp_anonymous_name"
anon_posts[pm["post_id"]]["name"] = pm["meta_value"]
end
if pm['meta_key'] == '_bbp_anonymous_website'
anon_posts[pm['post_id']]['website'] = pm['meta_value']
if pm["meta_key"] == "_bbp_anonymous_website"
anon_posts[pm["post_id"]]["website"] = pm["meta_value"]
end
end
# gather every existent username
anon_posts.each do |id, post|
anon_names[post['name']] = Hash.new if not anon_names[post['name']]
anon_names[post["name"]] = Hash.new if not anon_names[post["name"]]
# overwriting email address, one user can only use one email address
anon_names[post['name']]['email'] = post['email']
anon_names[post['name']]['website'] = post['website'] if post['website'] != ''
anon_names[post["name"]]["email"] = post["email"]
anon_names[post["name"]]["website"] = post["website"] if post["website"] != ""
end
# make sure every user name has a unique email address
anon_names.each do |k, name|
if not emails.include? name['email']
emails.push ( name['email'])
if not emails.include? name["email"]
emails.push (name["email"])
else
name['email'] = "anonymous_#{SecureRandom.hex}@no-email.invalid"
name["email"] = "anonymous_#{SecureRandom.hex}@no-email.invalid"
end
end
create_users(anon_names) do |k, n|
{
id: k,
email: n["email"].downcase,
name: k,
website: n["website"]
}
{ id: k, email: n["email"].downcase, name: k, website: n["website"] }
end
end
def import_categories
puts "", "importing categories..."
categories = bbpress_query(<<-SQL
categories = bbpress_query(<<-SQL)
SELECT id, post_name, post_parent
FROM #{BB_PRESS_PREFIX}posts
WHERE post_type = 'forum'
AND LENGTH(COALESCE(post_name, '')) > 0
ORDER BY post_parent, id
SQL
)
create_categories(categories) do |c|
category = { id: c['id'], name: c['post_name'] }
if (parent_id = c['post_parent'].to_i) > 0
category = { id: c["id"], name: c["post_name"] }
if (parent_id = c["post_parent"].to_i) > 0
category[:parent_category_id] = category_id_from_imported_category_id(parent_id)
end
category
@@ -185,16 +174,15 @@ class ImportScripts::Bbpress < ImportScripts::Base
puts "", "importing topics and posts..."
last_post_id = -1
total_posts = bbpress_query(<<-SQL
total_posts = bbpress_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}posts
WHERE post_status <> 'spam'
AND post_type IN ('topic', 'reply')
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
posts = bbpress_query(<<-SQL
posts = bbpress_query(<<-SQL).to_a
SELECT id,
post_author,
post_date,
@@ -209,7 +197,6 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if posts.empty?
@@ -221,31 +208,29 @@ class ImportScripts::Bbpress < ImportScripts::Base
post_ids_sql = post_ids.join(",")
posts_likes = {}
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i }
SELECT post_id, meta_value likes
FROM #{BB_PRESS_PREFIX}postmeta
WHERE post_id IN (#{post_ids_sql})
AND meta_key = 'Likes'
SQL
).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i }
anon_names = {}
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each { |pm| anon_names[pm["post_id"]] = pm["meta_value"] }
SELECT post_id, meta_value
FROM #{BB_PRESS_PREFIX}postmeta
WHERE post_id IN (#{post_ids_sql})
AND meta_key = '_bbp_anonymous_name'
SQL
).each { |pm| anon_names[pm["post_id"]] = pm["meta_value"] }
create_posts(posts, total: total_posts, offset: offset) do |p|
skip = false
user_id = user_id_from_imported_user_id(p["post_author"]) ||
find_user_by_import_id(p["post_author"]).try(:id) ||
user_id_from_imported_user_id(anon_names[p['id']]) ||
find_user_by_import_id(anon_names[p['id']]).try(:id) ||
-1
user_id =
user_id_from_imported_user_id(p["post_author"]) ||
find_user_by_import_id(p["post_author"]).try(:id) ||
user_id_from_imported_user_id(anon_names[p["id"]]) ||
find_user_by_import_id(anon_names[p["id"]]).try(:id) || -1
post = {
id: p["id"],
@@ -256,7 +241,9 @@ class ImportScripts::Bbpress < ImportScripts::Base
}
if post[:raw].present?
post[:raw].gsub!(/\<pre\>\<code(=[a-z]*)?\>(.*?)\<\/code\>\<\/pre\>/im) { "```\n#{@he.decode($2)}\n```" }
post[:raw].gsub!(%r{\<pre\>\<code(=[a-z]*)?\>(.*?)\</code\>\</pre\>}im) do
"```\n#{@he.decode($2)}\n```"
end
end
if p["post_type"] == "topic"
@@ -288,17 +275,16 @@ class ImportScripts::Bbpress < ImportScripts::Base
count = 0
last_attachment_id = -1
total_attachments = bbpress_query(<<-SQL
total_attachments = bbpress_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}postmeta pm
JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id
WHERE pm.meta_key = '_wp_attached_file'
AND p.post_parent > 0
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
attachments = bbpress_query(<<-SQL
attachments = bbpress_query(<<-SQL).to_a
SELECT pm.meta_id id, pm.meta_value, p.post_parent post_id
FROM #{BB_PRESS_PREFIX}postmeta pm
JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id
@@ -308,7 +294,6 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY pm.meta_id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if attachments.empty?
last_attachment_id = attachments[-1]["id"].to_i
@@ -325,7 +310,9 @@ class ImportScripts::Bbpress < ImportScripts::Base
if !post.raw[html]
post.raw << "\n\n" << html
post.save!
PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists?
unless PostUpload.where(post: post, upload: upload).exists?
PostUpload.create!(post: post, upload: upload)
end
end
end
end
@@ -340,15 +327,14 @@ class ImportScripts::Bbpress < ImportScripts::Base
count = 0
last_attachment_id = -1
total_attachments = bbpress_query(<<-SQL
total_attachments = bbpress_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}bb_attachments
WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply'))
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
attachments = bbpress_query(<<-SQL
attachments = bbpress_query(<<-SQL).to_a
SELECT id, filename, post_id
FROM #{BB_PRESS_PREFIX}bb_attachments
WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply'))
@@ -356,13 +342,16 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if attachments.empty?
last_attachment_id = attachments[-1]["id"].to_i
attachments.each do |a|
print_status(count += 1, total_attachments, get_start_time("attachments_from_bb_attachments"))
print_status(
count += 1,
total_attachments,
get_start_time("attachments_from_bb_attachments"),
)
if path = find_attachment(a["filename"], a["id"])
if post = Post.find_by(id: post_id_from_imported_post_id(a["post_id"]))
upload = create_upload(post.user.id, path, a["filename"])
@@ -371,7 +360,9 @@ class ImportScripts::Bbpress < ImportScripts::Base
if !post.raw[html]
post.raw << "\n\n" << html
post.save!
PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists?
unless PostUpload.where(post: post, upload: upload).exists?
PostUpload.create!(post: post, upload: upload)
end
end
end
end
@@ -391,7 +382,7 @@ class ImportScripts::Bbpress < ImportScripts::Base
last_topic_id = -1
batches(BATCH_SIZE) do |offset|
topics = bbpress_query(<<-SQL
topics = bbpress_query(<<-SQL).to_a
SELECT id,
guid
FROM #{BB_PRESS_PREFIX}posts
@@ -401,14 +392,17 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if topics.empty?
last_topic_id = topics[-1]["id"].to_i
topics.each do |t|
topic = topic_lookup_from_imported_post_id(t['id'])
Permalink.create(url: URI.parse(t['guid']).path.chomp('/'), topic_id: topic[:topic_id]) rescue nil
topic = topic_lookup_from_imported_post_id(t["id"])
begin
Permalink.create(url: URI.parse(t["guid"]).path.chomp("/"), topic_id: topic[:topic_id])
rescue StandardError
nil
end
end
end
end
@@ -417,42 +411,44 @@ class ImportScripts::Bbpress < ImportScripts::Base
puts "", "importing private messages..."
last_post_id = -1
total_posts = bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_messages").first["count"]
total_posts =
bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_messages").first[
"count"
]
threads = {}
total_count = bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_recipients").first["count"]
total_count =
bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_recipients").first[
"count"
]
current_count = 0
batches(BATCH_SIZE) do |offset|
rows = bbpress_query(<<-SQL
rows = bbpress_query(<<-SQL).to_a
SELECT thread_id, user_id
FROM #{BB_PRESS_PREFIX}bp_messages_recipients
ORDER BY id
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if rows.empty?
rows.each do |row|
current_count += 1
print_status(current_count, total_count, get_start_time('private_messages'))
print_status(current_count, total_count, get_start_time("private_messages"))
threads[row['thread_id']] ||= {
target_user_ids: [],
imported_topic_id: nil
}
user_id = user_id_from_imported_user_id(row['user_id'])
if user_id && !threads[row['thread_id']][:target_user_ids].include?(user_id)
threads[row['thread_id']][:target_user_ids] << user_id
threads[row["thread_id"]] ||= { target_user_ids: [], imported_topic_id: nil }
user_id = user_id_from_imported_user_id(row["user_id"])
if user_id && !threads[row["thread_id"]][:target_user_ids].include?(user_id)
threads[row["thread_id"]][:target_user_ids] << user_id
end
end
end
batches(BATCH_SIZE) do |offset|
posts = bbpress_query(<<-SQL
posts = bbpress_query(<<-SQL).to_a
SELECT id,
thread_id,
date_sent,
@@ -464,39 +460,48 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY thread_id, date_sent
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if posts.empty?
last_post_id = posts[-1]["id"].to_i
create_posts(posts, total: total_posts, offset: offset) do |post|
if tcf = TopicCustomField.where(name: 'bb_thread_id', value: post['thread_id']).first
if tcf = TopicCustomField.where(name: "bb_thread_id", value: post["thread_id"]).first
{
id: "pm#{post['id']}",
topic_id: threads[post['thread_id']][:imported_topic_id],
user_id: user_id_from_imported_user_id(post['sender_id']) || find_user_by_import_id(post['sender_id'])&.id || -1,
raw: post['message'],
created_at: post['date_sent'],
id: "pm#{post["id"]}",
topic_id: threads[post["thread_id"]][:imported_topic_id],
user_id:
user_id_from_imported_user_id(post["sender_id"]) ||
find_user_by_import_id(post["sender_id"])&.id || -1,
raw: post["message"],
created_at: post["date_sent"],
}
else
# First post of the thread
{
id: "pm#{post['id']}",
id: "pm#{post["id"]}",
archetype: Archetype.private_message,
user_id: user_id_from_imported_user_id(post['sender_id']) || find_user_by_import_id(post['sender_id'])&.id || -1,
title: post['subject'],
raw: post['message'],
created_at: post['date_sent'],
target_usernames: User.where(id: threads[post['thread_id']][:target_user_ids]).pluck(:username),
post_create_action: proc do |new_post|
if topic = new_post.topic
threads[post['thread_id']][:imported_topic_id] = topic.id
TopicCustomField.create(topic_id: topic.id, name: 'bb_thread_id', value: post['thread_id'])
else
puts "Error in post_create_action! Can't find topic!"
end
end
user_id:
user_id_from_imported_user_id(post["sender_id"]) ||
find_user_by_import_id(post["sender_id"])&.id || -1,
title: post["subject"],
raw: post["message"],
created_at: post["date_sent"],
target_usernames:
User.where(id: threads[post["thread_id"]][:target_user_ids]).pluck(:username),
post_create_action:
proc do |new_post|
if topic = new_post.topic
threads[post["thread_id"]][:imported_topic_id] = topic.id
TopicCustomField.create(
topic_id: topic.id,
name: "bb_thread_id",
value: post["thread_id"],
)
else
puts "Error in post_create_action! Can't find topic!"
end
end,
}
end
end
@@ -506,7 +511,6 @@ class ImportScripts::Bbpress < ImportScripts::Base
def bbpress_query(sql)
@client.query(sql, cache_rows: false)
end
end
ImportScripts::Bbpress.new.perform

View File

@@ -2,13 +2,12 @@
# bespoke importer for a customer, feel free to borrow ideas
require 'csv'
require "csv"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/bespoke_1.rb
class ImportScripts::Bespoke < ImportScripts::Base
BATCH_SIZE = 1000
def initialize(path)
@@ -18,9 +17,9 @@ class ImportScripts::Bespoke < ImportScripts::Base
puts "loading post mappings..."
@post_number_map = {}
Post.pluck(:id, :post_number).each do |post_id, post_number|
@post_number_map[post_id] = post_number
end
Post
.pluck(:id, :post_number)
.each { |post_id, post_number| @post_number_map[post_id] = post_number }
end
def created_post(post)
@@ -32,7 +31,6 @@ class ImportScripts::Bespoke < ImportScripts::Base
import_users
import_categories
import_posts
end
class RowResolver
@@ -45,19 +43,13 @@ class ImportScripts::Bespoke < ImportScripts::Base
end
def initialize(cols)
cols.each_with_index do |col, idx|
self.class.public_send(:define_method, col) do
@row[idx]
end
end
cols.each_with_index { |col, idx| self.class.public_send(:define_method, col) { @row[idx] } }
end
end
def load_user_batch!(users, offset, total)
if users.length > 0
create_users(users, offset: offset, total: total) do |user|
user
end
create_users(users, offset: offset, total: total) { |user| user }
users.clear
end
end
@@ -70,54 +62,56 @@ class ImportScripts::Bespoke < ImportScripts::Base
current_row = +""
double_quote_count = 0
File.open(filename).each_line do |line|
File
.open(filename)
.each_line do |line|
# escaping is mental here
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
line.strip!
# escaping is mental here
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
line.strip!
current_row << "\n" unless current_row.empty?
current_row << line
current_row << "\n" unless current_row.empty?
current_row << line
double_quote_count += line.scan('"').count
double_quote_count += line.scan('"').count
next if double_quote_count % 2 == 1
if double_quote_count % 2 == 1
next
end
raw =
begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
raw = begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
current_row = ""
double_quote_count = 0
next
end[
0
]
current_row = ""
double_quote_count = 0
next
end[0]
if first
row = RowResolver.create(raw)
if first
row = RowResolver.create(raw)
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
end
end
def total_rows(table)
@@ -133,14 +127,11 @@ class ImportScripts::Bespoke < ImportScripts::Base
total = total_rows("users")
csv_parse("users") do |row|
id = row.id
email = row.email
# fake it
if row.email.blank? || row.email !~ /@/
email = fake_email
end
email = fake_email if row.email.blank? || row.email !~ /@/
name = row.display_name
username = row.key_custom
@@ -150,19 +141,10 @@ class ImportScripts::Bespoke < ImportScripts::Base
username = email.split("@")[0] if username.blank?
name = email.split("@")[0] if name.blank?
users << {
id: id,
email: email,
name: name,
username: username,
created_at: created_at
}
users << { id: id, email: email, name: name, username: username, created_at: created_at }
count += 1
if count % BATCH_SIZE == 0
load_user_batch! users, count - users.length, total
end
load_user_batch! users, count - users.length, total if count % BATCH_SIZE == 0
end
load_user_batch! users, count, total
@@ -174,22 +156,19 @@ class ImportScripts::Bespoke < ImportScripts::Base
rows << { id: row.id, name: row.name, description: row.description }
end
create_categories(rows) do |row|
row
end
create_categories(rows) { |row| row }
end
def normalize_raw!(raw)
# purple and #1223f3
raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")
raw.gsub!(/\[\/color\]/i, "")
raw.gsub!(/\[signature\].+\[\/signature\]/im, "")
raw.gsub!(%r{\[/color\]}i, "")
raw.gsub!(%r{\[signature\].+\[/signature\]}im, "")
raw
end
def import_post_batch!(posts, topics, offset, total)
create_posts(posts, total: total, offset: offset) do |post|
mapped = {}
mapped[:id] = post[:id]
@@ -223,7 +202,7 @@ class ImportScripts::Bespoke < ImportScripts::Base
mapped
end
posts.clear
posts.clear
end
def import_posts
@@ -237,7 +216,7 @@ class ImportScripts::Bespoke < ImportScripts::Base
category_id: topic.forum_category_id,
deleted: topic.is_deleted.to_i == 1,
locked: topic.is_locked.to_i == 1,
pinned: topic.is_pinned.to_i == 1
pinned: topic.is_pinned.to_i == 1,
}
end
@@ -246,7 +225,6 @@ class ImportScripts::Bespoke < ImportScripts::Base
posts = []
count = 0
csv_parse("posts") do |row|
unless row.dcreate
puts "NO CREATION DATE FOR POST"
p row
@@ -261,7 +239,7 @@ class ImportScripts::Bespoke < ImportScripts::Base
title: row.title,
body: normalize_raw!(row.body),
deleted: row.is_deleted.to_i == 1,
created_at: DateTime.parse(row.dcreate)
created_at: DateTime.parse(row.dcreate),
}
posts << row
count += 1
@@ -275,7 +253,6 @@ class ImportScripts::Bespoke < ImportScripts::Base
exit
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])

View File

@@ -7,18 +7,18 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Make sure to follow the right format in your CSV files.
class ImportScripts::CsvImporter < ImportScripts::Base
CSV_FILE_PATH = ENV['CSV_USER_FILE'] || '/var/www/discourse/tmp/users.csv'
CSV_CUSTOM_FIELDS = ENV['CSV_CUSTOM_FIELDS'] || '/var/www/discourse/tmp/custom_fields.csv'
CSV_EMAILS = ENV['CSV_EMAILS'] || '/var/www/discourse/tmp/emails.csv'
CSV_CATEGORIES = ENV['CSV_CATEGORIES'] || '/var/www/discourse/tmp/categories.csv'
CSV_TOPICS = ENV['CSV_TOPICS'] || '/var/www/discourse/tmp/topics_new_users.csv'
CSV_TOPICS_EXISTING_USERS = ENV['CSV_TOPICS'] || '/var/www/discourse/tmp/topics_existing_users.csv'
IMPORT_PREFIX = ENV['IMPORT_PREFIX'] || '2022-08-11'
IMPORT_USER_ID_PREFIX = 'csv-user-import-' + IMPORT_PREFIX + '-'
IMPORT_CATEGORY_ID_PREFIX = 'csv-category-import-' + IMPORT_PREFIX + '-'
IMPORT_TOPIC_ID_PREFIX = 'csv-topic-import-' + IMPORT_PREFIX + '-'
IMPORT_TOPIC_ID_EXISITNG_PREFIX = 'csv-topic_existing-import-' + IMPORT_PREFIX + '-'
CSV_FILE_PATH = ENV["CSV_USER_FILE"] || "/var/www/discourse/tmp/users.csv"
CSV_CUSTOM_FIELDS = ENV["CSV_CUSTOM_FIELDS"] || "/var/www/discourse/tmp/custom_fields.csv"
CSV_EMAILS = ENV["CSV_EMAILS"] || "/var/www/discourse/tmp/emails.csv"
CSV_CATEGORIES = ENV["CSV_CATEGORIES"] || "/var/www/discourse/tmp/categories.csv"
CSV_TOPICS = ENV["CSV_TOPICS"] || "/var/www/discourse/tmp/topics_new_users.csv"
CSV_TOPICS_EXISTING_USERS =
ENV["CSV_TOPICS"] || "/var/www/discourse/tmp/topics_existing_users.csv"
IMPORT_PREFIX = ENV["IMPORT_PREFIX"] || "2022-08-11"
IMPORT_USER_ID_PREFIX = "csv-user-import-" + IMPORT_PREFIX + "-"
IMPORT_CATEGORY_ID_PREFIX = "csv-category-import-" + IMPORT_PREFIX + "-"
IMPORT_TOPIC_ID_PREFIX = "csv-topic-import-" + IMPORT_PREFIX + "-"
IMPORT_TOPIC_ID_EXISITNG_PREFIX = "csv-topic_existing-import-" + IMPORT_PREFIX + "-"
def initialize
super
@@ -49,25 +49,19 @@ class ImportScripts::CsvImporter < ImportScripts::Base
return nil
end
CSV.parse(File.read(path, encoding: 'bom|utf-8'), headers: true)
CSV.parse(File.read(path, encoding: "bom|utf-8"), headers: true)
end
def username_for(name)
result = name.downcase.gsub(/[^a-z0-9\-\_]/, '')
if result.blank?
result = Digest::SHA1.hexdigest(name)[0...10]
end
result = name.downcase.gsub(/[^a-z0-9\-\_]/, "")
result = Digest::SHA1.hexdigest(name)[0...10] if result.blank?
result
end
def get_email(id)
email = nil
@imported_emails.each do |e|
if e["user_id"] == id
email = e["email"]
end
end
@imported_emails.each { |e| email = e["email"] if e["user_id"] == id }
email
end
@@ -76,9 +70,7 @@ class ImportScripts::CsvImporter < ImportScripts::Base
custom_fields = {}
@imported_custom_fields.each do |cf|
if cf["user_id"] == id
@imported_custom_fields_names.each do |name|
custom_fields[name] = cf[name]
end
@imported_custom_fields_names.each { |name| custom_fields[name] = cf[name] }
end
end
@@ -86,98 +78,95 @@ class ImportScripts::CsvImporter < ImportScripts::Base
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
users = []
@imported_users.each do |u|
email = get_email(u['id'])
custom_fields = get_custom_fields(u['id'])
u['email'] = email
u['custom_fields'] = custom_fields
u['id'] = IMPORT_USER_ID_PREFIX + u['id']
email = get_email(u["id"])
custom_fields = get_custom_fields(u["id"])
u["email"] = email
u["custom_fields"] = custom_fields
u["id"] = IMPORT_USER_ID_PREFIX + u["id"]
users << u
end
users.uniq!
create_users(users) do |u|
{
id: u['id'],
username: u['username'],
email: u['email'],
created_at: u['created_at'],
custom_fields: u['custom_fields'],
id: u["id"],
username: u["username"],
email: u["email"],
created_at: u["created_at"],
custom_fields: u["custom_fields"],
}
end
end
def import_categories
puts '', "Importing categories"
puts "", "Importing categories"
categories = []
@imported_categories.each do |c|
c['user_id'] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + c['user_id']) || Discourse::SYSTEM_USER_ID
c['id'] = IMPORT_CATEGORY_ID_PREFIX + c['id']
c["user_id"] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + c["user_id"]) ||
Discourse::SYSTEM_USER_ID
c["id"] = IMPORT_CATEGORY_ID_PREFIX + c["id"]
categories << c
end
categories.uniq!
create_categories(categories) do |c|
{
id: c['id'],
user_id: c['user_id'],
name: c['name'],
description: c['description']
}
{ id: c["id"], user_id: c["user_id"], name: c["name"], description: c["description"] }
end
end
def import_topics
puts '', "Importing topics"
puts "", "Importing topics"
topics = []
@imported_topics.each do |t|
t['user_id'] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + t['user_id']) || Discourse::SYSTEM_USER_ID
t['category_id'] = category_id_from_imported_category_id(IMPORT_CATEGORY_ID_PREFIX + t['category_id'])
t['id'] = IMPORT_TOPIC_ID_PREFIX + t['id']
t["user_id"] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + t["user_id"]) ||
Discourse::SYSTEM_USER_ID
t["category_id"] = category_id_from_imported_category_id(
IMPORT_CATEGORY_ID_PREFIX + t["category_id"],
)
t["id"] = IMPORT_TOPIC_ID_PREFIX + t["id"]
topics << t
end
create_posts(topics) do |t|
{
id: t['id'],
user_id: t['user_id'],
title: t['title'],
category: t['category_id'],
raw: t['raw']
id: t["id"],
user_id: t["user_id"],
title: t["title"],
category: t["category_id"],
raw: t["raw"],
}
end
end
def import_topics_existing_users
# Import topics for users that already existed in the DB, not imported during this migration
puts '', "Importing topics for existing users"
puts "", "Importing topics for existing users"
topics = []
@imported_topics_existing_users.each do |t|
t['id'] = IMPORT_TOPIC_ID_EXISITNG_PREFIX + t['id']
t["id"] = IMPORT_TOPIC_ID_EXISITNG_PREFIX + t["id"]
topics << t
end
create_posts(topics) do |t|
{
id: t['id'],
user_id: t['user_id'], # This is a Discourse user ID
title: t['title'],
category: t['category_id'], # This is a Discourse category ID
raw: t['raw']
id: t["id"],
user_id: t["user_id"], # This is a Discourse user ID
title: t["title"],
category: t["category_id"], # This is a Discourse category ID
raw: t["raw"],
}
end
end
end
if __FILE__ == $0
ImportScripts::CsvImporter.new.perform
end
ImportScripts::CsvImporter.new.perform if __FILE__ == $0
# == CSV files format
#

View File

@@ -6,10 +6,9 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::CsvRestoreStagedUsers < ImportScripts::Base
CSV_FILE_PATH = ENV['CSV_USER_FILE']
CSV_CUSTOM_FIELDS = ENV['CSV_CUSTOM_FIELDS']
CSV_EMAILS = ENV['CSV_EMAILS']
CSV_FILE_PATH = ENV["CSV_USER_FILE"]
CSV_CUSTOM_FIELDS = ENV["CSV_CUSTOM_FIELDS"]
CSV_EMAILS = ENV["CSV_EMAILS"]
BATCH_SIZE ||= 1000
@@ -35,62 +34,51 @@ class ImportScripts::CsvRestoreStagedUsers < ImportScripts::Base
end
def username_for(name)
result = name.downcase.gsub(/[^a-z0-9\-\_]/, '')
result = name.downcase.gsub(/[^a-z0-9\-\_]/, "")
if result.blank?
result = Digest::SHA1.hexdigest(name)[0...10]
end
result = Digest::SHA1.hexdigest(name)[0...10] if result.blank?
result
end
def get_email(id)
email = nil
@imported_emails.each do |e|
if e["user_id"] == id
email = e["email"]
end
end
@imported_emails.each { |e| email = e["email"] if e["user_id"] == id }
email
end
def get_custom_fields(id)
custom_fields = {}
@imported_custom_fields.each do |cf|
if cf["user_id"] == id
custom_fields[cf["name"]] = cf["value"]
end
custom_fields[cf["name"]] = cf["value"] if cf["user_id"] == id
end
custom_fields
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
users = []
@imported_users.each do |u|
email = get_email(u['id'])
custom_fields = get_custom_fields(u['id'])
u['email'] = email
u['custom_fields'] = custom_fields
email = get_email(u["id"])
custom_fields = get_custom_fields(u["id"])
u["email"] = email
u["custom_fields"] = custom_fields
users << u
end
users.uniq!
create_users(users) do |u|
{
id: u['id'],
username: u['username'],
email: u['email'],
created_at: u['created_at'],
staged: u['staged'],
custom_fields: u['custom_fields'],
id: u["id"],
username: u["username"],
email: u["email"],
created_at: u["created_at"],
staged: u["staged"],
custom_fields: u["custom_fields"],
}
end
end
end
if __FILE__ == $0
ImportScripts::CsvRestoreStagedUsers.new.perform
end
ImportScripts::CsvRestoreStagedUsers.new.perform if __FILE__ == $0

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true
require 'nokogiri'
require 'optparse'
require "nokogiri"
require "optparse"
require File.expand_path(File.dirname(__FILE__) + "/base")
class ImportScripts::Disqus < ImportScripts::Base
@@ -35,7 +35,7 @@ class ImportScripts::Disqus < ImportScripts::Base
by_email = {}
@parser.posts.each do |id, p|
next if p[:is_spam] == 'true' || p[:is_deleted] == 'true'
next if p[:is_spam] == "true" || p[:is_deleted] == "true"
by_email[p[:author_email]] = { name: p[:author_name], username: p[:author_username] }
end
@@ -45,13 +45,7 @@ class ImportScripts::Disqus < ImportScripts::Base
create_users(by_email.keys) do |email|
user = by_email[email]
{
id: email,
email: email,
username: user[:username],
name: user[:name],
merge: true
}
{ id: email, email: email, username: user[:username], name: user[:name], merge: true }
end
end
@@ -59,7 +53,6 @@ class ImportScripts::Disqus < ImportScripts::Base
puts "", "importing topics..."
@parser.threads.each do |id, t|
title = t[:title]
title.gsub!(/&#8220;/, '"')
title.gsub!(/&#8221;/, '"')
@@ -79,7 +72,7 @@ class ImportScripts::Disqus < ImportScripts::Base
if post.present? && post.topic.posts_count <= 1
(t[:posts] || []).each do |p|
post_user = find_existing_user(p[:author_email] || '', p[:author_username])
post_user = find_existing_user(p[:author_email] || "", p[:author_username])
next unless post_user.present?
attrs = {
@@ -87,7 +80,7 @@ class ImportScripts::Disqus < ImportScripts::Base
topic_id: post.topic_id,
raw: p[:cooked],
cooked: p[:cooked],
created_at: Date.parse(p[:created_at])
created_at: Date.parse(p[:created_at]),
}
if p[:parent_id]
@@ -125,23 +118,22 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
end
def start_element(name, attrs = [])
hashed = Hash[attrs]
case name
when 'post'
when "post"
@post = {}
@post[:id] = hashed['dsq:id'] if @post
when 'thread'
id = hashed['dsq:id']
@post[:id] = hashed["dsq:id"] if @post
when "thread"
id = hashed["dsq:id"]
if @post
thread = @threads[id]
thread[:posts] << @post
else
@thread = { id: id, posts: [] }
end
when 'parent'
when "parent"
if @post
id = hashed['dsq:id']
id = hashed["dsq:id"]
@post[:parent_id] = id
end
end
@@ -151,10 +143,10 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
def end_element(name)
case name
when 'post'
when "post"
@posts[@post[:id]] = @post
@post = nil
when 'thread'
when "thread"
if @post.nil?
@threads[@thread[:id]] = @thread
@thread = nil
@@ -165,25 +157,25 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
end
def characters(str)
record(@post, :author_email, str, 'author', 'email')
record(@post, :author_name, str, 'author', 'name')
record(@post, :author_username, str, 'author', 'username')
record(@post, :author_anonymous, str, 'author', 'isAnonymous')
record(@post, :created_at, str, 'createdAt')
record(@post, :is_deleted, str, 'isDeleted')
record(@post, :is_spam, str, 'isSpam')
record(@post, :author_email, str, "author", "email")
record(@post, :author_name, str, "author", "name")
record(@post, :author_username, str, "author", "username")
record(@post, :author_anonymous, str, "author", "isAnonymous")
record(@post, :created_at, str, "createdAt")
record(@post, :is_deleted, str, "isDeleted")
record(@post, :is_spam, str, "isSpam")
record(@thread, :link, str, 'link')
record(@thread, :title, str, 'title')
record(@thread, :created_at, str, 'createdAt')
record(@thread, :author_email, str, 'author', 'email')
record(@thread, :author_name, str, 'author', 'name')
record(@thread, :author_username, str, 'author', 'username')
record(@thread, :author_anonymous, str, 'author', 'isAnonymous')
record(@thread, :link, str, "link")
record(@thread, :title, str, "title")
record(@thread, :created_at, str, "createdAt")
record(@thread, :author_email, str, "author", "email")
record(@thread, :author_name, str, "author", "name")
record(@thread, :author_username, str, "author", "username")
record(@thread, :author_anonymous, str, "author", "isAnonymous")
end
def cdata_block(str)
record(@post, :cooked, str, 'message')
record(@post, :cooked, str, "message")
end
def record(target, sym, str, *params)
@@ -205,7 +197,7 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
# Remove any threads that have no posts
@threads.delete(id)
else
t[:posts].delete_if { |p| p[:is_spam] == 'true' || p[:is_deleted] == 'true' }
t[:posts].delete_if { |p| p[:is_spam] == "true" || p[:is_deleted] == "true" }
end
end

View File

@@ -4,19 +4,19 @@ require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Drupal < ImportScripts::Base
DRUPAL_DB = ENV['DRUPAL_DB'] || "newsite3"
VID = ENV['DRUPAL_VID'] || 1
DRUPAL_DB = ENV["DRUPAL_DB"] || "newsite3"
VID = ENV["DRUPAL_VID"] || 1
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB,
)
end
def categories_query
@@ -25,7 +25,12 @@ class ImportScripts::Drupal < ImportScripts::Base
def execute
create_users(@client.query("SELECT uid id, name, mail email, created FROM users;")) do |row|
{ id: row['id'], username: row['name'], email: row['email'], created_at: Time.zone.at(row['created']) }
{
id: row["id"],
username: row["name"],
email: row["email"],
created_at: Time.zone.at(row["created"]),
}
end
# You'll need to edit the following query for your Drupal install:
@@ -34,38 +39,36 @@ class ImportScripts::Drupal < ImportScripts::Base
# * Table name may be term_data.
# * May need to select a vid other than 1.
create_categories(categories_query) do |c|
{ id: c['tid'], name: c['name'], description: c['description'] }
{ id: c["tid"], name: c["name"], description: c["description"] }
end
# "Nodes" in Drupal are divided into types. Here we import two types,
# and will later import all the comments/replies for each node.
# You will need to figure out what the type names are on your install and edit the queries to match.
if ENV['DRUPAL_IMPORT_BLOG']
create_blog_topics
end
create_blog_topics if ENV["DRUPAL_IMPORT_BLOG"]
create_forum_topics
create_replies
begin
create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil'))
create_admin(email: "neil.lalonde@discourse.org", username: UserNameSuggester.suggest("neil"))
rescue => e
puts '', "Failed to create admin user"
puts "", "Failed to create admin user"
puts e.message
end
end
def create_blog_topics
puts '', "creating blog topics"
puts "", "creating blog topics"
create_category({
name: 'Blog',
user_id: -1,
description: "Articles from the blog"
}, nil) unless Category.find_by_name('Blog')
unless Category.find_by_name("Blog")
create_category({ name: "Blog", user_id: -1, description: "Articles from the blog" }, nil)
end
results = @client.query("
results =
@client.query(
"
SELECT n.nid nid,
n.title title,
n.uid uid,
@@ -76,37 +79,48 @@ class ImportScripts::Drupal < ImportScripts::Base
LEFT JOIN node_revisions nr ON nr.vid=n.vid
WHERE n.type = 'blog'
AND n.status = 1
", cache_rows: false)
",
cache_rows: false,
)
create_posts(results) do |row|
{
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: 'Blog',
raw: row['body'],
created_at: Time.zone.at(row['created']),
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
title: row['title'].try(:strip),
custom_fields: { import_id: "nid:#{row['nid']}" }
id: "nid:#{row["nid"]}",
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
category: "Blog",
raw: row["body"],
created_at: Time.zone.at(row["created"]),
pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil,
title: row["title"].try(:strip),
custom_fields: {
import_id: "nid:#{row["nid"]}",
},
}
end
end
def create_forum_topics
puts '', "creating forum topics"
puts "", "creating forum topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(*) count
FROM node n
LEFT JOIN forum f ON f.vid=n.vid
WHERE n.type = 'forum'
AND n.status = 1
").first['count']
",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT n.nid nid,
n.title title,
f.tid tid,
@@ -121,48 +135,57 @@ class ImportScripts::Drupal < ImportScripts::Base
AND n.status = 1
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
{
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: category_id_from_imported_category_id(row['tid']),
raw: row['body'],
created_at: Time.zone.at(row['created']),
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
title: row['title'].try(:strip)
id: "nid:#{row["nid"]}",
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
category: category_id_from_imported_category_id(row["tid"]),
raw: row["body"],
created_at: Time.zone.at(row["created"]),
pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil,
title: row["title"].try(:strip),
}
end
end
end
def create_replies
puts '', "creating replies in topics"
puts "", "creating replies in topics"
if ENV['DRUPAL_IMPORT_BLOG']
if ENV["DRUPAL_IMPORT_BLOG"]
node_types = "('forum','blog')"
else
node_types = "('forum')"
end
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(*) count
FROM comments c
LEFT JOIN node n ON n.nid=c.nid
WHERE n.type IN #{node_types}
AND n.status = 1
AND c.status=0;
").first['count']
",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT c.cid,
c.pid,
c.nid,
@@ -176,37 +199,36 @@ class ImportScripts::Drupal < ImportScripts::Base
AND c.status=0
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['timestamp']),
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: row["body"],
created_at: Time.zone.at(row["timestamp"]),
}
if row['pid']
parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}")
if row["pid"]
parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}")
h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1)
end
h
else
puts "No topic found for comment #{row['cid']}"
puts "No topic found for comment #{row["cid"]}"
nil
end
end
end
end
end
if __FILE__ == $0
ImportScripts::Drupal.new.perform
end
ImportScripts::Drupal.new.perform if __FILE__ == $0

View File

@@ -5,9 +5,8 @@ require "htmlentities"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Drupal < ImportScripts::Base
DRUPAL_DB = ENV['DRUPAL_DB'] || "drupal"
VID = ENV['DRUPAL_VID'] || 1
DRUPAL_DB = ENV["DRUPAL_DB"] || "drupal"
VID = ENV["DRUPAL_VID"] || 1
BATCH_SIZE = 1000
ATTACHMENT_DIR = "/root/files/upload"
@@ -16,25 +15,23 @@ class ImportScripts::Drupal < ImportScripts::Base
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB,
)
end
def execute
import_users
import_categories
# "Nodes" in Drupal are divided into types. Here we import two types,
# and will later import all the comments/replies for each node.
# You will need to figure out what the type names are on your install and edit the queries to match.
if ENV['DRUPAL_IMPORT_BLOG']
import_blog_topics
end
import_blog_topics if ENV["DRUPAL_IMPORT_BLOG"]
import_forum_topics
@@ -56,7 +53,7 @@ class ImportScripts::Drupal < ImportScripts::Base
last_user_id = -1
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL
users = mysql_query(<<-SQL).to_a
SELECT uid,
name username,
mail email,
@@ -66,7 +63,6 @@ class ImportScripts::Drupal < ImportScripts::Base
ORDER BY uid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@@ -80,12 +76,7 @@ class ImportScripts::Drupal < ImportScripts::Base
username = @htmlentities.decode(user["username"]).strip
{
id: user["uid"],
name: username,
email: email,
created_at: Time.zone.at(user["created"])
}
{ id: user["uid"], name: username, email: email, created_at: Time.zone.at(user["created"]) }
end
end
end
@@ -99,35 +90,31 @@ class ImportScripts::Drupal < ImportScripts::Base
puts "", "importing categories"
categories = mysql_query(<<-SQL
categories = mysql_query(<<-SQL).to_a
SELECT tid,
name,
description
FROM taxonomy_term_data
WHERE vid = #{VID}
SQL
).to_a
create_categories(categories) do |category|
{
id: category['tid'],
name: @htmlentities.decode(category['name']).strip,
description: @htmlentities.decode(category['description']).strip
id: category["tid"],
name: @htmlentities.decode(category["name"]).strip,
description: @htmlentities.decode(category["description"]).strip,
}
end
end
def import_blog_topics
puts '', "importing blog topics"
puts "", "importing blog topics"
create_category(
{
name: 'Blog',
description: "Articles from the blog"
},
nil) unless Category.find_by_name('Blog')
unless Category.find_by_name("Blog")
create_category({ name: "Blog", description: "Articles from the blog" }, nil)
end
blogs = mysql_query(<<-SQL
blogs = mysql_query(<<-SQL).to_a
SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
f.body_value body
FROM node n,
@@ -136,38 +123,38 @@ class ImportScripts::Drupal < ImportScripts::Base
AND n.nid = f.entity_id
AND n.status = 1
SQL
).to_a
category_id = Category.find_by_name('Blog').id
category_id = Category.find_by_name("Blog").id
create_posts(blogs) do |topic|
{
id: "nid:#{topic['nid']}",
user_id: user_id_from_imported_user_id(topic['uid']) || -1,
id: "nid:#{topic["nid"]}",
user_id: user_id_from_imported_user_id(topic["uid"]) || -1,
category: category_id,
raw: topic['body'],
created_at: Time.zone.at(topic['created']),
pinned_at: topic['sticky'].to_i == 1 ? Time.zone.at(topic['created']) : nil,
title: topic['title'].try(:strip),
custom_fields: { import_id: "nid:#{topic['nid']}" }
raw: topic["body"],
created_at: Time.zone.at(topic["created"]),
pinned_at: topic["sticky"].to_i == 1 ? Time.zone.at(topic["created"]) : nil,
title: topic["title"].try(:strip),
custom_fields: {
import_id: "nid:#{topic["nid"]}",
},
}
end
end
def import_forum_topics
puts '', "importing forum topics"
puts "", "importing forum topics"
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM forum_index fi, node n
WHERE n.type = 'forum'
AND fi.nid = n.nid
AND n.status = 1
SQL
).first['count']
batches(BATCH_SIZE) do |offset|
results = mysql_query(<<-SQL
results = mysql_query(<<-SQL).to_a
SELECT fi.nid nid,
fi.title title,
fi.tid tid,
@@ -188,34 +175,33 @@ class ImportScripts::Drupal < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
SQL
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
raw = preprocess_raw(row['body'])
raw = preprocess_raw(row["body"])
topic = {
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: category_id_from_imported_category_id(row['tid']),
id: "nid:#{row["nid"]}",
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
category: category_id_from_imported_category_id(row["tid"]),
raw: raw,
created_at: Time.zone.at(row['created']),
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
title: row['title'].try(:strip),
views: row['views']
created_at: Time.zone.at(row["created"]),
pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil,
title: row["title"].try(:strip),
views: row["views"],
}
topic[:custom_fields] = { import_solved: true } if row['solved'].present?
topic[:custom_fields] = { import_solved: true } if row["solved"].present?
topic
end
end
end
def import_replies
puts '', "creating replies in topics"
puts "", "creating replies in topics"
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM comment c,
node n
@@ -224,10 +210,9 @@ class ImportScripts::Drupal < ImportScripts::Base
AND n.type IN ('article', 'forum')
AND n.status = 1
SQL
).first['count']
batches(BATCH_SIZE) do |offset|
results = mysql_query(<<-SQL
results = mysql_query(<<-SQL).to_a
SELECT c.cid, c.pid, c.nid, c.uid, c.created,
f.comment_body_value body
FROM comment c,
@@ -241,30 +226,29 @@ class ImportScripts::Drupal < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
raw = preprocess_raw(row['body'])
raw = preprocess_raw(row["body"])
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: raw,
created_at: Time.zone.at(row['created']),
created_at: Time.zone.at(row["created"]),
}
if row['pid']
parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}")
if row["pid"]
parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}")
h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1)
end
h
else
puts "No topic found for comment #{row['cid']}"
puts "No topic found for comment #{row["cid"]}"
nil
end
end
@@ -275,7 +259,7 @@ class ImportScripts::Drupal < ImportScripts::Base
puts "", "importing post likes"
batches(BATCH_SIZE) do |offset|
likes = mysql_query(<<-SQL
likes = mysql_query(<<-SQL).to_a
SELECT flagging_id,
fid,
entity_id,
@@ -286,17 +270,20 @@ class ImportScripts::Drupal < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if likes.empty?
likes.each do |l|
identifier = l['fid'] == 5 ? 'nid' : 'cid'
next unless user_id = user_id_from_imported_user_id(l['uid'])
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l['entity_id']}")
identifier = l["fid"] == 5 ? "nid" : "cid"
next unless user_id = user_id_from_imported_user_id(l["uid"])
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l["entity_id"]}")
next unless user = User.find_by(id: user_id)
next unless post = Post.find_by(id: post_id)
PostActionCreator.like(user, post) rescue nil
begin
PostActionCreator.like(user, post)
rescue StandardError
nil
end
end
end
end
@@ -304,7 +291,8 @@ class ImportScripts::Drupal < ImportScripts::Base
def mark_topics_as_solved
puts "", "marking topics as solved"
solved_topics = TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)
solved_topics =
TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)
solved_topics.each do |topic_id|
next unless topic = Topic.find(topic_id)
@@ -336,8 +324,13 @@ class ImportScripts::Drupal < ImportScripts::Base
begin
current_count += 1
print_status(current_count, total_count, start_time)
SingleSignOnRecord.create!(user_id: user.id, external_id: external_id, external_email: user.email, last_payload: '')
rescue
SingleSignOnRecord.create!(
user_id: user.id,
external_id: external_id,
external_email: user.email,
last_payload: "",
)
rescue StandardError
next
end
end
@@ -350,14 +343,13 @@ class ImportScripts::Drupal < ImportScripts::Base
success_count = 0
fail_count = 0
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first["count"]
SELECT count(field_post_attachment_fid) count
FROM field_data_field_post_attachment
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
attachments = mysql_query(<<-SQL
attachments = mysql_query(<<-SQL).to_a
SELECT *
FROM field_data_field_post_attachment fp
LEFT JOIN file_managed fm
@@ -365,7 +357,6 @@ class ImportScripts::Drupal < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if attachments.size < 1
@@ -373,9 +364,11 @@ class ImportScripts::Drupal < ImportScripts::Base
current_count += 1
print_status current_count, total_count
identifier = attachment['entity_type'] == "comment" ? "cid" : "nid"
next unless user_id = user_id_from_imported_user_id(attachment['uid'])
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment['entity_id']}")
identifier = attachment["entity_type"] == "comment" ? "cid" : "nid"
next unless user_id = user_id_from_imported_user_id(attachment["uid"])
unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment["entity_id"]}")
next
end
next unless user = User.find(user_id)
next unless post = Post.find(post_id)
@@ -392,9 +385,14 @@ class ImportScripts::Drupal < ImportScripts::Base
new_raw = "#{new_raw}\n\n#{upload_html}" unless new_raw.include?(upload_html)
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: "Import attachment from Drupal")
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachment from Drupal",
)
else
puts '', 'Skipped upload: already imported'
puts "", "Skipped upload: already imported"
end
success_count += 1
@@ -406,13 +404,13 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def create_permalinks
puts '', 'creating permalinks...'
puts "", "creating permalinks..."
Topic.listable_topics.find_each do |topic|
begin
tcf = topic.custom_fields
if tcf && tcf['import_id']
node_id = tcf['import_id'][/nid:(\d+)/, 1]
if tcf && tcf["import_id"]
node_id = tcf["import_id"][/nid:(\d+)/, 1]
slug = "/node/#{node_id}"
Permalink.create(url: slug, topic_id: topic.id)
end
@@ -424,18 +422,16 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def find_upload(post, attachment)
uri = attachment['uri'][/public:\/\/upload\/(.+)/, 1]
uri = attachment["uri"][%r{public://upload/(.+)}, 1]
real_filename = CGI.unescapeHTML(uri)
file = File.join(ATTACHMENT_DIR, real_filename)
unless File.exist?(file)
puts "Attachment file #{attachment['filename']} doesn't exist"
puts "Attachment file #{attachment["filename"]} doesn't exist"
tmpfile = "attachments_failed.txt"
filename = File.join('/tmp/', tmpfile)
File.open(filename, 'a') { |f|
f.puts attachment['filename']
}
filename = File.join("/tmp/", tmpfile)
File.open(filename, "a") { |f| f.puts attachment["filename"] }
end
upload = create_upload(post.user.id || -1, file, real_filename)
@@ -452,13 +448,13 @@ class ImportScripts::Drupal < ImportScripts::Base
def preprocess_raw(raw)
return if raw.blank?
# quotes on new lines
raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote|
quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" }
raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote|
quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" }
quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
}
end
# [QUOTE=<username>]...[/QUOTE]
raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do
username, quote = $1, $2
"\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n"
end
@@ -468,7 +464,7 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def postprocess_posts
puts '', 'postprocessing posts'
puts "", "postprocessing posts"
current = 0
max = Post.count
@@ -479,7 +475,7 @@ class ImportScripts::Drupal < ImportScripts::Base
new_raw = raw.dup
# replace old topic to new topic links
new_raw.gsub!(/https:\/\/site.com\/forum\/topic\/(\d+)/im) do
new_raw.gsub!(%r{https://site.com/forum/topic/(\d+)}im) do
post_id = post_id_from_imported_post_id("nid:#{$1}")
next unless post_id
topic = Post.find(post_id).topic
@@ -487,7 +483,7 @@ class ImportScripts::Drupal < ImportScripts::Base
end
# replace old comment to reply links
new_raw.gsub!(/https:\/\/site.com\/comment\/(\d+)#comment-\d+/im) do
new_raw.gsub!(%r{https://site.com/comment/(\d+)#comment-\d+}im) do
post_id = post_id_from_imported_post_id("cid:#{$1}")
next unless post_id
post_ref = Post.find(post_id)
@@ -498,8 +494,8 @@ class ImportScripts::Drupal < ImportScripts::Base
post.raw = new_raw
post.save
end
rescue
puts '', "Failed rewrite on post: #{post.id}"
rescue StandardError
puts "", "Failed rewrite on post: #{post.id}"
ensure
print_status(current += 1, max)
end
@@ -507,15 +503,15 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def import_gravatars
puts '', 'importing gravatars'
puts "", "importing gravatars"
current = 0
max = User.count
User.find_each do |user|
begin
user.create_user_avatar(user_id: user.id) unless user.user_avatar
user.user_avatar.update_gravatar!
rescue
puts '', 'Failed avatar update on user #{user.id}'
rescue StandardError
puts "", 'Failed avatar update on user #{user.id}'
ensure
print_status(current += 1, max)
end
@@ -523,15 +519,12 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def parse_datetime(time)
DateTime.strptime(time, '%s')
DateTime.strptime(time, "%s")
end
def mysql_query(sql)
@client.query(sql, cache_rows: true)
end
end
if __FILE__ == $0
ImportScripts::Drupal.new.perform
end
ImportScripts::Drupal.new.perform if __FILE__ == $0

View File

@@ -5,7 +5,6 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::DrupalJson < ImportScripts::Base
JSON_FILES_DIR = "/Users/techapj/Documents"
def initialize
@@ -28,20 +27,18 @@ class ImportScripts::DrupalJson < ImportScripts::Base
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
create_users(@users_json) do |u|
{
id: u["uid"],
name: u["name"],
email: u["mail"],
created_at: Time.zone.at(u["created"].to_i)
created_at: Time.zone.at(u["created"].to_i),
}
end
EmailToken.delete_all
end
end
if __FILE__ == $0
ImportScripts::DrupalJson.new.perform
end
ImportScripts::DrupalJson.new.perform if __FILE__ == $0

View File

@@ -5,41 +5,51 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require File.expand_path(File.dirname(__FILE__) + "/drupal.rb")
class ImportScripts::DrupalQA < ImportScripts::Drupal
def categories_query
result = @client.query("SELECT n.nid, GROUP_CONCAT(ti.tid) AS tids
result =
@client.query(
"SELECT n.nid, GROUP_CONCAT(ti.tid) AS tids
FROM node AS n
INNER JOIN taxonomy_index AS ti ON ti.nid = n.nid
WHERE n.type = 'question'
AND n.status = 1
GROUP BY n.nid")
GROUP BY n.nid",
)
categories = {}
result.each do |r|
tids = r['tids']
tids = r["tids"]
if tids.present?
tids = tids.split(',')
tids = tids.split(",")
categories[tids[0].to_i] = true
end
end
@client.query("SELECT tid, name, description FROM taxonomy_term_data WHERE tid IN (#{categories.keys.join(',')})")
@client.query(
"SELECT tid, name, description FROM taxonomy_term_data WHERE tid IN (#{categories.keys.join(",")})",
)
end
def create_forum_topics
puts "", "creating forum topics"
puts '', "creating forum topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(*) count
FROM node n
WHERE n.type = 'question'
AND n.status = 1;").first['count']
AND n.status = 1;",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT n.nid,
n.title,
GROUP_CONCAT(t.tid) AS tid,
@@ -54,40 +64,48 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
GROUP BY n.nid, n.title, n.uid, n.created, f.body_value
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
{
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: category_id_from_imported_category_id((row['tid'] || '').split(',')[0]),
raw: row['body'],
created_at: Time.zone.at(row['created']),
id: "nid:#{row["nid"]}",
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
category: category_id_from_imported_category_id((row["tid"] || "").split(",")[0]),
raw: row["body"],
created_at: Time.zone.at(row["created"]),
pinned_at: nil,
title: row['title'].try(:strip)
title: row["title"].try(:strip),
}
end
end
end
def create_direct_replies
puts '', "creating replies in topics"
puts "", "creating replies in topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(*) count
FROM node n
WHERE n.type = 'answer'
AND n.status = 1;").first['count']
AND n.status = 1;",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT n.nid AS cid,
q.field_answer_question_nid AS nid,
n.uid,
@@ -100,25 +118,27 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
AND n.type = 'answer'
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: row["body"],
created_at: Time.zone.at(row["created"]),
}
h
else
puts "No topic found for answer #{row['cid']}"
puts "No topic found for answer #{row["cid"]}"
nil
end
end
@@ -126,21 +146,27 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
end
def create_nested_replies
puts '', "creating nested replies to posts in topics"
puts "", "creating nested replies to posts in topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(c.cid) count
FROM node n
INNER JOIN comment AS c ON n.nid = c.nid
WHERE n.type = 'question'
AND n.status = 1;").first['count']
AND n.status = 1;",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
# WARNING: If there are more than 1000000 this might have to be revisited
results = @client.query("
results =
@client.query(
"
SELECT (c.cid + 1000000) as cid,
c.nid,
c.uid,
@@ -153,45 +179,53 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
AND n.type = 'question'
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: row["body"],
created_at: Time.zone.at(row["created"]),
}
h
else
puts "No topic found for comment #{row['cid']}"
puts "No topic found for comment #{row["cid"]}"
nil
end
end
end
puts '', "creating nested replies to answers in topics"
puts "", "creating nested replies to answers in topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(c.cid) count
FROM node n
INNER JOIN comment AS c ON n.nid = c.nid
WHERE n.type = 'answer'
AND n.status = 1;").first['count']
AND n.status = 1;",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
# WARNING: If there are more than 1000000 this might have to be revisited
results = @client.query("
results =
@client.query(
"
SELECT (c.cid + 1000000) as cid,
q.field_answer_question_nid AS nid,
c.uid,
@@ -205,25 +239,27 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
AND n.type = 'answer'
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: row["body"],
created_at: Time.zone.at(row["created"]),
}
h
else
puts "No topic found for comment #{row['cid']}"
puts "No topic found for comment #{row["cid"]}"
nil
end
end
@@ -234,9 +270,6 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
create_direct_replies
create_nested_replies
end
end
if __FILE__ == $0
ImportScripts::DrupalQA.new.perform
end
ImportScripts::DrupalQA.new.perform if __FILE__ == $0

View File

@@ -1,22 +1,16 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Elgg < ImportScripts::Base
BATCH_SIZE ||= 1000
def initialize
super
@client = Mysql2::Client.new(
host: "127.0.0.1",
port: "3306",
username: "",
database: "",
password: ""
)
@client =
Mysql2::Client.new(host: "127.0.0.1", port: "3306", username: "", database: "", password: "")
SiteSetting.max_username_length = 50
end
@@ -31,7 +25,7 @@ class ImportScripts::Elgg < ImportScripts::Base
def create_avatar(user, guid)
puts "#{@path}"
# Put your avatar at the root of discourse in this folder:
path_prefix = 'import/data/www/'
path_prefix = "import/data/www/"
# https://github.com/Elgg/Elgg/blob/2fc9c1910a9169bbe4010026c61d8e41a5b56239/engine/classes/ElggDiskFilestore.php#L24
# const BUCKET_SIZE = 5000;
bucket_size = 5000
@@ -40,13 +34,11 @@ class ImportScripts::Elgg < ImportScripts::Base
bucket_id = [guid / bucket_size * bucket_size, 1].max
avatar_path = File.join(path_prefix, bucket_id.to_s, "/#{guid}/profile/#{guid}master.jpg")
if File.exist?(avatar_path)
@uploader.create_avatar(user, avatar_path)
end
@uploader.create_avatar(user, avatar_path) if File.exist?(avatar_path)
end
def grant_admin(user, is_admin)
if is_admin == 'yes'
if is_admin == "yes"
puts "", "#{user.username} is granted admin!"
user.grant_admin!
end
@@ -56,10 +48,11 @@ class ImportScripts::Elgg < ImportScripts::Base
puts "", "importing users..."
last_user_id = -1
total_users = mysql_query("select count(*) from elgg_users_entity where banned='no'").first["count"]
total_users =
mysql_query("select count(*) from elgg_users_entity where banned='no'").first["count"]
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL
users = mysql_query(<<-SQL).to_a
select eue.guid, eue.username, eue.name, eue.email, eue.admin,
max(case when ems1.string='cae_structure' then ems2.string end)cae_structure,
max(case when ems1.string='location' then ems2.string end)location,
@@ -76,7 +69,6 @@ class ImportScripts::Elgg < ImportScripts::Base
group by eue.guid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@@ -97,11 +89,12 @@ class ImportScripts::Elgg < ImportScripts::Base
name: u["name"],
website: u["website"],
bio_raw: u["briefdescription"].to_s + " " + u["cae_structure"].to_s,
post_create_action: proc do |user|
create_avatar(user, u["guid"])
#add_user_to_group(user, u["cae_structure"])
grant_admin(user, u["admin"])
end
post_create_action:
proc do |user|
create_avatar(user, u["guid"])
#add_user_to_group(user, u["cae_structure"])
grant_admin(user, u["admin"])
end,
}
end
end
@@ -115,9 +108,9 @@ class ImportScripts::Elgg < ImportScripts::Base
create_categories(categories) do |c|
{
id: c['guid'],
name: CGI.unescapeHTML(c['name']),
description: CGI.unescapeHTML(c['description'])
id: c["guid"],
name: CGI.unescapeHTML(c["name"]),
description: CGI.unescapeHTML(c["description"]),
}
end
end
@@ -125,10 +118,13 @@ class ImportScripts::Elgg < ImportScripts::Base
def import_topics
puts "", "creating topics"
total_count = mysql_query("select count(*) count from elgg_entities where subtype = 32;").first["count"]
total_count =
mysql_query("select count(*) count from elgg_entities where subtype = 32;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT
ee.guid id,
owner_guid user_id,
@@ -143,30 +139,35 @@ class ImportScripts::Elgg < ImportScripts::Base
ORDER BY ee.guid
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
{
id: m['id'],
user_id: user_id_from_imported_user_id(m['user_id']) || -1,
raw: CGI.unescapeHTML(m['raw']),
created_at: Time.zone.at(m['created_at']),
category: category_id_from_imported_category_id(m['category_id']),
title: CGI.unescapeHTML(m['title']),
post_create_action: proc do |post|
tag_names = mysql_query("
id: m["id"],
user_id: user_id_from_imported_user_id(m["user_id"]) || -1,
raw: CGI.unescapeHTML(m["raw"]),
created_at: Time.zone.at(m["created_at"]),
category: category_id_from_imported_category_id(m["category_id"]),
title: CGI.unescapeHTML(m["title"]),
post_create_action:
proc do |post|
tag_names =
mysql_query(
"
select ms.string
from elgg_metadata md
join elgg_metastrings ms on md.value_id = ms.id
where name_id = 43
and entity_guid = #{m['id']};
").map { |tag| tag['string'] }
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end
and entity_guid = #{m["id"]};
",
).map { |tag| tag["string"] }
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end,
}
end
end
@@ -179,10 +180,13 @@ class ImportScripts::Elgg < ImportScripts::Base
def import_posts
puts "", "creating posts"
total_count = mysql_query("SELECT count(*) count FROM elgg_entities WHERE subtype = 42").first["count"]
total_count =
mysql_query("SELECT count(*) count FROM elgg_entities WHERE subtype = 42").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT
ee.guid id,
container_guid topic_id,
@@ -195,19 +199,20 @@ class ImportScripts::Elgg < ImportScripts::Base
ORDER BY ee.guid
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
{
id: m['id'],
user_id: user_id_from_imported_user_id(m['user_id']) || -1,
topic_id: topic_lookup_from_imported_post_id(m['topic_id'])[:topic_id],
raw: CGI.unescapeHTML(m['raw']),
created_at: Time.zone.at(m['created_at']),
id: m["id"],
user_id: user_id_from_imported_user_id(m["user_id"]) || -1,
topic_id: topic_lookup_from_imported_post_id(m["topic_id"])[:topic_id],
raw: CGI.unescapeHTML(m["raw"]),
created_at: Time.zone.at(m["created_at"]),
}
end
end
@@ -216,7 +221,6 @@ class ImportScripts::Elgg < ImportScripts::Base
def mysql_query(sql)
@client.query(sql, cache_rows: false)
end
end
ImportScripts::Elgg.new.perform

View File

@@ -1,60 +1,62 @@
# frozen_string_literal: true
require "mysql2"
require 'time'
require 'date'
require "time"
require "date"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::FLARUM < ImportScripts::Base
#SET THE APPROPRIATE VALUES FOR YOUR MYSQL CONNECTION
FLARUM_HOST ||= ENV['FLARUM_HOST'] || "db_host"
FLARUM_DB ||= ENV['FLARUM_DB'] || "db_name"
FLARUM_HOST ||= ENV["FLARUM_HOST"] || "db_host"
FLARUM_DB ||= ENV["FLARUM_DB"] || "db_name"
BATCH_SIZE ||= 1000
FLARUM_USER ||= ENV['FLARUM_USER'] || "db_user"
FLARUM_PW ||= ENV['FLARUM_PW'] || "db_user_pass"
FLARUM_USER ||= ENV["FLARUM_USER"] || "db_user"
FLARUM_PW ||= ENV["FLARUM_PW"] || "db_user_pass"
def initialize
super
@client = Mysql2::Client.new(
host: FLARUM_HOST,
username: FLARUM_USER,
password: FLARUM_PW,
database: FLARUM_DB
)
@client =
Mysql2::Client.new(
host: FLARUM_HOST,
username: FLARUM_USER,
password: FLARUM_PW,
database: FLARUM_DB,
)
end
def execute
import_users
import_categories
import_posts
end
def import_users
puts '', "creating users"
total_count = mysql_query("SELECT count(*) count FROM users;").first['count']
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM users;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT id, username, email, joined_at, last_seen_at
results =
mysql_query(
"SELECT id, username, email, joined_at, last_seen_at
FROM users
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'],
email: user['email'],
username: user['username'],
name: user['username'],
created_at: user['joined_at'],
last_seen_at: user['last_seen_at']
{
id: user["id"],
email: user["email"],
username: user["username"],
name: user["username"],
created_at: user["joined_at"],
last_seen_at: user["last_seen_at"],
}
end
end
@@ -63,30 +65,31 @@ class ImportScripts::FLARUM < ImportScripts::Base
def import_categories
puts "", "importing top level categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT id, name, description, position
FROM tags
ORDER BY position ASC
").to_a
",
).to_a
create_categories(categories) do |category|
{
id: category["id"],
name: category["name"]
}
end
create_categories(categories) { |category| { id: category["id"], name: category["name"] } }
puts "", "importing children categories..."
children_categories = mysql_query("
children_categories =
mysql_query(
"
SELECT id, name, description, position
FROM tags
ORDER BY position
").to_a
",
).to_a
create_categories(children_categories) do |category|
{
id: "child##{category['id']}",
id: "child##{category["id"]}",
name: category["name"],
description: category["description"],
}
@@ -99,7 +102,9 @@ class ImportScripts::FLARUM < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.id id,
d.id topic_id,
d.title title,
@@ -116,29 +121,30 @@ class ImportScripts::FLARUM < ImportScripts::Base
ORDER BY p.created_at
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
").to_a
",
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_FLARUM_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_FLARUM_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id("child##{m['category_id']}")
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["id"] == m["first_post_id"]
mapped[:category] = category_id_from_imported_category_id("child##{m["category_id"]}")
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
parent = topic_lookup_from_imported_post_id(m["first_post_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end

View File

@@ -17,23 +17,23 @@ export FLUXBB_PREFIX=""
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/fluxbb.rb
class ImportScripts::FluxBB < ImportScripts::Base
FLUXBB_HOST ||= ENV['FLUXBB_HOST'] || "localhost"
FLUXBB_DB ||= ENV['FLUXBB_DB'] || "fluxbb"
FLUXBB_HOST ||= ENV["FLUXBB_HOST"] || "localhost"
FLUXBB_DB ||= ENV["FLUXBB_DB"] || "fluxbb"
BATCH_SIZE ||= 1000
FLUXBB_USER ||= ENV['FLUXBB_USER'] || "root"
FLUXBB_PW ||= ENV['FLUXBB_PW'] || ""
FLUXBB_PREFIX ||= ENV['FLUXBB_PREFIX'] || ""
FLUXBB_USER ||= ENV["FLUXBB_USER"] || "root"
FLUXBB_PW ||= ENV["FLUXBB_PW"] || ""
FLUXBB_PREFIX ||= ENV["FLUXBB_PREFIX"] || ""
def initialize
super
@client = Mysql2::Client.new(
host: FLUXBB_HOST,
username: FLUXBB_USER,
password: FLUXBB_PW,
database: FLUXBB_DB
)
@client =
Mysql2::Client.new(
host: FLUXBB_HOST,
username: FLUXBB_USER,
password: FLUXBB_PW,
database: FLUXBB_DB,
)
end
def execute
@@ -45,64 +45,67 @@ class ImportScripts::FluxBB < ImportScripts::Base
end
def import_groups
puts '', "creating groups"
puts "", "creating groups"
results = mysql_query(
"SELECT g_id id, g_title name, g_user_title title
FROM #{FLUXBB_PREFIX}groups")
results =
mysql_query(
"SELECT g_id id, g_title name, g_user_title title
FROM #{FLUXBB_PREFIX}groups",
)
customgroups = results.select { |group| group['id'] > 2 }
customgroups = results.select { |group| group["id"] > 2 }
create_groups(customgroups) do |group|
{ id: group['id'],
name: group['name'],
title: group['title'] }
{ id: group["id"], name: group["name"], title: group["title"] }
end
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}users;").first['count']
total_count = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}users;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT id, username, realname name, url website, email email, registered created_at,
results =
mysql_query(
"SELECT id, username, realname name, url website, email email, registered created_at,
registration_ip registration_ip_address, last_visit last_visit_time,
last_email_sent last_emailed_at, location, group_id
FROM #{FLUXBB_PREFIX}users
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'],
email: user['email'],
username: user['username'],
name: user['name'],
created_at: Time.zone.at(user['created_at']),
website: user['website'],
registration_ip_address: user['registration_ip_address'],
last_seen_at: Time.zone.at(user['last_visit_time']),
last_emailed_at: user['last_emailed_at'] == nil ? 0 : Time.zone.at(user['last_emailed_at']),
location: user['location'],
moderator: user['group_id'] == 2,
admin: user['group_id'] == 1 }
{
id: user["id"],
email: user["email"],
username: user["username"],
name: user["name"],
created_at: Time.zone.at(user["created_at"]),
website: user["website"],
registration_ip_address: user["registration_ip_address"],
last_seen_at: Time.zone.at(user["last_visit_time"]),
last_emailed_at:
user["last_emailed_at"] == nil ? 0 : Time.zone.at(user["last_emailed_at"]),
location: user["location"],
moderator: user["group_id"] == 2,
admin: user["group_id"] == 1,
}
end
groupusers = results.select { |user| user['group_id'] > 2 }
groupusers = results.select { |user| user["group_id"] > 2 }
groupusers.each do |user|
if user['group_id']
user_id = user_id_from_imported_user_id(user['id'])
group_id = group_id_from_imported_group_id(user['group_id'])
if user["group_id"]
user_id = user_id_from_imported_user_id(user["id"])
group_id = group_id_from_imported_group_id(user["group_id"])
if user_id && group_id
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
end
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) if user_id && group_id
end
end
end
@@ -111,33 +114,34 @@ class ImportScripts::FluxBB < ImportScripts::Base
def import_categories
puts "", "importing top level categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT id, cat_name name, disp_position position
FROM #{FLUXBB_PREFIX}categories
ORDER BY id ASC
").to_a
",
).to_a
create_categories(categories) do |category|
{
id: category["id"],
name: category["name"]
}
end
create_categories(categories) { |category| { id: category["id"], name: category["name"] } }
puts "", "importing children categories..."
children_categories = mysql_query("
children_categories =
mysql_query(
"
SELECT id, forum_name name, forum_desc description, disp_position position, cat_id parent_category_id
FROM #{FLUXBB_PREFIX}forums
ORDER BY id
").to_a
",
).to_a
create_categories(children_categories) do |category|
{
id: "child##{category['id']}",
id: "child##{category["id"]}",
name: category["name"],
description: category["description"],
parent_category_id: category_id_from_imported_category_id(category["parent_category_id"])
parent_category_id: category_id_from_imported_category_id(category["parent_category_id"]),
}
end
end
@@ -148,7 +152,9 @@ class ImportScripts::FluxBB < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from #{FLUXBB_PREFIX}posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.id id,
t.id topic_id,
t.forum_id category_id,
@@ -163,29 +169,30 @@ class ImportScripts::FluxBB < ImportScripts::Base
ORDER BY p.posted
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
").to_a
",
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_fluxbb_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_fluxbb_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id("child##{m['category_id']}")
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["id"] == m["first_post_id"]
mapped[:category] = category_id_from_imported_category_id("child##{m["category_id"]}")
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
parent = topic_lookup_from_imported_post_id(m["first_post_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
@@ -196,16 +203,16 @@ class ImportScripts::FluxBB < ImportScripts::Base
end
def suspend_users
puts '', "updating banned users"
puts "", "updating banned users"
banned = 0
failed = 0
total = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}bans").first['count']
total = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}bans").first["count"]
system_user = Discourse.system_user
mysql_query("SELECT username, email FROM #{FLUXBB_PREFIX}bans").each do |b|
user = User.find_by_email(b['email'])
user = User.find_by_email(b["email"])
if user
user.suspended_at = Time.now
user.suspended_till = 200.years.from_now
@@ -218,7 +225,7 @@ class ImportScripts::FluxBB < ImportScripts::Base
failed += 1
end
else
puts "Not found: #{b['email']}"
puts "Not found: #{b["email"]}"
failed += 1
end
@@ -233,15 +240,15 @@ class ImportScripts::FluxBB < ImportScripts::Base
s.gsub!(/<!-- s(\S+) -->(?:.*)<!-- s(?:\S+) -->/, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')
# Many bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s.gsub!(/:(?:\w{8})\]/, "]")
# Remove video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")
s = CGI.unescapeHTML(s)
@@ -249,7 +256,7 @@ class ImportScripts::FluxBB < ImportScripts::Base
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
s.gsub!(%r{\[http(s)?://(www\.)?}, "[")
s
end

View File

@@ -2,7 +2,7 @@
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'csv'
require "csv"
# Importer for Friends+Me Google+ Exporter (F+MG+E) output.
#
@@ -32,18 +32,18 @@ require 'csv'
# Edit values at the top of the script to fit your preferences
class ImportScripts::FMGP < ImportScripts::Base
def initialize
super
# Set this to the base URL for the site; required for importing videos
# typically just 'https:' in production
@site_base_url = 'http://localhost:3000'
@site_base_url = "http://localhost:3000"
@system_user = Discourse.system_user
SiteSetting.max_image_size_kb = 40960
SiteSetting.max_attachment_size_kb = 40960
SiteSetting.max_image_size_kb = 40_960
SiteSetting.max_attachment_size_kb = 40_960
# handle the same video extension as the rest of Discourse
SiteSetting.authorized_extensions = (SiteSetting.authorized_extensions.split("|") + ['mp4', 'mov', 'webm', 'ogv']).uniq.join("|")
SiteSetting.authorized_extensions =
(SiteSetting.authorized_extensions.split("|") + %w[mp4 mov webm ogv]).uniq.join("|")
@invalid_bounce_score = 5.0
@min_title_words = 3
@max_title_words = 14
@@ -76,7 +76,7 @@ class ImportScripts::FMGP < ImportScripts::Base
@allowlist = nil
# Tags to apply to every topic; empty Array to not have any tags applied everywhere
@globaltags = [ "gplus" ]
@globaltags = ["gplus"]
@imagefiles = nil
@@ -101,34 +101,30 @@ class ImportScripts::FMGP < ImportScripts::Base
@first_date = nil
# every argument is a filename, do the right thing based on the file name
ARGV.each do |arg|
if arg.end_with?('.csv')
if arg.end_with?(".csv")
# CSV files produced by F+MG+E have "URL";"IsDownloaded";"FileName";"FilePath";"FileSize"
CSV.foreach(arg, headers: true, col_sep: ';') do |row|
@images[row[0]] = {
filename: row[2],
filepath: row[3],
filesize: row[4]
}
CSV.foreach(arg, headers: true, col_sep: ";") do |row|
@images[row[0]] = { filename: row[2], filepath: row[3], filesize: row[4] }
end
elsif arg.end_with?("upload-paths.txt")
@imagefiles = File.open(arg, "w")
elsif arg.end_with?('categories.json')
elsif arg.end_with?("categories.json")
@categories_filename = arg
@categories = load_fmgp_json(arg)
elsif arg.end_with?("usermap.json")
@usermap = load_fmgp_json(arg)
elsif arg.end_with?('blocklist.json')
elsif arg.end_with?("blocklist.json")
@blocklist = load_fmgp_json(arg).map { |i| i.to_s }.to_set
elsif arg.end_with?('allowlist.json')
elsif arg.end_with?("allowlist.json")
@allowlist = load_fmgp_json(arg).map { |i| i.to_s }.to_set
elsif arg.end_with?('.json')
elsif arg.end_with?(".json")
@feeds << load_fmgp_json(arg)
elsif arg == '--dry-run'
elsif arg == "--dry-run"
@dryrun = true
elsif arg.start_with?("--last-date=")
@last_date = Time.zone.parse(arg.gsub(/.*=/, ''))
@last_date = Time.zone.parse(arg.gsub(/.*=/, ""))
elsif arg.start_with?("--first-date=")
@first_date = Time.zone.parse(arg.gsub(/.*=/, ''))
@first_date = Time.zone.parse(arg.gsub(/.*=/, ""))
else
raise RuntimeError.new("unknown argument #{arg}")
end
@@ -153,7 +149,6 @@ class ImportScripts::FMGP < ImportScripts::Base
@blocked_posts = 0
# count uploaded file size
@totalsize = 0
end
def execute
@@ -222,7 +217,9 @@ class ImportScripts::FMGP < ImportScripts::Base
categories_new = "#{@categories_filename}.new"
File.open(categories_new, "w") do |f|
f.write(@categories.to_json)
raise RuntimeError.new("Category file missing categories for #{incomplete_categories}, edit #{categories_new} and rename it to #{@category_filename} before running the same import")
raise RuntimeError.new(
"Category file missing categories for #{incomplete_categories}, edit #{categories_new} and rename it to #{@category_filename} before running the same import",
)
end
end
end
@@ -233,28 +230,32 @@ class ImportScripts::FMGP < ImportScripts::Base
@categories.each do |id, cat|
if cat["parent"].present? && !cat["parent"].empty?
# Two separate sub-categories can have the same name, so need to identify by parent
Category.where(name: cat["category"]).each do |category|
parent = Category.where(id: category.parent_category_id).first
@cats[id] = category if parent.name == cat["parent"]
end
Category
.where(name: cat["category"])
.each do |category|
parent = Category.where(id: category.parent_category_id).first
@cats[id] = category if parent.name == cat["parent"]
end
else
if category = Category.where(name: cat["category"]).first
@cats[id] = category
elsif @create_categories
params = {}
params[:name] = cat['category']
params[:name] = cat["category"]
params[:id] = id
puts "Creating #{cat['category']}"
puts "Creating #{cat["category"]}"
category = create_category(params, id)
@cats[id] = category
end
end
raise RuntimeError.new("Could not find category #{cat["category"]} for #{cat}") if @cats[id].nil?
if @cats[id].nil?
raise RuntimeError.new("Could not find category #{cat["category"]} for #{cat}")
end
end
end
def import_users
puts '', "Importing Google+ post and comment author users..."
puts "", "Importing Google+ post and comment author users..."
# collect authors of both posts and comments
@feeds.each do |feed|
@@ -263,14 +264,10 @@ class ImportScripts::FMGP < ImportScripts::Base
community["categories"].each do |category|
category["posts"].each do |post|
import_author_user(post["author"])
if post["message"].present?
import_message_users(post["message"])
end
import_message_users(post["message"]) if post["message"].present?
post["comments"].each do |comment|
import_author_user(comment["author"])
if comment["message"].present?
import_message_users(comment["message"])
end
import_message_users(comment["message"]) if comment["message"].present?
end
end
end
@@ -282,12 +279,7 @@ class ImportScripts::FMGP < ImportScripts::Base
# now create them all
create_users(@newusers) do |id, u|
{
id: id,
email: u[:email],
name: u[:name],
post_create_action: u[:post_create_action]
}
{ id: id, email: u[:email], name: u[:name], post_create_action: u[:post_create_action] }
end
end
@@ -308,7 +300,8 @@ class ImportScripts::FMGP < ImportScripts::Base
def import_google_user(id, name)
if !@emails[id].present?
google_user_info = UserAssociatedAccount.find_by(provider_name: 'google_oauth2', provider_uid: id.to_i)
google_user_info =
UserAssociatedAccount.find_by(provider_name: "google_oauth2", provider_uid: id.to_i)
if google_user_info.nil?
# create new google user on system; expect this user to merge
# when they later log in with google authentication
@@ -320,36 +313,39 @@ class ImportScripts::FMGP < ImportScripts::Base
@newusers[id] = {
email: email,
name: name,
post_create_action: proc do |newuser|
newuser.approved = true
newuser.approved_by_id = @system_user.id
newuser.approved_at = newuser.created_at
if @blocklist.include?(id.to_s)
now = DateTime.now
forever = 1000.years.from_now
# you can suspend as well if you want your blocklist to
# be hard to recover from
#newuser.suspended_at = now
#newuser.suspended_till = forever
newuser.silenced_till = forever
end
newuser.save
@users[id] = newuser
UserAssociatedAccount.create(provider_name: 'google_oauth2', user_id: newuser.id, provider_uid: id)
# Do not send email to the invalid email addresses
# this can be removed after merging with #7162
s = UserStat.where(user_id: newuser.id).first
s.bounce_score = @invalid_bounce_score
s.reset_bounce_score_after = 1000.years.from_now
s.save
end
post_create_action:
proc do |newuser|
newuser.approved = true
newuser.approved_by_id = @system_user.id
newuser.approved_at = newuser.created_at
if @blocklist.include?(id.to_s)
now = DateTime.now
forever = 1000.years.from_now
# you can suspend as well if you want your blocklist to
# be hard to recover from
#newuser.suspended_at = now
#newuser.suspended_till = forever
newuser.silenced_till = forever
end
newuser.save
@users[id] = newuser
UserAssociatedAccount.create(
provider_name: "google_oauth2",
user_id: newuser.id,
provider_uid: id,
)
# Do not send email to the invalid email addresses
# this can be removed after merging with #7162
s = UserStat.where(user_id: newuser.id).first
s.bounce_score = @invalid_bounce_score
s.reset_bounce_score_after = 1000.years.from_now
s.save
end,
}
else
# user already on system
u = User.find(google_user_info.user_id)
if u.silenced? || u.suspended?
@blocklist.add(id)
end
@blocklist.add(id) if u.silenced? || u.suspended?
@users[id] = u
email = u.email
end
@@ -362,7 +358,7 @@ class ImportScripts::FMGP < ImportScripts::Base
# - A google+ post is a discourse topic
# - A google+ comment is a discourse post
puts '', "Importing Google+ posts and comments..."
puts "", "Importing Google+ posts and comments..."
@feeds.each do |feed|
feed["accounts"].each do |account|
@@ -371,14 +367,16 @@ class ImportScripts::FMGP < ImportScripts::Base
category["posts"].each do |post|
# G+ post / Discourse topic
import_topic(post, category)
print("\r#{@topics_imported}/#{@posts_imported} topics/posts (skipped: #{@topics_skipped}/#{@posts_skipped} blocklisted: #{@blocked_topics}/#{@blocked_posts}) ")
print(
"\r#{@topics_imported}/#{@posts_imported} topics/posts (skipped: #{@topics_skipped}/#{@posts_skipped} blocklisted: #{@blocked_topics}/#{@blocked_posts}) ",
)
end
end
end
end
end
puts ''
puts ""
end
def import_topic(post, category)
@@ -431,9 +429,7 @@ class ImportScripts::FMGP < ImportScripts::Base
return nil if !@frst_date.nil? && created_at < @first_date
user_id = user_id_from_imported_user_id(post_author_id)
if user_id.nil?
user_id = @users[post["author"]["id"]].id
end
user_id = @users[post["author"]["id"]].id if user_id.nil?
mapped = {
id: post["id"],
@@ -472,7 +468,8 @@ class ImportScripts::FMGP < ImportScripts::Base
def title_text(post, created_at)
words = message_text(post["message"])
if words.empty? || words.join("").length < @min_title_characters || words.length < @min_title_words
if words.empty? || words.join("").length < @min_title_characters ||
words.length < @min_title_words
# database has minimum length
# short posts appear not to work well as titles most of the time (in practice)
return untitled(post["author"]["name"], created_at)
@@ -483,17 +480,13 @@ class ImportScripts::FMGP < ImportScripts::Base
(@min_title_words..(words.length - 1)).each do |i|
# prefer full stop
if words[i].end_with?(".")
lastword = i
end
lastword = i if words[i].end_with?(".")
end
if lastword.nil?
# fall back on other punctuation
(@min_title_words..(words.length - 1)).each do |i|
if words[i].end_with?(',', ';', ':', '?')
lastword = i
end
lastword = i if words[i].end_with?(",", ";", ":", "?")
end
end
@@ -516,9 +509,7 @@ class ImportScripts::FMGP < ImportScripts::Base
text_types = [0, 3]
message.each do |fragment|
if text_types.include?(fragment[0])
fragment[1].split().each do |word|
words << word
end
fragment[1].split().each { |word| words << word }
elsif fragment[0] == 2
# use the display text of a link
words << fragment[1]
@@ -543,14 +534,10 @@ class ImportScripts::FMGP < ImportScripts::Base
lines << "\n#{formatted_link(post["image"]["proxy"])}\n"
end
if post["images"].present?
post["images"].each do |image|
lines << "\n#{formatted_link(image["proxy"])}\n"
end
post["images"].each { |image| lines << "\n#{formatted_link(image["proxy"])}\n" }
end
if post["videos"].present?
post["videos"].each do |video|
lines << "\n#{formatted_link(video["proxy"])}\n"
end
post["videos"].each { |video| lines << "\n#{formatted_link(video["proxy"])}\n" }
end
if post["link"].present? && post["link"]["url"].present?
url = post["link"]["url"]
@@ -575,12 +562,8 @@ class ImportScripts::FMGP < ImportScripts::Base
if fragment[2].nil?
text
else
if fragment[2]["italic"].present?
text = "<i>#{text}</i>"
end
if fragment[2]["bold"].present?
text = "<b>#{text}</b>"
end
text = "<i>#{text}</i>" if fragment[2]["italic"].present?
text = "<b>#{text}</b>" if fragment[2]["bold"].present?
if fragment[2]["strikethrough"].present?
# s more likely than del to represent user intent?
text = "<s>#{text}</s>"
@@ -594,9 +577,7 @@ class ImportScripts::FMGP < ImportScripts::Base
formatted_link_text(fragment[2], fragment[1])
elsif fragment[0] == 3
# reference to a user
if @usermap.include?(fragment[2].to_s)
return "@#{@usermap[fragment[2].to_s]}"
end
return "@#{@usermap[fragment[2].to_s]}" if @usermap.include?(fragment[2].to_s)
if fragment[2].nil?
# deleted G+ users show up with a null ID
return "<b>+#{fragment[1]}</b>"
@@ -606,12 +587,18 @@ class ImportScripts::FMGP < ImportScripts::Base
# user was in this import's authors
"@#{user.username} "
else
if google_user_info = UserAssociatedAccount.find_by(provider_name: 'google_oauth2', provider_uid: fragment[2])
if google_user_info =
UserAssociatedAccount.find_by(
provider_name: "google_oauth2",
provider_uid: fragment[2],
)
# user was not in this import, but has logged in or been imported otherwise
user = User.find(google_user_info.user_id)
"@#{user.username} "
else
raise RuntimeError.new("Google user #{fragment[1]} (id #{fragment[2]}) not imported") if !@dryrun
if !@dryrun
raise RuntimeError.new("Google user #{fragment[1]} (id #{fragment[2]}) not imported")
end
# if you want to fall back to their G+ name, just erase the raise above,
# but this should not happen
"<b>+#{fragment[1]}</b>"
@@ -681,6 +668,4 @@ class ImportScripts::FMGP < ImportScripts::Base
end
end
if __FILE__ == $0
ImportScripts::FMGP.new.perform
end
ImportScripts::FMGP.new.perform if __FILE__ == $0

View File

@@ -22,15 +22,14 @@
# that correctly and will import the replies in the wrong order.
# You should run `rake posts:reorder_posts` after the import.
require 'csv'
require 'set'
require "csv"
require "set"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'reverse_markdown' # gem 'reverse_markdown'
require "reverse_markdown" # gem 'reverse_markdown'
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/getsatisfaction.rb DIRNAME
class ImportScripts::GetSatisfaction < ImportScripts::Base
IMPORT_ARCHIVED_TOPICS = false
# The script classifies each topic as private when at least one associated category
@@ -85,22 +84,24 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
previous_line = nil
File.open(target_filename, "w") do |file|
File.open(source_filename).each_line do |line|
line.gsub!(/(?<![^\\]\\)\\"/, '""')
line.gsub!(/\\\\/, '\\')
File
.open(source_filename)
.each_line do |line|
line.gsub!(/(?<![^\\]\\)\\"/, '""')
line.gsub!(/\\\\/, '\\')
if previous_line
previous_line << "\n" unless line.starts_with?(",")
line = "#{previous_line}#{line}"
previous_line = nil
end
if previous_line
previous_line << "\n" unless line.starts_with?(",")
line = "#{previous_line}#{line}"
previous_line = nil
end
if line.gsub!(/,\+1\\\R$/m, ',"+1"').present?
previous_line = line
else
file.puts(line)
if line.gsub!(/,\+1\\\R$/m, ',"+1"').present?
previous_line = line
else
file.puts(line)
end
end
end
file.puts(previous_line) if previous_line
end
@@ -108,18 +109,18 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
end
def csv_parse(table_name)
CSV.foreach(csv_filename(table_name),
headers: true,
header_converters: :symbol,
skip_blanks: true,
encoding: 'bom|utf-8') { |row| yield row }
CSV.foreach(
csv_filename(table_name),
headers: true,
header_converters: :symbol,
skip_blanks: true,
encoding: "bom|utf-8",
) { |row| yield row }
end
def total_rows(table_name)
CSV.foreach(csv_filename(table_name),
headers: true,
skip_blanks: true,
encoding: 'bom|utf-8')
CSV
.foreach(csv_filename(table_name), headers: true, skip_blanks: true, encoding: "bom|utf-8")
.inject(0) { |c, _| c + 1 }
end
@@ -138,13 +139,11 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
name: row[:realname],
username: row[:nickname],
created_at: DateTime.parse(row[:joined_date]),
active: true
active: true,
}
count += 1
if count % BATCH_SIZE == 0
import_users_batch!(users, count - users.length, total)
end
import_users_batch!(users, count - users.length, total) if count % BATCH_SIZE == 0
end
import_users_batch!(users, count - users.length, total)
@@ -153,9 +152,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
def import_users_batch!(users, offset, total)
return if users.empty?
create_users(users, offset: offset, total: total) do |user|
user
end
create_users(users, offset: offset, total: total) { |user| user }
users.clear
end
@@ -168,13 +165,11 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
rows << {
id: row[:category_id],
name: row[:name],
description: row[:description].present? ? normalize_raw!(row[:description]) : nil
description: row[:description].present? ? normalize_raw!(row[:description]) : nil,
}
end
create_categories(rows) do |row|
row
end
create_categories(rows) { |row| row }
end
def import_topic_id(topic_id)
@@ -200,7 +195,13 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
else
topic = map_post(row)
topic[:id] = topic_id
topic[:title] = row[:subject].present? ? row[:subject].strip[0...255] : "Topic title missing"
topic[:title] = (
if row[:subject].present?
row[:subject].strip[0...255]
else
"Topic title missing"
end
)
topic[:category] = category_id(row)
topic[:archived] = row[:archived_at].present?
@@ -210,9 +211,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
topics << topic
count += 1
if count % BATCH_SIZE == 0
import_topics_batch!(topics, count - topics.length, total)
end
import_topics_batch!(topics, count - topics.length, total) if count % BATCH_SIZE == 0
end
import_topics_batch!(topics, count - topics.length, total)
@@ -290,9 +289,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
posts << post
count += 1
if count % BATCH_SIZE == 0
import_posts_batch!(posts, count - posts.length, total)
end
import_posts_batch!(posts, count - posts.length, total) if count % BATCH_SIZE == 0
end
import_posts_batch!(posts, count - posts.length, total)
@@ -324,7 +321,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
{
user_id: user_id_from_imported_user_id(row[:user_id]) || Discourse.system_user.id,
created_at: DateTime.parse(row[:created_at]),
raw: normalize_raw!(row[:formatted_content])
raw: normalize_raw!(row[:formatted_content]),
}
end
@@ -334,7 +331,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
# hoist code
hoisted = {}
raw.gsub!(/(<pre>\s*)?<code>(.*?)<\/code>(\s*<\/pre>)?/mi) do
raw.gsub!(%r{(<pre>\s*)?<code>(.*?)</code>(\s*</pre>)?}mi) do
code = $2
hoist = SecureRandom.hex
# tidy code, wow, this is impressively crazy
@@ -350,9 +347,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
# in this case double space works best ... so odd
raw.gsub!(" ", "\n\n")
hoisted.each do |hoist, code|
raw.gsub!(hoist, "\n```\n#{code}\n```\n")
end
hoisted.each { |hoist, code| raw.gsub!(hoist, "\n```\n#{code}\n```\n") }
raw = CGI.unescapeHTML(raw)
raw = ReverseMarkdown.convert(raw)
@@ -360,7 +355,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
end
def create_permalinks
puts '', 'Creating Permalinks...', ''
puts "", "Creating Permalinks...", ""
Topic.listable_topics.find_each do |topic|
tcf = topic.first_post.custom_fields
@@ -372,7 +367,6 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
end
end
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])

View File

@@ -20,19 +20,18 @@ DEFAULT_COOKIES_TXT = "/shared/import/cookies.txt"
ABORT_AFTER_SKIPPED_TOPIC_COUNT = 10
def driver
@driver ||= begin
chrome_args = ["disable-gpu"]
chrome_args << "headless" unless ENV["NOT_HEADLESS"] == '1'
chrome_args << "no-sandbox" if inside_container?
options = Selenium::WebDriver::Chrome::Options.new(args: chrome_args)
Selenium::WebDriver.for(:chrome, options: options)
end
@driver ||=
begin
chrome_args = ["disable-gpu"]
chrome_args << "headless" unless ENV["NOT_HEADLESS"] == "1"
chrome_args << "no-sandbox" if inside_container?
options = Selenium::WebDriver::Chrome::Options.new(args: chrome_args)
Selenium::WebDriver.for(:chrome, options: options)
end
end
def inside_container?
File.foreach("/proc/1/cgroup") do |line|
return true if line.include?("docker")
end
File.foreach("/proc/1/cgroup") { |line| return true if line.include?("docker") }
false
end
@@ -79,35 +78,38 @@ def base_url
end
def crawl_topics
1.step(nil, 100).each do |start|
url = "#{base_url}/#{@groupname}[#{start}-#{start + 99}]"
get(url)
1
.step(nil, 100)
.each do |start|
url = "#{base_url}/#{@groupname}[#{start}-#{start + 99}]"
get(url)
begin
if start == 1 && find("h2").text == "Error 403"
exit_with_error(<<~TEXT.red.bold)
begin
exit_with_error(<<~TEXT.red.bold) if start == 1 && find("h2").text == "Error 403"
Unable to find topics. Try running the script with the "--domain example.com"
option if you are a G Suite user and your group's URL contains a path with
your domain that looks like "/a/example.com".
TEXT
rescue Selenium::WebDriver::Error::NoSuchElementError
# Ignore this error. It simply means there wasn't an error.
end
rescue Selenium::WebDriver::Error::NoSuchElementError
# Ignore this error. It simply means there wasn't an error.
end
topic_urls = extract(".subject a[href*='#{@groupname}']") { |a| a["href"].sub("/d/topic/", "/forum/?_escaped_fragment_=topic/") }
break if topic_urls.size == 0
topic_urls =
extract(".subject a[href*='#{@groupname}']") do |a|
a["href"].sub("/d/topic/", "/forum/?_escaped_fragment_=topic/")
end
break if topic_urls.size == 0
topic_urls.each do |topic_url|
crawl_topic(topic_url)
topic_urls.each do |topic_url|
crawl_topic(topic_url)
# abort if this in an incremental crawl and there were too many consecutive, skipped topics
if @finished && @skipped_topic_count > ABORT_AFTER_SKIPPED_TOPIC_COUNT
puts "Skipping all other topics, because this is an incremental crawl.".green
return
# abort if this in an incremental crawl and there were too many consecutive, skipped topics
if @finished && @skipped_topic_count > ABORT_AFTER_SKIPPED_TOPIC_COUNT
puts "Skipping all other topics, because this is an incremental crawl.".green
return
end
end
end
end
end
def crawl_topic(url)
@@ -126,17 +128,14 @@ def crawl_topic(url)
messages_crawled = false
extract(".subject a[href*='#{@groupname}']") do |a|
[
a["href"].sub("/d/msg/", "/forum/message/raw?msg="),
a["title"].empty?
]
[a["href"].sub("/d/msg/", "/forum/message/raw?msg="), a["title"].empty?]
end.each do |msg_url, might_be_deleted|
messages_crawled |= crawl_message(msg_url, might_be_deleted)
end
@skipped_topic_count = skippable && messages_crawled ? 0 : @skipped_topic_count + 1
@scraped_topic_urls << url
rescue
rescue StandardError
puts "Failed to scrape topic at #{url}".red
raise if @abort_on_error
end
@@ -144,18 +143,16 @@ end
def crawl_message(url, might_be_deleted)
get(url)
filename = File.join(@path, "#{url[/#{@groupname}\/(.+)/, 1].sub("/", "-")}.eml")
filename = File.join(@path, "#{url[%r{#{@groupname}/(.+)}, 1].sub("/", "-")}.eml")
content = find("pre")["innerText"]
if !@first_message_checked
@first_message_checked = true
if content.match?(/From:.*\.\.\.@.*/i) && !@force_import
exit_with_error(<<~TEXT.red.bold)
exit_with_error(<<~TEXT.red.bold) if content.match?(/From:.*\.\.\.@.*/i) && !@force_import
It looks like you do not have permissions to see email addresses. Aborting.
Use the --force option to import anyway.
TEXT
end
end
old_md5 = Digest::MD5.file(filename) if File.exist?(filename)
@@ -169,7 +166,7 @@ rescue Selenium::WebDriver::Error::NoSuchElementError
puts "Failed to scrape message at #{url}".red
raise if @abort_on_error
end
rescue
rescue StandardError
puts "Failed to scrape message at #{url}".red
raise if @abort_on_error
end
@@ -178,10 +175,7 @@ def login
puts "Logging in..."
get("https://google.com/404")
add_cookies(
"myaccount.google.com",
"google.com"
)
add_cookies("myaccount.google.com", "google.com")
get("https://myaccount.google.com/?utm_source=sign_in_no_continue")
@@ -193,20 +187,24 @@ def login
end
def add_cookies(*domains)
File.readlines(@cookies).each do |line|
parts = line.chomp.split("\t")
next if parts.size != 7 || !domains.any? { |domain| parts[0] =~ /^\.?#{Regexp.escape(domain)}$/ }
File
.readlines(@cookies)
.each do |line|
parts = line.chomp.split("\t")
if parts.size != 7 || !domains.any? { |domain| parts[0] =~ /^\.?#{Regexp.escape(domain)}$/ }
next
end
driver.manage.add_cookie(
domain: parts[0],
httpOnly: "true".casecmp?(parts[1]),
path: parts[2],
secure: "true".casecmp?(parts[3]),
expires: parts[4] == "0" ? nil : DateTime.strptime(parts[4], "%s"),
name: parts[5],
value: parts[6]
)
end
driver.manage.add_cookie(
domain: parts[0],
httpOnly: "true".casecmp?(parts[1]),
path: parts[2],
secure: "true".casecmp?(parts[3]),
expires: parts[4] == "0" ? nil : DateTime.strptime(parts[4], "%s"),
name: parts[5],
value: parts[6],
)
end
end
def wait_for_url
@@ -240,10 +238,7 @@ def crawl
crawl_topics
@finished = true
ensure
File.write(status_filename, {
finished: @finished,
urls: @scraped_topic_urls
}.to_yaml)
File.write(status_filename, { finished: @finished, urls: @scraped_topic_urls }.to_yaml)
end
elapsed = Time.now - start_time
@@ -258,20 +253,25 @@ def parse_arguments
@abort_on_error = false
@cookies = DEFAULT_COOKIES_TXT if File.exist?(DEFAULT_COOKIES_TXT)
parser = OptionParser.new do |opts|
opts.banner = "Usage: google_groups.rb [options]"
parser =
OptionParser.new do |opts|
opts.banner = "Usage: google_groups.rb [options]"
opts.on("-g", "--groupname GROUPNAME") { |v| @groupname = v }
opts.on("-d", "--domain DOMAIN") { |v| @domain = v }
opts.on("-c", "--cookies PATH", "path to cookies.txt") { |v| @cookies = v }
opts.on("--path PATH", "output path for emails") { |v| @path = v }
opts.on("-f", "--force", "force import when user isn't allowed to see email addresses") { @force_import = true }
opts.on("-a", "--abort-on-error", "abort crawl on error instead of skipping message") { @abort_on_error = true }
opts.on("-h", "--help") do
puts opts
exit
opts.on("-g", "--groupname GROUPNAME") { |v| @groupname = v }
opts.on("-d", "--domain DOMAIN") { |v| @domain = v }
opts.on("-c", "--cookies PATH", "path to cookies.txt") { |v| @cookies = v }
opts.on("--path PATH", "output path for emails") { |v| @path = v }
opts.on("-f", "--force", "force import when user isn't allowed to see email addresses") do
@force_import = true
end
opts.on("-a", "--abort-on-error", "abort crawl on error instead of skipping message") do
@abort_on_error = true
end
opts.on("-h", "--help") do
puts opts
exit
end
end
end
begin
parser.parse!
@@ -279,10 +279,12 @@ def parse_arguments
exit_with_error(e.message, "", parser)
end
mandatory = [:groupname, :cookies]
mandatory = %i[groupname cookies]
missing = mandatory.select { |name| instance_variable_get("@#{name}").nil? }
exit_with_error("Missing arguments: #{missing.join(', ')}".red.bold, "", parser, "") if missing.any?
if missing.any?
exit_with_error("Missing arguments: #{missing.join(", ")}".red.bold, "", parser, "")
end
exit_with_error("cookies.txt not found at #{@cookies}".red.bold, "") if !File.exist?(@cookies)
@path = File.join(DEFAULT_OUTPUT_PATH, @groupname) if @path.nil?

View File

@@ -4,7 +4,6 @@ require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::HigherLogic < ImportScripts::Base
HIGHERLOGIC_DB = "higherlogic"
BATCH_SIZE = 1000
ATTACHMENT_DIR = "/shared/import/data/attachments"
@@ -12,11 +11,7 @@ class ImportScripts::HigherLogic < ImportScripts::Base
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
database: HIGHERLOGIC_DB
)
@client = Mysql2::Client.new(host: "localhost", username: "root", database: HIGHERLOGIC_DB)
end
def execute
@@ -29,7 +24,7 @@ class ImportScripts::HigherLogic < ImportScripts::Base
end
def import_groups
puts '', 'importing groups'
puts "", "importing groups"
groups = mysql_query <<-SQL
SELECT CommunityKey, CommunityName
@@ -37,16 +32,11 @@ class ImportScripts::HigherLogic < ImportScripts::Base
ORDER BY CommunityName
SQL
create_groups(groups) do |group|
{
id: group['CommunityKey'],
name: group['CommunityName']
}
end
create_groups(groups) { |group| { id: group["CommunityKey"], name: group["CommunityName"] } }
end
def import_users
puts '', 'importing users'
puts "", "importing users"
total_count = mysql_query("SELECT count(*) FROM Contact").first["count"]
batches(BATCH_SIZE) do |offset|
@@ -59,43 +49,42 @@ class ImportScripts::HigherLogic < ImportScripts::Base
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u['ContactKey'] }
next if all_records_exist? :users, results.map { |u| u["ContactKey"] }
create_users(results, total: total_count, offset: offset) do |user|
next if user['EmailAddress'].blank?
next if user["EmailAddress"].blank?
{
id: user['ContactKey'],
email: user['EmailAddress'],
name: "#{user['FirstName']} #{user['LastName']}",
created_at: user['CreatedOn'] == nil ? 0 : Time.zone.at(user['CreatedOn']),
bio_raw: user['Bio'],
active: user['UserStatus'] == "Active",
admin: user['HLAdminFlag'] == 1
id: user["ContactKey"],
email: user["EmailAddress"],
name: "#{user["FirstName"]} #{user["LastName"]}",
created_at: user["CreatedOn"] == nil ? 0 : Time.zone.at(user["CreatedOn"]),
bio_raw: user["Bio"],
active: user["UserStatus"] == "Active",
admin: user["HLAdminFlag"] == 1,
}
end
end
end
def import_group_users
puts '', 'importing group users'
puts "", "importing group users"
group_users = mysql_query(<<-SQL
group_users = mysql_query(<<-SQL).to_a
SELECT CommunityKey, ContactKey
FROM CommunityMember
SQL
).to_a
group_users.each do |row|
next unless user_id = user_id_from_imported_user_id(row['ContactKey'])
next unless group_id = group_id_from_imported_group_id(row['CommunityKey'])
puts '', '.'
next unless user_id = user_id_from_imported_user_id(row["ContactKey"])
next unless group_id = group_id_from_imported_group_id(row["CommunityKey"])
puts "", "."
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
end
end
def import_categories
puts '', 'importing categories'
puts "", "importing categories"
categories = mysql_query <<-SQL
SELECT DiscussionKey, DiscussionName
@@ -103,15 +92,12 @@ class ImportScripts::HigherLogic < ImportScripts::Base
SQL
create_categories(categories) do |category|
{
id: category['DiscussionKey'],
name: category['DiscussionName']
}
{ id: category["DiscussionKey"], name: category["DiscussionName"] }
end
end
def import_posts
puts '', 'importing topics and posts'
puts "", "importing topics and posts"
total_count = mysql_query("SELECT count(*) FROM DiscussionPost").first["count"]
batches(BATCH_SIZE) do |offset|
@@ -131,28 +117,28 @@ class ImportScripts::HigherLogic < ImportScripts::Base
SQL
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| p['MessageKey'] }
next if all_records_exist? :posts, results.map { |p| p["MessageKey"] }
create_posts(results, total: total_count, offset: offset) do |post|
raw = preprocess_raw(post['Body'])
raw = preprocess_raw(post["Body"])
mapped = {
id: post['MessageKey'],
user_id: user_id_from_imported_user_id(post['ContactKey']),
id: post["MessageKey"],
user_id: user_id_from_imported_user_id(post["ContactKey"]),
raw: raw,
created_at: Time.zone.at(post['CreatedOn']),
created_at: Time.zone.at(post["CreatedOn"]),
}
if post['ParentMessageKey'].nil?
mapped[:category] = category_id_from_imported_category_id(post['DiscussionKey']).to_i
mapped[:title] = CGI.unescapeHTML(post['Subject'])
mapped[:pinned] = post['PinnedFlag'] == 1
if post["ParentMessageKey"].nil?
mapped[:category] = category_id_from_imported_category_id(post["DiscussionKey"]).to_i
mapped[:title] = CGI.unescapeHTML(post["Subject"])
mapped[:pinned] = post["PinnedFlag"] == 1
else
topic = topic_lookup_from_imported_post_id(post['ParentMessageKey'])
topic = topic_lookup_from_imported_post_id(post["ParentMessageKey"])
if topic.present?
mapped[:topic_id] = topic[:topic_id]
else
puts "Parent post #{post['ParentMessageKey']} doesn't exist. Skipping."
puts "Parent post #{post["ParentMessageKey"]} doesn't exist. Skipping."
next
end
end
@@ -163,20 +149,19 @@ class ImportScripts::HigherLogic < ImportScripts::Base
end
def import_attachments
puts '', 'importing attachments'
puts "", "importing attachments"
count = 0
total_attachments = mysql_query(<<-SQL
total_attachments = mysql_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM LibraryEntryFile l
JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey
WHERE p.CreatedOn > '2020-01-01 00:00:00'
SQL
).first['count']
batches(BATCH_SIZE) do |offset|
attachments = mysql_query(<<-SQL
attachments = mysql_query(<<-SQL).to_a
SELECT l.VersionName,
l.FileExtension,
p.MessageKey
@@ -186,17 +171,16 @@ class ImportScripts::HigherLogic < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if attachments.empty?
attachments.each do |a|
print_status(count += 1, total_attachments, get_start_time("attachments"))
original_filename = "#{a['VersionName']}.#{a['FileExtension']}"
original_filename = "#{a["VersionName"]}.#{a["FileExtension"]}"
path = File.join(ATTACHMENT_DIR, original_filename)
if File.exist?(path)
if post = Post.find(post_id_from_imported_post_id(a['MessageKey']))
if post = Post.find(post_id_from_imported_post_id(a["MessageKey"]))
filename = File.basename(original_filename)
upload = create_upload(post.user.id, path, filename)
@@ -205,7 +189,9 @@ class ImportScripts::HigherLogic < ImportScripts::Base
post.raw << "\n\n" << html
post.save!
PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists?
unless PostUpload.where(post: post, upload: upload).exists?
PostUpload.create!(post: post, upload: upload)
end
end
end
end
@@ -217,7 +203,7 @@ class ImportScripts::HigherLogic < ImportScripts::Base
raw = body.dup
# trim off any post text beyond ---- to remove email threading
raw = raw.slice(0..(raw.index('------'))) || raw
raw = raw.slice(0..(raw.index("------"))) || raw
raw = HtmlToMarkdown.new(raw).to_markdown
raw

File diff suppressed because it is too large Load Diff

View File

@@ -5,19 +5,19 @@ require "reverse_markdown"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::IPBoard3 < ImportScripts::Base
BATCH_SIZE ||= 5000
BATCH_SIZE ||= 5000
UPLOADS_DIR ||= "/path/to/uploads"
def initialize
super
@client = Mysql2::Client.new(
host: ENV["DB_HOST"] || "localhost",
username: ENV["DB_USER"] || "root",
password: ENV["DB_PW"],
database: ENV["DB_NAME"],
)
@client =
Mysql2::Client.new(
host: ENV["DB_HOST"] || "localhost",
username: ENV["DB_USER"] || "root",
password: ENV["DB_PW"],
database: ENV["DB_NAME"],
)
@client.query("SET character_set_results = binary")
end
@@ -39,7 +39,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
total_users = mysql_query("SELECT COUNT(*) count FROM members").first["count"]
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<~SQL
users = mysql_query(<<~SQL).to_a
SELECT member_id id
, name
, email
@@ -59,7 +59,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY member_id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@@ -67,7 +66,9 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
create_users(users, total: total_users, offset: offset) do |u|
next if user_id_from_imported_user_id(u["id"])
%W{name email title pp_about_me}.each { |k| u[k]&.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "") }
%W[name email title pp_about_me].each do |k|
u[k]&.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "")
end
next if u["name"].blank? && !Email.is_valid?(u["email"])
{
@@ -77,30 +78,38 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
created_at: Time.zone.at(u["joined"]),
registration_ip_address: u["ip_address"],
title: CGI.unescapeHTML(u["title"].presence || ""),
date_of_birth: (Date.parse(u["date_of_birth"]) rescue nil),
date_of_birth:
(
begin
Date.parse(u["date_of_birth"])
rescue StandardError
nil
end
),
last_seen_at: Time.zone.at(u["last_activity"]),
admin: !!(u["g_title"] =~ /admin/i),
moderator: !!(u["g_title"] =~ /moderator/i),
bio_raw: clean_up(u["pp_about_me"]),
post_create_action: proc do |new_user|
if u["member_banned"] == 1
new_user.update(suspended_at: DateTime.now, suspended_till: 100.years.from_now)
elsif u["pp_main_photo"].present?
path = File.join(UPLOADS_DIR, u["pp_main_photo"])
if File.exist?(path)
begin
upload = create_upload(new_user.id, path, File.basename(path))
if upload.persisted?
new_user.create_user_avatar
new_user.user_avatar.update(custom_upload_id: upload.id)
new_user.update(uploaded_avatar_id: upload.id)
post_create_action:
proc do |new_user|
if u["member_banned"] == 1
new_user.update(suspended_at: DateTime.now, suspended_till: 100.years.from_now)
elsif u["pp_main_photo"].present?
path = File.join(UPLOADS_DIR, u["pp_main_photo"])
if File.exist?(path)
begin
upload = create_upload(new_user.id, path, File.basename(path))
if upload.persisted?
new_user.create_user_avatar
new_user.user_avatar.update(custom_upload_id: upload.id)
new_user.update(uploaded_avatar_id: upload.id)
end
rescue StandardError
# don't care
end
rescue
# don't care
end
end
end
end
end,
}
end
end
@@ -109,10 +118,11 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
def import_categories
puts "", "importing categories..."
categories = mysql_query("SELECT id, parent_id, name, description, position FROM forums ORDER BY id").to_a
categories =
mysql_query("SELECT id, parent_id, name, description, position FROM forums ORDER BY id").to_a
parent_categories = categories.select { |c| c["parent_id"] == -1 }
child_categories = categories.select { |c| c["parent_id"] != -1 }
child_categories = categories.select { |c| c["parent_id"] != -1 }
create_categories(parent_categories) do |c|
next if category_id_from_imported_category_id(c["id"])
@@ -142,7 +152,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
@closed_topic_ids = []
last_topic_id = -1
total_topics = mysql_query(<<~SQL
total_topics = mysql_query(<<~SQL).first["count"]
SELECT COUNT(*) count
FROM topics
JOIN posts ON tid = topic_id
@@ -152,10 +162,9 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
AND approved = 1
AND queued = 0
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
topics = mysql_query(<<~SQL
topics = mysql_query(<<~SQL).to_a
SELECT tid id
, title
, state
@@ -176,7 +185,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY tid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if topics.empty?
@@ -206,17 +214,16 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
puts "", "importing posts..."
last_post_id = -1
total_posts = mysql_query(<<~SQL
total_posts = mysql_query(<<~SQL).first["count"]
SELECT COUNT(*) count
FROM posts
WHERE new_topic = 0
AND pdelete_time = 0
AND queued = 0
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
posts = mysql_query(<<~SQL
posts = mysql_query(<<~SQL).to_a
SELECT pid id
, author_id
, post_date
@@ -230,7 +237,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY pid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if posts.empty?
@@ -276,17 +282,16 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
puts "", "import personal topics..."
last_personal_topic_id = -1
total_personal_topics = mysql_query(<<~SQL
total_personal_topics = mysql_query(<<~SQL).first["count"]
SELECT COUNT(*) count
FROM message_topics
JOIN message_posts ON msg_topic_id = mt_id
WHERE mt_is_deleted = 0
AND msg_is_first_post = 1
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
personal_topics = mysql_query(<<~SQL
personal_topics = mysql_query(<<~SQL).to_a
SELECT mt_id id
, mt_date
, mt_title
@@ -302,7 +307,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY mt_id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if personal_topics.empty?
@@ -312,7 +316,8 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
next if post_id_from_imported_post_id("pt-#{pt["id"]}")
user_id = user_id_from_imported_user_id(pt["mt_starter_id"]) || -1
user_ids = [pt["mt_to_member_id"]] + pt["mt_invited_members"].scan(/i:(\d+);/).flatten.map(&:to_i)
user_ids =
[pt["mt_to_member_id"]] + pt["mt_invited_members"].scan(/i:(\d+);/).flatten.map(&:to_i)
user_ids.map! { |id| user_id_from_imported_user_id(id) }
user_ids.compact!
user_ids.uniq!
@@ -334,10 +339,13 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
puts "", "importing personal posts..."
last_personal_post_id = -1
total_personal_posts = mysql_query("SELECT COUNT(*) count FROM message_posts WHERE msg_is_first_post = 0").first["count"]
total_personal_posts =
mysql_query("SELECT COUNT(*) count FROM message_posts WHERE msg_is_first_post = 0").first[
"count"
]
batches(BATCH_SIZE) do |offset|
personal_posts = mysql_query(<<~SQL
personal_posts = mysql_query(<<~SQL).to_a
SELECT msg_id id
, msg_topic_id
, msg_date
@@ -349,7 +357,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY msg_id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if personal_posts.empty?
@@ -374,27 +381,32 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
def clean_up(raw, user_id = -1)
raw.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "")
raw.gsub!(/<(.+)>&nbsp;<\/\1>/, "\n\n")
raw.gsub!(%r{<(.+)>&nbsp;</\1>}, "\n\n")
doc = Nokogiri::HTML5.fragment(raw)
doc.css("blockquote.ipsBlockquote").each do |bq|
post_id = post_id_from_imported_post_id(bq["data-cid"])
if post = Post.find_by(id: post_id)
bq.replace %{<br>[quote="#{post.user.username},post:#{post.post_number},topic:#{post.topic_id}"]\n#{bq.inner_html}\n[/quote]<br>}
doc
.css("blockquote.ipsBlockquote")
.each do |bq|
post_id = post_id_from_imported_post_id(bq["data-cid"])
if post = Post.find_by(id: post_id)
bq.replace %{<br>[quote="#{post.user.username},post:#{post.post_number},topic:#{post.topic_id}"]\n#{bq.inner_html}\n[/quote]<br>}
end
end
end
markdown = ReverseMarkdown.convert(doc.to_html)
markdown.gsub!(/\[attachment=(\d+):.+\]/) do
if a = mysql_query("SELECT attach_file, attach_location FROM attachments WHERE attach_id = #{$1}").first
if a =
mysql_query(
"SELECT attach_file, attach_location FROM attachments WHERE attach_id = #{$1}",
).first
path = File.join(UPLOADS_DIR, a["attach_location"])
if File.exist?(path)
begin
upload = create_upload(user_id, path, a["attach_file"])
return html_for_upload(upload, a["attach_file"]) if upload.persisted?
rescue
rescue StandardError
end
end
end
@@ -406,7 +418,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
def mysql_query(sql)
@client.query(sql)
end
end
ImportScripts::IPBoard3.new.perform

View File

@@ -1,25 +1,26 @@
# frozen_string_literal: true
require "mysql2"
require_relative 'base'
require_relative "base"
class ImportScripts::JForum < ImportScripts::Base
BATCH_SIZE = 1000
REMOTE_AVATAR_REGEX ||= /\Ahttps?:\/\//i
REMOTE_AVATAR_REGEX ||= %r{\Ahttps?://}i
def initialize
super
@settings = YAML.safe_load(File.read(ARGV.first), symbolize_names: true)
@database_client = Mysql2::Client.new(
host: @settings[:database][:host],
port: @settings[:database][:port],
username: @settings[:database][:username],
password: @settings[:database][:password],
database: @settings[:database][:schema],
reconnect: true
)
@database_client =
Mysql2::Client.new(
host: @settings[:database][:host],
port: @settings[:database][:port],
username: @settings[:database][:username],
password: @settings[:database][:password],
database: @settings[:database][:schema],
reconnect: true,
)
end
def execute
@@ -39,7 +40,7 @@ class ImportScripts::JForum < ImportScripts::Base
end
def import_users
puts '', 'creating users'
puts "", "creating users"
total_count = count("SELECT COUNT(1) AS count FROM jforum_users")
last_user_id = 0
@@ -69,9 +70,7 @@ class ImportScripts::JForum < ImportScripts::Base
active: row[:user_active] == 1,
location: row[:user_from],
custom_fields: user_custom_fields(row),
post_create_action: proc do |user|
import_avatar(user, row[:user_avatar])
end
post_create_action: proc { |user| import_avatar(user, row[:user_avatar]) },
}
end
end
@@ -84,13 +83,14 @@ class ImportScripts::JForum < ImportScripts::Base
@settings[:custom_fields].map do |field|
columns << (field[:alias] ? "#{field[:column]} AS #{field[:alias]}" : field[:column])
end
", #{columns.join(', ')}"
", #{columns.join(", ")}"
end
def user_fields
@user_fields ||= begin
Hash[UserField.all.map { |field| [field.name, field] }]
end
@user_fields ||=
begin
Hash[UserField.all.map { |field| [field.name, field] }]
end
end
def user_custom_fields(row)
@@ -124,7 +124,11 @@ class ImportScripts::JForum < ImportScripts::Base
if File.file?(path)
@uploader.create_avatar(user, path)
elsif avatar_source.match?(REMOTE_AVATAR_REGEX)
UserAvatar.import_url_for_user(avatar_source, user) rescue nil
begin
UserAvatar.import_url_for_user(avatar_source, user)
rescue StandardError
nil
end
end
end
@@ -218,10 +222,11 @@ class ImportScripts::JForum < ImportScripts::Base
id: "C#{row[:categories_id]}",
name: row[:title],
position: row[:display_order],
post_create_action: proc do |category|
url = File.join(@settings[:permalink_prefix], "forums/list/#{row[:categories_id]}.page")
Permalink.create(url: url, category_id: category.id) unless Permalink.find_by(url: url)
end
post_create_action:
proc do |category|
url = File.join(@settings[:permalink_prefix], "forums/list/#{row[:categories_id]}.page")
Permalink.create(url: url, category_id: category.id) unless Permalink.find_by(url: url)
end,
}
end
@@ -237,17 +242,19 @@ class ImportScripts::JForum < ImportScripts::Base
name: row[:forum_name],
description: row[:forum_desc],
position: row[:forum_order],
parent_category_id: @lookup.category_id_from_imported_category_id("C#{row[:categories_id]}"),
post_create_action: proc do |category|
url = File.join(@settings[:permalink_prefix], "forums/show/#{row[:forum_id]}.page")
Permalink.create(url: url, category_id: category.id) unless Permalink.find_by(url: url)
end
parent_category_id:
@lookup.category_id_from_imported_category_id("C#{row[:categories_id]}"),
post_create_action:
proc do |category|
url = File.join(@settings[:permalink_prefix], "forums/show/#{row[:forum_id]}.page")
Permalink.create(url: url, category_id: category.id) unless Permalink.find_by(url: url)
end,
}
end
end
def import_posts
puts '', 'creating topics and posts'
puts "", "creating topics and posts"
total_count = count("SELECT COUNT(1) AS count FROM jforum_posts")
last_post_id = 0
@@ -286,7 +293,7 @@ class ImportScripts::JForum < ImportScripts::Base
user_id: user_id,
created_at: row[:post_time],
raw: post_text,
import_topic_id: row[:topic_id]
import_topic_id: row[:topic_id],
}
if row[:topic_acceptedanswer_post_id] == row[:post_id]
@@ -312,7 +319,9 @@ class ImportScripts::JForum < ImportScripts::Base
TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true)
end
mapped[:tags] = @tags_by_import_forum_id[row[:forum_id]] if @settings[:import_categories_as_tags]
mapped[:tags] = @tags_by_import_forum_id[row[:forum_id]] if @settings[
:import_categories_as_tags
]
mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id])
mapped
@@ -470,7 +479,11 @@ class ImportScripts::JForum < ImportScripts::Base
category_id = @lookup.category_id_from_imported_category_id(row[:forum_id])
if user && category_id
CategoryUser.set_notification_level_for_category(user, NotificationLevels.all[:watching], category_id)
CategoryUser.set_notification_level_for_category(
user,
NotificationLevels.all[:watching],
category_id,
)
end
end
@@ -511,7 +524,11 @@ class ImportScripts::JForum < ImportScripts::Base
topic = @lookup.topic_lookup_from_imported_post_id(row[:topic_first_post_id])
if user_id && topic
TopicUser.change(user_id, topic[:topic_id], notification_level: NotificationLevels.all[:watching])
TopicUser.change(
user_id,
topic[:topic_id],
notification_level: NotificationLevels.all[:watching],
)
end
current_index += 1
@@ -545,17 +562,17 @@ class ImportScripts::JForum < ImportScripts::Base
end
def fix_bbcode_tag!(tag:, text:)
text.gsub!(/\s+(\[#{tag}\].*?\[\/#{tag}\])/im, '\1')
text.gsub!(%r{\s+(\[#{tag}\].*?\[/#{tag}\])}im, '\1')
text.gsub!(/(\[#{tag}.*?\])(?!$)/i) { "#{$1}\n" }
text.gsub!(/((?<!^)\[#{tag}.*?\])/i) { "\n#{$1}" }
text.gsub!(/(\[\/#{tag}\])(?!$)/i) { "#{$1}\n" }
text.gsub!(/((?<!^)\[\/#{tag}\])/i) { "\n#{$1}" }
text.gsub!(%r{(\[/#{tag}\])(?!$)}i) { "#{$1}\n" }
text.gsub!(%r{((?<!^)\[/#{tag}\])}i) { "\n#{$1}" }
end
def fix_inline_bbcode!(tag:, text:)
text.gsub!(/\[(#{tag}.*?)\](.*?)\[\/#{tag}\]/im) do
text.gsub!(%r{\[(#{tag}.*?)\](.*?)\[/#{tag}\]}im) do
beginning_tag = $1
content = $2.gsub(/(\n{2,})/) { "[/#{tag}]#{$1}[#{beginning_tag}]" }
"[#{beginning_tag}]#{content}[/#{tag}]"

View File

@@ -1,12 +1,11 @@
# frozen_string_literal: true
# Jive importer
require 'nokogiri'
require 'csv'
require "nokogiri"
require "csv"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Jive < ImportScripts::Base
BATCH_SIZE = 1000
CATEGORY_IDS = [2023, 2003, 2004, 2042, 2036, 2029] # categories that should be imported
@@ -17,9 +16,9 @@ class ImportScripts::Jive < ImportScripts::Base
puts "loading post mappings..."
@post_number_map = {}
Post.pluck(:id, :post_number).each do |post_id, post_number|
@post_number_map[post_id] = post_number
end
Post
.pluck(:id, :post_number)
.each { |post_id, post_number| @post_number_map[post_id] = post_number }
end
def created_post(post)
@@ -47,19 +46,13 @@ class ImportScripts::Jive < ImportScripts::Base
end
def initialize(cols)
cols.each_with_index do |col, idx|
self.class.public_send(:define_method, col) do
@row[idx]
end
end
cols.each_with_index { |col, idx| self.class.public_send(:define_method, col) { @row[idx] } }
end
end
def load_user_batch!(users, offset, total)
if users.length > 0
create_users(users, offset: offset, total: total) do |user|
user
end
create_users(users, offset: offset, total: total) { |user| user }
users.clear
end
end
@@ -72,53 +65,55 @@ class ImportScripts::Jive < ImportScripts::Base
current_row = +""
double_quote_count = 0
File.open(filename).each_line do |line|
File
.open(filename)
.each_line do |line|
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
line.strip!
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
line.strip!
current_row << "\n" unless current_row.empty?
current_row << line
current_row << "\n" unless current_row.empty?
current_row << line
double_quote_count += line.scan('"').count
double_quote_count += line.scan('"').count
next if double_quote_count % 2 == 1
if double_quote_count % 2 == 1
next
end
raw =
begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
raw = begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
current_row = ""
double_quote_count = 0
next
end[
0
]
current_row = ""
double_quote_count = 0
next
end[0]
if first
row = RowResolver.create(raw)
if first
row = RowResolver.create(raw)
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
end
end
def total_rows(table)
@@ -129,13 +124,9 @@ class ImportScripts::Jive < ImportScripts::Base
puts "", "importing groups..."
rows = []
csv_parse("groups") do |row|
rows << { id: row.groupid, name: row.name }
end
csv_parse("groups") { |row| rows << { id: row.groupid, name: row.name } }
create_groups(rows) do |row|
row
end
create_groups(rows) { |row| row }
end
def import_users
@@ -147,15 +138,12 @@ class ImportScripts::Jive < ImportScripts::Base
total = total_rows("users")
csv_parse("users") do |row|
id = row.userid
email = "#{row.email}"
# fake it
if row.email.blank? || row.email !~ /@/
email = fake_email
end
email = fake_email if row.email.blank? || row.email !~ /@/
name = "#{row.firstname} #{row.lastname}"
username = row.username
@@ -175,14 +163,11 @@ class ImportScripts::Jive < ImportScripts::Base
created_at: created_at,
last_seen_at: last_seen_at,
active: is_activated.to_i == 1,
approved: true
approved: true,
}
count += 1
if count % BATCH_SIZE == 0
load_user_batch! users, count - users.length, total
end
load_user_batch! users, count - users.length, total if count % BATCH_SIZE == 0
end
load_user_batch! users, count, total
@@ -195,9 +180,7 @@ class ImportScripts::Jive < ImportScripts::Base
user_id = user_id_from_imported_user_id(row.userid)
group_id = group_id_from_imported_group_id(row.groupid)
if user_id && group_id
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
end
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) if user_id && group_id
end
end
@@ -209,9 +192,7 @@ class ImportScripts::Jive < ImportScripts::Base
rows << { id: row.communityid, name: "#{row.name} (#{row.communityid})" }
end
create_categories(rows) do |row|
row
end
create_categories(rows) { |row| row }
end
def normalize_raw!(raw)
@@ -219,9 +200,7 @@ class ImportScripts::Jive < ImportScripts::Base
raw = raw[5..-6]
doc = Nokogiri::HTML5.fragment(raw)
doc.css('img').each do |img|
img.remove if img['class'] == "jive-image"
end
doc.css("img").each { |img| img.remove if img["class"] == "jive-image" }
raw = doc.to_html
raw = raw[4..-1]
@@ -231,7 +210,6 @@ class ImportScripts::Jive < ImportScripts::Base
def import_post_batch!(posts, topics, offset, total)
create_posts(posts, total: total, offset: offset) do |post|
mapped = {}
mapped[:id] = post[:id]
@@ -271,7 +249,7 @@ class ImportScripts::Jive < ImportScripts::Base
mapped
end
posts.clear
posts.clear
end
def import_posts
@@ -281,7 +259,6 @@ class ImportScripts::Jive < ImportScripts::Base
thread_map = {}
csv_parse("messages") do |thread|
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
if !thread.parentmessageid
@@ -291,32 +268,38 @@ class ImportScripts::Jive < ImportScripts::Base
#IMAGE UPLOADER
if thread.imagecount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}") do |item|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
photo_path = "/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
Dir.foreach(
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}",
) do |item|
next if item == (".") || item == ("..") || item == (".DS_Store")
photo_path =
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
if upload.persisted?
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
puts "Error: Image upload is not successful for #{photo_path}!"
end
if upload.persisted?
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
puts "Error: Image upload is not successful for #{photo_path}!"
end
end
end
#ATTACHMENT UPLOADER
if thread.attachmentcount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}") do |item|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
attach_path = "/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
Dir.foreach(
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}",
) do |item|
next if item == (".") || item == ("..") || item == (".DS_Store")
attach_path =
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
if upload.persisted?
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
if upload.persisted?
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
end
end
@@ -329,7 +312,6 @@ class ImportScripts::Jive < ImportScripts::Base
body: normalize_raw!(thread.body || thread.subject || "<missing>"),
created_at: DateTime.parse(thread.creationdate),
}
end
end
@@ -348,35 +330,40 @@ class ImportScripts::Jive < ImportScripts::Base
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
if thread.parentmessageid
#IMAGE UPLOADER
if thread.imagecount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}") do |item|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
photo_path = "/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
Dir.foreach(
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}",
) do |item|
next if item == (".") || item == ("..") || item == (".DS_Store")
photo_path =
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
if upload.persisted?
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
puts "Error: Image upload is not successful for #{photo_path}!"
end
if upload.persisted?
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
puts "Error: Image upload is not successful for #{photo_path}!"
end
end
end
#ATTACHMENT UPLOADER
if thread.attachmentcount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}") do |item|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
attach_path = "/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
Dir.foreach(
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}",
) do |item|
next if item == (".") || item == ("..") || item == (".DS_Store")
attach_path =
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
if upload.persisted?
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
if upload.persisted?
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
end
end
@@ -386,7 +373,7 @@ class ImportScripts::Jive < ImportScripts::Base
user_id: thread.userid,
title: thread.subject,
body: normalize_raw!(thread.body),
created_at: DateTime.parse(thread.creationdate)
created_at: DateTime.parse(thread.creationdate),
}
posts << row
count += 1
@@ -399,7 +386,6 @@ class ImportScripts::Jive < ImportScripts::Base
import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])

View File

@@ -7,7 +7,6 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# https://developers.jivesoftware.com/api/v3/cloud/rest/index.html
class ImportScripts::JiveApi < ImportScripts::Base
USER_COUNT ||= 1000
POST_COUNT ||= 100
STAFF_GUARDIAN ||= Guardian.new(Discourse.system_user)
@@ -16,37 +15,141 @@ class ImportScripts::JiveApi < ImportScripts::Base
#############################
# WHOLE CATEGORY OF CONTENT #
#############################
# Announcement & News
{ jive_object: { type: 37, id: 1004 }, filters: { created_after: 1.year.ago, type: "post" }, category_id: 7 },
{
jive_object: {
type: 37,
id: 1004,
},
filters: {
created_after: 1.year.ago,
type: "post",
},
category_id: 7,
},
# Questions & Answers / General Discussions
{ jive_object: { type: 14, id: 2006 }, filters: { created_after: 6.months.ago, type: "discussion" }, category: Proc.new { |c| c["question"] ? 5 : 21 } },
{
jive_object: {
type: 14,
id: 2006,
},
filters: {
created_after: 6.months.ago,
type: "discussion",
},
category: Proc.new { |c| c["question"] ? 5 : 21 },
},
# Anywhere beta
{ jive_object: { type: 14, id: 2052 }, filters: { created_after: 6.months.ago, type: "discussion" }, category_id: 22 },
{
jive_object: {
type: 14,
id: 2052,
},
filters: {
created_after: 6.months.ago,
type: "discussion",
},
category_id: 22,
},
# Tips & Tricks
{ jive_object: { type: 37, id: 1284 }, filters: { type: "post" }, category_id: 6 },
{ jive_object: { type: 37, id: 1319 }, filters: { type: "post" }, category_id: 6 },
{ jive_object: { type: 37, id: 1177 }, filters: { type: "post" }, category_id: 6 },
{ jive_object: { type: 37, id: 1165 }, filters: { type: "post" }, category_id: 6 },
# Ambassadors
{ jive_object: { type: 700, id: 1001 }, filters: { type: "discussion" }, authenticated: true, category_id: 8 },
{
jive_object: {
type: 700,
id: 1001,
},
filters: {
type: "discussion",
},
authenticated: true,
category_id: 8,
},
# Experts
{ jive_object: { type: 700, id: 1034 }, filters: { type: "discussion" }, authenticated: true, category_id: 15 },
{
jive_object: {
type: 700,
id: 1034,
},
filters: {
type: "discussion",
},
authenticated: true,
category_id: 15,
},
# Feature Requests
{ jive_object: { type: 14, id: 2015 }, filters: { type: "idea" }, category_id: 31 },
####################
# SELECTED CONTENT #
####################
# Announcement & News
{ jive_object: { type: 37, id: 1004 }, filters: { entities: { 38 => [1345, 1381, 1845, 2046, 2060, 2061] } }, category_id: 7 },
{
jive_object: {
type: 37,
id: 1004,
},
filters: {
entities: {
38 => [1345, 1381, 1845, 2046, 2060, 2061],
},
},
category_id: 7,
},
# Problem Solving
{ jive_object: { type: 14, id: 2006 }, filters: { entities: { 2 => [116685, 160745, 177010, 223482, 225036, 233228, 257882, 285103, 292297, 345243, 363250, 434546] } }, category_id: 10 },
{
jive_object: {
type: 14,
id: 2006,
},
filters: {
entities: {
2 => [
116_685,
160_745,
177_010,
223_482,
225_036,
233_228,
257_882,
285_103,
292_297,
345_243,
363_250,
434_546,
],
},
},
category_id: 10,
},
# General Discussions
{ jive_object: { type: 14, id: 2006 }, filters: { entities: { 2 => [178203, 188350, 312734] } }, category_id: 21 },
{
jive_object: {
type: 14,
id: 2006,
},
filters: {
entities: {
2 => [178_203, 188_350, 312_734],
},
},
category_id: 21,
},
# Questions & Answers
{ jive_object: { type: 14, id: 2006 }, filters: { entities: { 2 => [418811] } }, category_id: 5 },
{
jive_object: {
type: 14,
id: 2006,
},
filters: {
entities: {
2 => [418_811],
},
},
category_id: 5,
},
]
def initialize
@@ -75,9 +178,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
people = get("people/email/#{user.email}?fields=initialLogin,-resources", true)
if people && people["initialLogin"].present?
created_at = DateTime.parse(people["initialLogin"])
if user.created_at > created_at
user.update_columns(created_at: created_at)
end
user.update_columns(created_at: created_at) if user.created_at > created_at
end
end
end
@@ -89,7 +190,11 @@ class ImportScripts::JiveApi < ImportScripts::Base
start_index = [0, UserCustomField.where(name: "import_id").count - USER_COUNT].max
loop do
users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}", true)
users =
get(
"people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}",
true,
)
create_users(users["list"], offset: imported_users) do |user|
{
id: user["id"],
@@ -113,7 +218,11 @@ class ImportScripts::JiveApi < ImportScripts::Base
TO_IMPORT.each do |to_import|
puts Time.now
entity = to_import[:jive_object]
places = get("places?fields=placeID,name,-resources&filter=entityDescriptor(#{entity[:type]},#{entity[:id]})", to_import[:authenticated])
places =
get(
"places?fields=placeID,name,-resources&filter=entityDescriptor(#{entity[:type]},#{entity[:id]})",
to_import[:authenticated],
)
import_place_contents(places["list"][0], to_import) if places && places["list"].present?
end
end
@@ -125,19 +234,28 @@ class ImportScripts::JiveApi < ImportScripts::Base
if to_import.dig(:filters, :entities).present?
path = "contents"
entities = to_import[:filters][:entities].flat_map { |type, ids| ids.map { |id| "#{type},#{id}" } }
entities =
to_import[:filters][:entities].flat_map { |type, ids| ids.map { |id| "#{type},#{id}" } }
filters = "filter=entityDescriptor(#{entities.join(",")})"
else
path = "places/#{place["placeID"]}/contents"
filters = +"filter=status(published)"
if to_import[:filters]
filters << "&filter=type(#{to_import[:filters][:type]})" if to_import[:filters][:type].present?
filters << "&filter=creationDate(null,#{to_import[:filters][:created_after].strftime("%Y-%m-%dT%TZ")})" if to_import[:filters][:created_after].present?
if to_import[:filters][:type].present?
filters << "&filter=type(#{to_import[:filters][:type]})"
end
if to_import[:filters][:created_after].present?
filters << "&filter=creationDate(null,#{to_import[:filters][:created_after].strftime("%Y-%m-%dT%TZ")})"
end
end
end
loop do
contents = get("#{path}?#{filters}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
contents =
get(
"#{path}?#{filters}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}",
to_import[:authenticated],
)
contents["list"].each do |content|
content_id = content["contentID"].presence || "#{content["type"]}_#{content["id"]}"
@@ -149,7 +267,8 @@ class ImportScripts::JiveApi < ImportScripts::Base
created_at: content["published"],
title: @htmlentities.decode(content["subject"]),
raw: process_raw(content["content"]["text"]),
user_id: user_id_from_imported_user_id(content["author"]["id"]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(content["author"]["id"]) || Discourse::SYSTEM_USER_ID,
views: content["viewCount"],
custom_fields: custom_fields,
}
@@ -165,10 +284,16 @@ class ImportScripts::JiveApi < ImportScripts::Base
if parent_post&.id && parent_post&.topic_id
resources = content["resources"]
import_likes(resources["likes"]["ref"], parent_post.id) if content["likeCount"].to_i > 0 && resources.dig("likes", "ref").present?
if content["likeCount"].to_i > 0 && resources.dig("likes", "ref").present?
import_likes(resources["likes"]["ref"], parent_post.id)
end
if content["replyCount"].to_i > 0
import_comments(resources["comments"]["ref"], parent_post.topic_id, to_import) if resources.dig("comments", "ref").present?
import_messages(resources["messages"]["ref"], parent_post.topic_id, to_import) if resources.dig("messages", "ref").present?
if resources.dig("comments", "ref").present?
import_comments(resources["comments"]["ref"], parent_post.topic_id, to_import)
end
if resources.dig("messages", "ref").present?
import_messages(resources["messages"]["ref"], parent_post.topic_id, to_import)
end
end
end
end
@@ -198,7 +323,11 @@ class ImportScripts::JiveApi < ImportScripts::Base
start_index = 0
loop do
comments = get("#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
comments =
get(
"#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}",
to_import[:authenticated],
)
break if comments["error"]
comments["list"].each do |comment|
next if post_id_from_imported_post_id(comment["id"])
@@ -207,9 +336,12 @@ class ImportScripts::JiveApi < ImportScripts::Base
id: comment["id"],
created_at: comment["published"],
topic_id: topic_id,
user_id: user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
raw: process_raw(comment["content"]["text"]),
custom_fields: { import_id: comment["id"] },
custom_fields: {
import_id: comment["id"],
},
}
if (parent_post_id = comment["parentID"]).to_i > 0
@@ -234,7 +366,11 @@ class ImportScripts::JiveApi < ImportScripts::Base
start_index = 0
loop do
messages = get("#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
messages =
get(
"#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}",
to_import[:authenticated],
)
break if messages["error"]
messages["list"].each do |message|
next if post_id_from_imported_post_id(message["id"])
@@ -243,9 +379,12 @@ class ImportScripts::JiveApi < ImportScripts::Base
id: message["id"],
created_at: message["published"],
topic_id: topic_id,
user_id: user_id_from_imported_user_id(message["author"]["id"]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(message["author"]["id"]) || Discourse::SYSTEM_USER_ID,
raw: process_raw(message["content"]["text"]),
custom_fields: { import_id: message["id"] },
custom_fields: {
import_id: message["id"],
},
}
post[:custom_fields][:is_accepted_answer] = true if message["answer"]
@@ -280,20 +419,25 @@ class ImportScripts::JiveApi < ImportScripts::Base
puts "", "importing bookmarks..."
start_index = 0
fields = "fields=author.id,favoriteObject.id,-resources,-author.resources,-favoriteObject.resources"
fields =
"fields=author.id,favoriteObject.id,-resources,-author.resources,-favoriteObject.resources"
filter = "&filter=creationDate(null,2016-01-01T00:00:00Z)"
loop do
favorites = get("contents?#{fields}&filter=type(favorite)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
bookmarks_to_create = favorites["list"].map do |favorite|
next unless user_id = user_id_from_imported_user_id(favorite["author"]["id"])
next unless post_id = post_id_from_imported_post_id(favorite["favoriteObject"]["id"])
{ user_id: user_id, post_id: post_id }
end.flatten
favorites =
get(
"contents?#{fields}&filter=type(favorite)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}",
)
bookmarks_to_create =
favorites["list"]
.map do |favorite|
next unless user_id = user_id_from_imported_user_id(favorite["author"]["id"])
next unless post_id = post_id_from_imported_post_id(favorite["favoriteObject"]["id"])
{ user_id: user_id, post_id: post_id }
end
.flatten
create_bookmarks(bookmarks_to_create) do |row|
row
end
create_bookmarks(bookmarks_to_create) { |row| row }
break if favorites["list"].size < POST_COUNT || favorites.dig("links", "next").blank?
break unless start_index = favorites["links"]["next"][/startIndex=(\d+)/, 1]
@@ -304,22 +448,26 @@ class ImportScripts::JiveApi < ImportScripts::Base
doc = Nokogiri::HTML5.fragment(raw)
# convert emoticon
doc.css("span.emoticon-inline").each do |span|
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
end
doc
.css("span.emoticon-inline")
.each do |span|
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
end
# convert mentions
doc.css("a.jive-link-profile-small").each { |a| a.replace("@#{a.content}") }
# fix links
doc.css("a[href]").each do |a|
if a["href"]["#{@base_uri}/docs/DOC-"]
a["href"] = a["href"][/#{Regexp.escape(@base_uri)}\/docs\/DOC-\d+/]
elsif a["href"][@base_uri]
a.replace(a.inner_html)
doc
.css("a[href]")
.each do |a|
if a["href"]["#{@base_uri}/docs/DOC-"]
a["href"] = a["href"][%r{#{Regexp.escape(@base_uri)}/docs/DOC-\d+}]
elsif a["href"][@base_uri]
a.replace(a.inner_html)
end
end
end
html = doc.at(".jive-rendered-content").to_html
@@ -341,17 +489,22 @@ class ImportScripts::JiveApi < ImportScripts::Base
def get(url_or_path, authenticated = false)
tries ||= 3
command = ["curl", "--silent"]
command = %w[curl --silent]
command << "--user \"#{@username}:#{@password}\"" if !!authenticated
command << (url_or_path.start_with?("http") ? "\"#{url_or_path}\"" : "\"#{@base_uri}/api/core/v3/#{url_or_path}\"")
command << (
if url_or_path.start_with?("http")
"\"#{url_or_path}\""
else
"\"#{@base_uri}/api/core/v3/#{url_or_path}\""
end
)
puts command.join(" ") if ENV["VERBOSE"] == "1"
JSON.parse `#{command.join(" ")}`
rescue
rescue StandardError
retry if (tries -= 1) >= 0
end
end
ImportScripts::JiveApi.new.perform

View File

@@ -6,8 +6,7 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::JsonGeneric < ImportScripts::Base
JSON_FILE_PATH = ENV['JSON_FILE']
JSON_FILE_PATH = ENV["JSON_FILE"]
BATCH_SIZE ||= 1000
def initialize
@@ -30,24 +29,18 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
end
def username_for(name)
result = name.downcase.gsub(/[^a-z0-9\-\_]/, '')
result = name.downcase.gsub(/[^a-z0-9\-\_]/, "")
if result.blank?
result = Digest::SHA1.hexdigest(name)[0...10]
end
result = Digest::SHA1.hexdigest(name)[0...10] if result.blank?
result
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
users = []
@imported_json['topics'].each do |t|
t['posts'].each do |p|
users << p['author'].scrub
end
end
@imported_json["topics"].each { |t| t["posts"].each { |p| users << p["author"].scrub } }
users.uniq!
create_users(users) do |u|
@@ -56,7 +49,7 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
username: username_for(u),
name: u,
email: "#{username_for(u)}@example.com",
created_at: Time.now
created_at: Time.now,
}
end
end
@@ -67,8 +60,8 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
topics = 0
posts = 0
@imported_json['topics'].each do |t|
first_post = t['posts'][0]
@imported_json["topics"].each do |t|
first_post = t["posts"][0]
next unless first_post
topic = {
@@ -77,25 +70,32 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
raw: first_post["body"],
created_at: Time.zone.parse(first_post["date"]),
cook_method: Post.cook_methods[:raw_html],
title: t['title'],
category: ENV['CATEGORY_ID'],
custom_fields: { import_id: "pid:#{first_post['id']}" }
title: t["title"],
category: ENV["CATEGORY_ID"],
custom_fields: {
import_id: "pid:#{first_post["id"]}",
},
}
topic[:pinned_at] = Time.zone.parse(first_post["date"]) if t['pinned']
topic[:pinned_at] = Time.zone.parse(first_post["date"]) if t["pinned"]
topics += 1
parent_post = create_post(topic, topic[:id])
t['posts'][1..-1].each do |p|
create_post({
id: p["id"],
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(username_for(p["author"])) || -1,
raw: p["body"],
created_at: Time.zone.parse(p["date"]),
cook_method: Post.cook_methods[:raw_html],
custom_fields: { import_id: "pid:#{p['id']}" }
}, p['id'])
t["posts"][1..-1].each do |p|
create_post(
{
id: p["id"],
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(username_for(p["author"])) || -1,
raw: p["body"],
created_at: Time.zone.parse(p["date"]),
cook_method: Post.cook_methods[:raw_html],
custom_fields: {
import_id: "pid:#{p["id"]}",
},
},
p["id"],
)
posts += 1
end
end
@@ -104,6 +104,4 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
end
end
if __FILE__ == $0
ImportScripts::JsonGeneric.new.perform
end
ImportScripts::JsonGeneric.new.perform if __FILE__ == $0

View File

@@ -4,7 +4,6 @@ require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Kunena < ImportScripts::Base
KUNENA_DB = "kunena"
def initialize
@@ -12,12 +11,13 @@ class ImportScripts::Kunena < ImportScripts::Base
@users = {}
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: KUNENA_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: KUNENA_DB,
)
end
def execute
@@ -26,7 +26,8 @@ class ImportScripts::Kunena < ImportScripts::Base
puts "creating users"
create_users(@users) do |id, user|
{ id: id,
{
id: id,
email: user[:email],
username: user[:username],
created_at: user[:created_at],
@@ -34,15 +35,25 @@ class ImportScripts::Kunena < ImportScripts::Base
moderator: user[:moderator] ? true : false,
admin: user[:admin] ? true : false,
suspended_at: user[:suspended] ? Time.zone.now : nil,
suspended_till: user[:suspended] ? 100.years.from_now : nil }
suspended_till: user[:suspended] ? 100.years.from_now : nil,
}
end
@users = nil
create_categories(@client.query("SELECT id, parent, name, description, ordering FROM jos_kunena_categories ORDER BY parent, id;")) do |c|
h = { id: c['id'], name: c['name'], description: c['description'], position: c['ordering'].to_i }
if c['parent'].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(c['parent'])
create_categories(
@client.query(
"SELECT id, parent, name, description, ordering FROM jos_kunena_categories ORDER BY parent, id;",
),
) do |c|
h = {
id: c["id"],
name: c["name"],
description: c["description"],
position: c["ordering"].to_i,
}
if c["parent"].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(c["parent"])
end
h
end
@@ -50,9 +61,9 @@ class ImportScripts::Kunena < ImportScripts::Base
import_posts
begin
create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil'))
create_admin(email: "neil.lalonde@discourse.org", username: UserNameSuggester.suggest("neil"))
rescue => e
puts '', "Failed to create admin user"
puts "", "Failed to create admin user"
puts e.message
end
end
@@ -61,38 +72,50 @@ class ImportScripts::Kunena < ImportScripts::Base
# Need to merge data from joomla with kunena
puts "fetching Joomla users data from mysql"
results = @client.query("SELECT id, username, email, registerDate FROM jos_users;", cache_rows: false)
results =
@client.query("SELECT id, username, email, registerDate FROM jos_users;", cache_rows: false)
results.each do |u|
next unless u['id'].to_i > (0) && u['username'].present? && u['email'].present?
username = u['username'].gsub(' ', '_').gsub(/[^A-Za-z0-9_]/, '')[0, User.username_length.end]
next unless u["id"].to_i > (0) && u["username"].present? && u["email"].present?
username = u["username"].gsub(" ", "_").gsub(/[^A-Za-z0-9_]/, "")[0, User.username_length.end]
if username.length < User.username_length.first
username = username * User.username_length.first
end
@users[u['id'].to_i] = { id: u['id'].to_i, username: username, email: u['email'], created_at: u['registerDate'] }
@users[u["id"].to_i] = {
id: u["id"].to_i,
username: username,
email: u["email"],
created_at: u["registerDate"],
}
end
puts "fetching Kunena user data from mysql"
results = @client.query("SELECT userid, signature, moderator, banned FROM jos_kunena_users;", cache_rows: false)
results =
@client.query(
"SELECT userid, signature, moderator, banned FROM jos_kunena_users;",
cache_rows: false,
)
results.each do |u|
next unless u['userid'].to_i > 0
user = @users[u['userid'].to_i]
next unless u["userid"].to_i > 0
user = @users[u["userid"].to_i]
if user
user[:bio] = u['signature']
user[:moderator] = (u['moderator'].to_i == 1)
user[:suspended] = u['banned'].present?
user[:bio] = u["signature"]
user[:moderator] = (u["moderator"].to_i == 1)
user[:suspended] = u["banned"].present?
end
end
end
def import_posts
puts '', "creating topics and posts"
puts "", "creating topics and posts"
total_count = @client.query("SELECT COUNT(*) count FROM jos_kunena_messages m;").first['count']
total_count = @client.query("SELECT COUNT(*) count FROM jos_kunena_messages m;").first["count"]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT m.id id,
m.thread thread,
m.parent parent,
@@ -107,31 +130,33 @@ class ImportScripts::Kunena < ImportScripts::Base
ORDER BY m.id
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| p['id'].to_i }
next if all_records_exist? :posts, results.map { |p| p["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['userid']) || -1
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["userid"]) || -1
mapped[:raw] = m["message"]
mapped[:created_at] = Time.zone.at(m['time'])
mapped[:created_at] = Time.zone.at(m["time"])
if m['id'] == m['thread']
mapped[:category] = category_id_from_imported_category_id(m['catid'])
mapped[:title] = m['subject']
if m["id"] == m["thread"]
mapped[:category] = category_id_from_imported_category_id(m["catid"])
mapped[:title] = m["subject"]
else
parent = topic_lookup_from_imported_post_id(m['parent'])
parent = topic_lookup_from_imported_post_id(m["parent"])
if parent
mapped[:topic_id] = parent[:topic_id]
mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
else
puts "Parent post #{m['parent']} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}"
puts "Parent post #{m["parent"]} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}"
skip = true
end
end

View File

@@ -19,27 +19,21 @@ export PARENT_FIELD="parent_id" # "parent" in some versions
=end
class ImportScripts::Kunena < ImportScripts::Base
DB_HOST ||= ENV['DB_HOST'] || "localhost"
DB_NAME ||= ENV['DB_NAME'] || "kunena"
DB_USER ||= ENV['DB_USER'] || "kunena"
DB_PW ||= ENV['DB_PW'] || "kunena"
KUNENA_PREFIX ||= ENV['KUNENA_PREFIX'] || "jos_" # "iff_" sometimes
IMAGE_PREFIX ||= ENV['IMAGE_PREFIX'] || "http://EXAMPLE.com/media/kunena/attachments"
PARENT_FIELD ||= ENV['PARENT_FIELD'] || "parent_id" # "parent" in some versions
DB_HOST ||= ENV["DB_HOST"] || "localhost"
DB_NAME ||= ENV["DB_NAME"] || "kunena"
DB_USER ||= ENV["DB_USER"] || "kunena"
DB_PW ||= ENV["DB_PW"] || "kunena"
KUNENA_PREFIX ||= ENV["KUNENA_PREFIX"] || "jos_" # "iff_" sometimes
IMAGE_PREFIX ||= ENV["IMAGE_PREFIX"] || "http://EXAMPLE.com/media/kunena/attachments"
PARENT_FIELD ||= ENV["PARENT_FIELD"] || "parent_id" # "parent" in some versions
def initialize
super
@users = {}
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
end
def execute
@@ -48,7 +42,8 @@ class ImportScripts::Kunena < ImportScripts::Base
puts "creating users"
create_users(@users) do |id, user|
{ id: id,
{
id: id,
email: user[:email],
username: user[:username],
created_at: user[:created_at],
@@ -56,15 +51,25 @@ class ImportScripts::Kunena < ImportScripts::Base
moderator: user[:moderator] ? true : false,
admin: user[:admin] ? true : false,
suspended_at: user[:suspended] ? Time.zone.now : nil,
suspended_till: user[:suspended] ? 100.years.from_now : nil }
suspended_till: user[:suspended] ? 100.years.from_now : nil,
}
end
@users = nil
create_categories(@client.query("SELECT id, #{PARENT_FIELD} as parent_id, name, description, ordering FROM #{KUNENA_PREFIX}kunena_categories ORDER BY #{PARENT_FIELD}, id;")) do |c|
h = { id: c['id'], name: c['name'], description: c['description'], position: c['ordering'].to_i }
if c['parent_id'].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(c['parent_id'])
create_categories(
@client.query(
"SELECT id, #{PARENT_FIELD} as parent_id, name, description, ordering FROM #{KUNENA_PREFIX}kunena_categories ORDER BY #{PARENT_FIELD}, id;",
),
) do |c|
h = {
id: c["id"],
name: c["name"],
description: c["description"],
position: c["ordering"].to_i,
}
if c["parent_id"].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(c["parent_id"])
end
h
end
@@ -72,9 +77,9 @@ class ImportScripts::Kunena < ImportScripts::Base
import_posts
begin
create_admin(email: 'CHANGE@ME.COM', username: UserNameSuggester.suggest('CHAMGEME'))
create_admin(email: "CHANGE@ME.COM", username: UserNameSuggester.suggest("CHAMGEME"))
rescue => e
puts '', "Failed to create admin user"
puts "", "Failed to create admin user"
puts e.message
end
end
@@ -83,38 +88,54 @@ class ImportScripts::Kunena < ImportScripts::Base
# Need to merge data from joomla with kunena
puts "fetching Joomla users data from mysql"
results = @client.query("SELECT id, username, email, registerDate FROM #{KUNENA_PREFIX}users;", cache_rows: false)
results =
@client.query(
"SELECT id, username, email, registerDate FROM #{KUNENA_PREFIX}users;",
cache_rows: false,
)
results.each do |u|
next unless u['id'].to_i > (0) && u['username'].present? && u['email'].present?
username = u['username'].gsub(' ', '_').gsub(/[^A-Za-z0-9_]/, '')[0, User.username_length.end]
next unless u["id"].to_i > (0) && u["username"].present? && u["email"].present?
username = u["username"].gsub(" ", "_").gsub(/[^A-Za-z0-9_]/, "")[0, User.username_length.end]
if username.length < User.username_length.first
username = username * User.username_length.first
end
@users[u['id'].to_i] = { id: u['id'].to_i, username: username, email: u['email'], created_at: u['registerDate'] }
@users[u["id"].to_i] = {
id: u["id"].to_i,
username: username,
email: u["email"],
created_at: u["registerDate"],
}
end
puts "fetching Kunena user data from mysql"
results = @client.query("SELECT userid, signature, moderator, banned FROM #{KUNENA_PREFIX}kunena_users;", cache_rows: false)
results =
@client.query(
"SELECT userid, signature, moderator, banned FROM #{KUNENA_PREFIX}kunena_users;",
cache_rows: false,
)
results.each do |u|
next unless u['userid'].to_i > 0
user = @users[u['userid'].to_i]
next unless u["userid"].to_i > 0
user = @users[u["userid"].to_i]
if user
user[:bio] = u['signature']
user[:moderator] = (u['moderator'].to_i == 1)
user[:suspended] = u['banned'].present?
user[:bio] = u["signature"]
user[:moderator] = (u["moderator"].to_i == 1)
user[:suspended] = u["banned"].present?
end
end
end
def import_posts
puts '', "creating topics and posts"
puts "", "creating topics and posts"
total_count = @client.query("SELECT COUNT(*) count FROM #{KUNENA_PREFIX}kunena_messages m;").first['count']
total_count =
@client.query("SELECT COUNT(*) count FROM #{KUNENA_PREFIX}kunena_messages m;").first["count"]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT m.id id,
m.thread thread,
m.parent parent,
@@ -129,33 +150,38 @@ class ImportScripts::Kunena < ImportScripts::Base
ORDER BY m.id
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| p['id'].to_i }
next if all_records_exist? :posts, results.map { |p| p["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['userid']) || -1
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["userid"]) || -1
id = m['userid']
mapped[:raw] = m["message"].gsub(/\[attachment=[0-9]+\](.+?)\[\/attachment\]/, "\n#{IMAGE_PREFIX}/#{id}/\\1")
mapped[:created_at] = Time.zone.at(m['time'])
id = m["userid"]
mapped[:raw] = m["message"].gsub(
%r{\[attachment=[0-9]+\](.+?)\[/attachment\]},
"\n#{IMAGE_PREFIX}/#{id}/\\1",
)
mapped[:created_at] = Time.zone.at(m["time"])
if m['parent'] == 0
mapped[:category] = category_id_from_imported_category_id(m['catid'])
mapped[:title] = m['subject']
if m["parent"] == 0
mapped[:category] = category_id_from_imported_category_id(m["catid"])
mapped[:title] = m["subject"]
else
parent = topic_lookup_from_imported_post_id(m['parent'])
parent = topic_lookup_from_imported_post_id(m["parent"])
if parent
mapped[:topic_id] = parent[:topic_id]
mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
else
puts "Parent post #{m['parent']} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}"
puts "Parent post #{m["parent"]} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}"
skip = true
end
end

View File

@@ -12,16 +12,14 @@
# that was done using import_scripts/support/convert_mysql_xml_to_mysql.rb
#
require 'mysql2'
require 'csv'
require 'reverse_markdown'
require "mysql2"
require "csv"
require "reverse_markdown"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities'
require "htmlentities"
# remove table conversion
[:table, :td, :tr, :th, :thead, :tbody].each do |tag|
ReverseMarkdown::Converters.unregister(tag)
end
%i[table td tr th thead tbody].each { |tag| ReverseMarkdown::Converters.unregister(tag) }
class ImportScripts::Lithium < ImportScripts::Base
BATCH_SIZE = 1000
@@ -29,11 +27,11 @@ class ImportScripts::Lithium < ImportScripts::Base
# CHANGE THESE BEFORE RUNNING THE IMPORTER
DATABASE = "wd"
PASSWORD = "password"
AVATAR_DIR = '/tmp/avatars'
ATTACHMENT_DIR = '/tmp/attachments'
UPLOAD_DIR = '/tmp/uploads'
AVATAR_DIR = "/tmp/avatars"
ATTACHMENT_DIR = "/tmp/attachments"
UPLOAD_DIR = "/tmp/uploads"
OLD_DOMAIN = 'community.wd.com'
OLD_DOMAIN = "community.wd.com"
TEMP = ""
@@ -44,11 +42,10 @@ class ImportScripts::Lithium < ImportScripts::Base
{ name: "user_field_3", profile: "industry" },
]
LITHIUM_PROFILE_FIELDS = "'profile.jobtitle', 'profile.company', 'profile.industry', 'profile.location'"
LITHIUM_PROFILE_FIELDS =
"'profile.jobtitle', 'profile.company', 'profile.industry', 'profile.location'"
USERNAME_MAPPINGS = {
"admins": "admin_user"
}.with_indifferent_access
USERNAME_MAPPINGS = { admins: "admin_user" }.with_indifferent_access
def initialize
super
@@ -57,16 +54,16 @@ class ImportScripts::Lithium < ImportScripts::Base
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
password: PASSWORD,
database: DATABASE
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
password: PASSWORD,
database: DATABASE,
)
end
def execute
@max_start_id = Post.maximum(:id)
import_groups
@@ -94,10 +91,7 @@ class ImportScripts::Lithium < ImportScripts::Base
SQL
create_groups(groups) do |group|
{
id: group["name"],
name: @htmlentities.decode(group["name"]).strip
}
{ id: group["name"], name: @htmlentities.decode(group["name"]).strip }
end
end
@@ -106,7 +100,10 @@ class ImportScripts::Lithium < ImportScripts::Base
user_count = mysql_query("SELECT COUNT(*) count FROM users").first["count"]
avatar_files = Dir.entries(AVATAR_DIR)
duplicate_emails = mysql_query("SELECT email_lower FROM users GROUP BY email_lower HAVING COUNT(email_lower) > 1").map { |e| [e["email_lower"], 0] }.to_h
duplicate_emails =
mysql_query(
"SELECT email_lower FROM users GROUP BY email_lower HAVING COUNT(email_lower) > 1",
).map { |e| [e["email_lower"], 0] }.to_h
batches(BATCH_SIZE) do |offset|
users = mysql_query <<-SQL
@@ -134,8 +131,8 @@ class ImportScripts::Lithium < ImportScripts::Base
create_users(users, total: user_count, offset: offset) do |user|
user_id = user["id"]
profile = profiles.select { |p| p["user_id"] == user_id }
result = profile.select { |p| p["param"] == "profile.location" }
profile = profiles.select { |p| p["user_id"] == user_id }
result = profile.select { |p| p["param"] == "profile.location" }
location = result.count > 0 ? result.first["nvalue"] : nil
username = user["login_canon"]
username = USERNAME_MAPPINGS[username] if USERNAME_MAPPINGS[username].present?
@@ -158,31 +155,32 @@ class ImportScripts::Lithium < ImportScripts::Base
# title: @htmlentities.decode(user["usertitle"]).strip,
# primary_group_id: group_id_from_imported_group_id(user["usergroupid"]),
created_at: unix_time(user["registration_time"]),
post_create_action: proc do |u|
@old_username_to_new_usernames[user["login_canon"]] = u.username
post_create_action:
proc do |u|
@old_username_to_new_usernames[user["login_canon"]] = u.username
# import user avatar
sso_id = u.custom_fields["sso_id"]
if sso_id.present?
prefix = "#{AVATAR_DIR}/#{sso_id}_"
file = get_file(prefix + "actual.jpeg")
file ||= get_file(prefix + "profile.jpeg")
# import user avatar
sso_id = u.custom_fields["sso_id"]
if sso_id.present?
prefix = "#{AVATAR_DIR}/#{sso_id}_"
file = get_file(prefix + "actual.jpeg")
file ||= get_file(prefix + "profile.jpeg")
if file.present?
upload = UploadCreator.new(file, file.path, type: "avatar").create_for(u.id)
u.create_user_avatar unless u.user_avatar
if file.present?
upload = UploadCreator.new(file, file.path, type: "avatar").create_for(u.id)
u.create_user_avatar unless u.user_avatar
if !u.user_avatar.contains_upload?(upload.id)
u.user_avatar.update_columns(custom_upload_id: upload.id)
if !u.user_avatar.contains_upload?(upload.id)
u.user_avatar.update_columns(custom_upload_id: upload.id)
if u.uploaded_avatar_id.nil? ||
!u.user_avatar.contains_upload?(u.uploaded_avatar_id)
u.update_columns(uploaded_avatar_id: upload.id)
if u.uploaded_avatar_id.nil? ||
!u.user_avatar.contains_upload?(u.uploaded_avatar_id)
u.update_columns(uploaded_avatar_id: upload.id)
end
end
end
end
end
end
end,
}
end
end
@@ -226,7 +224,7 @@ class ImportScripts::Lithium < ImportScripts::Base
if attr[:user].present?
fields[name] = user[attr[:user]]
elsif attr[:profile].present? && profile.count > 0
result = profile.select { |p| p["param"] == "profile.#{attr[:profile]}" }
result = profile.select { |p| p["param"] == "profile.#{attr[:profile]}" }
fields[name] = result.first["nvalue"] if result.count > 0
end
end
@@ -268,8 +266,16 @@ class ImportScripts::Lithium < ImportScripts::Base
imported_user.user_avatar.update(custom_upload_id: upload.id)
imported_user.update(uploaded_avatar_id: upload.id)
ensure
file.close rescue nil
file.unlind rescue nil
begin
file.close
rescue StandardError
nil
end
begin
file.unlind
rescue StandardError
nil
end
end
def import_profile_background(old_user, imported_user)
@@ -295,8 +301,16 @@ class ImportScripts::Lithium < ImportScripts::Base
imported_user.user_profile.upload_profile_background(upload)
ensure
file.close rescue nil
file.unlink rescue nil
begin
file.close
rescue StandardError
nil
end
begin
file.unlink
rescue StandardError
nil
end
end
def import_categories
@@ -310,14 +324,16 @@ class ImportScripts::Lithium < ImportScripts::Base
ORDER BY n.type_id DESC, n.node_id ASC
SQL
categories = categories.map { |c| (c["name"] = c["c_title"] || c["b_title"] || c["display_id"]) && c }
categories =
categories.map { |c| (c["name"] = c["c_title"] || c["b_title"] || c["display_id"]) && c }
# To prevent duplicate category names
categories = categories.map do |category|
count = categories.to_a.count { |c| c["name"].present? && c["name"] == category["name"] }
category["name"] << " (#{category["node_id"]})" if count > 1
category
end
categories =
categories.map do |category|
count = categories.to_a.count { |c| c["name"].present? && c["name"] == category["name"] }
category["name"] << " (#{category["node_id"]})" if count > 1
category
end
parent_categories = categories.select { |c| c["parent_node_id"] <= 2 }
@@ -326,9 +342,7 @@ class ImportScripts::Lithium < ImportScripts::Base
id: category["node_id"],
name: category["name"],
position: category["position"],
post_create_action: lambda do |record|
after_category_create(record, category)
end
post_create_action: lambda { |record| after_category_create(record, category) },
}
end
@@ -342,9 +356,7 @@ class ImportScripts::Lithium < ImportScripts::Base
name: category["name"],
position: category["position"],
parent_category_id: category_id_from_imported_category_id(category["parent_node_id"]),
post_create_action: lambda do |record|
after_category_create(record, category)
end
post_create_action: lambda { |record| after_category_create(record, category) },
}
end
end
@@ -371,7 +383,6 @@ class ImportScripts::Lithium < ImportScripts::Base
end
end
end
end
def staff_guardian
@@ -386,8 +397,12 @@ class ImportScripts::Lithium < ImportScripts::Base
SiteSetting.max_tags_per_topic = 10
SiteSetting.max_tag_length = 100
topic_count = mysql_query("SELECT COUNT(*) count FROM message2 where id = root_id").first["count"]
topic_tags = mysql_query("SELECT e.target_id, GROUP_CONCAT(l.tag_text SEPARATOR ',') tags FROM tag_events_label_message e LEFT JOIN tags_label l ON e.tag_id = l.tag_id GROUP BY e.target_id")
topic_count =
mysql_query("SELECT COUNT(*) count FROM message2 where id = root_id").first["count"]
topic_tags =
mysql_query(
"SELECT e.target_id, GROUP_CONCAT(l.tag_text SEPARATOR ',') tags FROM tag_events_label_message e LEFT JOIN tags_label l ON e.tag_id = l.tag_id GROUP BY e.target_id",
)
batches(BATCH_SIZE) do |offset|
topics = mysql_query <<-SQL
@@ -405,7 +420,6 @@ class ImportScripts::Lithium < ImportScripts::Base
next if all_records_exist? :posts, topics.map { |topic| "#{topic["node_id"]} #{topic["id"]}" }
create_posts(topics, total: topic_count, offset: offset) do |topic|
category_id = category_id_from_imported_category_id(topic["node_id"])
deleted_at = topic["deleted"] == 1 ? topic["row_version"] : nil
raw = topic["body"]
@@ -420,24 +434,31 @@ class ImportScripts::Lithium < ImportScripts::Base
created_at: unix_time(topic["post_date"]),
deleted_at: deleted_at,
views: topic["views"],
custom_fields: { import_unique_id: topic["unique_id"] },
custom_fields: {
import_unique_id: topic["unique_id"],
},
import_mode: true,
post_create_action: proc do |post|
result = topic_tags.select { |t| t["target_id"] == topic["unique_id"] }
if result.count > 0
tag_names = result.first["tags"].split(",")
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end
end
post_create_action:
proc do |post|
result = topic_tags.select { |t| t["target_id"] == topic["unique_id"] }
if result.count > 0
tag_names = result.first["tags"].split(",")
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end
end,
}
else
message = "Unknown"
message = "Category '#{category_id}' not exist" if category_id.blank?
message = "Topic 'body' is empty" if raw.blank?
PluginStoreRow.find_or_create_by(plugin_name: "topic_import_log", key: topic["unique_id"].to_s, value: message, type_name: 'String')
PluginStoreRow.find_or_create_by(
plugin_name: "topic_import_log",
key: topic["unique_id"].to_s,
value: message,
type_name: "String",
)
nil
end
end
end
@@ -446,9 +467,13 @@ class ImportScripts::Lithium < ImportScripts::Base
end
def import_posts
post_count = mysql_query("SELECT COUNT(*) count FROM message2
WHERE id <> root_id").first["count"]
post_count =
mysql_query(
"SELECT COUNT(*) count FROM message2
WHERE id <> root_id",
).first[
"count"
]
puts "", "importing posts... (#{post_count})"
@@ -465,11 +490,18 @@ class ImportScripts::Lithium < ImportScripts::Base
break if posts.size < 1
next if all_records_exist? :posts, posts.map { |post| "#{post["node_id"]} #{post["root_id"]} #{post["id"]}" }
if all_records_exist? :posts,
posts.map { |post|
"#{post["node_id"]} #{post["root_id"]} #{post["id"]}"
}
next
end
create_posts(posts, total: post_count, offset: offset) do |post|
raw = post["raw"]
next unless topic = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]}")
unless topic = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]}")
next
end
deleted_at = topic["deleted"] == 1 ? topic["row_version"] : nil
raw = post["body"]
@@ -482,17 +514,27 @@ class ImportScripts::Lithium < ImportScripts::Base
raw: raw,
created_at: unix_time(post["post_date"]),
deleted_at: deleted_at,
custom_fields: { import_unique_id: post["unique_id"] },
import_mode: true
custom_fields: {
import_unique_id: post["unique_id"],
},
import_mode: true,
}
if parent = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]} #{post["parent_id"]}")
if parent =
topic_lookup_from_imported_post_id(
"#{post["node_id"]} #{post["root_id"]} #{post["parent_id"]}",
)
new_post[:reply_to_post_number] = parent[:post_number]
end
new_post
else
PluginStoreRow.find_or_create_by(plugin_name: "post_import_log", key: post["unique_id"].to_s, value: "Post 'body' is empty", type_name: 'String')
PluginStoreRow.find_or_create_by(
plugin_name: "post_import_log",
key: post["unique_id"].to_s,
value: "Post 'body' is empty",
type_name: "String",
)
nil
end
end
@@ -521,37 +563,40 @@ class ImportScripts::Lithium < ImportScripts::Base
"catwink" => "wink",
"catfrustrated" => "grumpycat",
"catembarrassed" => "kissing_cat",
"catlol" => "joy_cat"
"catlol" => "joy_cat",
}
def import_likes
puts "\nimporting likes..."
sql = "select source_id user_id, target_id post_id, row_version created_at from tag_events_score_message"
sql =
"select source_id user_id, target_id post_id, row_version created_at from tag_events_score_message"
results = mysql_query(sql)
puts "loading unique id map"
existing_map = {}
PostCustomField.where(name: 'import_unique_id').pluck(:post_id, :value).each do |post_id, import_id|
existing_map[import_id] = post_id
end
PostCustomField
.where(name: "import_unique_id")
.pluck(:post_id, :value)
.each { |post_id, import_id| existing_map[import_id] = post_id }
puts "loading data into temp table"
DB.exec("create temp table like_data(user_id int, post_id int, created_at timestamp without time zone)")
DB.exec(
"create temp table like_data(user_id int, post_id int, created_at timestamp without time zone)",
)
PostAction.transaction do
results.each do |result|
result["user_id"] = user_id_from_imported_user_id(result["user_id"].to_s)
result["post_id"] = existing_map[result["post_id"].to_s]
next unless result["user_id"] && result["post_id"]
DB.exec("INSERT INTO like_data VALUES (:user_id,:post_id,:created_at)",
DB.exec(
"INSERT INTO like_data VALUES (:user_id,:post_id,:created_at)",
user_id: result["user_id"],
post_id: result["post_id"],
created_at: result["created_at"]
created_at: result["created_at"],
)
end
end
@@ -616,31 +661,28 @@ class ImportScripts::Lithium < ImportScripts::Base
end
def import_accepted_answers
puts "\nimporting accepted answers..."
sql = "select unique_id post_id from message2 where (attributes & 0x4000 ) != 0 and deleted = 0;"
sql =
"select unique_id post_id from message2 where (attributes & 0x4000 ) != 0 and deleted = 0;"
results = mysql_query(sql)
puts "loading unique id map"
existing_map = {}
PostCustomField.where(name: 'import_unique_id').pluck(:post_id, :value).each do |post_id, import_id|
existing_map[import_id] = post_id
end
PostCustomField
.where(name: "import_unique_id")
.pluck(:post_id, :value)
.each { |post_id, import_id| existing_map[import_id] = post_id }
puts "loading data into temp table"
DB.exec("create temp table accepted_data(post_id int primary key)")
PostAction.transaction do
results.each do |result|
result["post_id"] = existing_map[result["post_id"].to_s]
next unless result["post_id"]
DB.exec("INSERT INTO accepted_data VALUES (:post_id)",
post_id: result["post_id"]
)
DB.exec("INSERT INTO accepted_data VALUES (:post_id)", post_id: result["post_id"])
end
end
@@ -679,7 +721,6 @@ class ImportScripts::Lithium < ImportScripts::Base
end
def import_pms
puts "", "importing pms..."
puts "determining participation records"
@@ -702,24 +743,20 @@ class ImportScripts::Lithium < ImportScripts::Base
note_to_subject = {}
subject_to_first_note = {}
mysql_query("SELECT note_id, subject, sender_user_id FROM tblia_notes_content order by note_id").each do |row|
mysql_query(
"SELECT note_id, subject, sender_user_id FROM tblia_notes_content order by note_id",
).each do |row|
user_id = user_id_from_imported_user_id(row["sender_user_id"])
ary = (users[row["note_id"]] ||= Set.new)
if user_id
ary << user_id
end
note_to_subject[row["note_id"]] = row["subject"]
ary = (users[row["note_id"]] ||= Set.new)
ary << user_id if user_id
note_to_subject[row["note_id"]] = row["subject"]
if row["subject"] !~ /^Re: /
subject_to_first_note[[row["subject"], ary]] ||= row["note_id"]
end
subject_to_first_note[[row["subject"], ary]] ||= row["note_id"] if row["subject"] !~ /^Re: /
end
puts "Loading user_id to username map"
user_map = {}
User.pluck(:id, :username).each do |id, username|
user_map[id] = username
end
User.pluck(:id, :username).each { |id, username| user_map[id] = username }
topic_count = mysql_query("SELECT COUNT(*) count FROM tblia_notes_content").first["count"]
@@ -737,8 +774,8 @@ class ImportScripts::Lithium < ImportScripts::Base
next if all_records_exist? :posts, topics.map { |topic| "pm_#{topic["note_id"]}" }
create_posts(topics, total: topic_count, offset: offset) do |topic|
user_id = user_id_from_imported_user_id(topic["sender_user_id"]) || Discourse::SYSTEM_USER_ID
user_id =
user_id_from_imported_user_id(topic["sender_user_id"]) || Discourse::SYSTEM_USER_ID
participants = users[topic["note_id"]]
usernames = (participants - [user_id]).map { |id| user_map[id] }
@@ -763,48 +800,54 @@ class ImportScripts::Lithium < ImportScripts::Base
user_id: user_id,
raw: raw,
created_at: unix_time(topic["sent_time"]),
import_mode: true
import_mode: true,
}
unless topic_id
msg[:title] = @htmlentities.decode(topic["subject"]).strip[0...255]
msg[:archetype] = Archetype.private_message
msg[:target_usernames] = usernames.join(',')
msg[:target_usernames] = usernames.join(",")
else
msg[:topic_id] = topic_id
end
msg
else
PluginStoreRow.find_or_create_by(plugin_name: "pm_import_log", key: topic["note_id"].to_s, value: "PM 'body' is empty", type_name: 'String')
PluginStoreRow.find_or_create_by(
plugin_name: "pm_import_log",
key: topic["note_id"].to_s,
value: "PM 'body' is empty",
type_name: "String",
)
nil
end
end
end
end
def close_topics
puts "\nclosing closed topics..."
sql = "select unique_id post_id from message2 where root_id = id AND (attributes & 0x0002 ) != 0;"
sql =
"select unique_id post_id from message2 where root_id = id AND (attributes & 0x0002 ) != 0;"
results = mysql_query(sql)
# loading post map
existing_map = {}
PostCustomField.where(name: 'import_unique_id').pluck(:post_id, :value).each do |post_id, import_id|
existing_map[import_id.to_i] = post_id.to_i
end
PostCustomField
.where(name: "import_unique_id")
.pluck(:post_id, :value)
.each { |post_id, import_id| existing_map[import_id.to_i] = post_id.to_i }
results.map { |r| r["post_id"] }.each_slice(500) do |ids|
mapped = ids.map { |id| existing_map[id] }.compact
DB.exec(<<~SQL, ids: mapped) if mapped.present?
results
.map { |r| r["post_id"] }
.each_slice(500) do |ids|
mapped = ids.map { |id| existing_map[id] }.compact
DB.exec(<<~SQL, ids: mapped) if mapped.present?
UPDATE topics SET closed = true
WHERE id IN (SELECT topic_id FROM posts where id in (:ids))
SQL
end
end
end
def create_permalinks
@@ -835,7 +878,6 @@ SQL
r = DB.exec sql
puts "#{r} permalinks to posts added!"
end
def find_upload(user_id, attachment_id, real_filename)
@@ -846,7 +888,7 @@ SQL
puts "Attachment file doesn't exist: #{filename}"
return nil
end
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
upload = create_upload(user_id, filename, real_filename)
if upload.nil? || !upload.valid?
@@ -864,48 +906,57 @@ SQL
default_extensions = SiteSetting.authorized_extensions
default_max_att_size = SiteSetting.max_attachment_size_kb
SiteSetting.authorized_extensions = "*"
SiteSetting.max_attachment_size_kb = 307200
SiteSetting.max_attachment_size_kb = 307_200
current = 0
max = Post.count
mysql_query("create index idxUniqueId on message2(unique_id)") rescue nil
attachments = mysql_query("SELECT a.attachment_id, a.file_name, m.message_uid FROM tblia_attachment a INNER JOIN tblia_message_attachments m ON a.attachment_id = m.attachment_id")
Post.where('id > ?', @max_start_id).find_each do |post|
begin
id = post.custom_fields["import_unique_id"]
next unless id
raw = mysql_query("select body from message2 where unique_id = '#{id}'").first['body']
unless raw
puts "Missing raw for post: #{post.id}"
next
end
new_raw = postprocess_post_raw(raw, post.user_id)
files = attachments.select { |a| a["message_uid"].to_s == id }
new_raw << html_for_attachments(post.user_id, files)
unless post.raw == new_raw
post.raw = new_raw
post.cooked = post.cook(new_raw)
cpp = CookedPostProcessor.new(post)
cpp.link_post_uploads
post.custom_fields["import_post_process"] = true
post.save
end
rescue PrettyText::JavaScriptError
puts "GOT A JS error on post: #{post.id}"
nil
ensure
print_status(current += 1, max)
end
begin
mysql_query("create index idxUniqueId on message2(unique_id)")
rescue StandardError
nil
end
attachments =
mysql_query(
"SELECT a.attachment_id, a.file_name, m.message_uid FROM tblia_attachment a INNER JOIN tblia_message_attachments m ON a.attachment_id = m.attachment_id",
)
Post
.where("id > ?", @max_start_id)
.find_each do |post|
begin
id = post.custom_fields["import_unique_id"]
next unless id
raw = mysql_query("select body from message2 where unique_id = '#{id}'").first["body"]
unless raw
puts "Missing raw for post: #{post.id}"
next
end
new_raw = postprocess_post_raw(raw, post.user_id)
files = attachments.select { |a| a["message_uid"].to_s == id }
new_raw << html_for_attachments(post.user_id, files)
unless post.raw == new_raw
post.raw = new_raw
post.cooked = post.cook(new_raw)
cpp = CookedPostProcessor.new(post)
cpp.link_post_uploads
post.custom_fields["import_post_process"] = true
post.save
end
rescue PrettyText::JavaScriptError
puts "GOT A JS error on post: #{post.id}"
nil
ensure
print_status(current += 1, max)
end
end
SiteSetting.authorized_extensions = default_extensions
SiteSetting.max_attachment_size_kb = default_max_att_size
end
def postprocess_post_raw(raw, user_id)
matches = raw.match(/<messagetemplate.*<\/messagetemplate>/m) || []
matches = raw.match(%r{<messagetemplate.*</messagetemplate>}m) || []
matches.each do |match|
hash = Hash.from_xml(match)
template = hash["messagetemplate"]["zone"]["item"]
@@ -915,106 +966,123 @@ SQL
doc = Nokogiri::HTML5.fragment(raw)
doc.css("a,img,li-image").each do |l|
upload_name, image, linked_upload = [nil] * 3
doc
.css("a,img,li-image")
.each do |l|
upload_name, image, linked_upload = [nil] * 3
if l.name == "li-image" && l["id"]
upload_name = l["id"]
else
uri = URI.parse(l["href"] || l["src"]) rescue nil
uri.hostname = nil if uri && uri.hostname == OLD_DOMAIN
if uri && !uri.hostname
if l["href"]
l["href"] = uri.path
# we have an internal link, lets see if we can remap it?
permalink = Permalink.find_by_url(uri.path) rescue nil
if l["href"]
if permalink && permalink.target_url
l["href"] = permalink.target_url
elsif l["href"] =~ /^\/gartner\/attachments\/gartner\/([^.]*).(\w*)/
linked_upload = "#{$1}.#{$2}"
end
end
elsif l["src"]
# we need an upload here
upload_name = $1 if uri.path =~ /image-id\/([^\/]+)/
end
end
end
if upload_name
png = UPLOAD_DIR + "/" + upload_name + ".png"
jpg = UPLOAD_DIR + "/" + upload_name + ".jpg"
gif = UPLOAD_DIR + "/" + upload_name + ".gif"
# check to see if we have it
if File.exist?(png)
image = png
elsif File.exist?(jpg)
image = jpg
elsif File.exist?(gif)
image = gif
end
if image
File.open(image) do |file|
upload = UploadCreator.new(file, "image." + (image.ends_with?(".png") ? "png" : "jpg")).create_for(user_id)
l.name = "img" if l.name == "li-image"
l["src"] = upload.url
end
if l.name == "li-image" && l["id"]
upload_name = l["id"]
else
puts "image was missing #{l["src"]}"
uri =
begin
URI.parse(l["href"] || l["src"])
rescue StandardError
nil
end
uri.hostname = nil if uri && uri.hostname == OLD_DOMAIN
if uri && !uri.hostname
if l["href"]
l["href"] = uri.path
# we have an internal link, lets see if we can remap it?
permalink =
begin
Permalink.find_by_url(uri.path)
rescue StandardError
nil
end
if l["href"]
if permalink && permalink.target_url
l["href"] = permalink.target_url
elsif l["href"] =~ %r{^/gartner/attachments/gartner/([^.]*).(\w*)}
linked_upload = "#{$1}.#{$2}"
end
end
elsif l["src"]
# we need an upload here
upload_name = $1 if uri.path =~ %r{image-id/([^/]+)}
end
end
end
elsif linked_upload
segments = linked_upload.match(/\/(\d*)\/(\d)\/([^.]*).(\w*)$/)
if segments.present?
lithium_post_id = segments[1]
attachment_number = segments[2]
if upload_name
png = UPLOAD_DIR + "/" + upload_name + ".png"
jpg = UPLOAD_DIR + "/" + upload_name + ".jpg"
gif = UPLOAD_DIR + "/" + upload_name + ".gif"
result = mysql_query("select a.attachment_id, f.file_name from tblia_message_attachments a
# check to see if we have it
if File.exist?(png)
image = png
elsif File.exist?(jpg)
image = jpg
elsif File.exist?(gif)
image = gif
end
if image
File.open(image) do |file|
upload =
UploadCreator.new(
file,
"image." + (image.ends_with?(".png") ? "png" : "jpg"),
).create_for(user_id)
l.name = "img" if l.name == "li-image"
l["src"] = upload.url
end
else
puts "image was missing #{l["src"]}"
end
elsif linked_upload
segments = linked_upload.match(%r{/(\d*)/(\d)/([^.]*).(\w*)$})
if segments.present?
lithium_post_id = segments[1]
attachment_number = segments[2]
result =
mysql_query(
"select a.attachment_id, f.file_name from tblia_message_attachments a
INNER JOIN message2 m ON a.message_uid = m.unique_id
INNER JOIN tblia_attachment f ON a.attachment_id = f.attachment_id
where m.id = #{lithium_post_id} AND a.attach_num = #{attachment_number} limit 0, 1")
where m.id = #{lithium_post_id} AND a.attach_num = #{attachment_number} limit 0, 1",
)
result.each do |row|
upload, filename = find_upload(user_id, row["attachment_id"], row["file_name"])
if upload.present?
l["href"] = upload.url
else
puts "attachment was missing #{l["href"]}"
result.each do |row|
upload, filename = find_upload(user_id, row["attachment_id"], row["file_name"])
if upload.present?
l["href"] = upload.url
else
puts "attachment was missing #{l["href"]}"
end
end
end
end
end
end
# for user mentions
doc.css("li-user").each do |l|
uid = l["uid"]
doc
.css("li-user")
.each do |l|
uid = l["uid"]
if uid.present?
user = UserCustomField.find_by(name: 'import_id', value: uid).try(:user)
if user.present?
username = user.username
span = l.document.create_element "span"
span.inner_html = "@#{username}"
l.replace span
if uid.present?
user = UserCustomField.find_by(name: "import_id", value: uid).try(:user)
if user.present?
username = user.username
span = l.document.create_element "span"
span.inner_html = "@#{username}"
l.replace span
end
end
end
end
raw = ReverseMarkdown.convert(doc.to_s)
raw.gsub!(/^\s*&nbsp;\s*$/, "")
# ugly quotes
raw.gsub!(/^>[\s\*]*$/, "")
raw.gsub!(/:([a-z]+):/) do |match|
":#{SMILEY_SUBS[$1] || $1}:"
end
raw.gsub!(/:([a-z]+):/) { |match| ":#{SMILEY_SUBS[$1] || $1}:" }
# nbsp central
raw.gsub!(/([a-zA-Z0-9])&nbsp;([a-zA-Z0-9])/, "\\1 \\2")
raw
@@ -1037,7 +1105,6 @@ SQL
def mysql_query(sql)
@client.query(sql, cache_rows: true)
end
end
ImportScripts::Lithium.new.perform

View File

@@ -1,14 +1,15 @@
# frozen_string_literal: true
if ARGV.length != 1 || !File.exist?(ARGV[0])
STDERR.puts '', 'Usage of mbox importer:', 'bundle exec ruby mbox.rb <path/to/settings.yml>'
STDERR.puts '', "Use the settings file from #{File.expand_path('mbox/settings.yml', File.dirname(__FILE__))} as an example."
STDERR.puts "", "Usage of mbox importer:", "bundle exec ruby mbox.rb <path/to/settings.yml>"
STDERR.puts "",
"Use the settings file from #{File.expand_path("mbox/settings.yml", File.dirname(__FILE__))} as an example."
exit 1
end
module ImportScripts
module Mbox
require_relative 'mbox/importer'
require_relative "mbox/importer"
Importer.new(ARGV[0]).perform
end
end

View File

@@ -1,9 +1,9 @@
# frozen_string_literal: true
require_relative '../base'
require_relative 'support/database'
require_relative 'support/indexer'
require_relative 'support/settings'
require_relative "../base"
require_relative "support/database"
require_relative "support/indexer"
require_relative "support/settings"
module ImportScripts::Mbox
class Importer < ImportScripts::Base
@@ -38,44 +38,44 @@ module ImportScripts::Mbox
end
def index_messages
puts '', 'creating index'
puts "", "creating index"
indexer = Indexer.new(@database, @settings)
indexer.execute
end
def import_categories
puts '', 'creating categories'
puts "", "creating categories"
rows = @database.fetch_categories
create_categories(rows) do |row|
{
id: row['name'],
name: row['name'],
parent_category_id: row['parent_category_id'].presence,
id: row["name"],
name: row["name"],
parent_category_id: row["parent_category_id"].presence,
}
end
end
def import_users
puts '', 'creating users'
puts "", "creating users"
total_count = @database.count_users
last_email = ''
last_email = ""
batches do |offset|
rows, last_email = @database.fetch_users(last_email)
break if rows.empty?
next if all_records_exist?(:users, rows.map { |row| row['email'] })
next if all_records_exist?(:users, rows.map { |row| row["email"] })
create_users(rows, total: total_count, offset: offset) do |row|
{
id: row['email'],
email: row['email'],
name: row['name'],
id: row["email"],
email: row["email"],
name: row["name"],
trust_level: @settings.trust_level,
staged: @settings.staged,
active: !@settings.staged,
created_at: to_time(row['date_of_first_message'])
created_at: to_time(row["date_of_first_message"]),
}
end
end
@@ -86,7 +86,7 @@ module ImportScripts::Mbox
end
def import_posts
puts '', 'creating topics and posts'
puts "", "creating topics and posts"
total_count = @database.count_messages
last_row_id = 0
@@ -94,47 +94,45 @@ module ImportScripts::Mbox
rows, last_row_id = @database.fetch_messages(last_row_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| row['msg_id'] })
next if all_records_exist?(:posts, rows.map { |row| row["msg_id"] })
create_posts(rows, total: total_count, offset: offset) do |row|
begin
if row['email_date'].blank?
puts "Date is missing. Skipping #{row['msg_id']}"
if row["email_date"].blank?
puts "Date is missing. Skipping #{row["msg_id"]}"
nil
elsif row['in_reply_to'].blank?
elsif row["in_reply_to"].blank?
map_first_post(row)
else
map_reply(row)
end
rescue => e
puts "Failed to map post for #{row['msg_id']}", e, e.backtrace.join("\n")
puts "Failed to map post for #{row["msg_id"]}", e, e.backtrace.join("\n")
end
end
end
end
def map_post(row)
user_id = user_id_from_imported_user_id(row['from_email']) || Discourse::SYSTEM_USER_ID
user_id = user_id_from_imported_user_id(row["from_email"]) || Discourse::SYSTEM_USER_ID
{
id: row['msg_id'],
id: row["msg_id"],
user_id: user_id,
created_at: to_time(row['email_date']),
created_at: to_time(row["email_date"]),
raw: format_raw(row, user_id),
raw_email: row['raw_message'],
raw_email: row["raw_message"],
via_email: true,
post_create_action: proc do |post|
create_incoming_email(post, row)
end
post_create_action: proc { |post| create_incoming_email(post, row) },
}
end
def format_raw(row, user_id)
body = row['body'] || ''
elided = row['elided']
body = row["body"] || ""
elided = row["elided"]
if row['attachment_count'].positive?
receiver = Email::Receiver.new(row['raw_message'])
if row["attachment_count"].positive?
receiver = Email::Receiver.new(row["raw_message"])
user = User.find(user_id)
body = receiver.add_attachments(body, user)
end
@@ -147,21 +145,21 @@ module ImportScripts::Mbox
end
def map_first_post(row)
subject = row['subject']
subject = row["subject"]
tags = remove_tags!(subject)
mapped = map_post(row)
mapped[:category] = category_id_from_imported_category_id(row['category'])
mapped[:category] = category_id_from_imported_category_id(row["category"])
mapped[:title] = subject.strip[0...255]
mapped[:tags] = tags if tags.present?
mapped
end
def map_reply(row)
parent = @lookup.topic_lookup_from_imported_post_id(row['in_reply_to'])
parent = @lookup.topic_lookup_from_imported_post_id(row["in_reply_to"])
if parent.blank?
puts "Parent message #{row['in_reply_to']} doesn't exist. Skipping #{row['msg_id']}: #{row['subject'][0..40]}"
puts "Parent message #{row["in_reply_to"]} doesn't exist. Skipping #{row["msg_id"]}: #{row["subject"][0..40]}"
return nil
end
@@ -178,9 +176,7 @@ module ImportScripts::Mbox
old_length = subject.length
@settings.tags.each do |tag|
if subject.sub!(tag[:regex], "") && tag[:name].present?
tag_names << tag[:name]
end
tag_names << tag[:name] if subject.sub!(tag[:regex], "") && tag[:name].present?
end
remove_prefixes!(subject) if subject.length != old_length
@@ -203,13 +199,13 @@ module ImportScripts::Mbox
def create_incoming_email(post, row)
IncomingEmail.create(
message_id: row['msg_id'],
raw: row['raw_message'],
subject: row['subject'],
from_address: row['from_email'],
message_id: row["msg_id"],
raw: row["raw_message"],
subject: row["subject"],
from_address: row["from_email"],
user_id: post.user_id,
topic_id: post.topic_id,
post_id: post.id
post_id: post.id,
)
end

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'sqlite3'
require "sqlite3"
module ImportScripts::Mbox
class Database
@@ -23,8 +23,7 @@ module ImportScripts::Mbox
@db.transaction
yield self
@db.commit
rescue
rescue StandardError
@db.rollback
end
@@ -60,9 +59,7 @@ module ImportScripts::Mbox
SQL
@db.prepare(sql) do |stmt|
reply_message_ids.each do |in_reply_to|
stmt.execute(msg_id, in_reply_to)
end
reply_message_ids.each { |in_reply_to| stmt.execute(msg_id, in_reply_to) }
end
end
@@ -95,7 +92,7 @@ module ImportScripts::Mbox
end
def sort_emails_by_date_and_reply_level
@db.execute 'DELETE FROM email_order'
@db.execute "DELETE FROM email_order"
@db.execute <<-SQL
WITH RECURSIVE
@@ -117,7 +114,7 @@ module ImportScripts::Mbox
end
def sort_emails_by_subject
@db.execute 'DELETE FROM email_order'
@db.execute "DELETE FROM email_order"
@db.execute <<-SQL
INSERT INTO email_order (msg_id)
@@ -128,7 +125,7 @@ module ImportScripts::Mbox
end
def fill_users_from_emails
@db.execute 'DELETE FROM user'
@db.execute "DELETE FROM user"
@db.execute <<-SQL
INSERT INTO user (email, name, date_of_first_message)
@@ -172,7 +169,7 @@ module ImportScripts::Mbox
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'email')
add_last_column_value(rows, "email")
end
def count_messages
@@ -193,14 +190,14 @@ module ImportScripts::Mbox
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
add_last_column_value(rows, "rowid")
end
private
def configure_database
@db.execute 'PRAGMA journal_mode = OFF'
@db.execute 'PRAGMA locking_mode = EXCLUSIVE'
@db.execute "PRAGMA journal_mode = OFF"
@db.execute "PRAGMA locking_mode = EXCLUSIVE"
end
def upgrade_schema_version
@@ -260,10 +257,10 @@ module ImportScripts::Mbox
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_from ON email (from_email)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_subject ON email (subject)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_in_reply_to ON email (in_reply_to)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_date ON email (email_date)'
@db.execute "CREATE INDEX IF NOT EXISTS email_by_from ON email (from_email)"
@db.execute "CREATE INDEX IF NOT EXISTS email_by_subject ON email (subject)"
@db.execute "CREATE INDEX IF NOT EXISTS email_by_in_reply_to ON email (in_reply_to)"
@db.execute "CREATE INDEX IF NOT EXISTS email_by_date ON email (email_date)"
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS email_order (
@@ -282,7 +279,7 @@ module ImportScripts::Mbox
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS reply_by_in_reply_to ON reply (in_reply_to)'
@db.execute "CREATE INDEX IF NOT EXISTS reply_by_in_reply_to ON reply (in_reply_to)"
end
def create_table_for_users

View File

@@ -1,8 +1,8 @@
# frozen_string_literal: true
require_relative 'database'
require 'json'
require 'yaml'
require_relative "database"
require "json"
require "yaml"
module ImportScripts::Mbox
class Indexer
@@ -15,7 +15,7 @@ module ImportScripts::Mbox
end
def execute
directories = Dir.glob(File.join(@settings.data_dir, '*'))
directories = Dir.glob(File.join(@settings.data_dir, "*"))
directories.select! { |f| File.directory?(f) }
directories.sort!
@@ -25,7 +25,7 @@ module ImportScripts::Mbox
index_emails(directory, category[:name])
end
puts '', 'indexing replies and users'
puts "", "indexing replies and users"
if @settings.group_messages_by_subject
@database.sort_emails_by_subject
@database.update_in_reply_to_by_email_subject
@@ -39,24 +39,24 @@ module ImportScripts::Mbox
private
METADATA_FILENAME = 'metadata.yml'
IGNORED_FILE_EXTENSIONS = ['.dbindex', '.dbnames', '.digest', '.subjects', '.yml']
METADATA_FILENAME = "metadata.yml"
IGNORED_FILE_EXTENSIONS = %w[.dbindex .dbnames .digest .subjects .yml]
def index_category(directory)
metadata_file = File.join(directory, METADATA_FILENAME)
if File.exist?(metadata_file)
# workaround for YML files that contain classname in file header
yaml = File.read(metadata_file).sub(/^--- !.*$/, '---')
yaml = File.read(metadata_file).sub(/^--- !.*$/, "---")
metadata = YAML.safe_load(yaml)
else
metadata = {}
end
category = {
name: metadata['name'].presence || File.basename(directory),
description: metadata['description'],
parent_category_id: metadata['parent_category_id'].presence,
name: metadata["name"].presence || File.basename(directory),
description: metadata["description"],
parent_category_id: metadata["parent_category_id"].presence,
}
@database.insert_category(category)
@@ -75,7 +75,7 @@ module ImportScripts::Mbox
# Detect cases like this and attempt to get actual sender from other headers:
# From: Jane Smith via ListName <ListName@lists.example.com>
if receiver.mail['X-Mailman-Version'] && from_display_name =~ /\bvia \S+$/i
if receiver.mail["X-Mailman-Version"] && from_display_name =~ /\bvia \S+$/i
email_from_from_line = opts[:from_line].scan(/From (\S+)/).flatten.first
a = Mail::Address.new(email_from_from_line)
from_email = a.address
@@ -88,7 +88,7 @@ module ImportScripts::Mbox
end
end
from_email = from_email.sub(/^(.*)=/, '') if @settings.elide_equals_in_addresses
from_email = from_email.sub(/^(.*)=/, "") if @settings.elide_equals_in_addresses
body, elided, format = receiver.select_body
reply_message_ids = extract_reply_message_ids(parsed_email)
@@ -109,7 +109,7 @@ module ImportScripts::Mbox
filename: File.basename(filename),
first_line_number: opts[:first_line_number],
last_line_number: opts[:last_line_number],
index_duration: (monotonic_time - opts[:start_time]).round(4)
index_duration: (monotonic_time - opts[:start_time]).round(4),
}
@database.transaction do |db|
@@ -132,8 +132,8 @@ module ImportScripts::Mbox
def imported_file_checksums(category_name)
rows = @database.fetch_imported_files(category_name)
rows.each_with_object({}) do |row, hash|
filename = File.basename(row['filename'])
hash[filename] = row['checksum']
filename = File.basename(row["filename"])
hash[filename] = row["checksum"]
end
end
@@ -171,14 +171,14 @@ module ImportScripts::Mbox
imported_file = {
category: category_name,
filename: File.basename(filename),
checksum: calc_checksum(filename)
checksum: calc_checksum(filename),
}
@database.insert_imported_file(imported_file)
end
def each_mail(filename)
raw_message = +''
raw_message = +""
first_line_number = 1
last_line_number = 0
@@ -188,7 +188,7 @@ module ImportScripts::Mbox
if line.scrub =~ @split_regex
if last_line_number > 0
yield raw_message, first_line_number, last_line_number, from_line
raw_message = +''
raw_message = +""
first_line_number = last_line_number + 1
end
@@ -204,12 +204,10 @@ module ImportScripts::Mbox
end
def each_line(filename)
raw_file = File.open(filename, 'r')
text_file = filename.end_with?('.gz') ? Zlib::GzipReader.new(raw_file) : raw_file
raw_file = File.open(filename, "r")
text_file = filename.end_with?(".gz") ? Zlib::GzipReader.new(raw_file) : raw_file
text_file.each_line do |line|
yield line
end
text_file.each_line { |line| yield line }
ensure
raw_file.close if raw_file
end
@@ -220,7 +218,9 @@ module ImportScripts::Mbox
end
def read_mail_from_string(raw_message)
Email::Receiver.new(raw_message, convert_plaintext: true, skip_trimming: false) unless raw_message.blank?
unless raw_message.blank?
Email::Receiver.new(raw_message, convert_plaintext: true, skip_trimming: false)
end
end
def extract_reply_message_ids(mail)
@@ -229,14 +229,13 @@ module ImportScripts::Mbox
def extract_subject(receiver, list_name)
subject = receiver.subject
subject.blank? ? nil : subject.strip.gsub(/\t+/, ' ')
subject.blank? ? nil : subject.strip.gsub(/\t+/, " ")
end
def ignored_file?(path, checksums)
filename = File.basename(path)
filename.start_with?('.') ||
filename == METADATA_FILENAME ||
filename.start_with?(".") || filename == METADATA_FILENAME ||
IGNORED_FILE_EXTENSIONS.include?(File.extname(filename)) ||
fully_indexed?(path, filename, checksums)
end

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'yaml'
require "yaml"
module ImportScripts::Mbox
class Settings
@@ -25,32 +25,32 @@ module ImportScripts::Mbox
attr_reader :elide_equals_in_addresses
def initialize(yaml)
@data_dir = yaml['data_dir']
@split_regex = Regexp.new(yaml['split_regex']) unless yaml['split_regex'].empty?
@data_dir = yaml["data_dir"]
@split_regex = Regexp.new(yaml["split_regex"]) unless yaml["split_regex"].empty?
@batch_size = 1000 # no need to make this actually configurable at the moment
@trust_level = yaml['default_trust_level']
@prefer_html = yaml['prefer_html']
@staged = yaml['staged']
@index_only = yaml['index_only']
@group_messages_by_subject = yaml['group_messages_by_subject']
@trust_level = yaml["default_trust_level"]
@prefer_html = yaml["prefer_html"]
@staged = yaml["staged"]
@index_only = yaml["index_only"]
@group_messages_by_subject = yaml["group_messages_by_subject"]
if yaml['remove_subject_prefixes'].present?
prefix_regexes = yaml['remove_subject_prefixes'].map { |p| Regexp.new(p) }
if yaml["remove_subject_prefixes"].present?
prefix_regexes = yaml["remove_subject_prefixes"].map { |p| Regexp.new(p) }
@subject_prefix_regex = /^#{Regexp.union(prefix_regexes).source}/i
end
@automatically_remove_list_name_prefix = yaml['automatically_remove_list_name_prefix']
@show_trimmed_content = yaml['show_trimmed_content']
@fix_mailman_via_addresses = yaml['fix_mailman_via_addresses']
@elide_equals_in_addresses = yaml['elide_equals_in_addresses']
@automatically_remove_list_name_prefix = yaml["automatically_remove_list_name_prefix"]
@show_trimmed_content = yaml["show_trimmed_content"]
@fix_mailman_via_addresses = yaml["fix_mailman_via_addresses"]
@elide_equals_in_addresses = yaml["elide_equals_in_addresses"]
@tags = []
if yaml['tags'].present?
yaml['tags'].each do |tag_name, value|
if yaml["tags"].present?
yaml["tags"].each do |tag_name, value|
prefixes = Regexp.union(value).source
@tags << {
regex: /^(?:(?:\[(?:#{prefixes})\])|(?:\((?:#{prefixes})\)))\s*/i,
name: tag_name
name: tag_name,
}
end
end

View File

@@ -1,23 +1,23 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities'
require "htmlentities"
class ImportScripts::Modx < ImportScripts::Base
BATCH_SIZE = 1000
# CHANGE THESE BEFORE RUNNING THE IMPORTER
DB_HOST ||= ENV['DB_HOST'] || "localhost"
DB_NAME ||= ENV['DB_NAME'] || "modx"
DB_PW ||= ENV['DB_PW'] || "modex"
DB_USER ||= ENV['DB_USER'] || "modx"
TIMEZONE ||= ENV['TIMEZONE'] || "America/Los_Angeles"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "modx_"
ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/path/to/your/attachment/folder'
RANDOM_CATEGORY_COLOR ||= !ENV['RANDOM_CATEGORY_COLOR'].nil?
SUSPEND_ALL_USERS ||= !ENV['SUSPEND_ALL_USERS']
DB_HOST ||= ENV["DB_HOST"] || "localhost"
DB_NAME ||= ENV["DB_NAME"] || "modx"
DB_PW ||= ENV["DB_PW"] || "modex"
DB_USER ||= ENV["DB_USER"] || "modx"
TIMEZONE ||= ENV["TIMEZONE"] || "America/Los_Angeles"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "modx_"
ATTACHMENT_DIR ||= ENV["ATTACHMENT_DIR"] || "/path/to/your/attachment/folder"
RANDOM_CATEGORY_COLOR ||= !ENV["RANDOM_CATEGORY_COLOR"].nil?
SUSPEND_ALL_USERS ||= !ENV["SUSPEND_ALL_USERS"]
# TODO: replace modx_ with #{TABLE_PREFIX}
@@ -34,14 +34,10 @@ class ImportScripts::Modx < ImportScripts::Base
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
rescue Exception => e
puts '=' * 50
puts "=" * 50
puts e.message
puts <<~TEXT
Cannot connect in to database.
@@ -83,22 +79,20 @@ class ImportScripts::Modx < ImportScripts::Base
SQL
create_groups(groups) do |group|
{
id: group["usergroupid"],
name: @htmlentities.decode(group["title"]).strip
}
{ id: group["usergroupid"], name: @htmlentities.decode(group["title"]).strip }
end
end
def import_users
puts "", "importing users"
user_count = mysql_query("SELECT COUNT(id) count FROM #{TABLE_PREFIX}discuss_users").first["count"]
user_count =
mysql_query("SELECT COUNT(id) count FROM #{TABLE_PREFIX}discuss_users").first["count"]
last_user_id = -1
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL
users = mysql_query(<<-SQL).to_a
SELECT id as userid, email, concat (name_first, " ", name_last) as name, username,
location, website, status, last_active as last_seen_at,
createdon as created_at, birthdate as date_of_birth,
@@ -112,7 +106,6 @@ FROM #{TABLE_PREFIX}discuss_users
ORDER BY id
LIMIT #{BATCH_SIZE};
SQL
).to_a
break if users.empty?
@@ -123,14 +116,14 @@ FROM #{TABLE_PREFIX}discuss_users
create_users(users, total: user_count, offset: offset) do |user|
{
id: user["userid"],
name: user['name'],
username: user['username'],
email: user['email'],
website: user['website'],
name: user["name"],
username: user["username"],
email: user["email"],
website: user["website"],
created_at: parse_timestamp(user["created_at"]),
last_seen_at: parse_timestamp(user["last_seen_at"]),
date_of_birth: user['date_of_birth'],
password: "#{user['password']}:#{user['salt']}" # not tested
date_of_birth: user["date_of_birth"],
password: "#{user["password"]}:#{user["salt"]}", # not tested
}
end
end
@@ -144,13 +137,13 @@ FROM #{TABLE_PREFIX}discuss_users
categories = mysql_query("select id, name, description from modx_discuss_categories").to_a
create_categories(categories) do |category|
puts "Creating #{category['name']}"
puts "Creating #{category["name"]}"
puts category
{
id: "cat#{category['id']}",
id: "cat#{category["id"]}",
name: category["name"],
color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil,
description: category["description"]
description: category["description"],
}
end
@@ -159,13 +152,13 @@ FROM #{TABLE_PREFIX}discuss_users
boards = mysql_query("select id, category, name, description from modx_discuss_boards;").to_a
create_categories(boards) do |category|
puts category
parent_category_id = category_id_from_imported_category_id("cat#{category['category']}")
parent_category_id = category_id_from_imported_category_id("cat#{category["category"]}")
{
id: category["id"],
parent_category_id: parent_category_id.to_s,
name: category["name"],
color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil,
description: category["description"]
description: category["description"],
}
end
end
@@ -173,12 +166,15 @@ FROM #{TABLE_PREFIX}discuss_users
def import_topics_and_posts
puts "", "creating topics and posts"
total_count = mysql_query("SELECT count(id) count from #{TABLE_PREFIX}discuss_posts").first["count"]
total_count =
mysql_query("SELECT count(id) count from #{TABLE_PREFIX}discuss_posts").first["count"]
topic_first_post_id = {}
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT id,
thread topic_id,
board category_id,
@@ -191,27 +187,28 @@ FROM #{TABLE_PREFIX}discuss_users
ORDER BY createdon
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = post_process_raw(m['raw'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = post_process_raw(m["raw"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['parent'] == 0
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
mapped[:title] = m['title']
topic_first_post_id[m['topic_id']] = m['id']
if m["parent"] == 0
mapped[:category] = category_id_from_imported_category_id(m["category_id"])
mapped[:title] = m["title"]
topic_first_post_id[m["topic_id"]] = m["id"]
else
parent = topic_lookup_from_imported_post_id(topic_first_post_id[m['topic_id']])
parent = topic_lookup_from_imported_post_id(topic_first_post_id[m["topic_id"]])
if parent
mapped[:topic_id] = parent[:topic_id]
else
@@ -227,10 +224,11 @@ FROM #{TABLE_PREFIX}discuss_users
def post_process_raw(raw)
# [QUOTE]...[/QUOTE]
raw = raw.gsub(/\[quote.*?\](.+?)\[\/quote\]/im) { |quote|
quote = quote.gsub(/\[quote author=(.*?) .+\]/i) { "\n[quote=\"#{$1}\"]\n" }
quote = quote.gsub(/[^\n]\[\/quote\]/im) { "\n[/quote]\n" }
}
raw =
raw.gsub(%r{\[quote.*?\](.+?)\[/quote\]}im) do |quote|
quote = quote.gsub(/\[quote author=(.*?) .+\]/i) { "\n[quote=\"#{$1}\"]\n" }
quote = quote.gsub(%r{[^\n]\[/quote\]}im) { "\n[/quote]\n" }
end
raw
end
@@ -249,25 +247,27 @@ FROM #{TABLE_PREFIX}discuss_users
# find the uploaded file information from the db
def not_find_upload(post, attachment_id)
sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename,
sql =
"SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename,
a.caption caption
FROM #{TABLE_PREFIX}attachment a
WHERE a.attachmentid = #{attachment_id}"
results = mysql_query(sql)
unless row = results.first
puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields["import_id"]}"
return
end
filename = File.join(ATTACHMENT_DIR, row['user_id'].to_s.split('').join('/'), "#{row['file_id']}.attach")
filename =
File.join(ATTACHMENT_DIR, row["user_id"].to_s.split("").join("/"), "#{row["file_id"]}.attach")
unless File.exist?(filename)
puts "Attachment file doesn't exist: #{filename}"
return
end
real_filename = row['filename']
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename = row["filename"]
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
upload = create_upload(post.user.id, filename, real_filename)
if upload.nil? || !upload.valid?
@@ -286,24 +286,24 @@ FROM #{TABLE_PREFIX}discuss_users
def not_import_private_messages
puts "", "importing private messages..."
topic_count = mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"]
topic_count =
mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"]
last_private_message_id = -1
batches(BATCH_SIZE) do |offset|
private_messages = mysql_query(<<-SQL
private_messages = mysql_query(<<-SQL).to_a
SELECT pmtextid, fromuserid, title, message, touserarray, dateline
FROM #{TABLE_PREFIX}pmtext
WHERE pmtextid > #{last_private_message_id}
ORDER BY pmtextid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if private_messages.empty?
last_private_message_id = private_messages[-1]["pmtextid"]
private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm['pmtextid']}") }
private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm["pmtextid"]}") }
title_username_of_pm_first_post = {}
@@ -311,11 +311,16 @@ FROM #{TABLE_PREFIX}discuss_users
skip = false
mapped = {}
mapped[:id] = "pm-#{m['pmtextid']}"
mapped[:user_id] = user_id_from_imported_user_id(m['fromuserid']) || Discourse::SYSTEM_USER_ID
mapped[:raw] = preprocess_post_raw(m['message']) rescue nil
mapped[:created_at] = Time.zone.at(m['dateline'])
title = @htmlentities.decode(m['title']).strip[0...255]
mapped[:id] = "pm-#{m["pmtextid"]}"
mapped[:user_id] = user_id_from_imported_user_id(m["fromuserid"]) ||
Discourse::SYSTEM_USER_ID
mapped[:raw] = begin
preprocess_post_raw(m["message"])
rescue StandardError
nil
end
mapped[:created_at] = Time.zone.at(m["dateline"])
title = @htmlentities.decode(m["title"]).strip[0...255]
topic_id = nil
next if mapped[:raw].blank?
@@ -324,9 +329,9 @@ FROM #{TABLE_PREFIX}discuss_users
target_usernames = []
target_userids = []
begin
to_user_array = PHP.unserialize(m['touserarray'])
rescue
puts "#{m['pmtextid']} -- #{m['touserarray']}"
to_user_array = PHP.unserialize(m["touserarray"])
rescue StandardError
puts "#{m["pmtextid"]} -- #{m["touserarray"]}"
skip = true
end
@@ -346,8 +351,8 @@ FROM #{TABLE_PREFIX}discuss_users
target_usernames << username if username
end
end
rescue
puts "skipping pm-#{m['pmtextid']} `to_user_array` is not properly serialized -- #{to_user_array.inspect}"
rescue StandardError
puts "skipping pm-#{m["pmtextid"]} `to_user_array` is not properly serialized -- #{to_user_array.inspect}"
skip = true
end
@@ -355,18 +360,18 @@ FROM #{TABLE_PREFIX}discuss_users
participants << mapped[:user_id]
begin
participants.sort!
rescue
rescue StandardError
puts "one of the participant's id is nil -- #{participants.inspect}"
end
if title =~ /^Re:/
parent_id = title_username_of_pm_first_post[[title[3..-1], participants]] ||
title_username_of_pm_first_post[[title[4..-1], participants]] ||
title_username_of_pm_first_post[[title[5..-1], participants]] ||
title_username_of_pm_first_post[[title[6..-1], participants]] ||
title_username_of_pm_first_post[[title[7..-1], participants]] ||
title_username_of_pm_first_post[[title[8..-1], participants]]
parent_id =
title_username_of_pm_first_post[[title[3..-1], participants]] ||
title_username_of_pm_first_post[[title[4..-1], participants]] ||
title_username_of_pm_first_post[[title[5..-1], participants]] ||
title_username_of_pm_first_post[[title[6..-1], participants]] ||
title_username_of_pm_first_post[[title[7..-1], participants]] ||
title_username_of_pm_first_post[[title[8..-1], participants]]
if parent_id
if t = topic_lookup_from_imported_post_id("pm-#{parent_id}")
@@ -374,18 +379,18 @@ FROM #{TABLE_PREFIX}discuss_users
end
end
else
title_username_of_pm_first_post[[title, participants]] ||= m['pmtextid']
title_username_of_pm_first_post[[title, participants]] ||= m["pmtextid"]
end
unless topic_id
mapped[:title] = title
mapped[:archetype] = Archetype.private_message
mapped[:target_usernames] = target_usernames.join(',')
mapped[:target_usernames] = target_usernames.join(",")
if mapped[:target_usernames].size < 1 # pm with yourself?
# skip = true
mapped[:target_usernames] = "system"
puts "pm-#{m['pmtextid']} has no target (#{m['touserarray']})"
puts "pm-#{m["pmtextid"]} has no target (#{m["touserarray"]})"
end
else
mapped[:topic_id] = topic_id
@@ -397,22 +402,21 @@ FROM #{TABLE_PREFIX}discuss_users
end
def not_import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
current_count = 0
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(postid) count
FROM #{TABLE_PREFIX}post p
JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid
WHERE t.firstpostid <> p.postid
SQL
).first["count"]
success_count = 0
fail_count = 0
attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i
attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
Post.find_each do |post|
current_count += 1
@@ -433,7 +437,12 @@ FROM #{TABLE_PREFIX}discuss_users
end
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from modx')
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachments from modx",
)
end
success_count += 1
@@ -496,14 +505,14 @@ FROM #{TABLE_PREFIX}discuss_users
end
def not_create_permalink_file
puts '', 'Creating Permalink File...', ''
puts "", "Creating Permalink File...", ""
id_mapping = []
Topic.listable_topics.find_each do |topic|
pcf = topic.first_post.custom_fields
if pcf && pcf["import_id"]
id = pcf["import_id"].split('-').last
id = pcf["import_id"].split("-").last
id_mapping.push("XXX#{id} YYY#{topic.id}")
end
end
@@ -517,11 +526,8 @@ FROM #{TABLE_PREFIX}discuss_users
# end
CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv|
id_mapping.each do |value|
csv << [value]
end
id_mapping.each { |value| csv << [value] }
end
end
def deactivate_all_users
@@ -529,16 +535,23 @@ FROM #{TABLE_PREFIX}discuss_users
end
def suspend_users
puts '', "updating blocked users"
puts "", "updating blocked users"
banned = 0
failed = 0
total = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}user_attributes where blocked != 0").first['count']
total =
mysql_query(
"SELECT count(*) count FROM #{TABLE_PREFIX}user_attributes where blocked != 0",
).first[
"count"
]
system_user = Discourse.system_user
mysql_query("SELECT id, blockedafter, blockeduntil FROM #{TABLE_PREFIX}user_attributes").each do |b|
user = User.find_by_id(user_id_from_imported_user_id(b['id']))
mysql_query(
"SELECT id, blockedafter, blockeduntil FROM #{TABLE_PREFIX}user_attributes",
).each do |b|
user = User.find_by_id(user_id_from_imported_user_id(b["id"]))
if user
user.suspended_at = parse_timestamp(user["blockedafter"])
user.suspended_till = parse_timestamp(user["blockeduntil"])
@@ -550,7 +563,7 @@ FROM #{TABLE_PREFIX}discuss_users
failed += 1
end
else
puts "Not found: #{b['userid']}"
puts "Not found: #{b["userid"]}"
failed += 1
end
@@ -565,7 +578,6 @@ FROM #{TABLE_PREFIX}discuss_users
def mysql_query(sql)
@client.query(sql, cache_rows: true)
end
end
ImportScripts::Modx.new.perform

View File

@@ -6,7 +6,6 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::Muut < ImportScripts::Base
JSON_FILE_PATH = "/path/to/json/file"
CSV_FILE_PATH = "/path/to/csv/file"
@@ -36,39 +35,33 @@ class ImportScripts::Muut < ImportScripts::Base
end
def repair_json(arg)
arg.gsub!(/^\(/, "") # content of file is surround by ( )
arg.gsub!(/^\(/, "") # content of file is surround by ( )
arg.gsub!(/\)$/, "")
arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end
arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end
arg.gsub!(/\}\{/, "},{") # missing commas sometimes!
arg.gsub!("}]{", "},{") # surprise square brackets
arg.gsub!("}[{", "},{") # :troll:
arg.gsub!("}]{", "},{") # surprise square brackets
arg.gsub!("}[{", "},{") # :troll:
arg
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
create_users(@imported_users) do |u|
{
id: u[0],
email: u[1],
created_at: Time.now
}
end
create_users(@imported_users) { |u| { id: u[0], email: u[1], created_at: Time.now } }
end
def import_categories
puts "", "Importing categories"
create_categories(@imported_json['categories']) do |category|
create_categories(@imported_json["categories"]) do |category|
{
id: category['path'], # muut has no id for categories, so use the path
name: category['title'],
slug: category['path']
id: category["path"], # muut has no id for categories, so use the path
name: category["title"],
slug: category["path"],
}
end
end
@@ -79,23 +72,23 @@ class ImportScripts::Muut < ImportScripts::Base
topics = 0
posts = 0
@imported_json['categories'].each do |category|
@imported_json['threads'][category['path']].each do |thread|
@imported_json["categories"].each do |category|
@imported_json["threads"][category["path"]].each do |thread|
next if thread["seed"]["key"] == "skip-this-topic"
mapped = {}
mapped[:id] = "#{thread["seed"]["key"]}-#{thread["seed"]["date"]}"
if thread["seed"]["author"] && user_id_from_imported_user_id(thread["seed"]["author"]["path"]) != ""
if thread["seed"]["author"] &&
user_id_from_imported_user_id(thread["seed"]["author"]["path"]) != ""
mapped[:user_id] = user_id_from_imported_user_id(thread["seed"]["author"]["path"]) || -1
else
mapped[:user_id] = -1
end
# update user display name
if thread["seed"]["author"] && thread["seed"]["author"]["displayname"] != "" && mapped[:user_id] != -1
if thread["seed"]["author"] && thread["seed"]["author"]["displayname"] != "" &&
mapped[:user_id] != -1
user = User.find_by(id: mapped[:user_id])
if user
user.name = thread["seed"]["author"]["displayname"]
@@ -122,18 +115,21 @@ class ImportScripts::Muut < ImportScripts::Base
# create replies
if thread["replies"].present? && thread["replies"].count > 0
thread["replies"].reverse_each do |post|
if post_id_from_imported_post_id(post["id"])
next # already imported this post
end
new_post = create_post({
id: "#{post["key"]}-#{post["date"]}",
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(post["author"]["path"]) || -1,
raw: process_muut_post_body(post["body"]),
created_at: Time.zone.at(post["date"])
}, post["id"])
new_post =
create_post(
{
id: "#{post["key"]}-#{post["date"]}",
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(post["author"]["path"]) || -1,
raw: process_muut_post_body(post["body"]),
created_at: Time.zone.at(post["date"]),
},
post["id"],
)
if new_post.is_a?(Post)
posts += 1
@@ -141,9 +137,7 @@ class ImportScripts::Muut < ImportScripts::Base
puts "Error creating post #{post["id"]}. Skipping."
puts new_post.inspect
end
end
end
topics += 1
@@ -165,7 +159,7 @@ class ImportScripts::Muut < ImportScripts::Base
raw.gsub!("---", "```\n")
# tab
raw.gsub!(/\\t/, ' ')
raw.gsub!(/\\t/, " ")
# double quote
raw.gsub!(/\\\"/, '"')
@@ -177,9 +171,6 @@ class ImportScripts::Muut < ImportScripts::Base
def file_full_path(relpath)
File.join JSON_FILES_DIR, relpath.split("?").first
end
end
if __FILE__ == $0
ImportScripts::Muut.new.perform
end
ImportScripts::Muut.new.perform if __FILE__ == $0

View File

@@ -18,13 +18,12 @@ export BASE="" #
# Call it like this:
# RAILS_ENV=production ruby script/import_scripts/mybb.rb
class ImportScripts::MyBB < ImportScripts::Base
DB_HOST ||= ENV['DB_HOST'] || "localhost"
DB_NAME ||= ENV['DB_NAME'] || "mybb"
DB_PW ||= ENV['DB_PW'] || ""
DB_USER ||= ENV['DB_USER'] || "root"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "mybb_"
UPLOADS_DIR ||= ENV['UPLOADS'] || '/data/limelightgaming/uploads'
DB_HOST ||= ENV["DB_HOST"] || "localhost"
DB_NAME ||= ENV["DB_NAME"] || "mybb"
DB_PW ||= ENV["DB_PW"] || ""
DB_USER ||= ENV["DB_USER"] || "root"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "mybb_"
UPLOADS_DIR ||= ENV["UPLOADS"] || "/data/limelightgaming/uploads"
BATCH_SIZE = 1000
BASE = ""
QUIET = true
@@ -33,12 +32,8 @@ class ImportScripts::MyBB < ImportScripts::Base
def initialize
super
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
end
def execute
@@ -52,63 +47,79 @@ class ImportScripts::MyBB < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count
total_count =
mysql_query(
"SELECT count(*) count
FROM #{TABLE_PREFIX}users u
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
WHERE g.title != 'Banned';").first['count']
WHERE g.title != 'Banned';",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT uid id, email email, username, regdate, g.title `group`, avatar
results =
mysql_query(
"SELECT uid id, email email, username, regdate, g.title `group`, avatar
FROM #{TABLE_PREFIX}users u
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
WHERE g.title != 'Banned'
ORDER BY u.uid ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
avatar_url = user['avatar'].match(/^http/) ? user['avatar'].gsub(/\?.*/, "") : nil
{ id: user['id'],
email: user['email'],
username: user['username'],
created_at: Time.zone.at(user['regdate']),
moderator: user['group'] == 'Super Moderators',
admin: user['group'] == 'Administrators' ,
avatar_url = user["avatar"].match(/^http/) ? user["avatar"].gsub(/\?.*/, "") : nil
{
id: user["id"],
email: user["email"],
username: user["username"],
created_at: Time.zone.at(user["regdate"]),
moderator: user["group"] == "Super Moderators",
admin: user["group"] == "Administrators",
avatar_url: avatar_url,
post_create_action: proc do |newuser|
if !user["avatar"].blank?
avatar = user["avatar"].gsub(/\?.*/, "")
if avatar.match(/^http.*/)
UserAvatar.import_url_for_user(avatar, newuser)
else
filename = File.join(UPLOADS_DIR, avatar)
@uploader.create_avatar(newuser, filename) if File.exists?(filename)
post_create_action:
proc do |newuser|
if !user["avatar"].blank?
avatar = user["avatar"].gsub(/\?.*/, "")
if avatar.match(/^http.*/)
UserAvatar.import_url_for_user(avatar, newuser)
else
filename = File.join(UPLOADS_DIR, avatar)
@uploader.create_avatar(newuser, filename) if File.exists?(filename)
end
end
end
end
end,
}
end
end
end
def import_categories
results = mysql_query("
results =
mysql_query(
"
SELECT fid id, pid parent_id, left(name, 50) name, description
FROM #{TABLE_PREFIX}forums
ORDER BY pid ASC, fid ASC
")
",
)
create_categories(results) do |row|
h = { id: row['id'], name: CGI.unescapeHTML(row['name']), description: CGI.unescapeHTML(row['description']) }
if row['parent_id'].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
h = {
id: row["id"],
name: CGI.unescapeHTML(row["name"]),
description: CGI.unescapeHTML(row["description"]),
}
if row["parent_id"].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(row["parent_id"])
end
h
end
@@ -120,7 +131,9 @@ class ImportScripts::MyBB < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.pid id,
p.tid topic_id,
t.fid category_id,
@@ -132,15 +145,16 @@ class ImportScripts::MyBB < ImportScripts::Base
FROM #{TABLE_PREFIX}posts p,
#{TABLE_PREFIX}threads t
WHERE p.tid = t.tid
#{'AND (p.visible = 1 AND t.visible = 1)' unless IMPORT_DELETED_POSTS}
#{"AND (p.visible = 1 AND t.visible = 1)" unless IMPORT_DELETED_POSTS}
ORDER BY p.dateline
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
@@ -160,20 +174,20 @@ class ImportScripts::MyBB < ImportScripts::Base
# LIMIT 1
# ").first['id']
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_mybb_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['post_time'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_mybb_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["post_time"])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["id"] == m["first_post_id"]
mapped[:category] = category_id_from_imported_category_id(m["category_id"])
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
parent = topic_lookup_from_imported_post_id(m["first_post_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
@@ -188,17 +202,18 @@ class ImportScripts::MyBB < ImportScripts::Base
end
def suspend_users
puts '', "banned users are not implemented"
puts "", "banned users are not implemented"
end
# Discourse usernames don't allow spaces
def convert_username(username, post_id)
count = 0
username.gsub!(/\s+/) { |a| count += 1; '_' }
# Warn on MyBB bug that places post text in the quote line - http://community.mybb.com/thread-180526.html
if count > 5
puts "Warning: probably incorrect quote in post #{post_id}"
username.gsub!(/\s+/) do |a|
count += 1
"_"
end
# Warn on MyBB bug that places post text in the quote line - http://community.mybb.com/thread-180526.html
puts "Warning: probably incorrect quote in post #{post_id}" if count > 5
username
end
@@ -209,7 +224,7 @@ class ImportScripts::MyBB < ImportScripts::Base
begin
post = Post.find(quoted_post_id_from_imported)
"post:#{post.post_number}, topic:#{post.topic_id}"
rescue
rescue StandardError
puts "Could not find migrated post #{quoted_post_id_from_imported} quoted by original post #{post_id} as #{quoted_post_id}"
""
end
@@ -223,23 +238,24 @@ class ImportScripts::MyBB < ImportScripts::Base
s = raw.dup
# convert the quote line
s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) {
"[quote=\"#{convert_username($1, import_id)}, " + post_id_to_post_num_and_topic($2, import_id) + '"]'
}
s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) do
"[quote=\"#{convert_username($1, import_id)}, " +
post_id_to_post_num_and_topic($2, import_id) + '"]'
end
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
s.gsub!(/<!-- s(\S+) -->(?:.*)<!-- s(?:\S+) -->/, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s.gsub!(/:(?:\w{8})\]/, "]")
# Remove mybb video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")
s = CGI.unescapeHTML(s)
@@ -247,16 +263,16 @@ class ImportScripts::MyBB < ImportScripts::Base
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
s.gsub!(%r{\[http(s)?://(www\.)?}, "[")
s
end
def create_permalinks
puts '', 'Creating redirects...', ''
puts "", "Creating redirects...", ""
SiteSetting.permalink_normalizations = '/(\\w+)-(\\d+)[-.].*/\\1-\\2.html'
puts '', 'Users...', ''
puts "", "Users...", ""
total_users = User.count
start_time = Time.now
count = 0
@@ -264,12 +280,19 @@ class ImportScripts::MyBB < ImportScripts::Base
ucf = u.custom_fields
count += 1
if ucf && ucf["import_id"] && ucf["import_username"]
Permalink.create(url: "#{BASE}/user-#{ucf['import_id']}.html", external_url: "/u/#{u.username}") rescue nil
begin
Permalink.create(
url: "#{BASE}/user-#{ucf["import_id"]}.html",
external_url: "/u/#{u.username}",
)
rescue StandardError
nil
end
end
print_status(count, total_users, start_time)
end
puts '', 'Categories...', ''
puts "", "Categories...", ""
total_categories = Category.count
start_time = Time.now
count = 0
@@ -277,20 +300,24 @@ class ImportScripts::MyBB < ImportScripts::Base
ccf = cat.custom_fields
count += 1
next unless id = ccf["import_id"]
unless QUIET
puts ("forum-#{id}.html --> /c/#{cat.id}")
puts ("forum-#{id}.html --> /c/#{cat.id}") unless QUIET
begin
Permalink.create(url: "#{BASE}/forum-#{id}.html", category_id: cat.id)
rescue StandardError
nil
end
Permalink.create(url: "#{BASE}/forum-#{id}.html", category_id: cat.id) rescue nil
print_status(count, total_categories, start_time)
end
puts '', 'Topics...', ''
puts "", "Topics...", ""
total_posts = Post.count
start_time = Time.now
count = 0
puts '', 'Posts...', ''
puts "", "Posts...", ""
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.pid id,
p.tid topic_id
FROM #{TABLE_PREFIX}posts p,
@@ -300,13 +327,18 @@ class ImportScripts::MyBB < ImportScripts::Base
ORDER BY p.dateline
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
results.each do |post|
count += 1
if topic = topic_lookup_from_imported_post_id(post['id'])
id = post['topic_id']
Permalink.create(url: "#{BASE}/thread-#{id}.html", topic_id: topic[:topic_id]) rescue nil
if topic = topic_lookup_from_imported_post_id(post["id"])
id = post["topic_id"]
begin
Permalink.create(url: "#{BASE}/thread-#{id}.html", topic_id: topic[:topic_id])
rescue StandardError
nil
end
unless QUIET
puts ("#{BASE}/thread-#{id}.html --> http://localhost:3000/t/#{topic[:topic_id]}")
end

View File

@@ -9,9 +9,8 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# If your forum has non-English usernames, don't forget to enable Unicode usernames in /admin/site_settings
class ImportScripts::MybbRu < ImportScripts::Base
JSON_TOPICS_FILE_PATH ||= ENV['JSON_TOPICS_FILE'] || 'mybbru_export/threads.json'
JSON_USERS_FILE_PATH ||= ENV['JSON_USERS_FILE'] || 'mybbru_export/users.json'
JSON_TOPICS_FILE_PATH ||= ENV["JSON_TOPICS_FILE"] || "mybbru_export/threads.json"
JSON_USERS_FILE_PATH ||= ENV["JSON_USERS_FILE"] || "mybbru_export/users.json"
def initialize
super
@@ -35,59 +34,49 @@ class ImportScripts::MybbRu < ImportScripts::Base
end
def clean_username(name)
name.gsub(/ /, '')
name.gsub(/ /, "")
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
create_users(@imported_users) do |u|
{
id: u['id'],
username: clean_username(u['name']),
email: u['email'],
created_at: Time.now
}
{ id: u["id"], username: clean_username(u["name"]), email: u["email"], created_at: Time.now }
end
end
def import_categories
puts "", "importing categories..."
categories = @imported_topics.map { |t| t['category'] }.uniq
categories = @imported_topics.map { |t| t["category"] }.uniq
create_categories(categories) do |c|
{
id: c['id'],
name: c['name']
}
end
create_categories(categories) { |c| { id: c["id"], name: c["name"] } }
end
def import_discussions
puts "", "Importing discussions"
@imported_topics.each do |t|
first_post = t['posts'][0]
first_post = t["posts"][0]
create_posts(t['posts']) do |p|
create_posts(t["posts"]) do |p|
result = {
id: p['id'],
user_id: user_id_from_imported_user_id(p['author']['id']),
id: p["id"],
user_id: user_id_from_imported_user_id(p["author"]["id"]),
raw: fix_post_content(p["source"]),
created_at: Time.at(p['createdAt']),
cook_method: Post.cook_methods[:regular]
created_at: Time.at(p["createdAt"]),
cook_method: Post.cook_methods[:regular],
}
if p['id'] == first_post['id']
result[:category] = category_id_from_imported_category_id(t['category']['id'])
result[:title] = t['title']
if p["id"] == first_post["id"]
result[:category] = category_id_from_imported_category_id(t["category"]["id"])
result[:title] = t["title"]
else
parent = topic_lookup_from_imported_post_id(first_post['id'])
parent = topic_lookup_from_imported_post_id(first_post["id"])
if parent
result[:topic_id] = parent[:topic_id]
else
puts "Parent post #{first_post['id']} doesn't exist. Skipping #{p["id"]}: #{t["title"][0..40]}"
puts "Parent post #{first_post["id"]} doesn't exist. Skipping #{p["id"]}: #{t["title"][0..40]}"
break
end
end
@@ -100,16 +89,15 @@ class ImportScripts::MybbRu < ImportScripts::Base
def fix_post_content(text)
text
.gsub(/\[code\]/, "\n[code]\n")
.gsub(/\[\/code\]/, "\n[/code]\n")
.gsub(%r{\[/code\]}, "\n[/code]\n")
.gsub(/\[video\]/, "")
.gsub(/\[\/video\]/, "")
.gsub(%r{\[/video\]}, "")
.gsub(/\[quote.*?\]/, "\n" + '\0' + "\n")
.gsub(/\[\/quote\]/, "\n[/quote]\n")
.gsub(/\[spoiler.*?\]/, "\n" + '\0' + "\n").gsub(/\[spoiler/, '[details')
.gsub(/\[\/spoiler\]/, "\n[/details]\n")
.gsub(%r{\[/quote\]}, "\n[/quote]\n")
.gsub(/\[spoiler.*?\]/, "\n" + '\0' + "\n")
.gsub(/\[spoiler/, "[details")
.gsub(%r{\[/spoiler\]}, "\n[/details]\n")
end
end
if __FILE__ == $0
ImportScripts::MybbRu.new.perform
end
ImportScripts::MybbRu.new.perform if __FILE__ == $0

View File

@@ -2,7 +2,7 @@
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities'
require "htmlentities"
# Before running this script, paste these lines into your shell,
# then use arrow keys to edit the values
@@ -18,45 +18,35 @@ export BASE="forum"
=end
class ImportScripts::MylittleforumSQL < ImportScripts::Base
DB_HOST ||= ENV['DB_HOST'] || "localhost"
DB_NAME ||= ENV['DB_NAME'] || "mylittleforum"
DB_PW ||= ENV['DB_PW'] || ""
DB_USER ||= ENV['DB_USER'] || "root"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "forum_"
IMPORT_AFTER ||= ENV['IMPORT_AFTER'] || "1970-01-01"
IMAGE_BASE ||= ENV['IMAGE_BASE'] || ""
BASE ||= ENV['BASE'] || "forum/"
DB_HOST ||= ENV["DB_HOST"] || "localhost"
DB_NAME ||= ENV["DB_NAME"] || "mylittleforum"
DB_PW ||= ENV["DB_PW"] || ""
DB_USER ||= ENV["DB_USER"] || "root"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "forum_"
IMPORT_AFTER ||= ENV["IMPORT_AFTER"] || "1970-01-01"
IMAGE_BASE ||= ENV["IMAGE_BASE"] || ""
BASE ||= ENV["BASE"] || "forum/"
BATCH_SIZE = 1000
CONVERT_HTML = true
QUIET = nil || ENV['VERBOSE'] == "TRUE"
FORCE_HOSTNAME = nil || ENV['FORCE_HOSTNAME']
QUIET = nil || ENV["VERBOSE"] == "TRUE"
FORCE_HOSTNAME = nil || ENV["FORCE_HOSTNAME"]
QUIET = true
# Site settings
SiteSetting.disable_emails = "non-staff"
if FORCE_HOSTNAME
SiteSetting.force_hostname = FORCE_HOSTNAME
end
SiteSetting.force_hostname = FORCE_HOSTNAME if FORCE_HOSTNAME
def initialize
if IMPORT_AFTER > "1970-01-01"
print_warning("Importing data after #{IMPORT_AFTER}")
end
print_warning("Importing data after #{IMPORT_AFTER}") if IMPORT_AFTER > "1970-01-01"
super
@htmlentities = HTMLEntities.new
begin
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
rescue Exception => e
puts '=' * 50
puts "=" * 50
puts e.message
puts <<~TEXT
Cannot log in to database.
@@ -95,12 +85,19 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}userdata WHERE last_login > '#{IMPORT_AFTER}';").first['count']
total_count =
mysql_query(
"SELECT count(*) count FROM #{TABLE_PREFIX}userdata WHERE last_login > '#{IMPORT_AFTER}';",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT user_id as UserID, user_name as username,
user_real_name as Name,
user_email as Email,
@@ -118,31 +115,33 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
WHERE last_login > '#{IMPORT_AFTER}'
order by UserID ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u['UserID'].to_i }
next if all_records_exist? :users, results.map { |u| u["UserID"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
next if user['Email'].blank?
next if @lookup.user_id_from_imported_user_id(user['UserID'])
next if user["Email"].blank?
next if @lookup.user_id_from_imported_user_id(user["UserID"])
# username = fix_username(user['username'])
{ id: user['UserID'],
email: user['Email'],
username: user['username'],
name: user['Name'],
created_at: user['DateInserted'] == nil ? 0 : Time.zone.at(user['DateInserted']),
bio_raw: user['bio_raw'],
registration_ip_address: user['InsertIPAddress'],
website: user['user_hp'],
password: user['password'],
last_seen_at: user['DateLastActive'] == nil ? 0 : Time.zone.at(user['DateLastActive']),
location: user['Location'],
admin: user['user_type'] == "admin",
moderator: user['user_type'] == "mod",
{
id: user["UserID"],
email: user["Email"],
username: user["username"],
name: user["Name"],
created_at: user["DateInserted"] == nil ? 0 : Time.zone.at(user["DateInserted"]),
bio_raw: user["bio_raw"],
registration_ip_address: user["InsertIPAddress"],
website: user["user_hp"],
password: user["password"],
last_seen_at: user["DateLastActive"] == nil ? 0 : Time.zone.at(user["DateLastActive"]),
location: user["Location"],
admin: user["user_type"] == "admin",
moderator: user["user_type"] == "mod",
}
end
end
@@ -151,7 +150,7 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
def fix_username(username)
olduser = username.dup
username.gsub!(/Dr\. /, "Dr") # no &
username.gsub!(/[ +!\/,*()?]/, "_") # can't have these
username.gsub!(%r{[ +!/,*()?]}, "_") # can't have these
username.gsub!(/&/, "_and_") # no &
username.gsub!(/@/, "_at_") # no @
username.gsub!(/#/, "_hash_") # no &
@@ -159,28 +158,29 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
username.gsub!(/[._]+/, "_") # can't have 2 special in a row
username.gsub!(/_+/, "_") # could result in dupes, but wtf?
username.gsub!(/_$/, "") # could result in dupes, but wtf?
if olduser != username
print_warning ("#{olduser} --> #{username}")
end
print_warning ("#{olduser} --> #{username}") if olduser != username
username
end
def import_categories
puts "", "importing categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT id as CategoryID,
category as Name,
description as Description
FROM #{TABLE_PREFIX}categories
ORDER BY CategoryID ASC
").to_a
",
).to_a
create_categories(categories) do |category|
{
id: category['CategoryID'],
name: CGI.unescapeHTML(category['Name']),
description: CGI.unescapeHTML(category['Description'])
id: category["CategoryID"],
name: CGI.unescapeHTML(category["Name"]),
description: CGI.unescapeHTML(category["Description"]),
}
end
end
@@ -188,13 +188,19 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
def import_topics
puts "", "importing topics..."
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}entries
total_count =
mysql_query(
"SELECT count(*) count FROM #{TABLE_PREFIX}entries
WHERE time > '#{IMPORT_AFTER}'
AND pid = 0;").first['count']
AND pid = 0;",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
discussions = mysql_query(
"SELECT id as DiscussionID,
discussions =
mysql_query(
"SELECT id as DiscussionID,
category as CategoryID,
subject as Name,
text as Body,
@@ -206,29 +212,32 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
AND time > '#{IMPORT_AFTER}'
ORDER BY time ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if discussions.size < 1
next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t['DiscussionID'].to_s }
if all_records_exist? :posts, discussions.map { |t| "discussion#" + t["DiscussionID"].to_s }
next
end
create_posts(discussions, total: total_count, offset: offset) do |discussion|
raw = clean_up(discussion['Body'])
raw = clean_up(discussion["Body"])
youtube = nil
unless discussion['youtube'].blank?
youtube = clean_youtube(discussion['youtube'])
unless discussion["youtube"].blank?
youtube = clean_youtube(discussion["youtube"])
raw += "\n#{youtube}\n"
print_warning(raw)
end
{
id: "discussion#" + discussion['DiscussionID'].to_s,
user_id: user_id_from_imported_user_id(discussion['InsertUserID']) || Discourse::SYSTEM_USER_ID,
title: discussion['Name'].gsub('\\"', '"'),
category: category_id_from_imported_category_id(discussion['CategoryID']),
id: "discussion#" + discussion["DiscussionID"].to_s,
user_id:
user_id_from_imported_user_id(discussion["InsertUserID"]) || Discourse::SYSTEM_USER_ID,
title: discussion["Name"].gsub('\\"', '"'),
category: category_id_from_imported_category_id(discussion["CategoryID"]),
raw: raw,
created_at: Time.zone.at(discussion['DateInserted']),
created_at: Time.zone.at(discussion["DateInserted"]),
}
end
end
@@ -237,15 +246,20 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
def import_posts
puts "", "importing posts..."
total_count = mysql_query(
"SELECT count(*) count
total_count =
mysql_query(
"SELECT count(*) count
FROM #{TABLE_PREFIX}entries
WHERE pid > 0
AND time > '#{IMPORT_AFTER}';").first['count']
AND time > '#{IMPORT_AFTER}';",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
comments = mysql_query(
"SELECT id as CommentID,
comments =
mysql_query(
"SELECT id as CommentID,
tid as DiscussionID,
text as Body,
time as DateInserted,
@@ -256,26 +270,33 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
AND time > '#{IMPORT_AFTER}'
ORDER BY time ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if comments.size < 1
next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment['CommentID'].to_s }
if all_records_exist? :posts,
comments.map { |comment| "comment#" + comment["CommentID"].to_s }
next
end
create_posts(comments, total: total_count, offset: offset) do |comment|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s)
next if comment['Body'].blank?
raw = clean_up(comment['Body'])
unless t = topic_lookup_from_imported_post_id("discussion#" + comment["DiscussionID"].to_s)
next
end
next if comment["Body"].blank?
raw = clean_up(comment["Body"])
youtube = nil
unless comment['youtube'].blank?
youtube = clean_youtube(comment['youtube'])
unless comment["youtube"].blank?
youtube = clean_youtube(comment["youtube"])
raw += "\n#{youtube}\n"
end
{
id: "comment#" + comment['CommentID'].to_s,
user_id: user_id_from_imported_user_id(comment['InsertUserID']) || Discourse::SYSTEM_USER_ID,
id: "comment#" + comment["CommentID"].to_s,
user_id:
user_id_from_imported_user_id(comment["InsertUserID"]) || Discourse::SYSTEM_USER_ID,
topic_id: t[:topic_id],
raw: clean_up(raw),
created_at: Time.zone.at(comment['DateInserted'])
created_at: Time.zone.at(comment["DateInserted"]),
}
end
end
@@ -284,20 +305,20 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
def clean_youtube(youtube_raw)
youtube_cooked = clean_up(youtube_raw.dup.to_s)
# get just src from <iframe> and put on a line by itself
re = /<iframe.+?src="(\S+?)".+?<\/iframe>/mix
re = %r{<iframe.+?src="(\S+?)".+?</iframe>}mix
youtube_cooked.gsub!(re) { "\n#{$1}\n" }
re = /<object.+?src="(\S+?)".+?<\/object>/mix
re = %r{<object.+?src="(\S+?)".+?</object>}mix
youtube_cooked.gsub!(re) { "\n#{$1}\n" }
youtube_cooked.gsub!(/^\/\//, "https://") # make sure it has a protocol
youtube_cooked.gsub!(%r{^//}, "https://") # make sure it has a protocol
unless /http/.match(youtube_cooked) # handle case of only youtube object number
if youtube_cooked.length < 8 || /[<>=]/.match(youtube_cooked)
# probably not a youtube id
youtube_cooked = ""
else
youtube_cooked = 'https://www.youtube.com/watch?v=' + youtube_cooked
youtube_cooked = "https://www.youtube.com/watch?v=" + youtube_cooked
end
end
print_warning("#{'-' * 40}\nBefore: #{youtube_raw}\nAfter: #{youtube_cooked}") unless QUIET
print_warning("#{"-" * 40}\nBefore: #{youtube_raw}\nAfter: #{youtube_cooked}") unless QUIET
youtube_cooked
end
@@ -313,73 +334,79 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
raw = raw.gsub("\\'", "'")
raw = raw.gsub(/\[b\]/i, "<strong>")
raw = raw.gsub(/\[\/b\]/i, "</strong>")
raw = raw.gsub(%r{\[/b\]}i, "</strong>")
raw = raw.gsub(/\[i\]/i, "<em>")
raw = raw.gsub(/\[\/i\]/i, "</em>")
raw = raw.gsub(%r{\[/i\]}i, "</em>")
raw = raw.gsub(/\[u\]/i, "<em>")
raw = raw.gsub(/\[\/u\]/i, "</em>")
raw = raw.gsub(%r{\[/u\]}i, "</em>")
raw = raw.gsub(/\[url\](\S+)\[\/url\]/im) { "#{$1}" }
raw = raw.gsub(/\[link\](\S+)\[\/link\]/im) { "#{$1}" }
raw = raw.gsub(%r{\[url\](\S+)\[/url\]}im) { "#{$1}" }
raw = raw.gsub(%r{\[link\](\S+)\[/link\]}im) { "#{$1}" }
# URL & LINK with text
raw = raw.gsub(/\[url=(\S+?)\](.*?)\[\/url\]/im) { "<a href=\"#{$1}\">#{$2}</a>" }
raw = raw.gsub(/\[link=(\S+?)\](.*?)\[\/link\]/im) { "<a href=\"#{$1}\">#{$2}</a>" }
raw = raw.gsub(%r{\[url=(\S+?)\](.*?)\[/url\]}im) { "<a href=\"#{$1}\">#{$2}</a>" }
raw = raw.gsub(%r{\[link=(\S+?)\](.*?)\[/link\]}im) { "<a href=\"#{$1}\">#{$2}</a>" }
# remote images
raw = raw.gsub(/\[img\](https?:.+?)\[\/img\]/im) { "<img src=\"#{$1}\">" }
raw = raw.gsub(/\[img=(https?.+?)\](.+?)\[\/img\]/im) { "<img src=\"#{$1}\" alt=\"#{$2}\">" }
raw = raw.gsub(%r{\[img\](https?:.+?)\[/img\]}im) { "<img src=\"#{$1}\">" }
raw = raw.gsub(%r{\[img=(https?.+?)\](.+?)\[/img\]}im) { "<img src=\"#{$1}\" alt=\"#{$2}\">" }
# local images
raw = raw.gsub(/\[img\](.+?)\[\/img\]/i) { "<img src=\"#{IMAGE_BASE}/#{$1}\">" }
raw = raw.gsub(/\[img=(.+?)\](https?.+?)\[\/img\]/im) { "<img src=\"#{IMAGE_BASE}/#{$1}\" alt=\"#{$2}\">" }
raw = raw.gsub(%r{\[img\](.+?)\[/img\]}i) { "<img src=\"#{IMAGE_BASE}/#{$1}\">" }
raw =
raw.gsub(%r{\[img=(.+?)\](https?.+?)\[/img\]}im) do
"<img src=\"#{IMAGE_BASE}/#{$1}\" alt=\"#{$2}\">"
end
# Convert image bbcode
raw.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/im, '<img width="\1" height="\2" src="\3">')
raw.gsub!(%r{\[img=(\d+),(\d+)\]([^\]]*)\[/img\]}im, '<img width="\1" height="\2" src="\3">')
# [div]s are really [quote]s
raw.gsub!(/\[div\]/mix, "[quote]")
raw.gsub!(/\[\/div\]/mix, "[/quote]")
raw.gsub!(%r{\[/div\]}mix, "[/quote]")
# [postedby] -> link to @user
raw.gsub(/\[postedby\](.+?)\[b\](.+?)\[\/b\]\[\/postedby\]/i) { "#{$1}@#{$2}" }
raw.gsub(%r{\[postedby\](.+?)\[b\](.+?)\[/b\]\[/postedby\]}i) { "#{$1}@#{$2}" }
# CODE (not tested)
raw = raw.gsub(/\[code\](\S+)\[\/code\]/im) { "```\n#{$1}\n```" }
raw = raw.gsub(/\[pre\](\S+)\[\/pre\]/im) { "```\n#{$1}\n```" }
raw = raw.gsub(%r{\[code\](\S+)\[/code\]}im) { "```\n#{$1}\n```" }
raw = raw.gsub(%r{\[pre\](\S+)\[/pre\]}im) { "```\n#{$1}\n```" }
raw = raw.gsub(/(https:\/\/youtu\S+)/i) { "\n#{$1}\n" } #youtube links on line by themselves
raw = raw.gsub(%r{(https://youtu\S+)}i) { "\n#{$1}\n" } #youtube links on line by themselves
# no center
raw = raw.gsub(/\[\/?center\]/i, "")
raw = raw.gsub(%r{\[/?center\]}i, "")
# no size
raw = raw.gsub(/\[\/?size.*?\]/i, "")
raw = raw.gsub(%r{\[/?size.*?\]}i, "")
### FROM VANILLA:
# fix whitespaces
raw = raw.gsub(/(\\r)?\\n/, "\n")
.gsub("\\t", "\t")
raw = raw.gsub(/(\\r)?\\n/, "\n").gsub("\\t", "\t")
unless CONVERT_HTML
# replace all chevrons with HTML entities
# NOTE: must be done
# - AFTER all the "code" processing
# - BEFORE the "quote" processing
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
.gsub("<", "&lt;")
.gsub("\u2603", "<")
raw =
raw
.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
.gsub("<", "&lt;")
.gsub("\u2603", "<")
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
.gsub(">", "&gt;")
.gsub("\u2603", ">")
raw =
raw
.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
.gsub(">", "&gt;")
.gsub("\u2603", ">")
end
# Remove the color tag
raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")
raw.gsub!(/\[\/color\]/i, "")
raw.gsub!(%r{\[/color\]}i, "")
### END VANILLA:
raw
@@ -395,54 +422,72 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
end
def create_permalinks
puts '', 'Creating redirects...', ''
puts "", "Creating redirects...", ""
puts '', 'Users...', ''
puts "", "Users...", ""
User.find_each do |u|
ucf = u.custom_fields
if ucf && ucf["import_id"] && ucf["import_username"]
Permalink.create(url: "#{BASE}/user-id-#{ucf['import_id']}.html", external_url: "/u/#{u.username}") rescue nil
print '.'
begin
Permalink.create(
url: "#{BASE}/user-id-#{ucf["import_id"]}.html",
external_url: "/u/#{u.username}",
)
rescue StandardError
nil
end
print "."
end
end
puts '', 'Posts...', ''
puts "", "Posts...", ""
Post.find_each do |post|
pcf = post.custom_fields
if pcf && pcf["import_id"]
topic = post.topic
id = pcf["import_id"].split('#').last
id = pcf["import_id"].split("#").last
if post.post_number == 1
Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", topic_id: topic.id) rescue nil
begin
Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", topic_id: topic.id)
rescue StandardError
nil
end
unless QUIET
print_warning("forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}")
end
else
Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", post_id: post.id) rescue nil
begin
Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", post_id: post.id)
rescue StandardError
nil
end
unless QUIET
print_warning("forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}/#{post.id}")
print_warning(
"forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}/#{post.id}",
)
end
end
print '.'
print "."
end
end
puts '', 'Categories...', ''
puts "", "Categories...", ""
Category.find_each do |cat|
ccf = cat.custom_fields
next unless id = ccf["import_id"]
unless QUIET
print_warning("forum-category-#{id}.html --> /t/#{cat.id}")
print_warning("forum-category-#{id}.html --> /t/#{cat.id}") unless QUIET
begin
Permalink.create(url: "#{BASE}/forum-category-#{id}.html", category_id: cat.id)
rescue StandardError
nil
end
Permalink.create(url: "#{BASE}/forum-category-#{id}.html", category_id: cat.id) rescue nil
print '.'
print "."
end
end
def print_warning(message)
$stderr.puts "#{message}"
end
end
ImportScripts::MylittleforumSQL.new.perform

View File

@@ -1,8 +1,8 @@
# frozen_string_literal: true
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'pg'
require_relative 'base/uploader'
require "pg"
require_relative "base/uploader"
=begin
if you want to create mock users for posts made by anonymous participants,
@@ -40,7 +40,7 @@ class ImportScripts::Nabble < ImportScripts::Base
BATCH_SIZE = 1000
DB_NAME = "nabble"
DB_NAME = "nabble"
CATEGORY_ID = 6
def initialize
@@ -64,14 +64,13 @@ class ImportScripts::Nabble < ImportScripts::Base
total_count = @client.exec("SELECT COUNT(user_id) FROM user_")[0]["count"]
batches(BATCH_SIZE) do |offset|
users = @client.query(<<-SQL
users = @client.query(<<-SQL)
SELECT user_id, name, email, joined
FROM user_
ORDER BY joined
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if users.ntuples() < 1
@@ -83,24 +82,23 @@ class ImportScripts::Nabble < ImportScripts::Base
email: row["email"] || fake_email,
created_at: Time.zone.at(@td.decode(row["joined"])),
name: row["name"],
post_create_action: proc do |user|
import_avatar(user, row["user_id"])
end
post_create_action: proc { |user| import_avatar(user, row["user_id"]) },
}
end
end
end
def import_avatar(user, org_id)
filename = 'avatar' + org_id.to_s
path = File.join('/tmp/nab', filename)
res = @client.exec("SELECT content FROM file_avatar WHERE name='avatar100.png' AND user_id = #{org_id} LIMIT 1")
filename = "avatar" + org_id.to_s
path = File.join("/tmp/nab", filename)
res =
@client.exec(
"SELECT content FROM file_avatar WHERE name='avatar100.png' AND user_id = #{org_id} LIMIT 1",
)
return if res.ntuples() < 1
binary = res[0]['content']
File.open(path, 'wb') { |f|
f.write(PG::Connection.unescape_bytea(binary))
}
binary = res[0]["content"]
File.open(path, "wb") { |f| f.write(PG::Connection.unescape_bytea(binary)) }
upload = @uploader.create_upload(user.id, path, filename)
@@ -113,7 +111,6 @@ class ImportScripts::Nabble < ImportScripts::Base
else
Rails.logger.error("Could not persist avatar for user #{user.username}")
end
end
def parse_email(msg)
@@ -128,11 +125,13 @@ class ImportScripts::Nabble < ImportScripts::Base
def create_forum_topics
puts "", "creating forum topics"
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]['node_id']
topic_count = @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id = #{app_node_id}")[0]["count"]
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]["node_id"]
topic_count =
@client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id = #{app_node_id}")[0][
"count"
]
batches(BATCH_SIZE) do |offset|
topics = @client.exec <<-SQL
SELECT n.node_id, n.subject, n.owner_id, n.when_created, nm.message, n.msg_fmt
FROM node AS n
@@ -145,43 +144,43 @@ class ImportScripts::Nabble < ImportScripts::Base
break if topics.ntuples() < 1
next if all_records_exist? :posts, topics.map { |t| t['node_id'].to_i }
next if all_records_exist? :posts, topics.map { |t| t["node_id"].to_i }
create_posts(topics, total: topic_count, offset: offset) do |t|
raw = body_from(t)
next unless raw
raw = process_content(raw)
raw = process_attachments(raw, t['node_id'])
raw = process_attachments(raw, t["node_id"])
{
id: t['node_id'],
title: t['subject'],
id: t["node_id"],
title: t["subject"],
user_id: user_id_from_imported_user_id(t["owner_id"]) || Discourse::SYSTEM_USER_ID,
created_at: Time.zone.at(@td.decode(t["when_created"])),
category: CATEGORY_ID,
raw: raw,
cook_method: Post.cook_methods[:regular]
cook_method: Post.cook_methods[:regular],
}
end
end
end
def body_from(p)
%w(m s).include?(p['msg_fmt']) ? parse_email(p['message']) : p['message']
%w[m s].include?(p["msg_fmt"]) ? parse_email(p["message"]) : p["message"]
rescue Email::Receiver::EmptyEmailError
puts "Skipped #{p['node_id']}"
puts "Skipped #{p["node_id"]}"
end
def process_content(txt)
txt.gsub! /\<quote author="(.*?)"\>/, '[quote="\1"]'
txt.gsub! /\<\/quote\>/, '[/quote]'
txt.gsub!(/\<raw\>(.*?)\<\/raw\>/m) do |match|
txt.gsub! %r{\</quote\>}, "[/quote]"
txt.gsub!(%r{\<raw\>(.*?)\</raw\>}m) do |match|
c = Regexp.last_match[1].indent(4)
"\n#{c}\n"
"\n#{c}\n"
end
# lines starting with # are comments, not headings, insert a space to prevent markdown
txt.gsub! /\n#/m, ' #'
txt.gsub! /\n#/m, " #"
# in the languagetool forum, quite a lot of XML was not marked as raw
# so we treat <rule...>...</rule> and <category...>...</category> as raw
@@ -202,12 +201,10 @@ class ImportScripts::Nabble < ImportScripts::Base
def process_attachments(txt, postid)
txt.gsub!(/<nabble_img src="(.*?)" (.*?)>/m) do |match|
basename = Regexp.last_match[1]
get_attachment_upload(basename, postid) do |upload|
@uploader.embedded_image_html(upload)
end
get_attachment_upload(basename, postid) { |upload| @uploader.embedded_image_html(upload) }
end
txt.gsub!(/<nabble_a href="(.*?)">(.*?)<\/nabble_a>/m) do |match|
txt.gsub!(%r{<nabble_a href="(.*?)">(.*?)</nabble_a>}m) do |match|
basename = Regexp.last_match[1]
get_attachment_upload(basename, postid) do |upload|
@uploader.attachment_html(upload, basename)
@@ -217,13 +214,12 @@ class ImportScripts::Nabble < ImportScripts::Base
end
def get_attachment_upload(basename, postid)
contents = @client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}")
contents =
@client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}")
if contents.any?
binary = contents[0]['content']
fn = File.join('/tmp/nab', basename)
File.open(fn, 'wb') { |f|
f.write(PG::Connection.unescape_bytea(binary))
}
binary = contents[0]["content"]
fn = File.join("/tmp/nab", basename)
File.open(fn, "wb") { |f| f.write(PG::Connection.unescape_bytea(binary)) }
yield @uploader.create_upload(0, fn, basename)
end
end
@@ -231,8 +227,11 @@ class ImportScripts::Nabble < ImportScripts::Base
def import_replies
puts "", "creating topic replies"
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]['node_id']
post_count = @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id != #{app_node_id}")[0]["count"]
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]["node_id"]
post_count =
@client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id != #{app_node_id}")[0][
"count"
]
topic_ids = {}
@@ -249,11 +248,11 @@ class ImportScripts::Nabble < ImportScripts::Base
break if posts.ntuples() < 1
next if all_records_exist? :posts, posts.map { |p| p['node_id'].to_i }
next if all_records_exist? :posts, posts.map { |p| p["node_id"].to_i }
create_posts(posts, total: post_count, offset: offset) do |p|
parent_id = p['parent_id']
id = p['node_id']
parent_id = p["parent_id"]
id = p["node_id"]
topic_id = topic_ids[parent_id]
unless topic_id
@@ -268,19 +267,21 @@ class ImportScripts::Nabble < ImportScripts::Base
next unless raw
raw = process_content(raw)
raw = process_attachments(raw, id)
{ id: id,
{
id: id,
topic_id: topic_id,
user_id: user_id_from_imported_user_id(p['owner_id']) || Discourse::SYSTEM_USER_ID,
user_id: user_id_from_imported_user_id(p["owner_id"]) || Discourse::SYSTEM_USER_ID,
created_at: Time.zone.at(@td.decode(p["when_created"])),
raw: raw,
cook_method: Post.cook_methods[:regular] }
cook_method: Post.cook_methods[:regular],
}
end
end
end
end
class String
def indent(count, char = ' ')
def indent(count, char = " ")
gsub(/([^\n]*)(\n|$)/) do |match|
last_iteration = ($1 == "" && $2 == "")
line = +""

View File

@@ -5,28 +5,28 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::Ning < ImportScripts::Base
JSON_FILES_DIR = "/Users/techapj/Downloads/ben/ADEM"
ATTACHMENT_PREFIXES = ["discussions", "pages", "blogs", "members", "photos"]
EXTRA_AUTHORIZED_EXTENSIONS = ["bmp", "ico", "txt", "pdf", "gif", "jpg", "jpeg", "html"]
ATTACHMENT_PREFIXES = %w[discussions pages blogs members photos]
EXTRA_AUTHORIZED_EXTENSIONS = %w[bmp ico txt pdf gif jpg jpeg html]
def initialize
super
@system_user = Discourse.system_user
@users_json = load_ning_json("ning-members-local.json")
@users_json = load_ning_json("ning-members-local.json")
@discussions_json = load_ning_json("ning-discussions-local.json")
# An example of a custom category from Ning:
@blogs_json = load_ning_json("ning-blogs-local.json")
@photos_json = load_ning_json("ning-photos-local.json")
@pages_json = load_ning_json("ning-pages-local.json")
@photos_json = load_ning_json("ning-photos-local.json")
@pages_json = load_ning_json("ning-pages-local.json")
SiteSetting.max_image_size_kb = 10240
SiteSetting.max_attachment_size_kb = 10240
SiteSetting.authorized_extensions = (SiteSetting.authorized_extensions.split("|") + EXTRA_AUTHORIZED_EXTENSIONS).uniq.join("|")
SiteSetting.max_image_size_kb = 10_240
SiteSetting.max_attachment_size_kb = 10_240
SiteSetting.authorized_extensions =
(SiteSetting.authorized_extensions.split("|") + EXTRA_AUTHORIZED_EXTENSIONS).uniq.join("|")
# Example of importing a custom profile field:
# @interests_field = UserField.find_by_name("My interests")
@@ -60,23 +60,23 @@ class ImportScripts::Ning < ImportScripts::Base
end
def repair_json(arg)
arg.gsub!(/^\(/, "") # content of file is surround by ( )
arg.gsub!(/^\(/, "") # content of file is surround by ( )
arg.gsub!(/\)$/, "")
arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end
arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end
arg.gsub!(/\}\{/, "},{") # missing commas sometimes!
arg.gsub!("}]{", "},{") # surprise square brackets
arg.gsub!("}[{", "},{") # :troll:
arg.gsub!("}]{", "},{") # surprise square brackets
arg.gsub!("}[{", "},{") # :troll:
arg
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
staff_levels = ["admin", "moderator", "owner"]
staff_levels = %w[admin moderator owner]
create_users(@users_json) do |u|
{
@@ -88,57 +88,58 @@ class ImportScripts::Ning < ImportScripts::Base
location: "#{u["location"]} #{u["country"]}",
avatar_url: u["profilePhoto"],
bio_raw: u["profileQuestions"].is_a?(Hash) ? u["profileQuestions"]["About Me"] : nil,
post_create_action: proc do |newuser|
# if u["profileQuestions"].is_a?(Hash)
# newuser.custom_fields = {"user_field_#{@interests_field.id}" => u["profileQuestions"]["My interests"]}
# end
post_create_action:
proc do |newuser|
# if u["profileQuestions"].is_a?(Hash)
# newuser.custom_fields = {"user_field_#{@interests_field.id}" => u["profileQuestions"]["My interests"]}
# end
if staff_levels.include?(u["level"].downcase)
if u["level"].downcase == "admin" || u["level"].downcase == "owner"
newuser.admin = true
else
newuser.moderator = true
end
end
# states: ["active", "suspended", "left", "pending"]
if u["state"] == "active" && newuser.approved_at.nil?
newuser.approved = true
newuser.approved_by_id = @system_user.id
newuser.approved_at = newuser.created_at
end
newuser.save
if u["profilePhoto"] && newuser.user_avatar.try(:custom_upload_id).nil?
photo_path = file_full_path(u["profilePhoto"])
if File.exist?(photo_path)
begin
upload = create_upload(newuser.id, photo_path, File.basename(photo_path))
if upload.persisted?
newuser.import_mode = false
newuser.create_user_avatar
newuser.import_mode = true
newuser.user_avatar.update(custom_upload_id: upload.id)
newuser.update(uploaded_avatar_id: upload.id)
else
puts "Error: Upload did not persist for #{photo_path}!"
end
rescue SystemCallError => err
puts "Could not import avatar #{photo_path}: #{err.message}"
if staff_levels.include?(u["level"].downcase)
if u["level"].downcase == "admin" || u["level"].downcase == "owner"
newuser.admin = true
else
newuser.moderator = true
end
else
puts "avatar file not found at #{photo_path}"
end
end
end
# states: ["active", "suspended", "left", "pending"]
if u["state"] == "active" && newuser.approved_at.nil?
newuser.approved = true
newuser.approved_by_id = @system_user.id
newuser.approved_at = newuser.created_at
end
newuser.save
if u["profilePhoto"] && newuser.user_avatar.try(:custom_upload_id).nil?
photo_path = file_full_path(u["profilePhoto"])
if File.exist?(photo_path)
begin
upload = create_upload(newuser.id, photo_path, File.basename(photo_path))
if upload.persisted?
newuser.import_mode = false
newuser.create_user_avatar
newuser.import_mode = true
newuser.user_avatar.update(custom_upload_id: upload.id)
newuser.update(uploaded_avatar_id: upload.id)
else
puts "Error: Upload did not persist for #{photo_path}!"
end
rescue SystemCallError => err
puts "Could not import avatar #{photo_path}: #{err.message}"
end
else
puts "avatar file not found at #{photo_path}"
end
end
end,
}
end
EmailToken.delete_all
end
def suspend_users
puts '', "Updating suspended users"
puts "", "Updating suspended users"
count = 0
suspended = 0
@@ -151,7 +152,10 @@ class ImportScripts::Ning < ImportScripts::Base
user.suspended_till = 200.years.from_now
if user.save
StaffActionLogger.new(@system_user).log_user_suspend(user, "Import data indicates account is suspended.")
StaffActionLogger.new(@system_user).log_user_suspend(
user,
"Import data indicates account is suspended.",
)
suspended += 1
else
puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}"
@@ -168,13 +172,15 @@ class ImportScripts::Ning < ImportScripts::Base
def import_categories
puts "", "Importing categories"
create_categories((["Blog", "Pages", "Photos"] + @discussions_json.map { |d| d["category"] }).uniq.compact) do |name|
create_categories(
(%w[Blog Pages Photos] + @discussions_json.map { |d| d["category"] }).uniq.compact,
) do |name|
if name.downcase == "uncategorized"
nil
else
{
id: name, # ning has no id for categories, so use the name
name: name
name: name,
}
end
end
@@ -220,9 +226,7 @@ class ImportScripts::Ning < ImportScripts::Base
unless topic["category"].nil? || topic["category"].downcase == "uncategorized"
mapped[:category] = category_id_from_imported_category_id(topic["category"])
end
if topic["category"].nil? && default_category
mapped[:category] = default_category
end
mapped[:category] = default_category if topic["category"].nil? && default_category
mapped[:title] = CGI.unescapeHTML(topic["title"])
mapped[:raw] = process_ning_post_body(topic["description"])
@@ -230,13 +234,9 @@ class ImportScripts::Ning < ImportScripts::Base
mapped[:raw] = add_file_attachments(mapped[:raw], topic["fileAttachments"])
end
if topic["photoUrl"]
mapped[:raw] = add_photo(mapped[:raw], topic["photoUrl"])
end
mapped[:raw] = add_photo(mapped[:raw], topic["photoUrl"]) if topic["photoUrl"]
if topic["embedCode"]
mapped[:raw] = add_video(mapped[:raw], topic["embedCode"])
end
mapped[:raw] = add_video(mapped[:raw], topic["embedCode"]) if topic["embedCode"]
parent_post = create_post(mapped, mapped[:id])
unless parent_post.is_a?(Post)
@@ -247,23 +247,24 @@ class ImportScripts::Ning < ImportScripts::Base
if topic["comments"].present?
topic["comments"].reverse.each do |post|
if post_id_from_imported_post_id(post["id"])
next # already imported this post
end
raw = process_ning_post_body(post["description"])
if post["fileAttachments"]
raw = add_file_attachments(raw, post["fileAttachments"])
end
raw = add_file_attachments(raw, post["fileAttachments"]) if post["fileAttachments"]
new_post = create_post({
id: post["id"],
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(post["contributorName"]) || -1,
raw: raw,
created_at: Time.zone.parse(post["createdDate"])
}, post["id"])
new_post =
create_post(
{
id: post["id"],
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(post["contributorName"]) || -1,
raw: raw,
created_at: Time.zone.parse(post["createdDate"]),
},
post["id"],
)
if new_post.is_a?(Post)
posts += 1
@@ -288,11 +289,17 @@ class ImportScripts::Ning < ImportScripts::Base
end
def attachment_regex
@_attachment_regex ||= Regexp.new(%Q[<a (?:[^>]*)href="(?:#{ATTACHMENT_PREFIXES.join('|')})\/(?:[^"]+)"(?:[^>]*)><img (?:[^>]*)src="([^"]+)"(?:[^>]*)><\/a>])
@_attachment_regex ||=
Regexp.new(
%Q[<a (?:[^>]*)href="(?:#{ATTACHMENT_PREFIXES.join("|")})\/(?:[^"]+)"(?:[^>]*)><img (?:[^>]*)src="([^"]+)"(?:[^>]*)><\/a>],
)
end
def youtube_iframe_regex
@_youtube_iframe_regex ||= Regexp.new(%Q[<p><iframe(?:[^>]*)src="\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>(?:[^<]*)<\/p>])
@_youtube_iframe_regex ||=
Regexp.new(
%Q[<p><iframe(?:[^>]*)src="\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>(?:[^<]*)<\/p>],
)
end
def process_ning_post_body(arg)
@@ -382,15 +389,16 @@ class ImportScripts::Ning < ImportScripts::Base
def add_video(arg, embed_code)
raw = arg
youtube_regex = Regexp.new(%Q[<iframe(?:[^>]*)src="http:\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>])
youtube_regex =
Regexp.new(
%Q[<iframe(?:[^>]*)src="http:\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>],
)
raw.gsub!(youtube_regex) do |s|
matches = youtube_regex.match(s)
video_id = matches[1].split("?").first
if video_id
raw += "\n\nhttps://www.youtube.com/watch?v=#{video_id}\n"
end
raw += "\n\nhttps://www.youtube.com/watch?v=#{video_id}\n" if video_id
end
raw += "\n" + embed_code + "\n"
@@ -398,6 +406,4 @@ class ImportScripts::Ning < ImportScripts::Base
end
end
if __FILE__ == $0
ImportScripts::Ning.new.perform
end
ImportScripts::Ning.new.perform if __FILE__ == $0

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'mongo'
require "mongo"
module NodeBB
class Mongo
@@ -43,8 +43,8 @@ module NodeBB
user["joindate"] = timestamp_to_date(user["joindate"])
user["lastonline"] = timestamp_to_date(user["lastonline"])
user['banned'] = user['banned'].to_s
user['uid'] = user['uid'].to_s
user["banned"] = user["banned"].to_s
user["uid"] = user["uid"].to_s
user
end
@@ -56,17 +56,17 @@ module NodeBB
category_keys.each do |category_key|
category = mongo.find(_key: "category:#{category_key}").first
category['parentCid'] = category['parentCid'].to_s
category['disabled'] = category['disabled'].to_s
category['cid'] = category['cid'].to_s
category["parentCid"] = category["parentCid"].to_s
category["disabled"] = category["disabled"].to_s
category["cid"] = category["cid"].to_s
categories[category['cid']] = category
categories[category["cid"]] = category
end
end
end
def topics(offset = 0, page_size = 2000)
topic_keys = mongo.find(_key: 'topics:tid').skip(offset).limit(page_size).pluck(:value)
topic_keys = mongo.find(_key: "topics:tid").skip(offset).limit(page_size).pluck(:value)
topic_keys.map { |topic_key| topic(topic_key) }
end
@@ -86,11 +86,11 @@ module NodeBB
end
def topic_count
mongo.find(_key: 'topics:tid').count
mongo.find(_key: "topics:tid").count
end
def posts(offset = 0, page_size = 2000)
post_keys = mongo.find(_key: 'posts:pid').skip(offset).limit(page_size).pluck(:value)
post_keys = mongo.find(_key: "posts:pid").skip(offset).limit(page_size).pluck(:value)
post_keys.map { |post_key| post(post_key) }
end
@@ -111,7 +111,7 @@ module NodeBB
end
def post_count
mongo.find(_key: 'posts:pid').count
mongo.find(_key: "posts:pid").count
end
private

View File

@@ -1,13 +1,13 @@
# frozen_string_literal: true
require_relative '../base'
require_relative './redis'
require_relative './mongo'
require_relative "../base"
require_relative "./redis"
require_relative "./mongo"
class ImportScripts::NodeBB < ImportScripts::Base
# CHANGE THESE BEFORE RUNNING THE IMPORTER
# ATTACHMENT_DIR needs to be absolute, not relative path
ATTACHMENT_DIR = '/Users/orlando/www/orlando/NodeBB/public/uploads'
ATTACHMENT_DIR = "/Users/orlando/www/orlando/NodeBB/public/uploads"
BATCH_SIZE = 2000
def initialize
@@ -17,17 +17,13 @@ class ImportScripts::NodeBB < ImportScripts::Base
# @client = adapter.new('mongodb://127.0.0.1:27017/nodebb')
adapter = NodeBB::Redis
@client = adapter.new(
host: "localhost",
port: "6379",
db: 14
)
@client = adapter.new(host: "localhost", port: "6379", db: 14)
load_merged_posts
end
def load_merged_posts
puts 'loading merged posts with topics...'
puts "loading merged posts with topics..."
# we keep here the posts that were merged
# as topics
@@ -35,13 +31,16 @@ class ImportScripts::NodeBB < ImportScripts::Base
# { post_id: discourse_post_id }
@merged_posts_map = {}
PostCustomField.where(name: 'import_merged_post_id').pluck(:post_id, :value).each do |post_id, import_id|
post = Post.find(post_id)
topic_id = post.topic_id
nodebb_post_id = post.custom_fields['import_merged_post_id']
PostCustomField
.where(name: "import_merged_post_id")
.pluck(:post_id, :value)
.each do |post_id, import_id|
post = Post.find(post_id)
topic_id = post.topic_id
nodebb_post_id = post.custom_fields["import_merged_post_id"]
@merged_posts_map[nodebb_post_id] = topic_id
end
@merged_posts_map[nodebb_post_id] = topic_id
end
end
def execute
@@ -56,19 +55,14 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
def import_groups
puts '', 'importing groups'
puts "", "importing groups"
groups = @client.groups
total_count = groups.count
progress_count = 0
start_time = Time.now
create_groups(groups) do |group|
{
id: group["name"],
name: group["slug"]
}
end
create_groups(groups) { |group| { id: group["name"], name: group["slug"] } }
end
def import_categories
@@ -107,15 +101,18 @@ class ImportScripts::NodeBB < ImportScripts::Base
name: category["name"],
position: category["order"],
description: category["description"],
parent_category_id: category_id_from_imported_category_id(category["parentCid"])
parent_category_id: category_id_from_imported_category_id(category["parentCid"]),
}
end
categories.each do |source_category|
cid = category_id_from_imported_category_id(source_category['cid'])
Permalink.create(url: "/category/#{source_category['slug']}", category_id: cid) rescue nil
cid = category_id_from_imported_category_id(source_category["cid"])
begin
Permalink.create(url: "/category/#{source_category["slug"]}", category_id: cid)
rescue StandardError
nil
end
end
end
def import_users
@@ -158,12 +155,13 @@ class ImportScripts::NodeBB < ImportScripts::Base
bio_raw: user["aboutme"],
active: true,
custom_fields: {
import_pass: user["password"]
import_pass: user["password"],
},
post_create_action: proc do |u|
import_profile_picture(user, u)
import_profile_background(user, u)
end
post_create_action:
proc do |u|
import_profile_picture(user, u)
import_profile_background(user, u)
end,
}
end
end
@@ -204,7 +202,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
# write tmp file
file = Tempfile.new(filename, encoding: 'ascii-8bit')
file = Tempfile.new(filename, encoding: "ascii-8bit")
file.write string_io.read
file.rewind
@@ -230,9 +228,21 @@ class ImportScripts::NodeBB < ImportScripts::Base
imported_user.user_avatar.update(custom_upload_id: upload.id)
imported_user.update(uploaded_avatar_id: upload.id)
ensure
string_io.close rescue nil
file.close rescue nil
file.unlind rescue nil
begin
string_io.close
rescue StandardError
nil
end
begin
file.close
rescue StandardError
nil
end
begin
file.unlind
rescue StandardError
nil
end
end
def import_profile_background(old_user, imported_user)
@@ -264,7 +274,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
# write tmp file
file = Tempfile.new(filename, encoding: 'ascii-8bit')
file = Tempfile.new(filename, encoding: "ascii-8bit")
file.write string_io.read
file.rewind
@@ -288,9 +298,21 @@ class ImportScripts::NodeBB < ImportScripts::Base
imported_user.user_profile.upload_profile_background(upload)
ensure
string_io.close rescue nil
file.close rescue nil
file.unlink rescue nil
begin
string_io.close
rescue StandardError
nil
end
begin
file.close
rescue StandardError
nil
end
begin
file.unlink
rescue StandardError
nil
end
end
def add_users_to_groups
@@ -305,7 +327,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
dgroup = find_group_by_import_id(group["name"])
# do thing if we migrated this group already
next if dgroup.custom_fields['import_users_added']
next if dgroup.custom_fields["import_users_added"]
group_member_ids = group["member_ids"].map { |uid| user_id_from_imported_user_id(uid) }
group_owner_ids = group["owner_ids"].map { |uid| user_id_from_imported_user_id(uid) }
@@ -320,7 +342,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
owners = User.find(group_owner_ids)
owners.each { |owner| dgroup.add_owner(owner) }
dgroup.custom_fields['import_users_added'] = true
dgroup.custom_fields["import_users_added"] = true
dgroup.save
progress_count += 1
@@ -357,12 +379,13 @@ class ImportScripts::NodeBB < ImportScripts::Base
created_at: topic["timestamp"],
views: topic["viewcount"],
closed: topic["locked"] == "1",
post_create_action: proc do |p|
# keep track of this to use in import_posts
p.custom_fields["import_merged_post_id"] = topic["mainPid"]
p.save
@merged_posts_map[topic["mainPid"]] = p.id
end
post_create_action:
proc do |p|
# keep track of this to use in import_posts
p.custom_fields["import_merged_post_id"] = topic["mainPid"]
p.save
@merged_posts_map[topic["mainPid"]] = p.id
end,
}
data[:pinned_at] = data[:created_at] if topic["pinned"] == "1"
@@ -372,7 +395,11 @@ class ImportScripts::NodeBB < ImportScripts::Base
topics.each do |import_topic|
topic = topic_lookup_from_imported_post_id("t#{import_topic["tid"]}")
Permalink.create(url: "/topic/#{import_topic['slug']}", topic_id: topic[:topic_id]) rescue nil
begin
Permalink.create(url: "/topic/#{import_topic["slug"]}", topic_id: topic[:topic_id])
rescue StandardError
nil
end
end
end
end
@@ -411,21 +438,23 @@ class ImportScripts::NodeBB < ImportScripts::Base
topic_id: topic[:topic_id],
raw: raw,
created_at: post["timestamp"],
post_create_action: proc do |p|
post["upvoted_by"].each do |upvoter_id|
user = User.new
user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID
PostActionCreator.like(user, p)
end
end
post_create_action:
proc do |p|
post["upvoted_by"].each do |upvoter_id|
user = User.new
user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID
PostActionCreator.like(user, p)
end
end,
}
if post['toPid']
if post["toPid"]
# Look reply to topic
parent_id = topic_lookup_from_imported_post_id("t#{post['toPid']}").try(:[], :post_number)
parent_id = topic_lookup_from_imported_post_id("t#{post["toPid"]}").try(:[], :post_number)
# Look reply post if topic is missing
parent_id ||= topic_lookup_from_imported_post_id("p#{post['toPid']}").try(:[], :post_number)
parent_id ||=
topic_lookup_from_imported_post_id("p#{post["toPid"]}").try(:[], :post_number)
if parent_id
data[:reply_to_post_number] = parent_id
@@ -448,12 +477,12 @@ class ImportScripts::NodeBB < ImportScripts::Base
Post.find_each do |post|
begin
next if post.custom_fields['import_post_processing']
next if post.custom_fields["import_post_processing"]
new_raw = postprocess_post(post)
if new_raw != post.raw
post.raw = new_raw
post.custom_fields['import_post_processing'] = true
post.custom_fields["import_post_processing"] = true
post.save
end
ensure
@@ -463,7 +492,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
def import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
current = 0
max = Post.count
@@ -474,7 +503,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
print_status(current, max, start_time)
new_raw = post.raw.dup
new_raw.gsub!(/\[(.*)\]\((\/assets\/uploads\/files\/.*)\)/) do
new_raw.gsub!(%r{\[(.*)\]\((/assets/uploads/files/.*)\)}) do
image_md = Regexp.last_match[0]
text, filepath = $1, $2
filepath = filepath.gsub("/assets/uploads", ATTACHMENT_DIR)
@@ -493,7 +522,12 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from NodeBB')
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachments from NodeBB",
)
end
end
end
@@ -502,28 +536,30 @@ class ImportScripts::NodeBB < ImportScripts::Base
raw = post.raw
# [link to post](/post/:id)
raw = raw.gsub(/\[(.*)\]\(\/post\/(\d+).*\)/) do
text, post_id = $1, $2
raw =
raw.gsub(%r{\[(.*)\]\(/post/(\d+).*\)}) do
text, post_id = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
end
end
end
# [link to topic](/topic/:id)
raw = raw.gsub(/\[(.*)\]\(\/topic\/(\d+).*\)/) do
text, topic_id = $1, $2
raw =
raw.gsub(%r{\[(.*)\]\(/topic/(\d+).*\)}) do
text, topic_id = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
end
end
end
raw
end

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'redis'
require "redis"
module NodeBB
class Redis
@@ -11,7 +11,7 @@ module NodeBB
end
def groups
group_keys = redis.zrange('groups:visible:createtime', 0, -1)
group_keys = redis.zrange("groups:visible:createtime", 0, -1)
group_keys.map { |group_key| group(group_key) }
end
@@ -26,7 +26,7 @@ module NodeBB
end
def users
user_keys = redis.zrange('users:joindate', 0, -1)
user_keys = redis.zrange("users:joindate", 0, -1)
user_keys.map { |user_key| user(user_key) }
end
@@ -41,13 +41,13 @@ module NodeBB
end
def categories
category_keys = redis.zrange('categories:cid', 0, -1)
category_keys = redis.zrange("categories:cid", 0, -1)
{}.tap do |categories|
category_keys.each do |category_key|
category = redis.hgetall("category:#{category_key}")
categories[category['cid']] = category
categories[category["cid"]] = category
end
end
end
@@ -59,7 +59,7 @@ module NodeBB
from = offset
to = page_size + offset
topic_keys = redis.zrange('topics:tid', from, to)
topic_keys = redis.zrange("topics:tid", from, to)
topic_keys.map { |topic_key| topic(topic_key) }
end
@@ -75,7 +75,7 @@ module NodeBB
end
def topic_count
redis.zcard('topics:tid')
redis.zcard("topics:tid")
end
def posts(offset = 0, page_size = 2000)
@@ -85,7 +85,7 @@ module NodeBB
from = offset
to = page_size + offset
post_keys = redis.zrange('posts:pid', from, to)
post_keys = redis.zrange("posts:pid", from, to)
post_keys.map { |post_key| post(post_key) }
end
@@ -99,7 +99,7 @@ module NodeBB
end
def post_count
redis.zcard('posts:pid')
redis.zcard("posts:pid")
end
private

View File

@@ -5,7 +5,6 @@ require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Phorum < ImportScripts::Base
PHORUM_DB = "piwik"
TABLE_PREFIX = "pw_"
BATCH_SIZE = 1000
@@ -13,12 +12,13 @@ class ImportScripts::Phorum < ImportScripts::Base
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
password: "pa$$word",
database: PHORUM_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
password: "pa$$word",
database: PHORUM_DB,
)
end
def execute
@@ -29,30 +29,34 @@ class ImportScripts::Phorum < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}users;").first['count']
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}users;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT user_id id, username, TRIM(email) AS email, username name, date_added created_at,
results =
mysql_query(
"SELECT user_id id, username, TRIM(email) AS email, username name, date_added created_at,
date_last_active last_seen_at, admin
FROM #{TABLE_PREFIX}users
WHERE #{TABLE_PREFIX}users.active = 1
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
create_users(results, total: total_count, offset: offset) do |user|
next if user['username'].blank?
{ id: user['id'],
email: user['email'],
username: user['username'],
name: user['name'],
created_at: Time.zone.at(user['created_at']),
last_seen_at: Time.zone.at(user['last_seen_at']),
admin: user['admin'] == 1 }
next if user["username"].blank?
{
id: user["id"],
email: user["email"],
username: user["username"],
name: user["name"],
created_at: Time.zone.at(user["created_at"]),
last_seen_at: Time.zone.at(user["last_seen_at"]),
admin: user["admin"] == 1,
}
end
end
end
@@ -60,19 +64,18 @@ class ImportScripts::Phorum < ImportScripts::Base
def import_categories
puts "", "importing categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT forum_id id, name, description, active
FROM #{TABLE_PREFIX}forums
ORDER BY forum_id ASC
").to_a
",
).to_a
create_categories(categories) do |category|
next if category['active'] == 0
{
id: category['id'],
name: category["name"],
description: category["description"]
}
next if category["active"] == 0
{ id: category["id"], name: category["name"], description: category["description"] }
end
# uncomment below lines to create permalink
@@ -87,7 +90,9 @@ class ImportScripts::Phorum < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}messages").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT m.message_id id,
m.parent_id,
m.forum_id category_id,
@@ -100,7 +105,8 @@ class ImportScripts::Phorum < ImportScripts::Base
ORDER BY m.datestamp
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
").to_a
",
).to_a
break if results.size < 1
@@ -108,20 +114,20 @@ class ImportScripts::Phorum < ImportScripts::Base
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_raw_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_raw_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['parent_id'] == 0
mapped[:category] = category_id_from_imported_category_id(m['category_id'].to_i)
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["parent_id"] == 0
mapped[:category] = category_id_from_imported_category_id(m["category_id"].to_i)
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['parent_id'])
parent = topic_lookup_from_imported_post_id(m["parent_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['parent_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["parent_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
@@ -137,25 +143,24 @@ class ImportScripts::Phorum < ImportScripts::Base
# end
# end
end
end
def process_raw_post(raw, import_id)
s = raw.dup
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
s.gsub!(/<!-- s(\S+) --><img (?:[^>]+) \/><!-- s(?:\S+) -->/, '\1')
s.gsub!(%r{<!-- s(\S+) --><img (?:[^>]+) /><!-- s(?:\S+) -->}, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s.gsub!(/:(?:\w{8})\]/, "]")
# Remove mybb video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")
s = CGI.unescapeHTML(s)
@@ -163,50 +168,54 @@ class ImportScripts::Phorum < ImportScripts::Base
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
s.gsub!(%r{\[http(s)?://(www\.)?}, "[")
# [QUOTE]...[/QUOTE]
s.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }
s.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n> #{$1}\n" }
# [URL=...]...[/URL]
s.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" }
s.gsub!(%r{\[url="?(.+?)"?\](.+)\[/url\]}i) { "[#{$2}](#{$1})" }
# [IMG]...[/IMG]
s.gsub!(/\[\/?img\]/i, "")
s.gsub!(%r{\[/?img\]}i, "")
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
s.gsub!(/\[list\](.*?)\[\/list\]/m, '[ul]\1[/ul]')
s.gsub!(/\[list=1\](.*?)\[\/list\]/m, '[ol]\1[/ol]')
s.gsub!(%r{\[list\](.*?)\[/list\]}m, '[ul]\1[/ul]')
s.gsub!(%r{\[list=1\](.*?)\[/list\]}m, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
s.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]')
# [CODE]...[/CODE]
s.gsub!(/\[\/?code\]/i, "\n```\n")
s.gsub!(%r{\[/?code\]}i, "\n```\n")
# [HIGHLIGHT]...[/HIGHLIGHT]
s.gsub!(/\[\/?highlight\]/i, "\n```\n")
s.gsub!(%r{\[/?highlight\]}i, "\n```\n")
# [YOUTUBE]<id>[/YOUTUBE]
s.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
s.gsub!(%r{\[youtube\](.+?)\[/youtube\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
# [youtube=425,350]id[/youtube]
s.gsub!(/\[youtube="?(.+?)"?\](.+)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$2}\n" }
s.gsub!(%r{\[youtube="?(.+?)"?\](.+)\[/youtube\]}i) do
"\nhttps://www.youtube.com/watch?v=#{$2}\n"
end
# [MEDIA=youtube]id[/MEDIA]
s.gsub!(/\[MEDIA=youtube\](.+?)\[\/MEDIA\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
s.gsub!(%r{\[MEDIA=youtube\](.+?)\[/MEDIA\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
# [ame="youtube_link"]title[/ame]
s.gsub!(/\[ame="?(.+?)"?\](.+)\[\/ame\]/i) { "\n#{$1}\n" }
s.gsub!(%r{\[ame="?(.+?)"?\](.+)\[/ame\]}i) { "\n#{$1}\n" }
# [VIDEO=youtube;<id>]...[/VIDEO]
s.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
s.gsub!(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) do
"\nhttps://www.youtube.com/watch?v=#{$1}\n"
end
# [USER=706]@username[/USER]
s.gsub!(/\[user="?(.+?)"?\](.+)\[\/user\]/i) { $2 }
s.gsub!(%r{\[user="?(.+?)"?\](.+)\[/user\]}i) { $2 }
# Remove the color tag
s.gsub!(/\[color=[#a-z0-9]+\]/i, "")
s.gsub!(/\[\/color\]/i, "")
s.gsub!(%r{\[/color\]}i, "")
s.gsub!(/\[hr\]/i, "<hr>")
@@ -221,7 +230,7 @@ class ImportScripts::Phorum < ImportScripts::Base
end
def import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
uploads = mysql_query <<-SQL
SELECT message_id, filename, FROM_BASE64(file_data) AS file_data, file_id
@@ -234,26 +243,23 @@ class ImportScripts::Phorum < ImportScripts::Base
total_count = uploads.count
uploads.each do |upload|
# puts "*** processing file #{upload['file_id']}"
post_id = post_id_from_imported_post_id(upload['message_id'])
post_id = post_id_from_imported_post_id(upload["message_id"])
if post_id.nil?
puts "Post #{upload['message_id']} for attachment #{upload['file_id']} not found"
puts "Post #{upload["message_id"]} for attachment #{upload["file_id"]} not found"
next
end
post = Post.find(post_id)
real_filename = upload['filename']
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename = upload["filename"]
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
tmpfile = 'attach_' + upload['file_id'].to_s
filename = File.join('/tmp/', tmpfile)
File.open(filename, 'wb') { |f|
f.write(upload['file_data'])
}
tmpfile = "attach_" + upload["file_id"].to_s
filename = File.join("/tmp/", tmpfile)
File.open(filename, "wb") { |f| f.write(upload["file_data"]) }
upl_obj = create_upload(post.user.id, filename, real_filename)
@@ -265,16 +271,16 @@ class ImportScripts::Phorum < ImportScripts::Base
post.raw += "\n\n#{html}\n\n"
post.save!
if PostUpload.where(post: post, upload: upl_obj).exists?
puts "skipping creating uploaded for previously uploaded file #{upload['file_id']}"
puts "skipping creating uploaded for previously uploaded file #{upload["file_id"]}"
else
PostUpload.create!(post: post, upload: upl_obj)
end
# PostUpload.create!(post: post, upload: upl_obj) unless PostUpload.where(post: post, upload: upl_obj).exists?
else
puts "Skipping attachment #{upload['file_id']}"
puts "Skipping attachment #{upload["file_id"]}"
end
else
puts "Failed to upload attachment #{upload['file_id']}"
puts "Failed to upload attachment #{upload["file_id"]}"
exit
end
@@ -282,7 +288,6 @@ class ImportScripts::Phorum < ImportScripts::Base
print_status(current_count, total_count)
end
end
end
ImportScripts::Phorum.new.perform

View File

@@ -4,32 +4,34 @@
# Documentation: https://meta.discourse.org/t/importing-from-phpbb3/30810
if ARGV.length != 1 || !File.exist?(ARGV[0])
STDERR.puts '', 'Usage of phpBB3 importer:', 'bundle exec ruby phpbb3.rb <path/to/settings.yml>'
STDERR.puts '', "Use the settings file from #{File.expand_path('phpbb3/settings.yml', File.dirname(__FILE__))} as an example."
STDERR.puts '', 'Still having problems? Take a look at https://meta.discourse.org/t/importing-from-phpbb3/30810'
STDERR.puts "", "Usage of phpBB3 importer:", "bundle exec ruby phpbb3.rb <path/to/settings.yml>"
STDERR.puts "",
"Use the settings file from #{File.expand_path("phpbb3/settings.yml", File.dirname(__FILE__))} as an example."
STDERR.puts "",
"Still having problems? Take a look at https://meta.discourse.org/t/importing-from-phpbb3/30810"
exit 1
end
module ImportScripts
module PhpBB3
require_relative 'phpbb3/support/settings'
require_relative 'phpbb3/database/database'
require_relative "phpbb3/support/settings"
require_relative "phpbb3/database/database"
@settings = Settings.load(ARGV[0])
# We need to load the gem files for ruby-bbcode-to-md and the database adapter
# (e.g. mysql2) before bundler gets initialized by the base importer.
# Otherwise we get an error since those gems are not always in the Gemfile.
require 'ruby-bbcode-to-md' if @settings.use_bbcode_to_md
require "ruby-bbcode-to-md" if @settings.use_bbcode_to_md
begin
@database = Database.create(@settings.database)
rescue UnsupportedVersionError => error
STDERR.puts '', error.message
STDERR.puts "", error.message
exit 1
end
require_relative 'phpbb3/importer'
require_relative "phpbb3/importer"
Importer.new(@settings, @database).perform
end
end

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
module ImportScripts::PhpBB3
class Database
@@ -19,11 +19,11 @@ module ImportScripts::PhpBB3
def create_database
version = get_phpbb_version
if version.start_with?('3.0')
require_relative 'database_3_0'
if version.start_with?("3.0")
require_relative "database_3_0"
Database_3_0.new(@database_client, @database_settings)
elsif version.start_with?('3.1') || version.start_with?('3.2') || version.start_with?('3.3')
require_relative 'database_3_1'
elsif version.start_with?("3.1") || version.start_with?("3.2") || version.start_with?("3.3")
require_relative "database_3_1"
Database_3_1.new(@database_client, @database_settings)
else
raise UnsupportedVersionError, <<~TEXT
@@ -42,7 +42,7 @@ module ImportScripts::PhpBB3
username: @database_settings.username,
password: @database_settings.password,
database: @database_settings.schema,
reconnect: true
reconnect: true,
)
end

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true
require_relative 'database_base'
require_relative '../support/constants'
require_relative "database_base"
require_relative "../support/constants"
module ImportScripts::PhpBB3
class Database_3_0 < DatabaseBase

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true
require_relative 'database_3_0'
require_relative '../support/constants'
require_relative "database_3_0"
require_relative "../support/constants"
module ImportScripts::PhpBB3
class Database_3_1 < Database_3_0
@@ -32,14 +32,15 @@ module ImportScripts::PhpBB3
private
def profile_fields_query(profile_fields)
@profile_fields_query ||= begin
if profile_fields.present?
columns = profile_fields.map { |field| "pf_#{field[:phpbb_field_name]}" }
", #{columns.join(', ')}"
else
""
@profile_fields_query ||=
begin
if profile_fields.present?
columns = profile_fields.map { |field| "pf_#{field[:phpbb_field_name]}" }
", #{columns.join(", ")}"
else
""
end
end
end
end
end
end

View File

@@ -39,9 +39,7 @@ module ImportScripts::PhpBB3
def find_last_row(rows)
last_index = rows.size - 1
rows.each_with_index do |row, index|
return row if index == last_index
end
rows.each_with_index { |row, index| return row if index == last_index }
nil
end

View File

@@ -1,9 +1,9 @@
# frozen_string_literal: true
require_relative '../base'
require_relative 'support/settings'
require_relative 'database/database'
require_relative 'importers/importer_factory'
require_relative "../base"
require_relative "support/settings"
require_relative "database/database"
require_relative "importers/importer_factory"
module ImportScripts::PhpBB3
class Importer < ImportScripts::Base
@@ -25,7 +25,7 @@ module ImportScripts::PhpBB3
protected
def execute
puts '', "importing from phpBB #{@php_config[:phpbb_version]}"
puts "", "importing from phpBB #{@php_config[:phpbb_version]}"
SiteSetting.tagging_enabled = true if @settings.tag_mappings.present?
@@ -55,8 +55,14 @@ module ImportScripts::PhpBB3
settings[:max_attachment_size_kb] = [max_file_size_kb, SiteSetting.max_attachment_size_kb].max
# temporarily disable validation since we want to import all existing images and attachments
SiteSetting.type_supervisor.load_setting(:max_image_size_kb, max: settings[:max_image_size_kb])
SiteSetting.type_supervisor.load_setting(:max_attachment_size_kb, max: settings[:max_attachment_size_kb])
SiteSetting.type_supervisor.load_setting(
:max_image_size_kb,
max: settings[:max_image_size_kb],
)
SiteSetting.type_supervisor.load_setting(
:max_attachment_size_kb,
max: settings[:max_attachment_size_kb],
)
settings
end
@@ -66,7 +72,7 @@ module ImportScripts::PhpBB3
end
def import_users
puts '', 'creating users'
puts "", "creating users"
total_count = @database.count_users
importer = @importers.user_importer
last_user_id = 0
@@ -88,10 +94,10 @@ module ImportScripts::PhpBB3
end
def import_anonymous_users
puts '', 'creating anonymous users'
puts "", "creating anonymous users"
total_count = @database.count_anonymous_users
importer = @importers.user_importer
last_username = ''
last_username = ""
batches do |offset|
rows, last_username = @database.fetch_anonymous_users(last_username)
@@ -109,26 +115,34 @@ module ImportScripts::PhpBB3
end
def import_groups
puts '', 'creating groups'
puts "", "creating groups"
rows = @database.fetch_groups
create_groups(rows) do |row|
begin
next if row[:group_type] == 3
group_name = if @settings.site_name.present?
"#{@settings.site_name}_#{row[:group_name]}"
else
row[:group_name]
end[0..19].gsub(/[^a-zA-Z0-9\-_. ]/, '_')
group_name =
if @settings.site_name.present?
"#{@settings.site_name}_#{row[:group_name]}"
else
row[:group_name]
end[
0..19
].gsub(/[^a-zA-Z0-9\-_. ]/, "_")
bio_raw = @importers.text_processor.process_raw_text(row[:group_desc]) rescue row[:group_desc]
bio_raw =
begin
@importers.text_processor.process_raw_text(row[:group_desc])
rescue StandardError
row[:group_desc]
end
{
id: @settings.prefix(row[:group_id]),
name: group_name,
full_name: row[:group_name],
bio_raw: bio_raw
bio_raw: bio_raw,
}
rescue => e
log_error("Failed to map group with ID #{row[:group_id]}", e)
@@ -137,7 +151,7 @@ module ImportScripts::PhpBB3
end
def import_user_groups
puts '', 'creating user groups'
puts "", "creating user groups"
rows = @database.fetch_group_users
rows.each do |row|
@@ -147,7 +161,11 @@ module ImportScripts::PhpBB3
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
begin
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id, owner: row[:group_leader])
GroupUser.find_or_create_by(
user_id: user_id,
group_id: group_id,
owner: row[:group_leader],
)
rescue => e
log_error("Failed to add user #{row[:user_id]} to group #{row[:group_id]}", e)
end
@@ -155,7 +173,7 @@ module ImportScripts::PhpBB3
end
def import_new_categories
puts '', 'creating new categories'
puts "", "creating new categories"
create_categories(@settings.new_categories) do |row|
next if row == "SKIP"
@@ -163,13 +181,14 @@ module ImportScripts::PhpBB3
{
id: @settings.prefix(row[:forum_id]),
name: row[:name],
parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id]))
parent_category_id:
@lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])),
}
end
end
def import_categories
puts '', 'creating categories'
puts "", "creating categories"
rows = @database.fetch_categories
importer = @importers.category_importer
@@ -181,7 +200,7 @@ module ImportScripts::PhpBB3
end
def import_posts
puts '', 'creating topics and posts'
puts "", "creating topics and posts"
total_count = @database.count_posts
importer = @importers.post_importer
last_post_id = 0
@@ -202,7 +221,7 @@ module ImportScripts::PhpBB3
end
def import_private_messages
puts '', 'creating private messages'
puts "", "creating private messages"
total_count = @database.count_messages
importer = @importers.message_importer
last_msg_id = 0
@@ -223,7 +242,7 @@ module ImportScripts::PhpBB3
end
def import_bookmarks
puts '', 'creating bookmarks'
puts "", "creating bookmarks"
total_count = @database.count_bookmarks
importer = @importers.bookmark_importer
last_user_id = last_topic_id = 0
@@ -243,7 +262,7 @@ module ImportScripts::PhpBB3
end
def import_likes
puts '', 'importing likes'
puts "", "importing likes"
total_count = @database.count_likes
last_post_id = last_user_id = 0
@@ -255,7 +274,7 @@ module ImportScripts::PhpBB3
{
post_id: @settings.prefix(row[:post_id]),
user_id: @settings.prefix(row[:user_id]),
created_at: Time.zone.at(row[:thanks_time])
created_at: Time.zone.at(row[:thanks_time]),
}
end
end

View File

@@ -49,12 +49,12 @@ module ImportScripts::PhpBB3
def get_avatar_path(avatar_type, filename)
case avatar_type
when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED then
filename.gsub!(/_[0-9]+\./, '.') # we need 1337.jpg, not 1337_2983745.jpg
get_uploaded_path(filename)
when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY then
when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED
filename.gsub!(/_[0-9]+\./, ".") # we need 1337.jpg, not 1337_2983745.jpg
get_uploaded_path(filename)
when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY
get_gallery_path(filename)
when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE then
when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE
download_avatar(filename)
else
puts "Invalid avatar type #{avatar_type}. Skipping..."
@@ -67,12 +67,13 @@ module ImportScripts::PhpBB3
max_image_size_kb = SiteSetting.max_image_size_kb.kilobytes
begin
avatar_file = FileHelper.download(
url,
max_file_size: max_image_size_kb,
tmp_file_name: 'discourse-avatar',
follow_redirect: true
)
avatar_file =
FileHelper.download(
url,
max_file_size: max_image_size_kb,
tmp_file_name: "discourse-avatar",
follow_redirect: true,
)
rescue StandardError => err
warn "Error downloading avatar: #{err.message}. Skipping..."
return nil
@@ -100,11 +101,11 @@ module ImportScripts::PhpBB3
def is_allowed_avatar_type?(avatar_type)
case avatar_type
when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED then
when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED
@settings.import_uploaded_avatars
when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE then
when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE
@settings.import_remote_avatars
when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY then
when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY
@settings.import_gallery_avatars
else
false

View File

@@ -9,7 +9,7 @@ module ImportScripts::PhpBB3
def map_bookmark(row)
{
user_id: @settings.prefix(row[:user_id]),
post_id: @settings.prefix(row[:topic_first_post_id])
post_id: @settings.prefix(row[:topic_first_post_id]),
}
end
end

View File

@@ -23,11 +23,13 @@ module ImportScripts::PhpBB3
{
id: @settings.prefix(row[:forum_id]),
name: CGI.unescapeHTML(row[:forum_name]),
parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])),
post_create_action: proc do |category|
update_category_description(category, row)
@permalink_importer.create_for_category(category, row[:forum_id]) # skip @settings.prefix because ID is used in permalink generation
end
parent_category_id:
@lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])),
post_create_action:
proc do |category|
update_category_description(category, row)
@permalink_importer.create_for_category(category, row[:forum_id]) # skip @settings.prefix because ID is used in permalink generation
end,
}
end
@@ -51,7 +53,16 @@ module ImportScripts::PhpBB3
end
if row[:forum_desc].present?
changes = { raw: (@text_processor.process_raw_text(row[:forum_desc]) rescue row[:forum_desc]) }
changes = {
raw:
(
begin
@text_processor.process_raw_text(row[:forum_desc])
rescue StandardError
row[:forum_desc]
end
),
}
opts = { revised_at: post.created_at, bypass_bump: true }
post.revise(Discourse.system_user, changes, opts)
end

View File

@@ -1,16 +1,16 @@
# frozen_string_literal: true
require_relative 'attachment_importer'
require_relative 'avatar_importer'
require_relative 'bookmark_importer'
require_relative 'category_importer'
require_relative 'message_importer'
require_relative 'poll_importer'
require_relative 'post_importer'
require_relative 'permalink_importer'
require_relative 'user_importer'
require_relative '../support/smiley_processor'
require_relative '../support/text_processor'
require_relative "attachment_importer"
require_relative "avatar_importer"
require_relative "bookmark_importer"
require_relative "category_importer"
require_relative "message_importer"
require_relative "poll_importer"
require_relative "post_importer"
require_relative "permalink_importer"
require_relative "user_importer"
require_relative "../support/smiley_processor"
require_relative "../support/text_processor"
module ImportScripts::PhpBB3
class ImporterFactory
@@ -36,7 +36,14 @@ module ImportScripts::PhpBB3
end
def post_importer
PostImporter.new(@lookup, text_processor, attachment_importer, poll_importer, permalink_importer, @settings)
PostImporter.new(
@lookup,
text_processor,
attachment_importer,
poll_importer,
permalink_importer,
@settings,
)
end
def message_importer
@@ -64,7 +71,8 @@ module ImportScripts::PhpBB3
end
def text_processor
@text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings, @phpbb_config)
@text_processor ||=
TextProcessor.new(@lookup, @database, smiley_processor, @settings, @phpbb_config)
end
def smiley_processor

View File

@@ -20,14 +20,16 @@ module ImportScripts::PhpBB3
end
def map_message(row)
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:author_id])) || Discourse.system_user.id
user_id =
@lookup.user_id_from_imported_user_id(@settings.prefix(row[:author_id])) ||
Discourse.system_user.id
attachments = import_attachments(row, user_id)
mapped = {
id: get_import_id(row[:msg_id]),
user_id: user_id,
created_at: Time.zone.at(row[:message_time]),
raw: @text_processor.process_private_msg(row[:message_text], attachments)
raw: @text_processor.process_private_msg(row[:message_text], attachments),
}
root_user_ids = sorted_user_ids(row[:root_author_id], row[:root_to_address])
@@ -43,7 +45,7 @@ module ImportScripts::PhpBB3
protected
RE_PREFIX = 're: '
RE_PREFIX = "re: "
def import_attachments(row, user_id)
if @settings.import_attachments && row[:attachment_count] > 0
@@ -55,7 +57,7 @@ module ImportScripts::PhpBB3
mapped[:title] = get_topic_title(row)
mapped[:archetype] = Archetype.private_message
mapped[:target_usernames] = get_recipient_usernames(row)
mapped[:custom_fields] = { import_user_ids: current_user_ids.join(',') }
mapped[:custom_fields] = { import_user_ids: current_user_ids.join(",") }
if mapped[:target_usernames].empty?
puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
@@ -75,9 +77,9 @@ module ImportScripts::PhpBB3
# to_address looks like this: "u_91:u_1234:g_200"
# If there is a "u_" prefix, the prefix is discarded and the rest is a user_id
user_ids = to_address.split(':')
user_ids = to_address.split(":")
user_ids.uniq!
user_ids.map! { |u| u[2..-1].to_i if u[0..1] == 'u_' }.compact
user_ids.map! { |u| u[2..-1].to_i if u[0..1] == "u_" }.compact
end
def get_recipient_group_ids(to_address)
@@ -85,16 +87,19 @@ module ImportScripts::PhpBB3
# to_address looks like this: "u_91:u_1234:g_200"
# If there is a "g_" prefix, the prefix is discarded and the rest is a group_id
group_ids = to_address.split(':')
group_ids = to_address.split(":")
group_ids.uniq!
group_ids.map! { |g| g[2..-1].to_i if g[0..1] == 'g_' }.compact
group_ids.map! { |g| g[2..-1].to_i if g[0..1] == "g_" }.compact
end
def get_recipient_usernames(row)
import_user_ids = get_recipient_user_ids(row[:to_address])
usernames = import_user_ids.map do |import_user_id|
@lookup.find_user_by_import_id(@settings.prefix(import_user_id)).try(:username)
end.compact
usernames =
import_user_ids
.map do |import_user_id|
@lookup.find_user_by_import_id(@settings.prefix(import_user_id)).try(:username)
end
.compact
import_group_ids = get_recipient_group_ids(row[:to_address])
import_group_ids.each do |import_group_id|
@@ -142,13 +147,19 @@ module ImportScripts::PhpBB3
topic_titles = [topic_title]
topic_titles << topic_title[RE_PREFIX.length..-1] if topic_title.start_with?(RE_PREFIX)
Post.select(:topic_id)
Post
.select(:topic_id)
.joins(:topic)
.joins(:_custom_fields)
.where(["LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids",
{ titles: topic_titles, user_ids: current_user_ids.join(',') }])
.order('topics.created_at DESC')
.first.try(:topic_id)
.where(
[
"LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids",
{ titles: topic_titles, user_ids: current_user_ids.join(",") },
],
)
.order("topics.created_at DESC")
.first
.try(:topic_id)
end
end
end

View File

@@ -13,13 +13,15 @@ module ImportScripts::PhpBB3
def change_site_settings
normalizations = SiteSetting.permalink_normalizations
normalizations = normalizations.blank? ? [] : normalizations.split('|')
normalizations = normalizations.blank? ? [] : normalizations.split("|")
add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION) if @settings.create_category_links
if @settings.create_category_links
add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION)
end
add_normalization(normalizations, POST_LINK_NORMALIZATION) if @settings.create_post_links
add_normalization(normalizations, TOPIC_LINK_NORMALIZATION) if @settings.create_topic_links
SiteSetting.permalink_normalizations = normalizations.join('|')
SiteSetting.permalink_normalizations = normalizations.join("|")
end
def create_for_category(category, import_id)
@@ -50,8 +52,8 @@ module ImportScripts::PhpBB3
def add_normalization(normalizations, normalization)
if @settings.normalization_prefix.present?
prefix = @settings.normalization_prefix[%r|^/?(.*?)/?$|, 1]
normalization = "/#{prefix.gsub('/', '\/')}\\#{normalization}"
prefix = @settings.normalization_prefix[%r{^/?(.*?)/?$}, 1]
normalization = "/#{prefix.gsub("/", '\/')}\\#{normalization}"
end
normalizations << normalization unless normalizations.include?(normalization)

View File

@@ -49,7 +49,12 @@ module ImportScripts::PhpBB3
end
def get_option_text(row)
text = @text_processor.process_raw_text(row[:poll_option_text]) rescue row[:poll_option_text]
text =
begin
@text_processor.process_raw_text(row[:poll_option_text])
rescue StandardError
row[:poll_option_text]
end
text.squish!
text.gsub!(/^(\d+)\./, '\1\.')
text
@@ -57,7 +62,12 @@ module ImportScripts::PhpBB3
# @param poll_data [ImportScripts::PhpBB3::PollData]
def get_poll_text(poll_data)
title = @text_processor.process_raw_text(poll_data.title) rescue poll_data.title
title =
begin
@text_processor.process_raw_text(poll_data.title)
rescue StandardError
poll_data.title
end
text = +"#{title}\n\n"
arguments = ["results=always"]
@@ -69,11 +79,9 @@ module ImportScripts::PhpBB3
arguments << "type=regular"
end
text << "[poll #{arguments.join(' ')}]"
text << "[poll #{arguments.join(" ")}]"
poll_data.options.each do |option|
text << "\n* #{option[:text]}"
end
poll_data.options.each { |option| text << "\n* #{option[:text]}" }
text << "\n[/poll]"
end
@@ -104,9 +112,7 @@ module ImportScripts::PhpBB3
poll.poll_options.each_with_index do |option, index|
imported_option = poll_data.options[index]
imported_option[:ids].each do |imported_id|
option_ids[imported_id] = option.id
end
imported_option[:ids].each { |imported_id| option_ids[imported_id] = option.id }
end
option_ids

View File

@@ -8,7 +8,14 @@ module ImportScripts::PhpBB3
# @param poll_importer [ImportScripts::PhpBB3::PollImporter]
# @param permalink_importer [ImportScripts::PhpBB3::PermalinkImporter]
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, text_processor, attachment_importer, poll_importer, permalink_importer, settings)
def initialize(
lookup,
text_processor,
attachment_importer,
poll_importer,
permalink_importer,
settings
)
@lookup = lookup
@text_processor = text_processor
@attachment_importer = attachment_importer
@@ -24,7 +31,8 @@ module ImportScripts::PhpBB3
def map_post(row)
return if @settings.category_mappings.dig(row[:forum_id].to_s, :skip)
imported_user_id = @settings.prefix(row[:post_username].blank? ? row[:poster_id] : row[:post_username])
imported_user_id =
@settings.prefix(row[:post_username].blank? ? row[:poster_id] : row[:post_username])
user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || -1
is_first_post = row[:post_id] == row[:topic_first_post_id]
@@ -35,7 +43,7 @@ module ImportScripts::PhpBB3
user_id: user_id,
created_at: Time.zone.at(row[:post_time]),
raw: @text_processor.process_post(row[:post_text], attachments),
import_topic_id: @settings.prefix(row[:topic_id])
import_topic_id: @settings.prefix(row[:topic_id]),
}
if is_first_post
@@ -58,7 +66,9 @@ module ImportScripts::PhpBB3
mapped[:category] = if category_mapping = @settings.category_mappings[row[:forum_id].to_s]
category_mapping[:discourse_category_id] ||
@lookup.category_id_from_imported_category_id(@settings.prefix(category_mapping[:target_category_id]))
@lookup.category_id_from_imported_category_id(
@settings.prefix(category_mapping[:target_category_id]),
)
else
@lookup.category_id_from_imported_category_id(@settings.prefix(row[:forum_id]))
end
@@ -81,7 +91,8 @@ module ImportScripts::PhpBB3
end
def map_other_post(row, mapped)
parent = @lookup.topic_lookup_from_imported_post_id(@settings.prefix(row[:topic_first_post_id]))
parent =
@lookup.topic_lookup_from_imported_post_id(@settings.prefix(row[:topic_first_post_id]))
if parent.blank?
puts "Parent post #{@settings.prefix(row[:topic_first_post_id])} doesn't exist. Skipping #{@settings.prefix(row[:post_id])}: #{row[:topic_title][0..40]}"

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative '../support/constants'
require_relative "../support/constants"
module ImportScripts::PhpBB3
class UserImporter
@@ -29,8 +29,22 @@ module ImportScripts::PhpBB3
password: @settings.import_passwords ? row[:user_password] : nil,
name: @settings.username_as_name ? row[:username] : row[:name].presence,
created_at: Time.zone.at(row[:user_regdate]),
last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]),
registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil),
last_seen_at:
(
if row[:user_lastvisit] == 0
Time.zone.at(row[:user_regdate])
else
Time.zone.at(row[:user_lastvisit])
end
),
registration_ip_address:
(
begin
IPAddr.new(row[:user_ip])
rescue StandardError
nil
end
),
active: is_active_user,
trust_level: trust_level,
manual_locked_trust_level: manual_locked_trust_level,
@@ -43,10 +57,11 @@ module ImportScripts::PhpBB3
location: row[:user_from],
date_of_birth: parse_birthdate(row),
custom_fields: custom_fields(row),
post_create_action: proc do |user|
suspend_user(user, row)
@avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present?
end
post_create_action:
proc do |user|
suspend_user(user, row)
@avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present?
end,
}
end
@@ -61,18 +76,19 @@ module ImportScripts::PhpBB3
id: @settings.prefix(username),
email: "anonymous_#{SecureRandom.hex}@no-email.invalid",
username: username,
name: @settings.username_as_name ? username : '',
name: @settings.username_as_name ? username : "",
created_at: Time.zone.at(row[:first_post_time]),
active: true,
trust_level: TrustLevel[0],
approved: true,
approved_by_id: Discourse.system_user.id,
approved_at: Time.now,
post_create_action: proc do |user|
row[:user_inactive_reason] = Constants::INACTIVE_MANUAL
row[:ban_reason] = 'Anonymous user from phpBB3' # TODO i18n
suspend_user(user, row, true)
end
post_create_action:
proc do |user|
row[:user_inactive_reason] = Constants::INACTIVE_MANUAL
row[:ban_reason] = "Anonymous user from phpBB3" # TODO i18n
suspend_user(user, row, true)
end,
}
end
@@ -80,25 +96,32 @@ module ImportScripts::PhpBB3
def parse_birthdate(row)
return nil if row[:user_birthday].blank?
birthdate = Date.strptime(row[:user_birthday].delete(' '), '%d-%m-%Y') rescue nil
birthdate =
begin
Date.strptime(row[:user_birthday].delete(" "), "%d-%m-%Y")
rescue StandardError
nil
end
birthdate && birthdate.year > 0 ? birthdate : nil
end
def user_fields
@user_fields ||= begin
Hash[UserField.all.map { |field| [field.name, field] }]
end
@user_fields ||=
begin
Hash[UserField.all.map { |field| [field.name, field] }]
end
end
def field_mappings
@field_mappings ||= begin
@settings.custom_fields.map do |field|
{
phpbb_field_name: "pf_#{field[:phpbb_field_name]}".to_sym,
discourse_user_field: user_fields[field[:discourse_field_name]]
}
@field_mappings ||=
begin
@settings.custom_fields.map do |field|
{
phpbb_field_name: "pf_#{field[:phpbb_field_name]}".to_sym,
discourse_user_field: user_fields[field[:discourse_field_name]],
}
end
end
end
end
def custom_fields(row)
@@ -114,7 +137,8 @@ module ImportScripts::PhpBB3
when "confirm"
value = value == 1 ? true : nil
when "dropdown"
value = user_field.user_field_options.find { |option| option.value == value } ? value : nil
value =
user_field.user_field_options.find { |option| option.value == value } ? value : nil
end
custom_fields["user_field_#{user_field.id}"] = value if value.present?
@@ -128,7 +152,8 @@ module ImportScripts::PhpBB3
if row[:user_inactive_reason] == Constants::INACTIVE_MANUAL
user.suspended_at = Time.now
user.suspended_till = 200.years.from_now
ban_reason = row[:ban_reason].blank? ? 'Account deactivated by administrator' : row[:ban_reason] # TODO i18n
ban_reason =
row[:ban_reason].blank? ? "Account deactivated by administrator" : row[:ban_reason] # TODO i18n
elsif row[:ban_start].present?
user.suspended_at = Time.zone.at(row[:ban_start])
user.suspended_till = row[:ban_end] > 0 ? Time.zone.at(row[:ban_end]) : 200.years.from_now
@@ -148,7 +173,9 @@ module ImportScripts::PhpBB3
if user.save
StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason)
else
Rails.logger.error("Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}")
Rails.logger.error(
"Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}",
)
end
end
end

View File

@@ -1,7 +1,9 @@
# frozen_string_literal: true
module ImportScripts; end
module ImportScripts::PhpBB3; end
module ImportScripts
end
module ImportScripts::PhpBB3
end
module ImportScripts::PhpBB3::BBCode
LINEBREAK_AUTO = :auto

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true
require 'nokogiri'
require_relative 'markdown_node'
require "nokogiri"
require_relative "markdown_node"
module ImportScripts::PhpBB3::BBCode
class XmlToMarkdown
@@ -14,7 +14,7 @@ module ImportScripts::PhpBB3::BBCode
@allow_inline_code = opts.fetch(:allow_inline_code, false)
@traditional_linebreaks = opts.fetch(:traditional_linebreaks, false)
@doc = Nokogiri::XML(xml)
@doc = Nokogiri.XML(xml)
@list_stack = []
end
@@ -28,9 +28,9 @@ module ImportScripts::PhpBB3::BBCode
private
IGNORED_ELEMENTS = ["s", "e", "i"]
ELEMENTS_WITHOUT_LEADING_WHITESPACES = ["LIST", "LI"]
ELEMENTS_WITH_HARD_LINEBREAKS = ["B", "I", "U"]
IGNORED_ELEMENTS = %w[s e i]
ELEMENTS_WITHOUT_LEADING_WHITESPACES = %w[LIST LI]
ELEMENTS_WITH_HARD_LINEBREAKS = %w[B I U]
EXPLICIT_LINEBREAK_THRESHOLD = 2
def preprocess_xml
@@ -65,9 +65,7 @@ module ImportScripts::PhpBB3::BBCode
xml_node.children.each { |xml_child| visit(xml_child, md_node || md_parent) }
after_hook = "after_#{xml_node.name}"
if respond_to?(after_hook, include_all: true)
send(after_hook, xml_node, md_node)
end
send(after_hook, xml_node, md_node) if respond_to?(after_hook, include_all: true)
end
def create_node(xml_node, md_parent)
@@ -84,19 +82,15 @@ module ImportScripts::PhpBB3::BBCode
end
def visit_B(xml_node, md_node)
if xml_node.parent&.name != 'B'
md_node.enclosed_with = "**"
end
md_node.enclosed_with = "**" if xml_node.parent&.name != "B"
end
def visit_I(xml_node, md_node)
if xml_node.parent&.name != 'I'
md_node.enclosed_with = "_"
end
md_node.enclosed_with = "_" if xml_node.parent&.name != "I"
end
def visit_U(xml_node, md_node)
if xml_node.parent&.name != 'U'
if xml_node.parent&.name != "U"
md_node.prefix = "[u]"
md_node.postfix = "[/u]"
end
@@ -122,10 +116,7 @@ module ImportScripts::PhpBB3::BBCode
md_node.prefix_linebreaks = md_node.postfix_linebreaks = @list_stack.size == 0 ? 2 : 1
md_node.prefix_linebreak_type = LINEBREAK_HTML if @list_stack.size == 0
@list_stack << {
unordered: xml_node.attribute('type').nil?,
item_count: 0
}
@list_stack << { unordered: xml_node.attribute("type").nil?, item_count: 0 }
end
def after_LIST(xml_node, md_node)
@@ -138,21 +129,21 @@ module ImportScripts::PhpBB3::BBCode
list[:item_count] += 1
indentation = ' ' * 2 * depth
symbol = list[:unordered] ? '*' : "#{list[:item_count]}."
indentation = " " * 2 * depth
symbol = list[:unordered] ? "*" : "#{list[:item_count]}."
md_node.prefix = "#{indentation}#{symbol} "
md_node.postfix_linebreaks = 1
end
def visit_IMG(xml_node, md_node)
md_node.text = +"![](#{xml_node.attribute('src')})"
md_node.text = +"![](#{xml_node.attribute("src")})"
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
md_node.skip_children
end
def visit_URL(xml_node, md_node)
original_url = xml_node.attribute('url').to_s
original_url = xml_node.attribute("url").to_s
url = CGI.unescapeHTML(original_url)
url = @url_replacement.call(url) if @url_replacement
@@ -173,7 +164,8 @@ module ImportScripts::PhpBB3::BBCode
def visit_br(xml_node, md_node)
md_node.postfix_linebreaks += 1
if md_node.postfix_linebreaks > 1 && ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name)
if md_node.postfix_linebreaks > 1 &&
ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name)
md_node.postfix_linebreak_type = LINEBREAK_HARD
end
end
@@ -194,7 +186,8 @@ module ImportScripts::PhpBB3::BBCode
def visit_QUOTE(xml_node, md_node)
if post = quoted_post(xml_node)
md_node.prefix = %Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n}
md_node.prefix =
%Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n}
md_node.postfix = "\n[/quote]"
elsif username = quoted_username(xml_node)
md_node.prefix = %Q{[quote="#{username}"]\n}
@@ -242,11 +235,11 @@ module ImportScripts::PhpBB3::BBCode
return if size.nil?
if size.between?(1, 99)
md_node.prefix = '<small>'
md_node.postfix = '</small>'
md_node.prefix = "<small>"
md_node.postfix = "</small>"
elsif size.between?(101, 200)
md_node.prefix = '<big>'
md_node.postfix = '</big>'
md_node.prefix = "<big>"
md_node.postfix = "</big>"
end
end
@@ -267,7 +260,8 @@ module ImportScripts::PhpBB3::BBCode
parent_prefix = prefix_from_parent(md_parent)
if parent_prefix && md_node.xml_node_name != "br" && (md_parent.prefix_children || !markdown.empty?)
if parent_prefix && md_node.xml_node_name != "br" &&
(md_parent.prefix_children || !markdown.empty?)
prefix = "#{parent_prefix}#{prefix}"
end
@@ -275,11 +269,21 @@ module ImportScripts::PhpBB3::BBCode
text, prefix, postfix = hoist_whitespaces!(markdown, text, prefix, postfix)
end
add_linebreaks!(markdown, md_node.prefix_linebreaks, md_node.prefix_linebreak_type, parent_prefix)
add_linebreaks!(
markdown,
md_node.prefix_linebreaks,
md_node.prefix_linebreak_type,
parent_prefix,
)
markdown << prefix
markdown << text
markdown << postfix
add_linebreaks!(markdown, md_node.postfix_linebreaks, md_node.postfix_linebreak_type, parent_prefix)
add_linebreaks!(
markdown,
md_node.postfix_linebreaks,
md_node.postfix_linebreak_type,
parent_prefix,
)
end
markdown
@@ -296,9 +300,7 @@ module ImportScripts::PhpBB3::BBCode
end
unless postfix.empty?
if ends_with_whitespace?(text)
postfix = "#{postfix}#{text[-1]}"
end
postfix = "#{postfix}#{text[-1]}" if ends_with_whitespace?(text)
text = text.rstrip
end
@@ -319,16 +321,24 @@ module ImportScripts::PhpBB3::BBCode
if linebreak_type == LINEBREAK_HTML
max_linebreak_count = [existing_linebreak_count, required_linebreak_count - 1].max + 1
required_linebreak_count = max_linebreak_count if max_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD
required_linebreak_count = max_linebreak_count if max_linebreak_count >
EXPLICIT_LINEBREAK_THRESHOLD
end
return if existing_linebreak_count >= required_linebreak_count
rstrip!(markdown)
alternative_linebreak_start_index = required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2
alternative_linebreak_start_index =
required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2
required_linebreak_count.times do |index|
linebreak = linebreak(linebreak_type, index, alternative_linebreak_start_index, required_linebreak_count)
linebreak =
linebreak(
linebreak_type,
index,
alternative_linebreak_start_index,
required_linebreak_count,
)
markdown << (linebreak == "\n" ? prefix.rstrip : prefix) if prefix && index > 0
markdown << linebreak
@@ -336,18 +346,25 @@ module ImportScripts::PhpBB3::BBCode
end
def rstrip!(markdown)
markdown.gsub!(/\s*(?:\\?\n|<br>\n)*\z/, '')
markdown.gsub!(/\s*(?:\\?\n|<br>\n)*\z/, "")
end
def linebreak(linebreak_type, linebreak_index, alternative_linebreak_start_index, required_linebreak_count)
def linebreak(
linebreak_type,
linebreak_index,
alternative_linebreak_start_index,
required_linebreak_count
)
use_alternative_linebreak = linebreak_index >= alternative_linebreak_start_index
is_last_linebreak = linebreak_index + 1 == required_linebreak_count
return "<br>\n" if linebreak_type == LINEBREAK_HTML &&
use_alternative_linebreak && is_last_linebreak
if linebreak_type == LINEBREAK_HTML && use_alternative_linebreak && is_last_linebreak
return "<br>\n"
end
return "\\\n" if linebreak_type == LINEBREAK_HARD ||
@traditional_linebreaks || use_alternative_linebreak
if linebreak_type == LINEBREAK_HARD || @traditional_linebreaks || use_alternative_linebreak
return "\\\n"
end
"\n"
end

View File

@@ -8,8 +8,8 @@ module ImportScripts::PhpBB3
INACTIVE_MANUAL = 3 # Account deactivated by administrator
INACTIVE_REMIND = 4 # Forced user account reactivation
GROUP_ADMINISTRATORS = 'ADMINISTRATORS'
GROUP_MODERATORS = 'GLOBAL_MODERATORS'
GROUP_ADMINISTRATORS = "ADMINISTRATORS"
GROUP_MODERATORS = "GLOBAL_MODERATORS"
# https://wiki.phpbb.com/Table.phpbb_users
USER_TYPE_NORMAL = 0
@@ -21,9 +21,9 @@ module ImportScripts::PhpBB3
AVATAR_TYPE_REMOTE = 2
AVATAR_TYPE_GALLERY = 3
AVATAR_TYPE_STRING_UPLOADED = 'avatar.driver.upload'
AVATAR_TYPE_STRING_REMOTE = 'avatar.driver.remote'
AVATAR_TYPE_STRING_GALLERY = 'avatar.driver.local'
AVATAR_TYPE_STRING_UPLOADED = "avatar.driver.upload"
AVATAR_TYPE_STRING_REMOTE = "avatar.driver.remote"
AVATAR_TYPE_STRING_GALLERY = "avatar.driver.local"
FORUM_TYPE_CATEGORY = 0
FORUM_TYPE_POST = 1

View File

@@ -1,13 +1,13 @@
# frozen_string_literal: true
require 'csv'
require 'yaml'
require_relative '../../base'
require "csv"
require "yaml"
require_relative "../../base"
module ImportScripts::PhpBB3
class Settings
def self.load(filename)
yaml = YAML::load_file(filename)
yaml = YAML.load_file(filename)
Settings.new(yaml.deep_stringify_keys.with_indifferent_access)
end
@@ -44,40 +44,41 @@ module ImportScripts::PhpBB3
attr_reader :database
def initialize(yaml)
import_settings = yaml['import']
import_settings = yaml["import"]
@site_name = import_settings['site_name']
@site_name = import_settings["site_name"]
@new_categories = import_settings['new_categories']
@category_mappings = import_settings.fetch('category_mappings', []).to_h { |m| [m[:source_category_id].to_s, m] }
@tag_mappings = import_settings['tag_mappings']
@rank_mapping = import_settings['rank_mapping']
@new_categories = import_settings["new_categories"]
@category_mappings =
import_settings.fetch("category_mappings", []).to_h { |m| [m[:source_category_id].to_s, m] }
@tag_mappings = import_settings["tag_mappings"]
@rank_mapping = import_settings["rank_mapping"]
@import_anonymous_users = import_settings['anonymous_users']
@import_attachments = import_settings['attachments']
@import_private_messages = import_settings['private_messages']
@import_polls = import_settings['polls']
@import_bookmarks = import_settings['bookmarks']
@import_passwords = import_settings['passwords']
@import_likes = import_settings['likes']
@import_anonymous_users = import_settings["anonymous_users"]
@import_attachments = import_settings["attachments"]
@import_private_messages = import_settings["private_messages"]
@import_polls = import_settings["polls"]
@import_bookmarks = import_settings["bookmarks"]
@import_passwords = import_settings["passwords"]
@import_likes = import_settings["likes"]
avatar_settings = import_settings['avatars']
@import_uploaded_avatars = avatar_settings['uploaded']
@import_remote_avatars = avatar_settings['remote']
@import_gallery_avatars = avatar_settings['gallery']
avatar_settings = import_settings["avatars"]
@import_uploaded_avatars = avatar_settings["uploaded"]
@import_remote_avatars = avatar_settings["remote"]
@import_gallery_avatars = avatar_settings["gallery"]
@use_bbcode_to_md = import_settings['use_bbcode_to_md']
@use_bbcode_to_md = import_settings["use_bbcode_to_md"]
@original_site_prefix = import_settings['site_prefix']['original']
@new_site_prefix = import_settings['site_prefix']['new']
@base_dir = import_settings['phpbb_base_dir']
@permalinks = PermalinkSettings.new(import_settings['permalinks'])
@original_site_prefix = import_settings["site_prefix"]["original"]
@new_site_prefix = import_settings["site_prefix"]["new"]
@base_dir = import_settings["phpbb_base_dir"]
@permalinks = PermalinkSettings.new(import_settings["permalinks"])
@username_as_name = import_settings['username_as_name']
@emojis = import_settings.fetch('emojis', [])
@custom_fields = import_settings.fetch('custom_fields', [])
@username_as_name = import_settings["username_as_name"]
@emojis = import_settings.fetch("emojis", [])
@custom_fields = import_settings.fetch("custom_fields", [])
@database = DatabaseSettings.new(yaml['database'])
@database = DatabaseSettings.new(yaml["database"])
end
def prefix(val)
@@ -87,7 +88,7 @@ module ImportScripts::PhpBB3
def trust_level_for_posts(rank, trust_level: 0)
if @rank_mapping.present?
@rank_mapping.each do |key, value|
trust_level = [trust_level, key.gsub('trust_level_', '').to_i].max if rank >= value
trust_level = [trust_level, key.gsub("trust_level_", "").to_i].max if rank >= value
end
end
@@ -106,14 +107,14 @@ module ImportScripts::PhpBB3
attr_reader :batch_size
def initialize(yaml)
@type = yaml['type']
@host = yaml['host']
@port = yaml['port']
@username = yaml['username']
@password = yaml['password']
@schema = yaml['schema']
@table_prefix = yaml['table_prefix']
@batch_size = yaml['batch_size']
@type = yaml["type"]
@host = yaml["host"]
@port = yaml["port"]
@username = yaml["username"]
@password = yaml["password"]
@schema = yaml["schema"]
@table_prefix = yaml["table_prefix"]
@batch_size = yaml["batch_size"]
end
end
@@ -124,10 +125,10 @@ module ImportScripts::PhpBB3
attr_reader :normalization_prefix
def initialize(yaml)
@create_category_links = yaml['categories']
@create_topic_links = yaml['topics']
@create_post_links = yaml['posts']
@normalization_prefix = yaml['prefix']
@create_category_links = yaml["categories"]
@create_topic_links = yaml["topics"]
@create_post_links = yaml["posts"]
@normalization_prefix = yaml["prefix"]
end
end
end

View File

@@ -18,15 +18,16 @@ module ImportScripts::PhpBB3
def replace_smilies(text)
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/.+?" alt=".*?" title=".*?" \/><!-- s?\S+ -->/) do
emoji($1)
end
text.gsub!(
/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/.+?" alt=".*?" title=".*?" \/><!-- s?\S+ -->/,
) { emoji($1) }
end
def emoji(smiley_code)
@smiley_map.fetch(smiley_code) do
smiley = @database.get_smiley(smiley_code)
emoji = upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley
emoji =
upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley
emoji || smiley_as_text(smiley_code)
end
end
@@ -35,37 +36,34 @@ module ImportScripts::PhpBB3
def add_default_smilies
{
[':D', ':-D', ':grin:'] => ':smiley:',
[':)', ':-)', ':smile:'] => ':slight_smile:',
[';)', ';-)', ':wink:'] => ':wink:',
[':(', ':-(', ':sad:'] => ':frowning:',
[':o', ':-o', ':eek:'] => ':astonished:',
[':shock:'] => ':open_mouth:',
[':?', ':-?', ':???:'] => ':confused:',
['8)', '8-)', ':cool:'] => ':sunglasses:',
[':lol:'] => ':laughing:',
[':x', ':-x', ':mad:'] => ':angry:',
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
[':oops:'] => ':blush:',
[':cry:'] => ':cry:',
[':evil:'] => ':imp:',
[':twisted:'] => ':smiling_imp:',
[':roll:'] => ':unamused:',
[':!:'] => ':exclamation:',
[':?:'] => ':question:',
[':idea:'] => ':bulb:',
[':arrow:'] => ':arrow_right:',
[':|', ':-|'] => ':neutral_face:',
[':geek:'] => ':nerd:'
}.each do |smilies, emoji|
smilies.each { |smiley| @smiley_map[smiley] = emoji }
end
%w[:D :-D :grin:] => ":smiley:",
%w[:) :-) :smile:] => ":slight_smile:",
%w[;) ;-) :wink:] => ":wink:",
%w[:( :-( :sad:] => ":frowning:",
%w[:o :-o :eek:] => ":astonished:",
[":shock:"] => ":open_mouth:",
%w[:? :-? :???:] => ":confused:",
%w[8) 8-) :cool:] => ":sunglasses:",
[":lol:"] => ":laughing:",
%w[:x :-x :mad:] => ":angry:",
%w[:P :-P :razz:] => ":stuck_out_tongue:",
[":oops:"] => ":blush:",
[":cry:"] => ":cry:",
[":evil:"] => ":imp:",
[":twisted:"] => ":smiling_imp:",
[":roll:"] => ":unamused:",
[":!:"] => ":exclamation:",
[":?:"] => ":question:",
[":idea:"] => ":bulb:",
[":arrow:"] => ":arrow_right:",
%w[:| :-|] => ":neutral_face:",
[":geek:"] => ":nerd:",
}.each { |smilies, emoji| smilies.each { |smiley| @smiley_map[smiley] = emoji } }
end
def add_configured_smilies(emojis)
emojis.each do |emoji, smilies|
Array.wrap(smilies)
.each { |smiley| @smiley_map[smiley] = ":#{emoji}:" }
Array.wrap(smilies).each { |smiley| @smiley_map[smiley] = ":#{emoji}:" }
end
end

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative 'bbcode/xml_to_markdown'
require_relative "bbcode/xml_to_markdown"
module ImportScripts::PhpBB3
class TextProcessor
@@ -14,7 +14,9 @@ module ImportScripts::PhpBB3
@database = database
@smiley_processor = smiley_processor
@he = HTMLEntities.new
@use_xml_to_markdown = phpbb_config[:phpbb_version].start_with?('3.2') || phpbb_config[:phpbb_version].start_with?('3.3')
@use_xml_to_markdown =
phpbb_config[:phpbb_version].start_with?("3.2") ||
phpbb_config[:phpbb_version].start_with?("3.3")
@settings = settings
@new_site_prefix = settings.new_site_prefix
@@ -25,24 +27,27 @@ module ImportScripts::PhpBB3
if @use_xml_to_markdown
unreferenced_attachments = attachments&.dup
converter = BBCode::XmlToMarkdown.new(
raw,
username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) },
smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup },
quoted_post_from_post_id: lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) },
upload_md_from_file: (lambda do |filename, index|
unreferenced_attachments[index] = nil
attachments.fetch(index, filename).dup
end if attachments),
url_replacement: nil,
allow_inline_code: false
)
converter =
BBCode::XmlToMarkdown.new(
raw,
username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) },
smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup },
quoted_post_from_post_id:
lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) },
upload_md_from_file:
(
lambda do |filename, index|
unreferenced_attachments[index] = nil
attachments.fetch(index, filename).dup
end if attachments
),
url_replacement: nil,
allow_inline_code: false,
)
text = converter.convert
text.gsub!(@short_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
text.gsub!(@short_internal_link_regexp) { |link| replace_internal_link(link, $1, $2) }
add_unreferenced_attachments(text, unreferenced_attachments)
else
@@ -50,9 +55,7 @@ module ImportScripts::PhpBB3
text = CGI.unescapeHTML(text)
clean_bbcodes(text)
if @settings.use_bbcode_to_md
text = bbcode_to_md(text)
end
text = bbcode_to_md(text) if @settings.use_bbcode_to_md
process_smilies(text)
process_links(text)
process_lists(text)
@@ -65,11 +68,19 @@ module ImportScripts::PhpBB3
end
def process_post(raw, attachments)
process_raw_text(raw, attachments) rescue raw
begin
process_raw_text(raw, attachments)
rescue StandardError
raw
end
end
def process_private_msg(raw, attachments)
process_raw_text(raw, attachments) rescue raw
begin
process_raw_text(raw, attachments)
rescue StandardError
raw
end
end
protected
@@ -78,10 +89,10 @@ module ImportScripts::PhpBB3
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
text.gsub!(/:(?:\w{5,8})\]/, ']')
text.gsub!(/:(?:\w{5,8})\]/, "]")
# remove color tags
text.gsub!(/\[\/?color(=#?[a-z0-9]*)?\]/i, "")
text.gsub!(%r{\[/?color(=#?[a-z0-9]*)?\]}i, "")
end
def bbcode_to_md(text)
@@ -101,23 +112,19 @@ module ImportScripts::PhpBB3
# Internal forum links can have this forms:
# for topics: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?f=26&amp;t=3412">viewtopic.php?f=26&amp;t=3412</a><!-- l -->
# for posts: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?p=1732#p1732">viewtopic.php?p=1732#p1732</a><!-- l -->
text.gsub!(@long_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
text.gsub!(@long_internal_link_regexp) { |link| replace_internal_link(link, $1, $2) }
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
text.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}i, '[\2](\1)')
# Replace internal forum links that aren't in the <!-- l --> format
text.gsub!(@short_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
text.gsub!(@short_internal_link_regexp) { |link| replace_internal_link(link, $1, $2) }
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
text.gsub!(%r{\[http(s)?://(www\.)?}i, "[")
end
def replace_internal_link(link, import_topic_id, import_post_id)
@@ -144,19 +151,20 @@ module ImportScripts::PhpBB3
# convert list tags to ul and list=1 tags to ol
# list=a is not supported, so handle it like list=1
# list=9 and list=x have the same result as list=1 and list=a
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi) do
$1.gsub(/\[\*\](.*?)\[\/\*:m\]\n*/mi) { "* #{$1}\n" }
text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi) do
$1.gsub(%r{\[\*\](.*?)\[/\*:m\]\n*}mi) { "* #{$1}\n" }
end
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi) do
$1.gsub(/\[\*\](.*?)\[\/\*:m\]\n*/mi) { "1. #{$1}\n" }
text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi) do
$1.gsub(%r{\[\*\](.*?)\[/\*:m\]\n*}mi) { "1. #{$1}\n" }
end
end
# This replaces existing [attachment] BBCodes with the corresponding HTML tags for Discourse.
# All attachments that haven't been referenced in the text are appended to the end of the text.
def process_attachments(text, attachments)
attachment_regexp = /\[attachment=([\d])+\]<!-- [\w]+ -->([^<]+)<!-- [\w]+ -->\[\/attachment\]?/i
attachment_regexp =
%r{\[attachment=([\d])+\]<!-- [\w]+ -->([^<]+)<!-- [\w]+ -->\[/attachment\]?}i
unreferenced_attachments = attachments.dup
text.gsub!(attachment_regexp) do
@@ -178,29 +186,34 @@ module ImportScripts::PhpBB3
end
def create_internal_link_regexps(original_site_prefix)
host = original_site_prefix.gsub('.', '\.')
link_regex = "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)(?:[^\\s\\)\\]]*)"
host = original_site_prefix.gsub(".", '\.')
link_regex =
"http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)(?:[^\\s\\)\\]]*)"
@long_internal_link_regexp = Regexp.new(%Q|<!-- l --><a(?:.+)href="#{link_regex}"(?:.*)</a><!-- l -->|, Regexp::IGNORECASE)
@long_internal_link_regexp =
Regexp.new(
%Q|<!-- l --><a(?:.+)href="#{link_regex}"(?:.*)</a><!-- l -->|,
Regexp::IGNORECASE,
)
@short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE)
end
def process_code(text)
text.gsub!(/<span class="syntax.*?>(.*?)<\/span>/) { "#{$1}" }
text.gsub!(/\[code(=[a-z]*)?\](.*?)\[\/code\]/i) { "[code]\n#{@he.decode($2)}\n[/code]" }
text.gsub!(/<br \/>/, "\n")
text.gsub!(%r{<span class="syntax.*?>(.*?)</span>}) { "#{$1}" }
text.gsub!(%r{\[code(=[a-z]*)?\](.*?)\[/code\]}i) { "[code]\n#{@he.decode($2)}\n[/code]" }
text.gsub!(%r{<br />}, "\n")
text
end
def fix_markdown(text)
text.gsub!(/(\n*\[\/?quote.*?\]\n*)/mi) { |q| "\n#{q.strip}\n" }
text.gsub!(%r{(\n*\[/?quote.*?\]\n*)}mi) { |q| "\n#{q.strip}\n" }
text.gsub!(/^!\[[^\]]*\]\([^\]]*\)$/i) { |img| "\n#{img.strip}\n" } # space out images single on line
text
end
def process_videos(text)
# [YOUTUBE]<id>[/YOUTUBE]
text.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
text.gsub(%r{\[youtube\](.+?)\[/youtube\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
text
end
end

View File

@@ -7,19 +7,19 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/punbb.rb
class ImportScripts::PunBB < ImportScripts::Base
PUNBB_DB = "punbb_db"
BATCH_SIZE = 1000
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
password: "pa$$word",
database: PUNBB_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
password: "pa$$word",
database: PUNBB_DB,
)
end
def execute
@@ -30,36 +30,41 @@ class ImportScripts::PunBB < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM users;").first['count']
total_count = mysql_query("SELECT count(*) count FROM users;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT id, username, realname name, url website, email email, registered created_at,
results =
mysql_query(
"SELECT id, username, realname name, url website, email email, registered created_at,
registration_ip registration_ip_address, last_visit last_visit_time, last_email_sent last_emailed_at,
last_email_sent last_emailed_at, location, group_id
FROM users
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'],
email: user['email'],
username: user['username'],
name: user['name'],
created_at: Time.zone.at(user['created_at']),
website: user['website'],
registration_ip_address: user['registration_ip_address'],
last_seen_at: Time.zone.at(user['last_visit_time']),
last_emailed_at: user['last_emailed_at'] == nil ? 0 : Time.zone.at(user['last_emailed_at']),
location: user['location'],
moderator: user['group_id'] == 4,
admin: user['group_id'] == 1 }
{
id: user["id"],
email: user["email"],
username: user["username"],
name: user["name"],
created_at: Time.zone.at(user["created_at"]),
website: user["website"],
registration_ip_address: user["registration_ip_address"],
last_seen_at: Time.zone.at(user["last_visit_time"]),
last_emailed_at:
user["last_emailed_at"] == nil ? 0 : Time.zone.at(user["last_emailed_at"]),
location: user["location"],
moderator: user["group_id"] == 4,
admin: user["group_id"] == 1,
}
end
end
end
@@ -67,33 +72,34 @@ class ImportScripts::PunBB < ImportScripts::Base
def import_categories
puts "", "importing top level categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT id, cat_name name, disp_position position
FROM categories
ORDER BY id ASC
").to_a
",
).to_a
create_categories(categories) do |category|
{
id: category["id"],
name: category["name"]
}
end
create_categories(categories) { |category| { id: category["id"], name: category["name"] } }
puts "", "importing children categories..."
children_categories = mysql_query("
children_categories =
mysql_query(
"
SELECT id, forum_name name, forum_desc description, disp_position position, cat_id parent_category_id
FROM forums
ORDER BY id
").to_a
",
).to_a
create_categories(children_categories) do |category|
{
id: "child##{category['id']}",
id: "child##{category["id"]}",
name: category["name"],
description: category["description"],
parent_category_id: category_id_from_imported_category_id(category["parent_category_id"])
parent_category_id: category_id_from_imported_category_id(category["parent_category_id"]),
}
end
end
@@ -104,7 +110,9 @@ class ImportScripts::PunBB < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.id id,
t.id topic_id,
t.forum_id category_id,
@@ -119,29 +127,30 @@ class ImportScripts::PunBB < ImportScripts::Base
ORDER BY p.posted
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
").to_a
",
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_punbb_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_punbb_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id("child##{m['category_id']}")
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["id"] == m["first_post_id"]
mapped[:category] = category_id_from_imported_category_id("child##{m["category_id"]}")
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
parent = topic_lookup_from_imported_post_id(m["first_post_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
@@ -152,16 +161,16 @@ class ImportScripts::PunBB < ImportScripts::Base
end
def suspend_users
puts '', "updating banned users"
puts "", "updating banned users"
banned = 0
failed = 0
total = mysql_query("SELECT count(*) count FROM bans").first['count']
total = mysql_query("SELECT count(*) count FROM bans").first["count"]
system_user = Discourse.system_user
mysql_query("SELECT username, email FROM bans").each do |b|
user = User.find_by_email(b['email'])
user = User.find_by_email(b["email"])
if user
user.suspended_at = Time.now
user.suspended_till = 200.years.from_now
@@ -174,7 +183,7 @@ class ImportScripts::PunBB < ImportScripts::Base
failed += 1
end
else
puts "Not found: #{b['email']}"
puts "Not found: #{b["email"]}"
failed += 1
end
@@ -189,15 +198,15 @@ class ImportScripts::PunBB < ImportScripts::Base
s.gsub!(/<!-- s(\S+) -->(?:.*)<!-- s(?:\S+) -->/, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s.gsub!(/:(?:\w{8})\]/, "]")
# Remove mybb video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")
s = CGI.unescapeHTML(s)
@@ -205,7 +214,7 @@ class ImportScripts::PunBB < ImportScripts::Base
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
s.gsub!(%r{\[http(s)?://(www\.)?}, "[")
s
end

View File

@@ -1,25 +1,25 @@
# frozen_string_literal: true
require 'yaml'
require_relative 'quandora_api'
require "yaml"
require_relative "quandora_api"
def load_config(file)
config = YAML::load_file(File.join(__dir__, file))
@domain = config['domain']
@username = config['username']
@password = config['password']
config = YAML.load_file(File.join(__dir__, file))
@domain = config["domain"]
@username = config["username"]
@password = config["password"]
end
def export
api = QuandoraApi.new @domain, @username, @password
bases = api.list_bases
bases.each do |base|
question_list = api.list_questions base['objectId'], 1000
question_list = api.list_questions base["objectId"], 1000
question_list.each do |q|
question_id = q['uid']
question_id = q["uid"]
question = api.get_question question_id
File.open("output/#{question_id}.json", 'w') do |f|
puts question['title']
File.open("output/#{question_id}.json", "w") do |f|
puts question["title"]
f.write question.to_json
f.close
end

View File

@@ -1,10 +1,9 @@
# frozen_string_literal: true
require_relative './quandora_question.rb'
require_relative "./quandora_question.rb"
require File.expand_path(File.dirname(__FILE__) + "/../base.rb")
class ImportScripts::Quandora < ImportScripts::Base
JSON_FILES_DIR = "output"
def initialize
@@ -12,8 +11,8 @@ class ImportScripts::Quandora < ImportScripts::Base
@system_user = Discourse.system_user
@questions = []
Dir.foreach(JSON_FILES_DIR) do |filename|
next if filename == ('.') || filename == ('..')
question = File.read JSON_FILES_DIR + '/' + filename
next if filename == (".") || filename == ("..")
question = File.read JSON_FILES_DIR + "/" + filename
@questions << question
end
end
@@ -33,9 +32,7 @@ class ImportScripts::Quandora < ImportScripts::Base
q = QuandoraQuestion.new question
import_users q.users
created_topic = import_topic q.topic
if created_topic
import_posts q.replies, created_topic.topic_id
end
import_posts q.replies, created_topic.topic_id if created_topic
topics += 1
print_status topics, total
end
@@ -43,9 +40,7 @@ class ImportScripts::Quandora < ImportScripts::Base
end
def import_users(users)
users.each do |user|
create_user user, user[:id]
end
users.each { |user| create_user user, user[:id] }
end
def import_topic(topic)
@@ -54,7 +49,7 @@ class ImportScripts::Quandora < ImportScripts::Base
post = Post.find(post_id) # already imported this topic
else
topic[:user_id] = user_id_from_imported_user_id(topic[:author_id]) || -1
topic[:category] = 'quandora-import'
topic[:category] = "quandora-import"
post = create_post(topic, topic[:id])
@@ -68,9 +63,7 @@ class ImportScripts::Quandora < ImportScripts::Base
end
def import_posts(posts, topic_id)
posts.each do |post|
import_post post, topic_id
end
posts.each { |post| import_post post, topic_id }
end
def import_post(post, topic_id)
@@ -91,6 +84,4 @@ class ImportScripts::Quandora < ImportScripts::Base
end
end
if __FILE__ == $0
ImportScripts::Quandora.new.perform
end
ImportScripts::Quandora.new.perform if __FILE__ == $0

View File

@@ -1,10 +1,9 @@
# frozen_string_literal: true
require 'base64'
require 'json'
require "base64"
require "json"
class QuandoraApi
attr_accessor :domain, :username, :password
def initialize(domain, username, password)
@@ -38,18 +37,18 @@ class QuandoraApi
def list_bases
response = request list_bases_url
response['data']
response["data"]
end
def list_questions(kb_id, limit = nil)
url = list_questions_url(kb_id, limit)
response = request url
response['data']['result']
response["data"]["result"]
end
def get_question(question_id)
url = "#{base_url @domain}/q/#{question_id}"
response = request url
response['data']
response["data"]
end
end

View File

@@ -1,28 +1,27 @@
# frozen_string_literal: true
require 'json'
require 'cgi'
require 'time'
require "json"
require "cgi"
require "time"
class QuandoraQuestion
def initialize(question_json)
@question = JSON.parse question_json
end
def topic
topic = {}
topic[:id] = @question['uid']
topic[:author_id] = @question['author']['uid']
topic[:title] = unescape @question['title']
topic[:raw] = unescape @question['content']
topic[:created_at] = Time.parse @question['created']
topic[:id] = @question["uid"]
topic[:author_id] = @question["author"]["uid"]
topic[:title] = unescape @question["title"]
topic[:raw] = unescape @question["content"]
topic[:created_at] = Time.parse @question["created"]
topic
end
def users
users = {}
user = user_from_author @question['author']
user = user_from_author @question["author"]
users[user[:id]] = user
replies.each do |reply|
user = user_from_author reply[:author]
@@ -32,12 +31,12 @@ class QuandoraQuestion
end
def user_from_author(author)
email = author['email']
email = "#{author['uid']}@noemail.com" unless email
email = author["email"]
email = "#{author["uid"]}@noemail.com" unless email
user = {}
user[:id] = author['uid']
user[:name] = "#{author['firstName']} #{author['lastName']}"
user[:id] = author["uid"]
user[:name] = "#{author["firstName"]} #{author["lastName"]}"
user[:email] = email
user[:staged] = true
user
@@ -45,26 +44,20 @@ class QuandoraQuestion
def replies
posts = []
answers = @question['answersList']
comments = @question['comments']
comments.each_with_index do |comment, i|
posts << post_from_comment(comment, i, @question)
end
answers = @question["answersList"]
comments = @question["comments"]
comments.each_with_index { |comment, i| posts << post_from_comment(comment, i, @question) }
answers.each do |answer|
posts << post_from_answer(answer)
comments = answer['comments']
comments.each_with_index do |comment, i|
posts << post_from_comment(comment, i, answer)
end
comments = answer["comments"]
comments.each_with_index { |comment, i| posts << post_from_comment(comment, i, answer) }
end
order_replies posts
end
def order_replies(posts)
posts = posts.sort_by { |p| p[:created_at] }
posts.each_with_index do |p, i|
p[:post_number] = i + 2
end
posts.each_with_index { |p, i| p[:post_number] = i + 2 }
posts.each do |p|
parent = posts.select { |pp| pp[:id] == p[:parent_id] }
p[:reply_to_post_number] = parent[0][:post_number] if parent.size > 0
@@ -74,35 +67,35 @@ class QuandoraQuestion
def post_from_answer(answer)
post = {}
post[:id] = answer['uid']
post[:parent_id] = @question['uid']
post[:author] = answer['author']
post[:author_id] = answer['author']['uid']
post[:raw] = unescape answer['content']
post[:created_at] = Time.parse answer['created']
post[:id] = answer["uid"]
post[:parent_id] = @question["uid"]
post[:author] = answer["author"]
post[:author_id] = answer["author"]["uid"]
post[:raw] = unescape answer["content"]
post[:created_at] = Time.parse answer["created"]
post
end
def post_from_comment(comment, index, parent)
if comment['created']
created_at = Time.parse comment['created']
if comment["created"]
created_at = Time.parse comment["created"]
else
created_at = Time.parse parent['created']
created_at = Time.parse parent["created"]
end
parent_id = parent['uid']
parent_id = "#{parent['uid']}-#{index - 1}" if index > 0
parent_id = parent["uid"]
parent_id = "#{parent["uid"]}-#{index - 1}" if index > 0
post = {}
id = "#{parent['uid']}-#{index}"
id = "#{parent["uid"]}-#{index}"
post[:id] = id
post[:parent_id] = parent_id
post[:author] = comment['author']
post[:author_id] = comment['author']['uid']
post[:raw] = unescape comment['text']
post[:author] = comment["author"]
post[:author_id] = comment["author"]["uid"]
post[:raw] = unescape comment["text"]
post[:created_at] = created_at
post
end
private
private
def unescape(html)
return nil unless html

View File

@@ -1,5 +1,6 @@
# frozen_string_literal: true
BASES = '{
# frozen_string_literal: true
BASES =
'{
"type" : "kbase",
"data" : [ {
"objectId" : "90b1ccf3-35aa-4d6f-848e-e7c122d92c58",
@@ -9,7 +10,8 @@
} ]
}'
QUESTIONS = '{
QUESTIONS =
'{
"type": "question-search-result",
"data": {
"totalSize": 445,
@@ -50,7 +52,8 @@
}
}'
QUESTION = '{
QUESTION =
'{
"type" : "question",
"data" : {
"uid" : "de20ed0a-5fe5-48a5-9c14-d854f9af99f1",

View File

@@ -1,21 +1,20 @@
# frozen_string_literal: true
require 'minitest/autorun'
require 'yaml'
require_relative '../quandora_api.rb'
require_relative './test_data.rb'
require "minitest/autorun"
require "yaml"
require_relative "../quandora_api.rb"
require_relative "./test_data.rb"
class TestQuandoraApi < Minitest::Test
DEBUG = false
def initialize(args)
config = YAML::load_file(File.join(__dir__, 'config.yml'))
@domain = config['domain']
@username = config['username']
@password = config['password']
@kb_id = config['kb_id']
@question_id = config['question_id']
config = YAML.load_file(File.join(__dir__, "config.yml"))
@domain = config["domain"]
@username = config["username"]
@password = config["password"]
@kb_id = config["kb_id"]
@question_id = config["question_id"]
super args
end
@@ -30,19 +29,19 @@ class TestQuandoraApi < Minitest::Test
end
def test_base_url
assert_equal 'https://mydomain.quandora.com/m/json', @quandora.base_url('mydomain')
assert_equal "https://mydomain.quandora.com/m/json", @quandora.base_url("mydomain")
end
def test_auth_header
user = 'Aladdin'
password = 'open sesame'
user = "Aladdin"
password = "open sesame"
auth_header = @quandora.auth_header user, password
assert_equal 'Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==', auth_header[:Authorization]
assert_equal "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==", auth_header[:Authorization]
end
def test_list_bases_element_has_expected_structure
element = @quandora.list_bases[0]
expected = JSON.parse(BASES)['data'][0]
expected = JSON.parse(BASES)["data"][0]
debug element
check_keys expected, element
end
@@ -50,24 +49,24 @@ class TestQuandoraApi < Minitest::Test
def test_list_questions_has_expected_structure
response = @quandora.list_questions @kb_id, 1
debug response
check_keys JSON.parse(QUESTIONS)['data']['result'][0], response[0]
check_keys JSON.parse(QUESTIONS)["data"]["result"][0], response[0]
end
def test_get_question_has_expected_structure
question = @quandora.get_question @question_id
expected = JSON.parse(QUESTION)['data']
expected = JSON.parse(QUESTION)["data"]
check_keys expected, question
expected_comment = expected['comments'][0]
actual_comment = question['comments'][0]
expected_comment = expected["comments"][0]
actual_comment = question["comments"][0]
check_keys expected_comment, actual_comment
expected_answer = expected['answersList'][1]
actual_answer = question['answersList'][0]
expected_answer = expected["answersList"][1]
actual_answer = question["answersList"][0]
check_keys expected_answer, actual_answer
expected_answer_comment = expected_answer['comments'][0]
actual_answer_comment = actual_answer['comments'][0]
expected_answer_comment = expected_answer["comments"][0]
actual_answer_comment = actual_answer["comments"][0]
check_keys expected_answer_comment, actual_answer_comment
end
@@ -75,18 +74,16 @@ class TestQuandoraApi < Minitest::Test
def check_keys(expected, actual)
msg = "### caller[0]:\nKey not found in actual keys: #{actual.keys}\n"
expected.keys.each do |k|
assert (actual.keys.include? k), "#{k}"
end
expected.keys.each { |k| assert (actual.keys.include? k), "#{k}" }
end
def debug(message, show = false)
if show || DEBUG
puts '### ' + caller[0]
puts ''
puts "### " + caller[0]
puts ""
puts message
puts ''
puts ''
puts ""
puts ""
end
end
end

View File

@@ -1,47 +1,46 @@
# frozen_string_literal: true
require 'minitest/autorun'
require 'cgi'
require 'time'
require_relative '../quandora_question.rb'
require_relative './test_data.rb'
require "minitest/autorun"
require "cgi"
require "time"
require_relative "../quandora_question.rb"
require_relative "./test_data.rb"
class TestQuandoraQuestion < Minitest::Test
def setup
@data = JSON.parse(QUESTION)['data']
@data = JSON.parse(QUESTION)["data"]
@question = QuandoraQuestion.new @data.to_json
end
def test_topic
topic = @question.topic
assert_equal @data['uid'], topic[:id]
assert_equal @data['author']['uid'], topic[:author_id]
assert_equal unescape(@data['title']), topic[:title]
assert_equal unescape(@data['content']), topic[:raw]
assert_equal Time.parse(@data['created']), topic[:created_at]
assert_equal @data["uid"], topic[:id]
assert_equal @data["author"]["uid"], topic[:author_id]
assert_equal unescape(@data["title"]), topic[:title]
assert_equal unescape(@data["content"]), topic[:raw]
assert_equal Time.parse(@data["created"]), topic[:created_at]
end
def test_user_from_author
author = {}
author['uid'] = 'uid'
author['firstName'] = 'Joe'
author['lastName'] = 'Schmoe'
author['email'] = 'joe.schmoe@mydomain.com'
author["uid"] = "uid"
author["firstName"] = "Joe"
author["lastName"] = "Schmoe"
author["email"] = "joe.schmoe@mydomain.com"
user = @question.user_from_author author
assert_equal 'uid', user[:id]
assert_equal 'Joe Schmoe', user[:name]
assert_equal 'joe.schmoe@mydomain.com', user[:email]
assert_equal "uid", user[:id]
assert_equal "Joe Schmoe", user[:name]
assert_equal "joe.schmoe@mydomain.com", user[:email]
assert_equal true, user[:staged]
end
def test_user_from_author_with_no_email
author = {}
author['uid'] = 'foo'
author["uid"] = "foo"
user = @question.user_from_author author
assert_equal 'foo@noemail.com', user[:email]
assert_equal "foo@noemail.com", user[:email]
end
def test_replies
@@ -57,77 +56,77 @@ class TestQuandoraQuestion < Minitest::Test
assert_equal nil, replies[2][:reply_to_post_number]
assert_equal 4, replies[3][:reply_to_post_number]
assert_equal 3, replies[4][:reply_to_post_number]
assert_equal '2013-01-07 04:59:56 UTC', replies[0][:created_at].to_s
assert_equal '2013-01-08 16:49:32 UTC', replies[1][:created_at].to_s
assert_equal '2016-01-20 15:38:55 UTC', replies[2][:created_at].to_s
assert_equal '2016-01-21 15:38:55 UTC', replies[3][:created_at].to_s
assert_equal '2016-01-22 15:38:55 UTC', replies[4][:created_at].to_s
assert_equal "2013-01-07 04:59:56 UTC", replies[0][:created_at].to_s
assert_equal "2013-01-08 16:49:32 UTC", replies[1][:created_at].to_s
assert_equal "2016-01-20 15:38:55 UTC", replies[2][:created_at].to_s
assert_equal "2016-01-21 15:38:55 UTC", replies[3][:created_at].to_s
assert_equal "2016-01-22 15:38:55 UTC", replies[4][:created_at].to_s
end
def test_post_from_answer
answer = {}
answer['uid'] = 'uid'
answer['content'] = 'content'
answer['created'] = '2013-01-06T18:24:54.62Z'
answer['author'] = { 'uid' => 'auid' }
answer["uid"] = "uid"
answer["content"] = "content"
answer["created"] = "2013-01-06T18:24:54.62Z"
answer["author"] = { "uid" => "auid" }
post = @question.post_from_answer answer
assert_equal 'uid', post[:id]
assert_equal "uid", post[:id]
assert_equal @question.topic[:id], post[:parent_id]
assert_equal answer['author'], post[:author]
assert_equal 'auid', post[:author_id]
assert_equal 'content', post[:raw]
assert_equal Time.parse('2013-01-06T18:24:54.62Z'), post[:created_at]
assert_equal answer["author"], post[:author]
assert_equal "auid", post[:author_id]
assert_equal "content", post[:raw]
assert_equal Time.parse("2013-01-06T18:24:54.62Z"), post[:created_at]
end
def test_post_from_comment
comment = {}
comment['text'] = 'text'
comment['created'] = '2013-01-06T18:24:54.62Z'
comment['author'] = { 'uid' => 'auid' }
parent = { 'uid' => 'parent-uid' }
comment["text"] = "text"
comment["created"] = "2013-01-06T18:24:54.62Z"
comment["author"] = { "uid" => "auid" }
parent = { "uid" => "parent-uid" }
post = @question.post_from_comment comment, 0, parent
assert_equal 'parent-uid-0', post[:id]
assert_equal 'parent-uid', post[:parent_id]
assert_equal comment['author'], post[:author]
assert_equal 'auid', post[:author_id]
assert_equal 'text', post[:raw]
assert_equal Time.parse('2013-01-06T18:24:54.62Z'), post[:created_at]
assert_equal "parent-uid-0", post[:id]
assert_equal "parent-uid", post[:parent_id]
assert_equal comment["author"], post[:author]
assert_equal "auid", post[:author_id]
assert_equal "text", post[:raw]
assert_equal Time.parse("2013-01-06T18:24:54.62Z"), post[:created_at]
end
def test_post_from_comment_uses_parent_created_if_necessary
comment = {}
comment['author'] = { 'uid' => 'auid' }
parent = { 'created' => '2013-01-06T18:24:54.62Z' }
comment["author"] = { "uid" => "auid" }
parent = { "created" => "2013-01-06T18:24:54.62Z" }
post = @question.post_from_comment comment, 0, parent
assert_equal Time.parse('2013-01-06T18:24:54.62Z'), post[:created_at]
assert_equal Time.parse("2013-01-06T18:24:54.62Z"), post[:created_at]
end
def test_post_from_comment_uses_previous_comment_as_parent
comment = {}
comment['author'] = { 'uid' => 'auid' }
parent = { 'uid' => 'parent-uid', 'created' => '2013-01-06T18:24:54.62Z' }
comment["author"] = { "uid" => "auid" }
parent = { "uid" => "parent-uid", "created" => "2013-01-06T18:24:54.62Z" }
post = @question.post_from_comment comment, 1, parent
assert_equal 'parent-uid-1', post[:id]
assert_equal 'parent-uid-0', post[:parent_id]
assert_equal Time.parse('2013-01-06T18:24:54.62Z'), post[:created_at]
assert_equal "parent-uid-1", post[:id]
assert_equal "parent-uid-0", post[:parent_id]
assert_equal Time.parse("2013-01-06T18:24:54.62Z"), post[:created_at]
end
def test_users
users = @question.users
assert_equal 5, users.size
assert_equal 'Ida Inquisitive', users[0][:name]
assert_equal 'Harry Helpful', users[1][:name]
assert_equal 'Sam Smarty-Pants', users[2][:name]
assert_equal 'Greta Greatful', users[3][:name]
assert_equal 'Eddy Excited', users[4][:name]
assert_equal "Ida Inquisitive", users[0][:name]
assert_equal "Harry Helpful", users[1][:name]
assert_equal "Sam Smarty-Pants", users[2][:name]
assert_equal "Greta Greatful", users[3][:name]
assert_equal "Eddy Excited", users[4][:name]
end
private

View File

@@ -1,21 +1,21 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities'
require 'php_serialize' # https://github.com/jqr/php-serialize
require "htmlentities"
require "php_serialize" # https://github.com/jqr/php-serialize
class ImportScripts::Question2Answer < ImportScripts::Base
BATCH_SIZE = 1000
# CHANGE THESE BEFORE RUNNING THE IMPORTER
DB_HOST ||= ENV['DB_HOST'] || "localhost"
DB_NAME ||= ENV['DB_NAME'] || "qa_db"
DB_PW ||= ENV['DB_PW'] || ""
DB_USER ||= ENV['DB_USER'] || "root"
TIMEZONE ||= ENV['TIMEZONE'] || "America/Los_Angeles"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "qa_"
DB_HOST ||= ENV["DB_HOST"] || "localhost"
DB_NAME ||= ENV["DB_NAME"] || "qa_db"
DB_PW ||= ENV["DB_PW"] || ""
DB_USER ||= ENV["DB_USER"] || "root"
TIMEZONE ||= ENV["TIMEZONE"] || "America/Los_Angeles"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "qa_"
def initialize
super
@@ -26,12 +26,8 @@ class ImportScripts::Question2Answer < ImportScripts::Base
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
end
def execute
@@ -51,11 +47,16 @@ class ImportScripts::Question2Answer < ImportScripts::Base
# only import users that have posted or voted on Q2A
# if you want to import all users, just leave out the WHERE and everything after it (and remove line 95 as well)
user_count = mysql_query("SELECT COUNT(userid) count FROM #{TABLE_PREFIX}users u WHERE EXISTS (SELECT 1 FROM #{TABLE_PREFIX}posts p WHERE p.userid=u.userid) or EXISTS (SELECT 1 FROM #{TABLE_PREFIX}uservotes uv WHERE u.userid=uv.userid)").first["count"]
user_count =
mysql_query(
"SELECT COUNT(userid) count FROM #{TABLE_PREFIX}users u WHERE EXISTS (SELECT 1 FROM #{TABLE_PREFIX}posts p WHERE p.userid=u.userid) or EXISTS (SELECT 1 FROM #{TABLE_PREFIX}uservotes uv WHERE u.userid=uv.userid)",
).first[
"count"
]
last_user_id = -1
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL
users = mysql_query(<<-SQL).to_a
SELECT u.userid AS id, u.email, u.handle AS username, u.created AS created_at, u.loggedin AS last_sign_in_at, u.avatarblobid
FROM #{TABLE_PREFIX}users u
WHERE u.userid > #{last_user_id}
@@ -63,7 +64,6 @@ class ImportScripts::Question2Answer < ImportScripts::Base
ORDER BY u.userid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
last_user_id = users[-1]["id"]
@@ -73,18 +73,17 @@ class ImportScripts::Question2Answer < ImportScripts::Base
email = user["email"].presence
username = @htmlentities.decode(user["email"]).strip.split("@").first
avatar_url = "https://your_image_bucket/#{user['cdn_slug']}" if user['cdn_slug']
avatar_url = "https://your_image_bucket/#{user["cdn_slug"]}" if user["cdn_slug"]
{
id: user["id"],
name: "#{user['username']}",
username: "#{user['username']}",
password: user['password'],
name: "#{user["username"]}",
username: "#{user["username"]}",
password: user["password"],
email: email,
created_at: user["created_at"],
last_seen_at: user["last_sign_in_at"],
post_create_action: proc do |u|
@old_username_to_new_usernames[user["username"]] = u.username
end
post_create_action:
proc { |u| @old_username_to_new_usernames[user["username"]] = u.username },
}
end
end
@@ -93,7 +92,10 @@ class ImportScripts::Question2Answer < ImportScripts::Base
def import_categories
puts "", "importing top level categories..."
categories = mysql_query("SELECT categoryid, parentid, title, position FROM #{TABLE_PREFIX}categories ORDER BY categoryid").to_a
categories =
mysql_query(
"SELECT categoryid, parentid, title, position FROM #{TABLE_PREFIX}categories ORDER BY categoryid",
).to_a
top_level_categories = categories.select { |c| c["parentid"].nil? }
@@ -101,7 +103,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base
{
id: category["categoryid"],
name: @htmlentities.decode(category["title"]).strip,
position: category["position"]
position: category["position"],
}
end
@@ -122,7 +124,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base
id: category["categoryid"],
name: @htmlentities.decode(category["title"]).strip,
position: category["position"],
parent_category_id: category_id_from_imported_category_id(category["parentid"])
parent_category_id: category_id_from_imported_category_id(category["parentid"]),
}
end
end
@@ -130,12 +132,15 @@ class ImportScripts::Question2Answer < ImportScripts::Base
def import_topics
puts "", "importing topics..."
topic_count = mysql_query("SELECT COUNT(postid) count FROM #{TABLE_PREFIX}posts WHERE type = 'Q'").first["count"]
topic_count =
mysql_query("SELECT COUNT(postid) count FROM #{TABLE_PREFIX}posts WHERE type = 'Q'").first[
"count"
]
last_topic_id = -1
batches(BATCH_SIZE) do |offset|
topics = mysql_query(<<-SQL
topics = mysql_query(<<-SQL).to_a
SELECT p.postid, p.type, p.categoryid, p.closedbyid, p.userid postuserid, p.views, p.created, p.title, p.content raw
FROM #{TABLE_PREFIX}posts p
WHERE type = 'Q'
@@ -143,7 +148,6 @@ class ImportScripts::Question2Answer < ImportScripts::Base
ORDER BY p.postid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if topics.empty?
@@ -179,20 +183,19 @@ class ImportScripts::Question2Answer < ImportScripts::Base
if topic.present?
title_slugified = slugify(thread["title"], false, 50) if thread["title"].present?
url_slug = "qa/#{thread["postid"]}/#{title_slugified}" if thread["title"].present?
Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i) if url_slug.present? && topic[:topic_id].present?
if url_slug.present? && topic[:topic_id].present?
Permalink.create(url: url_slug, topic_id: topic[:topic_id].to_i)
end
end
end
end
end
def slugify(title, ascii_only, max_length)
words = title.downcase.gsub(/[^a-zA-Z0-9\s]/, '').split(" ")
words = title.downcase.gsub(/[^a-zA-Z0-9\s]/, "").split(" ")
word_lengths = {}
words.each_with_index do |word, idx|
word_lengths[idx] = word.length
end
words.each_with_index { |word, idx| word_lengths[idx] = word.length }
remaining = max_length
if word_lengths.inject(0) { |sum, (_, v)| sum + v } > remaining
@@ -211,17 +214,16 @@ class ImportScripts::Question2Answer < ImportScripts::Base
def import_posts
puts "", "importing posts..."
post_count = mysql_query(<<-SQL
post_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(postid) count
FROM #{TABLE_PREFIX}posts p
WHERE p.parentid IS NOT NULL
SQL
).first["count"]
last_post_id = -1
batches(BATCH_SIZE) do |offset|
posts = mysql_query(<<-SQL
posts = mysql_query(<<-SQL).to_a
SELECT p.postid, p.type, p.parentid, p.categoryid, p.closedbyid, p.userid, p.views, p.created, p.title, p.content,
parent.type AS parenttype, parent.parentid AS qid
FROM #{TABLE_PREFIX}posts p
@@ -233,7 +235,6 @@ class ImportScripts::Question2Answer < ImportScripts::Base
ORDER BY p.postid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if posts.empty?
last_post_id = posts[-1]["postid"]
@@ -250,11 +251,11 @@ class ImportScripts::Question2Answer < ImportScripts::Base
# this works as long as comments can not have a comment as parent
# it's always Q-A Q-C or A-C
if post['type'] == 'A' # for answers the question/topic is always the parent
if post["type"] == "A" # for answers the question/topic is always the parent
topic = topic_lookup_from_imported_post_id("thread-#{post["parentid"]}")
next if topic.nil?
else
if post['parenttype'] == 'Q' # for comments to questions, the question/topic is the parent as well
if post["parenttype"] == "Q" # for comments to questions, the question/topic is the parent as well
topic = topic_lookup_from_imported_post_id("thread-#{post["parentid"]}")
next if topic.nil?
else # for comments to answers, the question/topic is the parent of the parent
@@ -284,7 +285,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base
ans = mysql_query("select postid, selchildid from qa_posts where selchildid is not null").to_a
ans.each do |answer|
begin
post = Post.find_by(id: post_id_from_imported_post_id("#{answer['selchildid']}"))
post = Post.find_by(id: post_id_from_imported_post_id("#{answer["selchildid"]}"))
post.custom_fields["is_accepted_answer"] = "true"
post.save
topic = Topic.find(post.topic_id)
@@ -293,20 +294,18 @@ class ImportScripts::Question2Answer < ImportScripts::Base
rescue => e
puts "error acting on post #{e}"
end
end
end
def import_likes
puts "", "importing likes..."
likes = mysql_query(<<-SQL
likes = mysql_query(<<-SQL).to_a
SELECT postid, userid
FROM #{TABLE_PREFIX}uservotes u
WHERE u.vote=1
SQL
).to_a
likes.each do |like|
post = Post.find_by(id: post_id_from_imported_post_id("thread-#{like['postid']}"))
post = Post.find_by(id: post_id_from_imported_post_id("thread-#{like["postid"]}"))
user = User.find_by(id: user_id_from_imported_user_id(like["userid"]))
begin
PostActionCreator.like(user, post) if user && post
@@ -340,10 +339,10 @@ class ImportScripts::Question2Answer < ImportScripts::Base
def preprocess_post_raw(raw)
return "" if raw.blank?
raw.gsub!(/<a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a>/i, '[\2](\1)')
raw.gsub!(/<p>(.+?)<\/p>/im) { "#{$1}\n\n" }
raw.gsub!('<br />', "\n")
raw.gsub!(/<strong>(.*?)<\/strong>/im, '[b]\1[/b]')
raw.gsub!(%r{<a(?:.+)href="(\S+)"(?:.*)>(.+)</a>}i, '[\2](\1)')
raw.gsub!(%r{<p>(.+?)</p>}im) { "#{$1}\n\n" }
raw.gsub!("<br />", "\n")
raw.gsub!(%r{<strong>(.*?)</strong>}im, '[b]\1[/b]')
# decode HTML entities
raw = @htmlentities.decode(raw)
@@ -355,22 +354,22 @@ class ImportScripts::Question2Answer < ImportScripts::Base
# [HTML]...[/HTML]
raw.gsub!(/\[html\]/i, "\n```html\n")
raw.gsub!(/\[\/html\]/i, "\n```\n")
raw.gsub!(%r{\[/html\]}i, "\n```\n")
# [PHP]...[/PHP]
raw.gsub!(/\[php\]/i, "\n```php\n")
raw.gsub!(/\[\/php\]/i, "\n```\n")
raw.gsub!(%r{\[/php\]}i, "\n```\n")
# [HIGHLIGHT="..."]
raw.gsub!(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" }
# [CODE]...[/CODE]
# [HIGHLIGHT]...[/HIGHLIGHT]
raw.gsub!(/\[\/?code\]/i, "\n```\n")
raw.gsub!(/\[\/?highlight\]/i, "\n```\n")
raw.gsub!(%r{\[/?code\]}i, "\n```\n")
raw.gsub!(%r{\[/?highlight\]}i, "\n```\n")
# [SAMP]...[/SAMP]
raw.gsub!(/\[\/?samp\]/i, "`")
raw.gsub!(%r{\[/?samp\]}i, "`")
# replace all chevrons with HTML entities
# NOTE: must be done
@@ -385,16 +384,16 @@ class ImportScripts::Question2Answer < ImportScripts::Base
raw.gsub!("\u2603", ">")
# [URL=...]...[/URL]
raw.gsub!(/\[url="?([^"]+?)"?\](.*?)\[\/url\]/im) { "[#{$2.strip}](#{$1})" }
raw.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/im) { "[#{$2.strip}](#{$1})" }
raw.gsub!(%r{\[url="?([^"]+?)"?\](.*?)\[/url\]}im) { "[#{$2.strip}](#{$1})" }
raw.gsub!(%r{\[url="?(.+?)"?\](.+)\[/url\]}im) { "[#{$2.strip}](#{$1})" }
# [URL]...[/URL]
# [MP3]...[/MP3]
raw.gsub!(/\[\/?url\]/i, "")
raw.gsub!(/\[\/?mp3\]/i, "")
raw.gsub!(%r{\[/?url\]}i, "")
raw.gsub!(%r{\[/?mp3\]}i, "")
# [MENTION]<username>[/MENTION]
raw.gsub!(/\[mention\](.+?)\[\/mention\]/i) do
raw.gsub!(%r{\[mention\](.+?)\[/mention\]}i) do
old_username = $1
if @old_username_to_new_usernames.has_key?(old_username)
old_username = @old_username_to_new_usernames[old_username]
@@ -403,31 +402,31 @@ class ImportScripts::Question2Answer < ImportScripts::Base
end
# [FONT=blah] and [COLOR=blah]
raw.gsub!(/\[FONT=.*?\](.*?)\[\/FONT\]/im, '\1')
raw.gsub!(/\[COLOR=.*?\](.*?)\[\/COLOR\]/im, '\1')
raw.gsub!(/\[COLOR=#.*?\](.*?)\[\/COLOR\]/im, '\1')
raw.gsub!(%r{\[FONT=.*?\](.*?)\[/FONT\]}im, '\1')
raw.gsub!(%r{\[COLOR=.*?\](.*?)\[/COLOR\]}im, '\1')
raw.gsub!(%r{\[COLOR=#.*?\](.*?)\[/COLOR\]}im, '\1')
raw.gsub!(/\[SIZE=.*?\](.*?)\[\/SIZE\]/im, '\1')
raw.gsub!(/\[h=.*?\](.*?)\[\/h\]/im, '\1')
raw.gsub!(%r{\[SIZE=.*?\](.*?)\[/SIZE\]}im, '\1')
raw.gsub!(%r{\[h=.*?\](.*?)\[/h\]}im, '\1')
# [CENTER]...[/CENTER]
raw.gsub!(/\[CENTER\](.*?)\[\/CENTER\]/im, '\1')
raw.gsub!(%r{\[CENTER\](.*?)\[/CENTER\]}im, '\1')
# [INDENT]...[/INDENT]
raw.gsub!(/\[INDENT\](.*?)\[\/INDENT\]/im, '\1')
raw.gsub!(/\[TABLE\](.*?)\[\/TABLE\]/im, '\1')
raw.gsub!(/\[TR\](.*?)\[\/TR\]/im, '\1')
raw.gsub!(/\[TD\](.*?)\[\/TD\]/im, '\1')
raw.gsub!(/\[TD="?.*?"?\](.*?)\[\/TD\]/im, '\1')
raw.gsub!(%r{\[INDENT\](.*?)\[/INDENT\]}im, '\1')
raw.gsub!(%r{\[TABLE\](.*?)\[/TABLE\]}im, '\1')
raw.gsub!(%r{\[TR\](.*?)\[/TR\]}im, '\1')
raw.gsub!(%r{\[TD\](.*?)\[/TD\]}im, '\1')
raw.gsub!(%r{\[TD="?.*?"?\](.*?)\[/TD\]}im, '\1')
# [QUOTE]...[/QUOTE]
raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote|
quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" }
raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote|
quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" }
quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
}
end
# [QUOTE=<username>]...[/QUOTE]
raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do
old_username, quote = $1, $2
if @old_username_to_new_usernames.has_key?(old_username)
old_username = @old_username_to_new_usernames[old_username]
@@ -436,31 +435,33 @@ class ImportScripts::Question2Answer < ImportScripts::Base
end
# [YOUTUBE]<id>[/YOUTUBE]
raw.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" }
raw.gsub!(%r{\[youtube\](.+?)\[/youtube\]}i) { "\n//youtu.be/#{$1}\n" }
# [VIDEO=youtube;<id>]...[/VIDEO]
raw.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" }
raw.gsub!(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) { "\n//youtu.be/#{$1}\n" }
# More Additions ....
# [spoiler=Some hidden stuff]SPOILER HERE!![/spoiler]
raw.gsub!(/\[spoiler="?(.+?)"?\](.+?)\[\/spoiler\]/im) { "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" }
raw.gsub!(%r{\[spoiler="?(.+?)"?\](.+?)\[/spoiler\]}im) do
"\n#{$1}\n[spoiler]#{$2}[/spoiler]\n"
end
# [IMG][IMG]http://i63.tinypic.com/akga3r.jpg[/IMG][/IMG]
raw.gsub!(/\[IMG\]\[IMG\](.+?)\[\/IMG\]\[\/IMG\]/i) { "[IMG]#{$1}[/IMG]" }
raw.gsub!(%r{\[IMG\]\[IMG\](.+?)\[/IMG\]\[/IMG\]}i) { "[IMG]#{$1}[/IMG]" }
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
# (https://meta.discourse.org/t/phpbb-3-importer-old/17397)
raw.gsub!(/\[list\](.*?)\[\/list\]/im, '[ul]\1[/ul]')
raw.gsub!(/\[list=1\](.*?)\[\/list\]/im, '[ol]\1[/ol]')
raw.gsub!(/\[list\](.*?)\[\/list:u\]/im, '[ul]\1[/ul]')
raw.gsub!(/\[list=1\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]')
raw.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]')
raw.gsub!(%r{\[list=1\](.*?)\[/list\]}im, '[ol]\1[/ol]')
raw.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]')
raw.gsub!(%r{\[list=1\](.*?)\[/list:o\]}im, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
raw.gsub!(/\[\*\]\n/, '')
raw.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]')
raw.gsub!(/\[\*\]\n/, "")
raw.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]')
raw.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]')
raw.gsub!(/\[\*=1\]/, '')
raw.gsub!(/\[\*=1\]/, "")
raw.strip!
raw
@@ -468,7 +469,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base
def postprocess_post_raw(raw)
# [QUOTE=<username>;<post_id>]...[/QUOTE]
raw.gsub!(/\[quote=([^;]+);(\d+)\](.+?)\[\/quote\]/im) do
raw.gsub!(%r{\[quote=([^;]+);(\d+)\](.+?)\[/quote\]}im) do
old_username, post_id, quote = $1, $2, $3
if @old_username_to_new_usernames.has_key?(old_username)
@@ -477,7 +478,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
post_number = topic_lookup[:post_number]
topic_id = topic_lookup[:topic_id]
topic_id = topic_lookup[:topic_id]
"\n[quote=\"#{old_username},post:#{post_number},topic:#{topic_id}\"]\n#{quote}\n[/quote]\n"
else
"\n[quote=\"#{old_username}\"]\n#{quote}\n[/quote]\n"
@@ -485,11 +486,11 @@ class ImportScripts::Question2Answer < ImportScripts::Base
end
# remove attachments
raw.gsub!(/\[attach[^\]]*\]\d+\[\/attach\]/i, "")
raw.gsub!(%r{\[attach[^\]]*\]\d+\[/attach\]}i, "")
# [THREAD]<thread_id>[/THREAD]
# ==> http://my.discourse.org/t/slug/<topic_id>
raw.gsub!(/\[thread\](\d+)\[\/thread\]/i) do
raw.gsub!(%r{\[thread\](\d+)\[/thread\]}i) do
thread_id = $1
if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
topic_lookup[:url]
@@ -500,7 +501,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base
# [THREAD=<thread_id>]...[/THREAD]
# ==> [...](http://my.discourse.org/t/slug/<topic_id>)
raw.gsub!(/\[thread=(\d+)\](.+?)\[\/thread\]/i) do
raw.gsub!(%r{\[thread=(\d+)\](.+?)\[/thread\]}i) do
thread_id, link = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("thread-#{thread_id}")
url = topic_lookup[:url]
@@ -512,7 +513,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base
# [POST]<post_id>[/POST]
# ==> http://my.discourse.org/t/slug/<topic_id>/<post_number>
raw.gsub!(/\[post\](\d+)\[\/post\]/i) do
raw.gsub!(%r{\[post\](\d+)\[/post\]}i) do
post_id = $1
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
topic_lookup[:url]
@@ -523,7 +524,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base
# [POST=<post_id>]...[/POST]
# ==> [...](http://my.discourse.org/t/<topic_slug>/<topic_id>/<post_number>)
raw.gsub!(/\[post=(\d+)\](.+?)\[\/post\]/i) do
raw.gsub!(%r{\[post=(\d+)\](.+?)\[/post\]}i) do
post_id, link = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id(post_id)
url = topic_lookup[:url]
@@ -537,7 +538,7 @@ class ImportScripts::Question2Answer < ImportScripts::Base
end
def create_permalinks
puts '', 'Creating permalinks...'
puts "", "Creating permalinks..."
# topics
Topic.find_each do |topic|
@@ -546,7 +547,11 @@ class ImportScripts::Question2Answer < ImportScripts::Base
if tcf && tcf["import_id"]
question_id = tcf["import_id"][/thread-(\d)/, 0]
url = "#{question_id}"
Permalink.create(url: url, topic_id: topic.id) rescue nil
begin
Permalink.create(url: url, topic_id: topic.id)
rescue StandardError
nil
end
end
end
@@ -555,11 +560,21 @@ class ImportScripts::Question2Answer < ImportScripts::Base
ccf = category.custom_fields
if ccf && ccf["import_id"]
url = category.parent_category ? "#{category.parent_category.slug}/#{category.slug}" : category.slug
Permalink.create(url: url, category_id: category.id) rescue nil
url =
(
if category.parent_category
"#{category.parent_category.slug}/#{category.slug}"
else
category.slug
end
)
begin
Permalink.create(url: url, category_id: category.id)
rescue StandardError
nil
end
end
end
end
def parse_timestamp(timestamp)
@@ -569,7 +584,6 @@ class ImportScripts::Question2Answer < ImportScripts::Base
def mysql_query(sql)
@client.query(sql, cache_rows: true)
end
end
ImportScripts::Question2Answer.new.perform

View File

@@ -8,7 +8,6 @@ require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Sfn < ImportScripts::Base
BATCH_SIZE = 100_000
MIN_CREATED_AT = "2003-11-01"
@@ -96,22 +95,27 @@ class ImportScripts::Sfn < ImportScripts::Base
username: email.split("@")[0],
bio_raw: bio,
created_at: user["created_at"],
post_create_action: proc do |newuser|
next if user["avatar"].blank?
post_create_action:
proc do |newuser|
next if user["avatar"].blank?
avatar = Tempfile.new("sfn-avatar")
avatar.write(user["avatar"].encode("ASCII-8BIT").force_encoding("UTF-8"))
avatar.rewind
avatar = Tempfile.new("sfn-avatar")
avatar.write(user["avatar"].encode("ASCII-8BIT").force_encoding("UTF-8"))
avatar.rewind
upload = UploadCreator.new(avatar, "avatar.jpg").create_for(newuser.id)
if upload.persisted?
newuser.create_user_avatar
newuser.user_avatar.update(custom_upload_id: upload.id)
newuser.update(uploaded_avatar_id: upload.id)
end
upload = UploadCreator.new(avatar, "avatar.jpg").create_for(newuser.id)
if upload.persisted?
newuser.create_user_avatar
newuser.user_avatar.update(custom_upload_id: upload.id)
newuser.update(uploaded_avatar_id: upload.id)
end
avatar.try(:close!) rescue nil
end
begin
avatar.try(:close!)
rescue StandardError
nil
end
end,
}
end
end
@@ -198,9 +202,7 @@ class ImportScripts::Sfn < ImportScripts::Base
def import_categories
puts "", "importing categories..."
create_categories(NEW_CATEGORIES) do |category|
{ id: category, name: category }
end
create_categories(NEW_CATEGORIES) { |category| { id: category, name: category } }
end
def import_topics
@@ -234,7 +236,7 @@ class ImportScripts::Sfn < ImportScripts::Base
SQL
break if topics.size < 1
next if all_records_exist? :posts, topics.map { |t| t['id'].to_i }
next if all_records_exist? :posts, topics.map { |t| t["id"].to_i }
create_posts(topics, total: topic_count, offset: offset) do |topic|
next unless category_id = CATEGORY_MAPPING[topic["category_id"]]
@@ -286,7 +288,7 @@ class ImportScripts::Sfn < ImportScripts::Base
break if posts.size < 1
next if all_records_exist? :posts, posts.map { |p| p['id'].to_i }
next if all_records_exist? :posts, posts.map { |p| p["id"].to_i }
create_posts(posts, total: posts_count, offset: offset) do |post|
next unless parent = topic_lookup_from_imported_post_id(post["topic_id"])
@@ -307,7 +309,7 @@ class ImportScripts::Sfn < ImportScripts::Base
def cleanup_raw(raw)
# fix some html
raw.gsub!(/<br\s*\/?>/i, "\n")
raw.gsub!(%r{<br\s*/?>}i, "\n")
# remove "This message has been cross posted to the following eGroups: ..."
raw.gsub!(/^This message has been cross posted to the following eGroups: .+\n-{3,}/i, "")
# remove signatures
@@ -320,7 +322,6 @@ class ImportScripts::Sfn < ImportScripts::Base
@client ||= Mysql2::Client.new(username: "root", database: "sfn")
@client.query(sql)
end
end
ImportScripts::Sfn.new.perform

View File

@@ -1,22 +1,17 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::SimplePress < ImportScripts::Base
SIMPLE_PRESS_DB ||= ENV['SIMPLEPRESS_DB'] || "simplepress"
SIMPLE_PRESS_DB ||= ENV["SIMPLEPRESS_DB"] || "simplepress"
TABLE_PREFIX = "wp_sf"
BATCH_SIZE ||= 1000
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
database: SIMPLE_PRESS_DB,
)
@client = Mysql2::Client.new(host: "localhost", username: "root", database: SIMPLE_PRESS_DB)
SiteSetting.max_username_length = 50
end
@@ -32,10 +27,11 @@ class ImportScripts::SimplePress < ImportScripts::Base
puts "", "importing users..."
last_user_id = -1
total_users = mysql_query("SELECT COUNT(*) count FROM wp_users WHERE user_email LIKE '%@%'").first["count"]
total_users =
mysql_query("SELECT COUNT(*) count FROM wp_users WHERE user_email LIKE '%@%'").first["count"]
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL
users = mysql_query(<<-SQL).to_a
SELECT ID id, user_nicename, display_name, user_email, user_registered, user_url
FROM wp_users
WHERE user_email LIKE '%@%'
@@ -43,7 +39,6 @@ class ImportScripts::SimplePress < ImportScripts::Base
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@@ -55,13 +50,12 @@ class ImportScripts::SimplePress < ImportScripts::Base
user_ids_sql = user_ids.join(",")
users_description = {}
mysql_query(<<-SQL
mysql_query(<<-SQL).each { |um| users_description[um["user_id"]] = um["description"] }
SELECT user_id, meta_value description
FROM wp_usermeta
WHERE user_id IN (#{user_ids_sql})
AND meta_key = 'description'
SQL
).each { |um| users_description[um["user_id"]] = um["description"] }
create_users(users, total: total_users, offset: offset) do |u|
{
@@ -71,7 +65,7 @@ class ImportScripts::SimplePress < ImportScripts::Base
name: u["display_name"],
created_at: u["user_registered"],
website: u["user_url"],
bio_raw: users_description[u["id"]]
bio_raw: users_description[u["id"]],
}
end
end
@@ -80,16 +74,20 @@ class ImportScripts::SimplePress < ImportScripts::Base
def import_categories
puts "", "importing categories..."
categories = mysql_query(<<-SQL
categories = mysql_query(<<-SQL)
SELECT forum_id, forum_name, forum_seq, forum_desc, parent
FROM #{TABLE_PREFIX}forums
ORDER BY forum_id
SQL
)
create_categories(categories) do |c|
category = { id: c['forum_id'], name: CGI.unescapeHTML(c['forum_name']), description: CGI.unescapeHTML(c['forum_desc']), position: c['forum_seq'] }
if (parent_id = c['parent'].to_i) > 0
category = {
id: c["forum_id"],
name: CGI.unescapeHTML(c["forum_name"]),
description: CGI.unescapeHTML(c["forum_desc"]),
position: c["forum_seq"],
}
if (parent_id = c["parent"].to_i) > 0
category[:parent_category_id] = category_id_from_imported_category_id(parent_id)
end
category
@@ -99,10 +97,15 @@ class ImportScripts::SimplePress < ImportScripts::Base
def import_topics
puts "", "creating topics"
total_count = mysql_query("SELECT COUNT(*) count FROM #{TABLE_PREFIX}posts WHERE post_index = 1").first["count"]
total_count =
mysql_query("SELECT COUNT(*) count FROM #{TABLE_PREFIX}posts WHERE post_index = 1").first[
"count"
]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.post_id id,
p.topic_id topic_id,
t.forum_id category_id,
@@ -119,23 +122,24 @@ class ImportScripts::SimplePress < ImportScripts::Base
ORDER BY p.post_id
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
created_at = Time.zone.at(m['post_time'])
created_at = Time.zone.at(m["post_time"])
{
id: m['id'],
user_id: user_id_from_imported_user_id(m['user_id']) || -1,
raw: process_simplepress_post(m['raw'], m['id']),
id: m["id"],
user_id: user_id_from_imported_user_id(m["user_id"]) || -1,
raw: process_simplepress_post(m["raw"], m["id"]),
created_at: created_at,
category: category_id_from_imported_category_id(m['category_id']),
title: CGI.unescapeHTML(m['title']),
views: m['views'],
pinned_at: m['pinned'] == 1 ? created_at : nil,
category: category_id_from_imported_category_id(m["category_id"]),
title: CGI.unescapeHTML(m["title"]),
views: m["views"],
pinned_at: m["pinned"] == 1 ? created_at : nil,
}
end
end
@@ -146,17 +150,24 @@ class ImportScripts::SimplePress < ImportScripts::Base
topic_first_post_id = {}
mysql_query("
mysql_query(
"
SELECT t.topic_id, p.post_id
FROM #{TABLE_PREFIX}topics t
JOIN #{TABLE_PREFIX}posts p ON p.topic_id = t.topic_id
WHERE p.post_index = 1
").each { |r| topic_first_post_id[r["topic_id"]] = r["post_id"] }
",
).each { |r| topic_first_post_id[r["topic_id"]] = r["post_id"] }
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}posts WHERE post_index <> 1").first["count"]
total_count =
mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}posts WHERE post_index <> 1").first[
"count"
]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.post_id id,
p.topic_id topic_id,
p.user_id user_id,
@@ -169,23 +180,24 @@ class ImportScripts::SimplePress < ImportScripts::Base
ORDER BY p.post_id
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
if parent = topic_lookup_from_imported_post_id(topic_first_post_id[m['topic_id']])
if parent = topic_lookup_from_imported_post_id(topic_first_post_id[m["topic_id"]])
{
id: m['id'],
user_id: user_id_from_imported_user_id(m['user_id']) || -1,
id: m["id"],
user_id: user_id_from_imported_user_id(m["user_id"]) || -1,
topic_id: parent[:topic_id],
raw: process_simplepress_post(m['raw'], m['id']),
created_at: Time.zone.at(m['post_time']),
raw: process_simplepress_post(m["raw"], m["id"]),
created_at: Time.zone.at(m["post_time"]),
}
else
puts "Parent post #{m['topic_id']} doesn't exist. Skipping #{m["id"]}"
puts "Parent post #{m["topic_id"]} doesn't exist. Skipping #{m["id"]}"
nil
end
end
@@ -196,28 +208,27 @@ class ImportScripts::SimplePress < ImportScripts::Base
s = raw.dup
# fix invalid byte sequence in UTF-8 (ArgumentError)
unless s.valid_encoding?
s.force_encoding("UTF-8")
end
s.force_encoding("UTF-8") unless s.valid_encoding?
# convert the quote line
s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) {
"[quote=\"#{convert_username($1, import_id)}, " + post_id_to_post_num_and_topic($2, import_id) + '"]'
}
s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) do
"[quote=\"#{convert_username($1, import_id)}, " +
post_id_to_post_num_and_topic($2, import_id) + '"]'
end
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
s.gsub!(/<!-- s(\S+) -->(?:.*)<!-- s(?:\S+) -->/, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s.gsub!(/:(?:\w{8})\]/, "]")
# Remove mybb video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")
s = CGI.unescapeHTML(s)
@@ -225,7 +236,7 @@ class ImportScripts::SimplePress < ImportScripts::Base
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
s.gsub!(%r{\[http(s)?://(www\.)?}, "[")
s
end
@@ -233,7 +244,6 @@ class ImportScripts::SimplePress < ImportScripts::Base
def mysql_query(sql)
@client.query(sql, cache_rows: false)
end
end
ImportScripts::SimplePress.new.perform

View File

@@ -5,21 +5,21 @@ require "htmlentities"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Smf1 < ImportScripts::Base
BATCH_SIZE ||= 5000
BATCH_SIZE ||= 5000
UPLOADS_DIR ||= ENV["UPLOADS_DIR"].presence
FORUM_URL ||= ENV["FORUM_URL"].presence
FORUM_URL ||= ENV["FORUM_URL"].presence
def initialize
fail "UPLOADS_DIR env variable is required (example: '/path/to/attachments')" unless UPLOADS_DIR
fail "UPLOADS_DIR env variable is required (example: '/path/to/attachments')" unless UPLOADS_DIR
fail "FORUM_URL env variable is required (example: 'https://domain.com/forum')" unless FORUM_URL
@client = Mysql2::Client.new(
host: ENV["DB_HOST"] || "localhost",
username: ENV["DB_USER"] || "root",
password: ENV["DB_PW"],
database: ENV["DB_NAME"],
)
@client =
Mysql2::Client.new(
host: ENV["DB_HOST"] || "localhost",
username: ENV["DB_USER"] || "root",
password: ENV["DB_PW"],
database: ENV["DB_NAME"],
)
check_version!
@@ -29,7 +29,12 @@ class ImportScripts::Smf1 < ImportScripts::Base
puts "Loading existing usernames..."
@old_to_new_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("value", "users.username").to_h
@old_to_new_usernames =
UserCustomField
.joins(:user)
.where(name: "import_username")
.pluck("value", "users.username")
.to_h
puts "Loading pm mapping..."
@@ -41,13 +46,14 @@ class ImportScripts::Smf1 < ImportScripts::Base
.where("title NOT ILIKE 'Re: %'")
.group(:id)
.order(:id)
.pluck("string_agg(topic_allowed_users.user_id::text, ',' ORDER BY topic_allowed_users.user_id), title, topics.id")
.pluck(
"string_agg(topic_allowed_users.user_id::text, ',' ORDER BY topic_allowed_users.user_id), title, topics.id",
)
.each do |users, title, topic_id|
@pm_mapping[users] ||= {}
@pm_mapping[users][title] ||= []
@pm_mapping[users][title] << topic_id
end
@pm_mapping[users] ||= {}
@pm_mapping[users][title] ||= []
@pm_mapping[users][title] << topic_id
end
end
def execute
@@ -71,7 +77,10 @@ class ImportScripts::Smf1 < ImportScripts::Base
end
def check_version!
version = mysql_query("SELECT value FROM smf_settings WHERE variable = 'smfVersion' LIMIT 1").first["value"]
version =
mysql_query("SELECT value FROM smf_settings WHERE variable = 'smfVersion' LIMIT 1").first[
"value"
]
fail "Incompatible version (#{version})" unless version&.start_with?("1.")
end
@@ -84,10 +93,7 @@ class ImportScripts::Smf1 < ImportScripts::Base
create_groups(groups) do |g|
next if g["groupName"].blank?
{
id: g["id_group"],
full_name: g["groupName"],
}
{ id: g["id_group"], full_name: g["groupName"] }
end
end
@@ -98,7 +104,7 @@ class ImportScripts::Smf1 < ImportScripts::Base
total = mysql_query("SELECT COUNT(*) count FROM smf_members").first["count"]
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<~SQL
users = mysql_query(<<~SQL).to_a
SELECT m.id_member
, memberName
, dateRegistered
@@ -125,7 +131,6 @@ class ImportScripts::Smf1 < ImportScripts::Base
ORDER BY m.id_member
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@@ -158,38 +163,45 @@ class ImportScripts::Smf1 < ImportScripts::Base
ip_address: u["memberIP2"],
active: u["is_activated"] == 1,
approved: u["is_activated"] == 1,
post_create_action: proc do |user|
# usernames
@old_to_new_usernames[u["memberName"]] = user.username
post_create_action:
proc do |user|
# usernames
@old_to_new_usernames[u["memberName"]] = user.username
# groups
GroupUser.transaction do
group_ids.each do |gid|
(group_id = group_id_from_imported_group_id(gid)) && GroupUser.find_or_create_by(user: user, group_id: group_id)
# groups
GroupUser.transaction do
group_ids.each do |gid|
(group_id = group_id_from_imported_group_id(gid)) &&
GroupUser.find_or_create_by(user: user, group_id: group_id)
end
end
end
# avatar
avatar_url = nil
# avatar
avatar_url = nil
if u["avatar"].present?
if u["avatar"].start_with?("http")
avatar_url = u["avatar"]
elsif u["avatar"].start_with?("avatar_")
avatar_url = "#{FORUM_URL}/avatar-members/#{u["avatar"]}"
if u["avatar"].present?
if u["avatar"].start_with?("http")
avatar_url = u["avatar"]
elsif u["avatar"].start_with?("avatar_")
avatar_url = "#{FORUM_URL}/avatar-members/#{u["avatar"]}"
end
end
end
avatar_url ||= if u["attachmentType"] == 0 && u["id_attach"].present?
"#{FORUM_URL}/index.php?action=dlattach;attach=#{u["id_attach"]};type=avatar"
elsif u["attachmentType"] == 1 && u["filename"].present?
"#{FORUM_URL}/avatar-members/#{u["filename"]}"
end
avatar_url ||=
if u["attachmentType"] == 0 && u["id_attach"].present?
"#{FORUM_URL}/index.php?action=dlattach;attach=#{u["id_attach"]};type=avatar"
elsif u["attachmentType"] == 1 && u["filename"].present?
"#{FORUM_URL}/avatar-members/#{u["filename"]}"
end
if avatar_url.present?
UserAvatar.import_url_for_user(avatar_url, user) rescue nil
end
end
if avatar_url.present?
begin
UserAvatar.import_url_for_user(avatar_url, user)
rescue StandardError
nil
end
end
end,
}
end
end
@@ -198,7 +210,7 @@ class ImportScripts::Smf1 < ImportScripts::Base
def import_categories
puts "", "Importing categories..."
categories = mysql_query(<<~SQL
categories = mysql_query(<<~SQL).to_a
SELECT id_board
, id_parent
, boardOrder
@@ -207,7 +219,6 @@ class ImportScripts::Smf1 < ImportScripts::Base
FROM smf_boards
ORDER BY id_parent, id_board
SQL
).to_a
parent_categories = categories.select { |c| c["id_parent"] == 0 }
children_categories = categories.select { |c| c["id_parent"] != 0 }
@@ -218,9 +229,13 @@ class ImportScripts::Smf1 < ImportScripts::Base
name: c["name"],
description: pre_process_raw(c["description"].presence),
position: c["boardOrder"],
post_create_action: proc do |category|
Permalink.find_or_create_by(url: "forums/index.php/board,#{c["id_board"]}.0.html", category_id: category.id)
end,
post_create_action:
proc do |category|
Permalink.find_or_create_by(
url: "forums/index.php/board,#{c["id_board"]}.0.html",
category_id: category.id,
)
end,
}
end
@@ -231,9 +246,13 @@ class ImportScripts::Smf1 < ImportScripts::Base
name: c["name"],
description: pre_process_raw(c["description"].presence),
position: c["boardOrder"],
post_create_action: proc do |category|
Permalink.find_or_create_by(url: "forums/index.php/board,#{c["id_board"]}.0.html", category_id: category.id)
end,
post_create_action:
proc do |category|
Permalink.find_or_create_by(
url: "forums/index.php/board,#{c["id_board"]}.0.html",
category_id: category.id,
)
end,
}
end
end
@@ -245,7 +264,7 @@ class ImportScripts::Smf1 < ImportScripts::Base
total = mysql_query("SELECT COUNT(*) count FROM smf_messages").first["count"]
batches(BATCH_SIZE) do |offset|
posts = mysql_query(<<~SQL
posts = mysql_query(<<~SQL).to_a
SELECT m.id_msg
, m.id_topic
, m.id_board
@@ -262,7 +281,6 @@ class ImportScripts::Smf1 < ImportScripts::Base
ORDER BY m.id_msg
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if posts.empty?
@@ -287,12 +305,18 @@ class ImportScripts::Smf1 < ImportScripts::Base
post[:views] = p["numViews"]
post[:pinned_at] = created_at if p["isSticky"] == 1
post[:post_create_action] = proc do |pp|
Permalink.find_or_create_by(url: "forums/index.php/topic,#{p["id_topic"]}.0.html", topic_id: pp.topic_id)
Permalink.find_or_create_by(
url: "forums/index.php/topic,#{p["id_topic"]}.0.html",
topic_id: pp.topic_id,
)
end
elsif parent = topic_lookup_from_imported_post_id(p["id_first_msg"])
post[:topic_id] = parent[:topic_id]
post[:post_create_action] = proc do |pp|
Permalink.find_or_create_by(url: "forums/index.php/topic,#{p["id_topic"]}.msg#{p["id_msg"]}.html", post_id: pp.id)
Permalink.find_or_create_by(
url: "forums/index.php/topic,#{p["id_topic"]}.msg#{p["id_msg"]}.html",
post_id: pp.id,
)
end
else
next
@@ -307,10 +331,15 @@ class ImportScripts::Smf1 < ImportScripts::Base
puts "", "Importing personal posts..."
last_post_id = -1
total = mysql_query("SELECT COUNT(*) count FROM smf_personal_messages WHERE deletedBySender = 0").first["count"]
total =
mysql_query(
"SELECT COUNT(*) count FROM smf_personal_messages WHERE deletedBySender = 0",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
posts = mysql_query(<<~SQL
posts = mysql_query(<<~SQL).to_a
SELECT id_pm
, id_member_from
, msgtime
@@ -323,7 +352,6 @@ class ImportScripts::Smf1 < ImportScripts::Base
ORDER BY id_pm
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if posts.empty?
@@ -335,7 +363,8 @@ class ImportScripts::Smf1 < ImportScripts::Base
create_posts(posts, total: total, offset: offset) do |p|
next unless user_id = user_id_from_imported_user_id(p["id_member_from"])
next if p["recipients"].blank?
recipients = p["recipients"].split(",").map { |id| user_id_from_imported_user_id(id) }.compact.uniq
recipients =
p["recipients"].split(",").map { |id| user_id_from_imported_user_id(id) }.compact.uniq
next if recipients.empty?
id = "pm-#{p["id_pm"]}"
@@ -385,10 +414,13 @@ class ImportScripts::Smf1 < ImportScripts::Base
count = 0
last_upload_id = -1
total = mysql_query("SELECT COUNT(*) count FROM smf_attachments WHERE id_msg IS NOT NULL").first["count"]
total =
mysql_query("SELECT COUNT(*) count FROM smf_attachments WHERE id_msg IS NOT NULL").first[
"count"
]
batches(BATCH_SIZE) do |offset|
uploads = mysql_query(<<~SQL
uploads = mysql_query(<<~SQL).to_a
SELECT id_attach
, id_msg
, filename
@@ -399,7 +431,6 @@ class ImportScripts::Smf1 < ImportScripts::Base
ORDER BY id_attach
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if uploads.empty?
@@ -408,7 +439,13 @@ class ImportScripts::Smf1 < ImportScripts::Base
uploads.each do |u|
count += 1
next unless post = PostCustomField.joins(:post).find_by(name: "import_id", value: u["id_msg"].to_s)&.post
unless post =
PostCustomField
.joins(:post)
.find_by(name: "import_id", value: u["id_msg"].to_s)
&.post
next
end
path = File.join(UPLOADS_DIR, "#{u["id_attach"]}_#{u["file_hash"]}")
next unless File.exist?(path) && File.size(path) > 0
@@ -433,15 +470,25 @@ class ImportScripts::Smf1 < ImportScripts::Base
puts "", "Importing likes..."
count = 0
total = mysql_query("SELECT COUNT(*) count FROM smf_thank_you_post WHERE thx_time > 0").first["count"]
total =
mysql_query("SELECT COUNT(*) count FROM smf_thank_you_post WHERE thx_time > 0").first["count"]
like = PostActionType.types[:like]
mysql_query("SELECT id_msg, id_member, thx_time FROM smf_thank_you_post WHERE thx_time > 0 ORDER BY id_thx_post").each do |l|
mysql_query(
"SELECT id_msg, id_member, thx_time FROM smf_thank_you_post WHERE thx_time > 0 ORDER BY id_thx_post",
).each do |l|
print_status(count += 1, total, get_start_time("likes"))
next unless post_id = post_id_from_imported_post_id(l["id_msg"])
next unless user_id = user_id_from_imported_user_id(l["id_member"])
next if PostAction.where(post_action_type_id: like, post_id: post_id, user_id: user_id).exists?
PostAction.create(post_action_type_id: like, post_id: post_id, user_id: user_id, created_at: Time.at(l["thx_time"]))
if PostAction.where(post_action_type_id: like, post_id: post_id, user_id: user_id).exists?
next
end
PostAction.create(
post_action_type_id: like,
post_id: post_id,
user_id: user_id,
created_at: Time.at(l["thx_time"]),
)
end
end
@@ -457,7 +504,7 @@ class ImportScripts::Smf1 < ImportScripts::Base
count = 0
total = mysql_query("SELECT COUNT(*) count FROM smf_feedback WHERE approved").first["count"]
mysql_query(<<~SQL
mysql_query(<<~SQL).each do |f|
SELECT feedbackid
, id_member
, feedbackmember_id
@@ -470,7 +517,6 @@ class ImportScripts::Smf1 < ImportScripts::Base
WHERE approved
ORDER BY feedbackid
SQL
).each do |f|
print_status(count += 1, total, get_start_time("feedbacks"))
next unless user_id_from = user_id_from_imported_user_id(f["feedbackmember_id"])
next unless user_id_to = user_id_from_imported_user_id(f["id_member"])
@@ -498,7 +544,10 @@ class ImportScripts::Smf1 < ImportScripts::Base
puts "", "Importing banned email domains..."
blocklist = SiteSetting.blocked_email_domains.split("|")
banned_domains = mysql_query("SELECT SUBSTRING(email_address, 3) domain FROM smf_ban_items WHERE email_address RLIKE '^%@[^%]+$' GROUP BY email_address").map { |r| r["domain"] }
banned_domains =
mysql_query(
"SELECT SUBSTRING(email_address, 3) domain FROM smf_ban_items WHERE email_address RLIKE '^%@[^%]+$' GROUP BY email_address",
).map { |r| r["domain"] }
SiteSetting.blocked_email_domains = (blocklist + banned_domains).uniq.sort.join("|")
end
@@ -508,7 +557,10 @@ class ImportScripts::Smf1 < ImportScripts::Base
count = 0
banned_emails = mysql_query("SELECT email_address FROM smf_ban_items WHERE email_address RLIKE '^[^%]+@[^%]+$' GROUP BY email_address").map { |r| r["email_address"] }
banned_emails =
mysql_query(
"SELECT email_address FROM smf_ban_items WHERE email_address RLIKE '^[^%]+@[^%]+$' GROUP BY email_address",
).map { |r| r["email_address"] }
banned_emails.each do |email|
print_status(count += 1, banned_emails.size, get_start_time("banned_emails"))
ScreenedEmail.find_or_create_by(email: email)
@@ -520,7 +572,7 @@ class ImportScripts::Smf1 < ImportScripts::Base
count = 0
banned_ips = mysql_query(<<~SQL
banned_ips = mysql_query(<<~SQL).to_a
SELECT CONCAT_WS('.', ip_low1, ip_low2, ip_low3, ip_low4) low
, CONCAT_WS('.', ip_high1, ip_high2, ip_high3, ip_high4) high
, hits
@@ -528,7 +580,6 @@ class ImportScripts::Smf1 < ImportScripts::Base
WHERE (ip_low1 + ip_low2 + ip_low3 + ip_low4 + ip_high1 + ip_high2 + ip_high3 + ip_high4) > 0
GROUP BY low, high, hits;
SQL
).to_a
banned_ips.each do |r|
print_status(count += 1, banned_ips.size, get_start_time("banned_ips"))
@@ -537,15 +588,15 @@ class ImportScripts::Smf1 < ImportScripts::Base
ScreenedIpAddress.create(ip_address: r["low"], match_count: r["hits"])
end
else
low_values = r["low"].split(".").map(&:to_i)
low_values = r["low"].split(".").map(&:to_i)
high_values = r["high"].split(".").map(&:to_i)
first_diff = low_values.zip(high_values).count { |a, b| a == b }
first_diff = low_values.zip(high_values).count { |a, b| a == b }
first_diff -= 1 if low_values[first_diff] == 0 && high_values[first_diff] == 255
prefix = low_values[0...first_diff]
suffix = [0] * (3 - first_diff)
mask = 8 * (first_diff + 1)
values = (low_values[first_diff]..high_values[first_diff])
hits = (r["hits"] / [1, values.count].max).floor
prefix = low_values[0...first_diff]
suffix = [0] * (3 - first_diff)
mask = 8 * (first_diff + 1)
values = (low_values[first_diff]..high_values[first_diff])
hits = (r["hits"] / [1, values.count].max).floor
values.each do |v|
range_values = prefix + [v] + suffix
ip_address = "#{range_values.join(".")}/#{mask}"
@@ -562,10 +613,28 @@ class ImportScripts::Smf1 < ImportScripts::Base
ScreenedIpAddress.roll_up
end
IGNORED_BBCODE ||= %w{
black blue center color email flash font glow green iurl left list move red
right shadown size table time white
}
IGNORED_BBCODE ||= %w[
black
blue
center
color
email
flash
font
glow
green
iurl
left
list
move
red
right
shadown
size
table
time
white
]
def pre_process_raw(raw)
return "" if raw.blank?
@@ -573,59 +642,59 @@ class ImportScripts::Smf1 < ImportScripts::Base
raw = @htmlentities.decode(raw)
# [acronym]
raw.gsub!(/\[acronym=([^\]]+)\](.*?)\[\/acronym\]/im) { %{<abbr title="#{$1}">#{$2}</abbr>} }
raw.gsub!(%r{\[acronym=([^\]]+)\](.*?)\[/acronym\]}im) { %{<abbr title="#{$1}">#{$2}</abbr>} }
# [br]
raw.gsub!(/\[br\]/i, "\n")
raw.gsub!(/<br\s*\/?>/i, "\n")
raw.gsub!(%r{<br\s*/?>}i, "\n")
# [hr]
raw.gsub!(/\[hr\]/i, "<hr/>")
# [sub]
raw.gsub!(/\[sub\](.*?)\[\/sub\]/im) { "<sub>#{$1}</sub>" }
raw.gsub!(%r{\[sub\](.*?)\[/sub\]}im) { "<sub>#{$1}</sub>" }
# [sup]
raw.gsub!(/\[sup\](.*?)\[\/sup\]/im) { "<sup>#{$1}</sup>" }
raw.gsub!(%r{\[sup\](.*?)\[/sup\]}im) { "<sup>#{$1}</sup>" }
# [html]
raw.gsub!(/\[html\]/i, "\n```html\n")
raw.gsub!(/\[\/html\]/i, "\n```\n")
raw.gsub!(%r{\[/html\]}i, "\n```\n")
# [php]
raw.gsub!(/\[php\]/i, "\n```php\n")
raw.gsub!(/\[\/php\]/i, "\n```\n")
raw.gsub!(%r{\[/php\]}i, "\n```\n")
# [code]
raw.gsub!(/\[\/?code\]/i, "\n```\n")
raw.gsub!(%r{\[/?code\]}i, "\n```\n")
# [pre]
raw.gsub!(/\[\/?pre\]/i, "\n```\n")
raw.gsub!(%r{\[/?pre\]}i, "\n```\n")
# [tt]
raw.gsub!(/\[\/?tt\]/i, "`")
raw.gsub!(%r{\[/?tt\]}i, "`")
# [ftp]
raw.gsub!(/\[ftp/i, "[url")
raw.gsub!(/\[\/ftp\]/i, "[/url]")
raw.gsub!(%r{\[/ftp\]}i, "[/url]")
# [me]
raw.gsub!(/\[me=([^\]]*)\](.*?)\[\/me\]/im) { "_\\* #{$1} #{$2}_" }
raw.gsub!(%r{\[me=([^\]]*)\](.*?)\[/me\]}im) { "_\\* #{$1} #{$2}_" }
# [li]
raw.gsub!(/\[li\](.*?)\[\/li\]/im) { "- #{$1}" }
raw.gsub!(%r{\[li\](.*?)\[/li\]}im) { "- #{$1}" }
# puts [img] on their own line
raw.gsub!(/\[img[^\]]*\](.*?)\[\/img\]/im) { "\n#{$1}\n" }
raw.gsub!(%r{\[img[^\]]*\](.*?)\[/img\]}im) { "\n#{$1}\n" }
# puts [youtube] on their own line
raw.gsub!(/\[youtube\](.*?)\[\/youtube\]/im) { "\n#{$1}\n" }
raw.gsub!(%r{\[youtube\](.*?)\[/youtube\]}im) { "\n#{$1}\n" }
IGNORED_BBCODE.each { |code| raw.gsub!(/\[#{code}[^\]]*\](.*?)\[\/#{code}\]/im, '\1') }
IGNORED_BBCODE.each { |code| raw.gsub!(%r{\[#{code}[^\]]*\](.*?)\[/#{code}\]}im, '\1') }
# ensure [/quote] are on their own line
raw.gsub!(/\s*\[\/quote\]\s*/im, "\n[/quote]\n")
raw.gsub!(%r{\s*\[/quote\]\s*}im, "\n[/quote]\n")
# [quote]
raw.gsub!(/\s*\[quote (.+?)\]\s/im) {
raw.gsub!(/\s*\[quote (.+?)\]\s/im) do
params = $1
post_id = params[/msg(\d+)/, 1]
username = params[/author=(.+) link=/, 1]
@@ -636,14 +705,14 @@ class ImportScripts::Smf1 < ImportScripts::Base
else
%{\n[quote="#{username}"]\n}
end
}
end
# remove tapatalk mess
raw.gsub!(/Sent from .+? using \[url=.*?\].+?\[\/url\]/i, "")
raw.gsub!(%r{Sent from .+? using \[url=.*?\].+?\[/url\]}i, "")
raw.gsub!(/Sent from .+? using .+?\z/i, "")
# clean URLs
raw.gsub!(/\[url=(.+?)\]\1\[\/url\]/i, '\1')
raw.gsub!(%r{\[url=(.+?)\]\1\[/url\]}i, '\1')
raw
end
@@ -651,7 +720,6 @@ class ImportScripts::Smf1 < ImportScripts::Base
def mysql_query(sql)
@client.query(sql)
end
end
ImportScripts::Smf1.new.perform

View File

@@ -1,18 +1,17 @@
# coding: utf-8
# frozen_string_literal: true
require 'mysql2'
require File.expand_path(File.dirname(__FILE__) + '/base.rb')
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities'
require 'tsort'
require 'set'
require 'optparse'
require 'etc'
require 'open3'
require "htmlentities"
require "tsort"
require "set"
require "optparse"
require "etc"
require "open3"
class ImportScripts::Smf2 < ImportScripts::Base
def self.run
options = Options.new
begin
@@ -54,9 +53,9 @@ class ImportScripts::Smf2 < ImportScripts::Base
exit 1
end
if options.password == :ask
require 'highline'
require "highline"
$stderr.print "Enter password for MySQL database `#{options.database}`: "
options.password = HighLine.new.ask('') { |q| q.echo = false }
options.password = HighLine.new.ask("") { |q| q.echo = false }
end
@default_db_connection = create_db_connection
@@ -68,11 +67,11 @@ class ImportScripts::Smf2 < ImportScripts::Base
import_categories
import_posts
postprocess_posts
make_prettyurl_permalinks('/forum')
make_prettyurl_permalinks("/forum")
end
def import_groups
puts '', 'creating groups'
puts "", "creating groups"
total = query(<<-SQL, as: :single)
SELECT COUNT(*) FROM {prefix}membergroups
@@ -92,7 +91,7 @@ class ImportScripts::Smf2 < ImportScripts::Base
MODERATORS_GROUP = 2
def import_users
puts '', 'creating users'
puts "", "creating users"
total = query("SELECT COUNT(*) FROM {prefix}members", as: :single)
create_users(query(<<-SQL), total: total) do |member|
@@ -103,10 +102,25 @@ class ImportScripts::Smf2 < ImportScripts::Base
FROM {prefix}members AS a
LEFT JOIN {prefix}attachments AS b ON a.id_member = b.id_member
SQL
group_ids = [ member[:id_group], *member[:additional_groups].split(',').map(&:to_i) ]
create_time = Time.zone.at(member[:date_registered]) rescue Time.now
last_seen_time = Time.zone.at(member[:last_login]) rescue nil
ip_addr = IPAddr.new(member[:member_ip]) rescue nil
group_ids = [member[:id_group], *member[:additional_groups].split(",").map(&:to_i)]
create_time =
begin
Time.zone.at(member[:date_registered])
rescue StandardError
Time.now
end
last_seen_time =
begin
Time.zone.at(member[:last_login])
rescue StandardError
nil
end
ip_addr =
begin
IPAddr.new(member[:member_ip])
rescue StandardError
nil
end
{
id: member[:id_member],
username: member[:member_name],
@@ -121,27 +135,33 @@ class ImportScripts::Smf2 < ImportScripts::Base
ip_address: ip_addr,
admin: group_ids.include?(ADMIN_GROUP),
moderator: group_ids.include?(MODERATORS_GROUP),
post_create_action: proc do |user|
user.update(created_at: create_time) if create_time < user.created_at
user.save
GroupUser.transaction do
group_ids.each do |gid|
(group_id = group_id_from_imported_group_id(gid)) &&
GroupUser.find_or_create_by(user: user, group_id: group_id)
end
end
if options.smfroot && member[:id_attach].present? && user.uploaded_avatar_id.blank?
(path = find_smf_attachment_path(member[:id_attach], member[:file_hash], member[:filename])) && begin
upload = create_upload(user.id, path, member[:filename])
if upload.persisted?
user.update(uploaded_avatar_id: upload.id)
post_create_action:
proc do |user|
user.update(created_at: create_time) if create_time < user.created_at
user.save
GroupUser.transaction do
group_ids.each do |gid|
(group_id = group_id_from_imported_group_id(gid)) &&
GroupUser.find_or_create_by(user: user, group_id: group_id)
end
rescue SystemCallError => err
puts "Could not import avatar: #{err.message}"
end
end
end
if options.smfroot && member[:id_attach].present? && user.uploaded_avatar_id.blank?
(
path =
find_smf_attachment_path(
member[:id_attach],
member[:file_hash],
member[:filename],
)
) &&
begin
upload = create_upload(user.id, path, member[:filename])
user.update(uploaded_avatar_id: upload.id) if upload.persisted?
rescue SystemCallError => err
puts "Could not import avatar: #{err.message}"
end
end
end,
}
end
end
@@ -155,38 +175,39 @@ class ImportScripts::Smf2 < ImportScripts::Base
parent_id = category_id_from_imported_category_id(board[:id_parent]) if board[:id_parent] > 0
groups = (board[:member_groups] || "").split(/,/).map(&:to_i)
restricted = !groups.include?(GUEST_GROUP) && !groups.include?(MEMBER_GROUP)
if Category.find_by_name(board[:name])
board[:name] += board[:id_board].to_s
end
board[:name] += board[:id_board].to_s if Category.find_by_name(board[:name])
{
id: board[:id_board],
name: board[:name],
description: board[:description],
parent_category_id: parent_id,
post_create_action: restricted && proc do |category|
category.update(read_restricted: true)
groups.each do |imported_group_id|
(group_id = group_id_from_imported_group_id(imported_group_id)) &&
CategoryGroup.find_or_create_by(category: category, group_id: group_id) do |cg|
cg.permission_type = CategoryGroup.permission_types[:full]
end
end
end,
post_create_action:
restricted &&
proc do |category|
category.update(read_restricted: true)
groups.each do |imported_group_id|
(group_id = group_id_from_imported_group_id(imported_group_id)) &&
CategoryGroup.find_or_create_by(category: category, group_id: group_id) do |cg|
cg.permission_type = CategoryGroup.permission_types[:full]
end
end
end,
}
end
end
def import_posts
puts '', 'creating posts'
spinner = %w(/ - \\ |).cycle
puts "", "creating posts"
spinner = %w[/ - \\ |].cycle
total = query("SELECT COUNT(*) FROM {prefix}messages", as: :single)
PostCreator.class_eval do
def guardian
@guardian ||= if opts[:import_mode]
@@system_guardian ||= Guardian.new(Discourse.system_user)
else
Guardian.new(@user)
end
@guardian ||=
if opts[:import_mode]
@@system_guardian ||= Guardian.new(Discourse.system_user)
else
Guardian.new(@user)
end
end
end
@@ -208,10 +229,12 @@ class ImportScripts::Smf2 < ImportScripts::Base
id: message[:id_msg],
user_id: user_id_from_imported_user_id(message[:id_member]) || -1,
created_at: Time.zone.at(message[:poster_time]),
post_create_action: ignore_quotes && proc do |p|
p.custom_fields['import_rebake'] = 't'
p.save
end
post_create_action:
ignore_quotes &&
proc do |p|
p.custom_fields["import_rebake"] = "t"
p.save
end,
}
if message[:id_msg] == message[:id_first_msg]
@@ -228,31 +251,48 @@ class ImportScripts::Smf2 < ImportScripts::Base
end
next nil if skip
attachments = message[:attachment_count] == 0 ? [] : query(<<-SQL, connection: db2, as: :array)
attachments =
message[:attachment_count] == 0 ? [] : query(<<-SQL, connection: db2, as: :array)
SELECT id_attach, file_hash, filename FROM {prefix}attachments
WHERE attachment_type = 0 AND id_msg = #{message[:id_msg]}
ORDER BY id_attach ASC
SQL
attachments.map! { |a| import_attachment(post, a) rescue (puts $! ; nil) }
attachments.map! do |a|
begin
import_attachment(post, a)
rescue StandardError
(
puts $!
nil
)
end
end
post[:raw] = convert_message_body(message[:body], attachments, ignore_quotes: ignore_quotes)
next post
end
end
def import_attachment(post, attachment)
path = find_smf_attachment_path(attachment[:id_attach], attachment[:file_hash], attachment[:filename])
path =
find_smf_attachment_path(
attachment[:id_attach],
attachment[:file_hash],
attachment[:filename],
)
raise "Attachment for post #{post[:id]} failed: #{attachment[:filename]}" unless path.present?
upload = create_upload(post[:user_id], path, attachment[:filename])
raise "Attachment for post #{post[:id]} failed: #{upload.errors.full_messages.join(', ')}" unless upload.persisted?
unless upload.persisted?
raise "Attachment for post #{post[:id]} failed: #{upload.errors.full_messages.join(", ")}"
end
upload
rescue SystemCallError => err
raise "Attachment for post #{post[:id]} failed: #{err.message}"
end
def postprocess_posts
puts '', 'rebaking posts'
puts "", "rebaking posts"
tags = PostCustomField.where(name: 'import_rebake', value: 't')
tags = PostCustomField.where(name: "import_rebake", value: "t")
tags_total = tags.count
tags_done = 0
@@ -271,38 +311,47 @@ class ImportScripts::Smf2 < ImportScripts::Base
private
def create_db_connection
Mysql2::Client.new(host: options.host, username: options.username,
password: options.password, database: options.database)
Mysql2::Client.new(
host: options.host,
username: options.username,
password: options.password,
database: options.database,
)
end
def query(sql, **opts, &block)
db = opts[:connection] || @default_db_connection
return __query(db, sql).to_a if opts[:as] == :array
return __query(db, sql, as: :array).first[0] if opts[:as] == :single
return __query(db, sql).to_a if opts[:as] == :array
return __query(db, sql, as: :array).first[0] if opts[:as] == :single
return __query(db, sql, stream: true).each(&block) if block_given?
__query(db, sql, stream: true)
end
def __query(db, sql, **opts)
db.query(sql.gsub('{prefix}', options.prefix),
{ symbolize_keys: true, cache_rows: false }.merge(opts))
db.query(
sql.gsub("{prefix}", options.prefix),
{ symbolize_keys: true, cache_rows: false }.merge(opts),
)
end
TRTR_TABLE = begin
from = "ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ"
to = "SZszYAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"
from.chars.zip(to.chars)
end
TRTR_TABLE =
begin
from = "ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ"
to = "SZszYAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"
from.chars.zip(to.chars)
end
def find_smf_attachment_path(attachment_id, file_hash, filename)
cleaned_name = filename.dup
TRTR_TABLE.each { |from, to| cleaned_name.gsub!(from, to) }
cleaned_name.gsub!(/\s/, '_')
cleaned_name.gsub!(/[^\w_\.\-]/, '')
legacy_name = "#{attachment_id}_#{cleaned_name.gsub('.', '_')}#{Digest::MD5.hexdigest(cleaned_name)}"
cleaned_name.gsub!(/\s/, "_")
cleaned_name.gsub!(/[^\w_\.\-]/, "")
legacy_name =
"#{attachment_id}_#{cleaned_name.gsub(".", "_")}#{Digest::MD5.hexdigest(cleaned_name)}"
[ filename, "#{attachment_id}_#{file_hash}", legacy_name ]
.map { |name| File.join(options.smfroot, 'attachments', name) }
[filename, "#{attachment_id}_#{file_hash}", legacy_name].map do |name|
File.join(options.smfroot, "attachments", name)
end
.detect { |file| File.exist?(file) }
end
@@ -311,16 +360,16 @@ class ImportScripts::Smf2 < ImportScripts::Base
end
def convert_message_body(body, attachments = [], **opts)
body = decode_entities(body.gsub(/<br\s*\/>/, "\n"))
body = decode_entities(body.gsub(%r{<br\s*/>}, "\n"))
body.gsub!(ColorPattern, '\k<inner>')
body.gsub!(ListPattern) do |s|
params = parse_tag_params($~[:params])
tag = params['type'] == 'decimal' ? 'ol' : 'ul'
tag = params["type"] == "decimal" ? "ol" : "ul"
"\n[#{tag}]#{$~[:inner].strip}[/#{tag}]\n"
end
body.gsub!(XListPattern) do |s|
r = +"\n[ul]"
s.lines.each { |l| "#{r}[li]#{l.strip.sub(/^\[x\]\s*/, '')}[/li]" }
s.lines.each { |l| "#{r}[li]#{l.strip.sub(/^\[x\]\s*/, "")}[/li]" }
"#{r}[/ul]\n"
end
@@ -338,9 +387,7 @@ class ImportScripts::Smf2 < ImportScripts::Base
if use_count.keys.length < attachments.select(&:present?).length
body = "#{body}\n\n---"
attachments.each_with_index do |upload, num|
if upload.present? && use_count[num] == (0)
"#{body}\n\n#{get_upload_markdown(upload)}"
end
"#{body}\n\n#{get_upload_markdown(upload)}" if upload.present? && use_count[num] == (0)
end
end
end
@@ -353,26 +400,46 @@ class ImportScripts::Smf2 < ImportScripts::Base
end
def convert_quotes(body)
body.to_s.gsub(QuotePattern) do |s|
inner = $~[:inner].strip
params = parse_tag_params($~[:params])
if params['author'].present?
quote = +"\n[quote=\"#{params['author']}"
if QuoteParamsPattern =~ params['link']
tl = topic_lookup_from_imported_post_id($~[:msg].to_i)
quote = "#{quote} post:#{tl[:post_number]}, topic:#{tl[:topic_id]}" if tl
body
.to_s
.gsub(QuotePattern) do |s|
inner = $~[:inner].strip
params = parse_tag_params($~[:params])
if params["author"].present?
quote = +"\n[quote=\"#{params["author"]}"
if QuoteParamsPattern =~ params["link"]
tl = topic_lookup_from_imported_post_id($~[:msg].to_i)
quote = "#{quote} post:#{tl[:post_number]}, topic:#{tl[:topic_id]}" if tl
end
quote = "#{quote}\"]\n#{convert_quotes(inner)}\n[/quote]"
else
"<blockquote>#{convert_quotes(inner)}</blockquote>"
end
quote = "#{quote}\"]\n#{convert_quotes(inner)}\n[/quote]"
else
"<blockquote>#{convert_quotes(inner)}</blockquote>"
end
end
end
IGNORED_BBCODE ||= %w{
black blue center color email flash font glow green iurl left list move red
right shadown size table time white
}
IGNORED_BBCODE ||= %w[
black
blue
center
color
email
flash
font
glow
green
iurl
left
list
move
red
right
shadown
size
table
time
white
]
def convert_bbcode(raw)
return "" if raw.blank?
@@ -380,67 +447,67 @@ class ImportScripts::Smf2 < ImportScripts::Base
raw = convert_quotes(raw)
# [acronym]
raw.gsub!(/\[acronym=([^\]]+)\](.*?)\[\/acronym\]/im) { %{<abbr title="#{$1}">#{$2}</abbr>} }
raw.gsub!(%r{\[acronym=([^\]]+)\](.*?)\[/acronym\]}im) { %{<abbr title="#{$1}">#{$2}</abbr>} }
# [br]
raw.gsub!(/\[br\]/i, "\n")
raw.gsub!(/<br\s*\/?>/i, "\n")
raw.gsub!(%r{<br\s*/?>}i, "\n")
# [hr]
raw.gsub!(/\[hr\]/i, "<hr/>")
# [sub]
raw.gsub!(/\[sub\](.*?)\[\/sub\]/im) { "<sub>#{$1}</sub>" }
raw.gsub!(%r{\[sub\](.*?)\[/sub\]}im) { "<sub>#{$1}</sub>" }
# [sup]
raw.gsub!(/\[sup\](.*?)\[\/sup\]/im) { "<sup>#{$1}</sup>" }
raw.gsub!(%r{\[sup\](.*?)\[/sup\]}im) { "<sup>#{$1}</sup>" }
# [html]
raw.gsub!(/\[html\]/i, "\n```html\n")
raw.gsub!(/\[\/html\]/i, "\n```\n")
raw.gsub!(%r{\[/html\]}i, "\n```\n")
# [php]
raw.gsub!(/\[php\]/i, "\n```php\n")
raw.gsub!(/\[\/php\]/i, "\n```\n")
raw.gsub!(%r{\[/php\]}i, "\n```\n")
# [code]
raw.gsub!(/\[\/?code\]/i, "\n```\n")
raw.gsub!(%r{\[/?code\]}i, "\n```\n")
# [pre]
raw.gsub!(/\[\/?pre\]/i, "\n```\n")
raw.gsub!(%r{\[/?pre\]}i, "\n```\n")
# [tt]
raw.gsub!(/\[\/?tt\]/i, "`")
raw.gsub!(%r{\[/?tt\]}i, "`")
# [ftp]
raw.gsub!(/\[ftp/i, "[url")
raw.gsub!(/\[\/ftp\]/i, "[/url]")
raw.gsub!(%r{\[/ftp\]}i, "[/url]")
# [me]
raw.gsub!(/\[me=([^\]]*)\](.*?)\[\/me\]/im) { "_\\* #{$1} #{$2}_" }
raw.gsub!(%r{\[me=([^\]]*)\](.*?)\[/me\]}im) { "_\\* #{$1} #{$2}_" }
# [ul]
raw.gsub!(/\[ul\]/i, "")
raw.gsub!(/\[\/ul\]/i, "")
raw.gsub!(%r{\[/ul\]}i, "")
# [li]
raw.gsub!(/\[li\](.*?)\[\/li\]/im) { "- #{$1}" }
raw.gsub!(%r{\[li\](.*?)\[/li\]}im) { "- #{$1}" }
# puts [img] on their own line
raw.gsub!(/\[img[^\]]*\](.*?)\[\/img\]/im) { "\n#{$1}\n" }
raw.gsub!(%r{\[img[^\]]*\](.*?)\[/img\]}im) { "\n#{$1}\n" }
# puts [youtube] on their own line
raw.gsub!(/\[youtube\](.*?)\[\/youtube\]/im) { "\n#{$1}\n" }
raw.gsub!(%r{\[youtube\](.*?)\[/youtube\]}im) { "\n#{$1}\n" }
IGNORED_BBCODE.each { |code| raw.gsub!(/\[#{code}[^\]]*\](.*?)\[\/#{code}\]/im, '\1') }
IGNORED_BBCODE.each { |code| raw.gsub!(%r{\[#{code}[^\]]*\](.*?)\[/#{code}\]}im, '\1') }
# ensure [/quote] are on their own line
raw.gsub!(/\s*\[\/quote\]\s*/im, "\n[/quote]\n")
raw.gsub!(%r{\s*\[/quote\]\s*}im, "\n[/quote]\n")
# remove tapatalk mess
raw.gsub!(/Sent from .+? using \[url=.*?\].+?\[\/url\]/i, "")
raw.gsub!(%r{Sent from .+? using \[url=.*?\].+?\[/url\]}i, "")
raw.gsub!(/Sent from .+? using .+?\z/i, "")
# clean URLs
raw.gsub!(/\[url=(.+?)\]\1\[\/url\]/i, '\1')
raw.gsub!(%r{\[url=(.+?)\]\1\[/url\]}i, '\1')
raw
end
@@ -460,8 +527,14 @@ class ImportScripts::Smf2 < ImportScripts::Base
# param1=value1=still1 value1 param2=value2 ...
# => {'param1' => 'value1=still1 value1', 'param2' => 'value2 ...'}
def parse_tag_params(params)
params.to_s.strip.scan(/(?<param>\w+)=(?<value>(?:(?>\S+)|\s+(?!\w+=))*)/).
inject({}) { |h, e| h[e[0]] = e[1]; h }
params
.to_s
.strip
.scan(/(?<param>\w+)=(?<value>(?:(?>\S+)|\s+(?!\w+=))*)/)
.inject({}) do |h, e|
h[e[0]] = e[1]
h
end
end
class << self
@@ -474,8 +547,8 @@ class ImportScripts::Smf2 < ImportScripts::Base
# => match[:params] == 'param=value param2=value2'
# match[:inner] == "\n text\n [tag nested=true]text[/tag]\n"
def build_nested_tag_regex(ltag, rtag = nil)
rtag ||= '/' + ltag
%r{
rtag ||= "/" + ltag
/
\[#{ltag}(?-x:[ =](?<params>[^\]]*))?\] # consume open tag, followed by...
(?<inner>(?:
(?> [^\[]+ ) # non-tags, or...
@@ -495,40 +568,41 @@ class ImportScripts::Smf2 < ImportScripts::Base
)
)*)
\[#{rtag}\]
}x
/x
end
end
QuoteParamsPattern = /^topic=(?<topic>\d+).msg(?<msg>\d+)#msg\k<msg>$/
XListPattern = /(?<xblock>(?>^\[x\]\s*(?<line>.*)$\n?)+)/
QuotePattern = build_nested_tag_regex('quote')
ColorPattern = build_nested_tag_regex('color')
ListPattern = build_nested_tag_regex('list')
QuotePattern = build_nested_tag_regex("quote")
ColorPattern = build_nested_tag_regex("color")
ListPattern = build_nested_tag_regex("list")
AttachmentPatterns = [
[/^\[attach(?:|img|url|mini)=(?<num>\d+)\]$/, ->(u) { "\n" + get_upload_markdown(u) + "\n" }],
[/\[attach(?:|img|url|mini)=(?<num>\d+)\]/, ->(u) { get_upload_markdown(u) }]
[/\[attach(?:|img|url|mini)=(?<num>\d+)\]/, ->(u) { get_upload_markdown(u) }],
]
# Provides command line options and parses the SMF settings file.
class Options
class Error < StandardError ; end
class SettingsError < Error ; end
class Error < StandardError
end
class SettingsError < Error
end
def parse!(args = ARGV)
raise Error, 'not enough arguments' if ARGV.empty?
raise Error, "not enough arguments" if ARGV.empty?
begin
parser.parse!(args)
rescue OptionParser::ParseError => err
raise Error, err.message
end
raise Error, 'too many arguments' if args.length > 1
raise Error, "too many arguments" if args.length > 1
self.smfroot = args.first
read_smf_settings if self.smfroot
self.host ||= 'localhost'
self.host ||= "localhost"
self.username ||= Etc.getlogin
self.prefix ||= 'smf_'
self.prefix ||= "smf_"
self.timezone ||= get_php_timezone
end
@@ -547,44 +621,63 @@ class ImportScripts::Smf2 < ImportScripts::Base
private
def get_php_timezone
phpinfo, status = Open3.capture2('php', '-i')
phpinfo, status = Open3.capture2("php", "-i")
phpinfo.lines.each do |line|
key, *vals = line.split(' => ').map(&:strip)
break vals[0] if key == 'Default timezone'
key, *vals = line.split(" => ").map(&:strip)
break vals[0] if key == "Default timezone"
end
rescue Errno::ENOENT
$stderr.puts "Error: PHP CLI executable not found"
end
def read_smf_settings
settings = File.join(self.smfroot, 'Settings.php')
File.readlines(settings).each do |line|
next unless m = /\$([a-z_]+)\s*=\s*['"](.+?)['"]\s*;\s*((#|\/\/).*)?$/.match(line)
case m[1]
when 'db_server' then self.host ||= m[2]
when 'db_user' then self.username ||= m[2]
when 'db_passwd' then self.password ||= m[2]
when 'db_name' then self.database ||= m[2]
when 'db_prefix' then self.prefix ||= m[2]
settings = File.join(self.smfroot, "Settings.php")
File
.readlines(settings)
.each do |line|
next unless m = %r{\$([a-z_]+)\s*=\s*['"](.+?)['"]\s*;\s*((#|//).*)?$}.match(line)
case m[1]
when "db_server"
self.host ||= m[2]
when "db_user"
self.username ||= m[2]
when "db_passwd"
self.password ||= m[2]
when "db_name"
self.database ||= m[2]
when "db_prefix"
self.prefix ||= m[2]
end
end
end
rescue => err
raise SettingsError, err.message unless self.database
end
def parser
@parser ||= OptionParser.new(nil, 12) do |o|
o.banner = "Usage:\t#{File.basename($0)} <SMFROOT> [options]\n"
o.banner = "${o.banner}\t#{File.basename($0)} -d <DATABASE> [options]"
o.on('-h HOST', :REQUIRED, "MySQL server hostname [\"#{self.host}\"]") { |s| self.host = s }
o.on('-u USER', :REQUIRED, "MySQL username [\"#{self.username}\"]") { |s| self.username = s }
o.on('-p [PASS]', :OPTIONAL, 'MySQL password. Without argument, reads password from STDIN.') { |s| self.password = s || :ask }
o.on('-d DBNAME', :REQUIRED, 'Name of SMF database') { |s| self.database = s }
o.on('-f PREFIX', :REQUIRED, "Table names prefix [\"#{self.prefix}\"]") { |s| self.prefix = s }
o.on('-t TIMEZONE', :REQUIRED, 'Timezone used by SMF2 [auto-detected from PHP]') { |s| self.timezone = s }
end
@parser ||=
OptionParser.new(nil, 12) do |o|
o.banner = "Usage:\t#{File.basename($0)} <SMFROOT> [options]\n"
o.banner = "${o.banner}\t#{File.basename($0)} -d <DATABASE> [options]"
o.on("-h HOST", :REQUIRED, "MySQL server hostname [\"#{self.host}\"]") do |s|
self.host = s
end
o.on("-u USER", :REQUIRED, "MySQL username [\"#{self.username}\"]") do |s|
self.username = s
end
o.on(
"-p [PASS]",
:OPTIONAL,
"MySQL password. Without argument, reads password from STDIN.",
) { |s| self.password = s || :ask }
o.on("-d DBNAME", :REQUIRED, "Name of SMF database") { |s| self.database = s }
o.on("-f PREFIX", :REQUIRED, "Table names prefix [\"#{self.prefix}\"]") do |s|
self.prefix = s
end
o.on("-t TIMEZONE", :REQUIRED, "Timezone used by SMF2 [auto-detected from PHP]") do |s|
self.timezone = s
end
end
end
end #Options
# Framework around TSort, used to build a dependency graph over messages
@@ -644,10 +737,14 @@ class ImportScripts::Smf2 < ImportScripts::Base
end
def dependencies
@dependencies ||= Set.new.tap do |deps|
deps.merge(quoted) unless ignore_quotes?
deps << prev if prev.present?
end.to_a
@dependencies ||=
Set
.new
.tap do |deps|
deps.merge(quoted) unless ignore_quotes?
deps << prev if prev.present?
end
.to_a
end
def hash
@@ -659,7 +756,7 @@ class ImportScripts::Smf2 < ImportScripts::Base
end
def inspect
"#<#{self.class.name}: id=#{id.inspect}, prev=#{safe_id(@prev)}, quoted=[#{@quoted.map(&method(:safe_id)).join(', ')}]>"
"#<#{self.class.name}: id=#{id.inspect}, prev=#{safe_id(@prev)}, quoted=[#{@quoted.map(&method(:safe_id)).join(", ")}]>"
end
private
@@ -668,11 +765,10 @@ class ImportScripts::Smf2 < ImportScripts::Base
@graph[id].present? ? @graph[id].id.inspect : "(#{id})"
end
end #Node
end #MessageDependencyGraph
def make_prettyurl_permalinks(prefix)
puts 'creating permalinks for prettyurl plugin'
puts "creating permalinks for prettyurl plugin"
begin
serialized = query(<<-SQL, as: :single)
SELECT value FROM {prefix}settings
@@ -680,9 +776,7 @@ class ImportScripts::Smf2 < ImportScripts::Base
SQL
board_slugs = Array.new
ser = /\{(.*)\}/.match(serialized)[1]
ser.scan(/i:(\d+);s:\d+:\"(.*?)\";/).each do |nv|
board_slugs[nv[0].to_i] = nv[1]
end
ser.scan(/i:(\d+);s:\d+:\"(.*?)\";/).each { |nv| board_slugs[nv[0].to_i] = nv[1] }
topic_urls = query(<<-SQL, as: :array)
SELECT t.id_first_msg, t.id_board,u.pretty_url
FROM smf_topics t
@@ -690,12 +784,14 @@ class ImportScripts::Smf2 < ImportScripts::Base
SQL
topic_urls.each do |url|
t = topic_lookup_from_imported_post_id(url[:id_first_msg])
Permalink.create(url: "#{prefix}/#{board_slugs[url[:id_board]]}/#{url[:pretty_url]}", topic_id: t[:topic_id])
Permalink.create(
url: "#{prefix}/#{board_slugs[url[:id_board]]}/#{url[:pretty_url]}",
topic_id: t[:topic_id],
)
end
rescue
rescue StandardError
end
end
end
ImportScripts::Smf2.run

View File

@@ -1,9 +1,8 @@
# frozen_string_literal: true
require 'uri'
require "uri"
class CreateTitle
def self.from_body(body)
title = remove_mentions body
title = remove_urls title
@@ -24,11 +23,11 @@ class CreateTitle
private
def self.remove_mentions(text)
text.gsub(/@[\w]*/, '')
text.gsub(/@[\w]*/, "")
end
def self.remove_urls(text)
text.gsub(URI::regexp(['http', 'https', 'mailto', 'ftp', 'ldap', 'ldaps']), '')
text.gsub(URI.regexp(%w[http https mailto ftp ldap ldaps]), "")
end
def self.remove_stray_punctuation(text)
@@ -42,7 +41,7 @@ class CreateTitle
end
def self.complete_sentences(text)
/(^.*[\S]{2,}[.!?:]+)\W/.match(text[0...80] + ' ')
/(^.*[\S]{2,}[.!?:]+)\W/.match(text[0...80] + " ")
end
def self.complete_words(text)

View File

@@ -1,14 +1,14 @@
# frozen_string_literal: true
require 'yaml'
require 'fileutils'
require_relative 'socialcast_api'
require "yaml"
require "fileutils"
require_relative "socialcast_api"
def load_config(file)
config = YAML::load_file(File.join(__dir__, file))
@domain = config['domain']
@username = config['username']
@password = config['password']
config = YAML.load_file(File.join(__dir__, file))
@domain = config["domain"]
@username = config["username"]
@password = config["password"]
end
def export
@@ -23,8 +23,8 @@ def export_users(page = 1)
users = @api.list_users(page: page)
return if users.empty?
users.each do |user|
File.open("output/users/#{user['id']}.json", 'w') do |f|
puts user['contact_info']['email']
File.open("output/users/#{user["id"]}.json", "w") do |f|
puts user["contact_info"]["email"]
f.write user.to_json
f.close
end
@@ -36,12 +36,12 @@ def export_messages(page = 1)
messages = @api.list_messages(page: page)
return if messages.empty?
messages.each do |message|
File.open("output/messages/#{message['id']}.json", 'w') do |f|
title = message['title']
title = message['body'] if title.empty?
File.open("output/messages/#{message["id"]}.json", "w") do |f|
title = message["title"]
title = message["body"] if title.empty?
title = title.split('\n')[0][0..50] unless title.empty?
puts "#{message['id']}: #{title}"
puts "#{message["id"]}: #{title}"
f.write message.to_json
f.close
end
@@ -51,9 +51,7 @@ end
def create_dir(path)
path = File.join(__dir__, path)
unless File.directory?(path)
FileUtils.mkdir_p(path)
end
FileUtils.mkdir_p(path) unless File.directory?(path)
end
load_config ARGV.shift

View File

@@ -1,12 +1,11 @@
# frozen_string_literal: true
require_relative './socialcast_message.rb'
require_relative './socialcast_user.rb'
require 'set'
require_relative "./socialcast_message.rb"
require_relative "./socialcast_user.rb"
require "set"
require File.expand_path(File.dirname(__FILE__) + "/../base.rb")
class ImportScripts::Socialcast < ImportScripts::Base
MESSAGES_DIR = "output/messages"
USERS_DIR = "output/users"
@@ -29,15 +28,13 @@ class ImportScripts::Socialcast < ImportScripts::Base
imported = 0
total = count_files(MESSAGES_DIR)
Dir.foreach(MESSAGES_DIR) do |filename|
next if filename == ('.') || filename == ('..')
next if filename == (".") || filename == ("..")
topics += 1
message_json = File.read MESSAGES_DIR + '/' + filename
message_json = File.read MESSAGES_DIR + "/" + filename
message = SocialcastMessage.new(message_json)
next unless message.title
created_topic = import_topic message.topic
if created_topic
import_posts message.replies, created_topic.topic_id
end
import_posts message.replies, created_topic.topic_id if created_topic
imported += 1
print_status topics, total
end
@@ -48,8 +45,8 @@ class ImportScripts::Socialcast < ImportScripts::Base
users = 0
total = count_files(USERS_DIR)
Dir.foreach(USERS_DIR) do |filename|
next if filename == ('.') || filename == ('..')
user_json = File.read USERS_DIR + '/' + filename
next if filename == (".") || filename == ("..")
user_json = File.read USERS_DIR + "/" + filename
user = SocialcastUser.new(user_json).user
create_user user, user[:id]
users += 1
@@ -58,7 +55,7 @@ class ImportScripts::Socialcast < ImportScripts::Base
end
def count_files(path)
Dir.foreach(path).select { |f| f != '.' && f != '..' }.count
Dir.foreach(path).select { |f| f != "." && f != ".." }.count
end
def import_topic(topic)
@@ -80,9 +77,7 @@ class ImportScripts::Socialcast < ImportScripts::Base
end
def import_posts(posts, topic_id)
posts.each do |post|
import_post post, topic_id
end
posts.each { |post| import_post post, topic_id }
end
def import_post(post, topic_id)
@@ -95,9 +90,6 @@ class ImportScripts::Socialcast < ImportScripts::Base
puts new_post.inspect
end
end
end
if __FILE__ == $0
ImportScripts::Socialcast.new.perform
end
ImportScripts::Socialcast.new.perform if __FILE__ == $0

View File

@@ -1,10 +1,9 @@
# frozen_string_literal: true
require 'base64'
require 'json'
require "base64"
require "json"
class SocialcastApi
attr_accessor :domain, :username, :password
def initialize(domain, username, password)
@@ -29,12 +28,12 @@ class SocialcastApi
def list_users(opts = {})
page = opts[:page] ? opts[:page] : 1
response = request "#{base_url}/users?page=#{page}"
response['users'].sort { |u| u['id'] }
response["users"].sort { |u| u["id"] }
end
def list_messages(opts = {})
page = opts[:page] ? opts[:page] : 1
response = request "#{base_url}/messages?page=#{page}"
response['messages'].sort { |m| m['id'] }
response["messages"].sort { |m| m["id"] }
end
end

View File

@@ -1,24 +1,23 @@
# frozen_string_literal: true
require 'json'
require 'cgi'
require 'time'
require_relative 'create_title.rb'
require "json"
require "cgi"
require "time"
require_relative "create_title.rb"
class SocialcastMessage
DEFAULT_CATEGORY = "Socialcast Import"
DEFAULT_TAG = "socialcast-import"
TAGS_AND_CATEGORIES = {
"somegroupname" => {
category: "Apple Stems",
tags: ["waxy", "tough"]
tags: %w[waxy tough],
},
"someothergroupname" => {
category: "Orange Peels",
tags: ["oily"]
}
}
tags: ["oily"],
},
}
def initialize(message_json)
@parsed_json = JSON.parse message_json
@@ -26,18 +25,18 @@ class SocialcastMessage
def topic
topic = {}
topic[:id] = @parsed_json['id']
topic[:author_id] = @parsed_json['user']['id']
topic[:id] = @parsed_json["id"]
topic[:author_id] = @parsed_json["user"]["id"]
topic[:title] = title
topic[:raw] = @parsed_json['body']
topic[:created_at] = Time.parse @parsed_json['created_at']
topic[:raw] = @parsed_json["body"]
topic[:created_at] = Time.parse @parsed_json["created_at"]
topic[:tags] = tags
topic[:category] = category
topic
end
def title
CreateTitle.from_body @parsed_json['body']
CreateTitle.from_body @parsed_json["body"]
end
def tags
@@ -55,39 +54,37 @@ class SocialcastMessage
def category
category = DEFAULT_CATEGORY
if group && TAGS_AND_CATEGORIES[group]
category = TAGS_AND_CATEGORIES[group][:category]
end
category = TAGS_AND_CATEGORIES[group][:category] if group && TAGS_AND_CATEGORIES[group]
category
end
def group
@parsed_json['group']['groupname'].downcase if @parsed_json['group'] && @parsed_json['group']['groupname']
if @parsed_json["group"] && @parsed_json["group"]["groupname"]
@parsed_json["group"]["groupname"].downcase
end
end
def url
@parsed_json['url']
@parsed_json["url"]
end
def message_type
@parsed_json['message_type']
@parsed_json["message_type"]
end
def replies
posts = []
comments = @parsed_json['comments']
comments.each do |comment|
posts << post_from_comment(comment)
end
comments = @parsed_json["comments"]
comments.each { |comment| posts << post_from_comment(comment) }
posts
end
def post_from_comment(comment)
post = {}
post[:id] = comment['id']
post[:author_id] = comment['user']['id']
post[:raw] = comment['text']
post[:created_at] = Time.parse comment['created_at']
post[:id] = comment["id"]
post[:author_id] = comment["user"]["id"]
post[:raw] = comment["text"]
post[:created_at] = Time.parse comment["created_at"]
post
end

View File

@@ -1,26 +1,24 @@
# frozen_string_literal: true
require 'json'
require 'cgi'
require 'time'
require "json"
require "cgi"
require "time"
class SocialcastUser
def initialize(user_json)
@parsed_json = JSON.parse user_json
end
def user
email = @parsed_json['contact_info']['email']
email = "#{@parsed_json['id']}@noemail.com" unless email
email = @parsed_json["contact_info"]["email"]
email = "#{@parsed_json["id"]}@noemail.com" unless email
user = {}
user[:id] = @parsed_json['id']
user[:name] = @parsed_json['name']
user[:username] = @parsed_json['username']
user[:id] = @parsed_json["id"]
user[:name] = @parsed_json["name"]
user[:username] = @parsed_json["username"]
user[:email] = email
user[:staged] = true
user
end
end

View File

@@ -1,26 +1,28 @@
# frozen_string_literal: true
require 'minitest/autorun'
require_relative '../create_title.rb'
require "minitest/autorun"
require_relative "../create_title.rb"
class TestCreateTitle < Minitest::Test
def test_create_title_1
body = "@GreatCheerThreading \nWhere can I find information on how GCTS stacks up against the competition? What are the key differentiators?"
body =
"@GreatCheerThreading \nWhere can I find information on how GCTS stacks up against the competition? What are the key differentiators?"
expected = "Where can I find information on how GCTS stacks up against the competition?"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_2
body = "GCTS in 200 stores across town. How many threads per inch would you guess? @GreatCheerThreading"
body =
"GCTS in 200 stores across town. How many threads per inch would you guess? @GreatCheerThreading"
expected = "GCTS in 200 stores across town. How many threads per inch would you guess?"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_3
body = "gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness running through the cotton fibers."
body =
"gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness running through the cotton fibers."
expected = "gFabric Sheets 1.2 now has Great Cheer Threads, letting you feel the softness..."
title = CreateTitle.from_body body
assert_equal(expected, title)
@@ -34,49 +36,56 @@ class TestCreateTitle < Minitest::Test
end
def test_create_title_5
body = "One sentence. Two sentence. Three sentence. Four is going to go on and on for more words than we want."
body =
"One sentence. Two sentence. Three sentence. Four is going to go on and on for more words than we want."
expected = "One sentence. Two sentence. Three sentence."
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_6
body = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?\n\n//cc @RD @GreatCheerThreading"
body =
"Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?\n\n//cc @RD @GreatCheerThreading"
expected = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site)?"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_6b
body = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading"
body =
"Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading"
expected = "Anyone know of any invite codes for www.greatcheer.io (the Great Cheer v2 site..."
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_6c
body = "Anyone know of any invite codes for www.greatcheer.io?! (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading"
body =
"Anyone know of any invite codes for www.greatcheer.io?! (the Great Cheer v2 site of yore)?\n\n//cc @RD @GreatCheerThreading"
expected = "Anyone know of any invite codes for www.greatcheer.io?!"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_7
body = "@GreatCheerThreading \n\nDoes anyone know what the plan is to move to denser 1.2 threads for GCTS?\n\nI have a customer interested in the higher thread counts offered in 1.2."
body =
"@GreatCheerThreading \n\nDoes anyone know what the plan is to move to denser 1.2 threads for GCTS?\n\nI have a customer interested in the higher thread counts offered in 1.2."
expected = "Does anyone know what the plan is to move to denser 1.2 threads for GCTS?"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_8
body = "@GreatCheerThreading @FabricWeavingWorldwide \n\nI was just chatting with a customer, after receiving this email:\n\n\"Ours is more of a conceptual question. We have too much fiber"
body =
"@GreatCheerThreading @FabricWeavingWorldwide \n\nI was just chatting with a customer, after receiving this email:\n\n\"Ours is more of a conceptual question. We have too much fiber"
expected = "I was just chatting with a customer, after receiving this email:"
title = CreateTitle.from_body body
assert_equal(expected, title)
end
def test_create_title_9
body = "Hi,\n\nDoes anyone have a PPT deck on whats new in cotton (around 10 or so slides) nothing to detailed as per what we have in the current 1.x version?\n\nI am not after a what's coming in cotton 2"
body =
"Hi,\n\nDoes anyone have a PPT deck on whats new in cotton (around 10 or so slides) nothing to detailed as per what we have in the current 1.x version?\n\nI am not after a what's coming in cotton 2"
expected = "Does anyone have a PPT deck on whats new in cotton (around 10 or so slides)..."
title = CreateTitle.from_body body
assert_equal(expected, title)
@@ -90,7 +99,8 @@ class TestCreateTitle < Minitest::Test
end
def test_create_title_11
body = "Hi Guys,\nI'm working with #gtcs and one of the things we're playing with is TC. What better tool to demo and use than our own \nhttps://greatcheerthreading.com/themostthreads/cool-stuff\n\nThis used to work great in 2013,"
body =
"Hi Guys,\nI'm working with #gtcs and one of the things we're playing with is TC. What better tool to demo and use than our own \nhttps://greatcheerthreading.com/themostthreads/cool-stuff\n\nThis used to work great in 2013,"
expected = "I'm working with #gtcs and one of the things we're playing with is TC."
title = CreateTitle.from_body body
assert_equal(expected, title)
@@ -104,10 +114,10 @@ class TestCreateTitle < Minitest::Test
end
def test_create_title_13
body = "Embroidered TC ... http://blogs.greatcheerthreading.com/thread/embroidering-the-threads-is-just-the-beginning\n@SoftStuff @TightWeave and team hopefully can share their thoughts on this recent post."
body =
"Embroidered TC ... http://blogs.greatcheerthreading.com/thread/embroidering-the-threads-is-just-the-beginning\n@SoftStuff @TightWeave and team hopefully can share their thoughts on this recent post."
expected = "and team hopefully can share their thoughts on this recent post."
title = CreateTitle.from_body body
assert_equal(expected, title)
end
end

View File

@@ -1,6 +1,7 @@
# frozen_string_literal: true
USERS = '{
USERS =
'{
"users": [
{
"contact_info": {
@@ -1082,7 +1083,8 @@ USERS = '{
]
}'
MESSAGES = '{
MESSAGES =
'{
"messages": [
{
"id": 426,
@@ -5429,7 +5431,8 @@ MESSAGES = '{
"messages_next_page": 2
}'
MESSAGES_PG_2 = '{
MESSAGES_PG_2 =
'{
"messages": [
{
"id": 386,

View File

@@ -1,21 +1,20 @@
# frozen_string_literal: true
require 'minitest/autorun'
require 'yaml'
require_relative '../socialcast_api.rb'
require_relative './test_data.rb'
require "minitest/autorun"
require "yaml"
require_relative "../socialcast_api.rb"
require_relative "./test_data.rb"
class TestSocialcastApi < Minitest::Test
DEBUG = false
def initialize(args)
config = YAML::load_file(File.join(__dir__, 'config.ex.yml'))
@domain = config['domain']
@username = config['username']
@password = config['password']
@kb_id = config['kb_id']
@question_id = config['question_id']
config = YAML.load_file(File.join(__dir__, "config.ex.yml"))
@domain = config["domain"]
@username = config["username"]
@password = config["password"]
@kb_id = config["kb_id"]
@question_id = config["question_id"]
super args
end
@@ -30,18 +29,18 @@ class TestSocialcastApi < Minitest::Test
end
def test_base_url
assert_equal 'https://demo.socialcast.com/api', @socialcast.base_url
assert_equal "https://demo.socialcast.com/api", @socialcast.base_url
end
def test_headers
headers = @socialcast.headers
assert_equal 'Basic ZW1pbHlAc29jaWFsY2FzdC5jb206ZGVtbw==', headers[:Authorization]
assert_equal 'application/json', headers[:Accept]
assert_equal "Basic ZW1pbHlAc29jaWFsY2FzdC5jb206ZGVtbw==", headers[:Authorization]
assert_equal "application/json", headers[:Accept]
end
def test_list_users
users = @socialcast.list_users
expected = JSON.parse(USERS)['users'].sort { |u| u['id'] }
expected = JSON.parse(USERS)["users"].sort { |u| u["id"] }
assert_equal 15, users.size
assert_equal expected[0], users[0]
end
@@ -53,14 +52,14 @@ class TestSocialcastApi < Minitest::Test
def test_list_messages
messages = @socialcast.list_messages
expected = JSON.parse(MESSAGES)['messages'].sort { |m| m['id'] }
expected = JSON.parse(MESSAGES)["messages"].sort { |m| m["id"] }
assert_equal 20, messages.size
check_keys expected[0], messages[0]
end
def test_messages_next_page
messages = @socialcast.list_messages(page: 2)
expected = JSON.parse(MESSAGES_PG_2)['messages'].sort { |m| m['id'] }
expected = JSON.parse(MESSAGES_PG_2)["messages"].sort { |m| m["id"] }
assert_equal 20, messages.size
check_keys expected[0], messages[0]
end
@@ -69,18 +68,16 @@ class TestSocialcastApi < Minitest::Test
def check_keys(expected, actual)
msg = "### caller[0]:\nKey not found in actual keys: #{actual.keys}\n"
expected.keys.each do |k|
assert (actual.keys.include? k), "#{k}"
end
expected.keys.each { |k| assert (actual.keys.include? k), "#{k}" }
end
def debug(message, show = false)
if show || DEBUG
puts '### ' + caller[0]
puts ''
puts "### " + caller[0]
puts ""
puts message
puts ''
puts ''
puts ""
puts ""
end
end
end

View File

@@ -1,8 +1,8 @@
# frozen_string_literal: true
require_relative './socialcast_message.rb'
require_relative './socialcast_user.rb'
require 'set'
require_relative "./socialcast_message.rb"
require_relative "./socialcast_user.rb"
require "set"
require File.expand_path(File.dirname(__FILE__) + "/../base.rb")
MESSAGES_DIR = "output/messages"
@@ -11,8 +11,8 @@ def titles
topics = 0
total = count_files(MESSAGES_DIR)
Dir.foreach(MESSAGES_DIR) do |filename|
next if filename == ('.') || filename == ('..')
message_json = File.read MESSAGES_DIR + '/' + filename
next if filename == (".") || filename == ("..")
message_json = File.read MESSAGES_DIR + "/" + filename
message = SocialcastMessage.new(message_json)
next unless message.title
#puts "#{filename}, #{message.replies.size}, #{message.topic[:raw].size}, #{message.message_type}, #{message.title}"
@@ -23,7 +23,7 @@ def titles
end
def count_files(path)
Dir.foreach(path).select { |f| f != '.' && f != '..' }.count
Dir.foreach(path).select { |f| f != "." && f != ".." }.count
end
titles

View File

@@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative 'base.rb'
require_relative "base.rb"
# Import script for SourceForge discussions.
#
@@ -15,10 +15,10 @@ require_relative 'base.rb'
class ImportScripts::Sourceforge < ImportScripts::Base
# When the URL of your project is https://sourceforge.net/projects/foo/
# than the value of PROJECT_NAME is 'foo'
PROJECT_NAME = 'project_name'
PROJECT_NAME = "project_name"
# This is the path to the discussion.json that you exported from SourceForge.
JSON_FILE = '/path/to/discussion.json'
JSON_FILE = "/path/to/discussion.json"
def initialize
super
@@ -27,7 +27,7 @@ class ImportScripts::Sourceforge < ImportScripts::Base
end
def execute
puts '', 'Importing from SourceForge...'
puts "", "Importing from SourceForge..."
load_json
@@ -40,25 +40,26 @@ class ImportScripts::Sourceforge < ImportScripts::Base
end
def import_categories
puts '', 'importing categories'
puts "", "importing categories"
create_categories(@json[:forums]) do |forum|
{
id: forum[:shortname],
name: forum[:name],
post_create_action: proc do |category|
changes = { raw: forum[:description] }
opts = { revised_at: Time.now, bypass_bump: true }
post_create_action:
proc do |category|
changes = { raw: forum[:description] }
opts = { revised_at: Time.now, bypass_bump: true }
post = category.topic.first_post
post.revise(@system_user, changes, opts)
end
post = category.topic.first_post
post.revise(@system_user, changes, opts)
end,
}
end
end
def import_topics
puts '', 'importing posts'
puts "", "importing posts"
imported_post_count = 0
total_post_count = count_posts
@@ -78,7 +79,7 @@ class ImportScripts::Sourceforge < ImportScripts::Base
id: "#{thread[:_id]}_#{post[:slug]}",
user_id: @system_user,
created_at: Time.zone.parse(post[:timestamp]),
raw: process_post_text(forum, thread, post)
raw: process_post_text(forum, thread, post),
}
if post == first_post
@@ -103,9 +104,7 @@ class ImportScripts::Sourceforge < ImportScripts::Base
total_count = 0
@json[:forums].each do |forum|
forum[:threads].each do |thread|
total_count += thread[:posts].size
end
forum[:threads].each { |thread| total_count += thread[:posts].size }
end
total_count
@@ -117,20 +116,22 @@ class ImportScripts::Sourceforge < ImportScripts::Base
def process_post_text(forum, thread, post)
text = post[:text]
text.gsub!(/~{3,}/, '```') # Discourse doesn't recognize ~~~ as beginning/end of code blocks
text.gsub!(/~{3,}/, "```") # Discourse doesn't recognize ~~~ as beginning/end of code blocks
# SourceForge doesn't allow symbols in usernames, so we are safe here.
# Well, unless it's the anonymous user, which has an evil asterisk in the JSON file...
username = post[:author]
username = 'anonymous' if username == '*anonymous'
username = "anonymous" if username == "*anonymous"
# anonymous and nobody are nonexistent users. Make sure we don't create links for them.
user_without_profile = username == 'anonymous' || username == 'nobody'
user_link = user_without_profile ? username : "[#{username}](https://sourceforge.net/u/#{username}/)"
user_without_profile = username == "anonymous" || username == "nobody"
user_link =
user_without_profile ? username : "[#{username}](https://sourceforge.net/u/#{username}/)"
# Create a nice looking header for each imported post that links to the author's user profile and the old post.
post_date = Time.zone.parse(post[:timestamp]).strftime('%A, %B %d, %Y')
post_url = "https://sourceforge.net/p/#{PROJECT_NAME}/discussion/#{forum[:shortname]}/thread/#{thread[:_id]}/##{post[:slug]}"
post_date = Time.zone.parse(post[:timestamp]).strftime("%A, %B %d, %Y")
post_url =
"https://sourceforge.net/p/#{PROJECT_NAME}/discussion/#{forum[:shortname]}/thread/#{thread[:_id]}/##{post[:slug]}"
"**#{user_link}** wrote on [#{post_date}](#{post_url}):\n\n#{text}"
end

View File

@@ -5,18 +5,18 @@ require "tiny_tds"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::StackOverflow < ImportScripts::Base
BATCH_SIZE ||= 1000
def initialize
super
@client = TinyTds::Client.new(
host: ENV["DB_HOST"],
username: ENV["DB_USERNAME"],
password: ENV["DB_PASSWORD"],
database: ENV["DB_NAME"],
)
@client =
TinyTds::Client.new(
host: ENV["DB_HOST"],
username: ENV["DB_USERNAME"],
password: ENV["DB_PASSWORD"],
database: ENV["DB_NAME"],
)
end
def execute
@@ -36,7 +36,7 @@ class ImportScripts::StackOverflow < ImportScripts::Base
total = query("SELECT COUNT(*) count FROM Users WHERE Id > 0").first["count"]
batches(BATCH_SIZE) do |offset|
users = query(<<~SQL
users = query(<<~SQL).to_a
SELECT TOP #{BATCH_SIZE}
Id
, UserTypeId
@@ -55,7 +55,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base
AND Id > #{last_user_id}
ORDER BY Id
SQL
).to_a
break if users.empty?
@@ -77,11 +76,16 @@ class ImportScripts::StackOverflow < ImportScripts::Base
name: u["RealName"],
location: u["Location"],
date_of_birth: u["Birthday"],
post_create_action: proc do |user|
if u["ProfileImageUrl"].present?
UserAvatar.import_url_for_user(u["ProfileImageUrl"], user) rescue nil
end
end
post_create_action:
proc do |user|
if u["ProfileImageUrl"].present?
begin
UserAvatar.import_url_for_user(u["ProfileImageUrl"], user)
rescue StandardError
nil
end
end
end,
}
end
end
@@ -91,11 +95,16 @@ class ImportScripts::StackOverflow < ImportScripts::Base
puts "", "Importing posts..."
last_post_id = -1
total = query("SELECT COUNT(*) count FROM Posts WHERE PostTypeId IN (1,2,3)").first["count"] +
query("SELECT COUNT(*) count FROM PostComments WHERE PostId IN (SELECT Id FROM Posts WHERE PostTypeId IN (1,2,3))").first["count"]
total =
query("SELECT COUNT(*) count FROM Posts WHERE PostTypeId IN (1,2,3)").first["count"] +
query(
"SELECT COUNT(*) count FROM PostComments WHERE PostId IN (SELECT Id FROM Posts WHERE PostTypeId IN (1,2,3))",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
posts = query(<<~SQL
posts = query(<<~SQL).to_a
SELECT TOP #{BATCH_SIZE}
Id
, PostTypeId
@@ -113,14 +122,13 @@ class ImportScripts::StackOverflow < ImportScripts::Base
AND Id > #{last_post_id}
ORDER BY Id
SQL
).to_a
break if posts.empty?
last_post_id = posts[-1]["Id"]
post_ids = posts.map { |p| p["Id"] }
comments = query(<<~SQL
comments = query(<<~SQL).to_a
SELECT CONCAT('Comment-', Id) AS Id
, PostId AS ParentId
, Text
@@ -130,7 +138,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base
WHERE PostId IN (#{post_ids.join(",")})
ORDER BY Id
SQL
).to_a
posts_and_comments = (posts + comments).sort_by { |p| p["CreationDate"] }
post_and_comment_ids = posts_and_comments.map { |p| p["Id"] }
@@ -173,7 +180,7 @@ class ImportScripts::StackOverflow < ImportScripts::Base
last_like_id = -1
batches(BATCH_SIZE) do |offset|
likes = query(<<~SQL
likes = query(<<~SQL).to_a
SELECT TOP #{BATCH_SIZE}
Id
, PostId
@@ -185,7 +192,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base
AND Id > #{last_like_id}
ORDER BY Id
SQL
).to_a
break if likes.empty?
@@ -196,17 +202,26 @@ class ImportScripts::StackOverflow < ImportScripts::Base
next unless post_id = post_id_from_imported_post_id(l["PostId"])
next unless user = User.find_by(id: user_id)
next unless post = Post.find_by(id: post_id)
PostActionCreator.like(user, post) rescue nil
begin
PostActionCreator.like(user, post)
rescue StandardError
nil
end
end
end
puts "", "Importing comment likes..."
last_like_id = -1
total = query("SELECT COUNT(*) count FROM Comments2Votes WHERE VoteTypeId = 2 AND DeletionDate IS NULL").first["count"]
total =
query(
"SELECT COUNT(*) count FROM Comments2Votes WHERE VoteTypeId = 2 AND DeletionDate IS NULL",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
likes = query(<<~SQL
likes = query(<<~SQL).to_a
SELECT TOP #{BATCH_SIZE}
Id
, CONCAT('Comment-', PostCommentId) AS PostCommentId
@@ -218,7 +233,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base
AND Id > #{last_like_id}
ORDER BY Id
SQL
).to_a
break if likes.empty?
@@ -229,7 +243,11 @@ class ImportScripts::StackOverflow < ImportScripts::Base
next unless post_id = post_id_from_imported_post_id(l["PostCommentId"])
next unless user = User.find_by(id: user_id)
next unless post = Post.find_by(id: post_id)
PostActionCreator.like(user, post) rescue nil
begin
PostActionCreator.like(user, post)
rescue StandardError
nil
end
end
end
end
@@ -249,7 +267,6 @@ class ImportScripts::StackOverflow < ImportScripts::Base
def query(sql)
@client.execute(sql)
end
end
ImportScripts::StackOverflow.new.perform

View File

@@ -3,11 +3,10 @@
# convert huge XML dump to mysql friendly import
#
require 'ox'
require 'set'
require "ox"
require "set"
class Saxy < Ox::Sax
def initialize
@stack = []
end
@@ -32,7 +31,6 @@ class Saxy < Ox::Sax
def cdata(val)
@stack[-1][:text] = val
end
end
class Convert < Saxy
@@ -59,10 +57,13 @@ class Convert < Saxy
end
def output_table_definition(data)
cols = data[:cols].map do |col|
attrs = col[:attrs]
"#{attrs[:Field]} #{attrs[:Type]}"
end.join(", ")
cols =
data[:cols]
.map do |col|
attrs = col[:attrs]
"#{attrs[:Field]} #{attrs[:Type]}"
end
.join(", ")
puts "CREATE TABLE #{data[:attrs][:name]} (#{cols});"
end
@@ -77,4 +78,4 @@ class Convert < Saxy
end
end
Ox.sax_parse(Convert.new(skip_data: ['metrics2', 'user_log']), File.open(ARGV[0]))
Ox.sax_parse(Convert.new(skip_data: %w[metrics2 user_log]), File.open(ARGV[0]))

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true
require_relative 'base'
require 'tiny_tds'
require_relative "base"
require "tiny_tds"
# Import script for Telligent communities
#
@@ -40,17 +40,19 @@ require 'tiny_tds'
class ImportScripts::Telligent < ImportScripts::Base
BATCH_SIZE ||= 1000
LOCAL_AVATAR_REGEX ||= /\A~\/.*(?<directory>communityserver-components-(?:selectable)?avatars)\/(?<path>[^\/]+)\/(?<filename>.+)/i
REMOTE_AVATAR_REGEX ||= /\Ahttps?:\/\//i
LOCAL_AVATAR_REGEX ||=
%r{\A~/.*(?<directory>communityserver-components-(?:selectable)?avatars)/(?<path>[^/]+)/(?<filename>.+)}i
REMOTE_AVATAR_REGEX ||= %r{\Ahttps?://}i
ATTACHMENT_REGEXES ||= [
/<a[^>]*\shref="[^"]*?\/cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)".*?>.*?<\/a>/i,
/<img[^>]*\ssrc="[^"]*?\/cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)".*?>/i,
/\[View:[^\]]*?\/cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)(?:\:[:\d\s]*?)?\]/i,
/\[(?<tag>img|url)\][^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)\[\/\k<tag>\]/i,
/\[(?<tag>img|url)=[^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?\/__key\/(?<directory>[^\/]+)\/(?<path>[^\/]+)\/(?<filename>.+?)\][^\[]*?\[\/\k<tag>\]/i
%r{<a[^>]*\shref="[^"]*?/cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)".*?>.*?</a>}i,
%r{<img[^>]*\ssrc="[^"]*?/cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)".*?>}i,
%r{\[View:[^\]]*?/cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)(?:\:[:\d\s]*?)?\]}i,
%r{\[(?<tag>img|url)\][^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)\[/\k<tag>\]}i,
%r{\[(?<tag>img|url)=[^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)\][^\[]*?\[/\k<tag>\]}i,
]
PROPERTY_NAMES_REGEX ||= /(?<name>\w+):S:(?<start>\d+):(?<length>\d+):/
INTERNAL_LINK_REGEX ||= /\shref=".*?\/f\/\d+(?:(\/t\/(?<topic_id>\d+))|(?:\/p\/\d+\/(?<post_id>\d+))|(?:\/p\/(?<post_id>\d+)\/reply))\.aspx[^"]*?"/i
INTERNAL_LINK_REGEX ||=
%r{\shref=".*?/f/\d+(?:(/t/(?<topic_id>\d+))|(?:/p/\d+/(?<post_id>\d+))|(?:/p/(?<post_id>\d+)/reply))\.aspx[^"]*?"}i
CATEGORY_LINK_NORMALIZATION = '/.*?(f\/\d+)$/\1'
TOPIC_LINK_NORMALIZATION = '/.*?(f\/\d+\/t\/\d+)$/\1'
@@ -82,19 +84,20 @@ class ImportScripts::Telligent < ImportScripts::Base
"1D20" => "",
"B000" => "°",
"0003" => ["0300".to_i(16)].pack("U"),
"0103" => ["0301".to_i(16)].pack("U")
"0103" => ["0301".to_i(16)].pack("U"),
}
def initialize
super()
@client = TinyTds::Client.new(
host: ENV["DB_HOST"],
username: ENV["DB_USERNAME"],
password: ENV["DB_PASSWORD"],
database: ENV["DB_NAME"],
timeout: 60 # the user query is very slow
)
@client =
TinyTds::Client.new(
host: ENV["DB_HOST"],
username: ENV["DB_USERNAME"],
password: ENV["DB_PASSWORD"],
database: ENV["DB_NAME"],
timeout: 60, # the user query is very slow
)
@filestore_root_directory = ENV["FILE_BASE_DIR"]
@files = {}
@@ -180,10 +183,11 @@ class ImportScripts::Telligent < ImportScripts::Base
bio_raw: html_to_markdown(ap_properties["bio"]),
location: ap_properties["location"],
website: ap_properties["webAddress"],
post_create_action: proc do |user|
import_avatar(user, up_properties["avatarUrl"])
suspend_user(user, up_properties["BannedUntil"], up_properties["UserBanReason"])
end
post_create_action:
proc do |user|
import_avatar(user, up_properties["avatarUrl"])
suspend_user(user, up_properties["BannedUntil"], up_properties["UserBanReason"])
end,
}
end
@@ -193,13 +197,18 @@ class ImportScripts::Telligent < ImportScripts::Base
# TODO move into base importer (create_user) and use consistent error handling
def import_avatar(user, avatar_url)
return if @filestore_root_directory.blank? || avatar_url.blank? || avatar_url.include?("anonymous")
if @filestore_root_directory.blank? || avatar_url.blank? || avatar_url.include?("anonymous")
return
end
if match_data = avatar_url.match(LOCAL_AVATAR_REGEX)
avatar_path = File.join(@filestore_root_directory,
match_data[:directory].gsub("-", "."),
match_data[:path].split("-"),
match_data[:filename])
avatar_path =
File.join(
@filestore_root_directory,
match_data[:directory].gsub("-", "."),
match_data[:path].split("-"),
match_data[:filename],
)
if File.file?(avatar_path)
@uploader.create_avatar(user, avatar_path)
@@ -207,7 +216,11 @@ class ImportScripts::Telligent < ImportScripts::Base
STDERR.puts "Could not find avatar: #{avatar_path}"
end
elsif avatar_url.match?(REMOTE_AVATAR_REGEX)
UserAvatar.import_url_for_user(avatar_url, user) rescue nil
begin
UserAvatar.import_url_for_user(avatar_url, user)
rescue StandardError
nil
end
end
end
@@ -224,7 +237,7 @@ class ImportScripts::Telligent < ImportScripts::Base
end
def import_categories
if ENV['CATEGORY_MAPPING']
if ENV["CATEGORY_MAPPING"]
import_mapped_forums_as_categories
else
import_groups_and_forums_as_categories
@@ -234,7 +247,7 @@ class ImportScripts::Telligent < ImportScripts::Base
def import_mapped_forums_as_categories
puts "", "Importing categories..."
json = JSON.parse(File.read(ENV['CATEGORY_MAPPING']))
json = JSON.parse(File.read(ENV["CATEGORY_MAPPING"]))
categories = []
@forum_ids_to_tags = {}
@@ -256,7 +269,7 @@ class ImportScripts::Telligent < ImportScripts::Base
id: id,
name: name,
parent_id: parent_id,
forum_ids: index == last_index ? forum_ids : nil
forum_ids: index == last_index ? forum_ids : nil,
}
parent_id = id
end
@@ -271,9 +284,7 @@ class ImportScripts::Telligent < ImportScripts::Base
id: c[:id],
name: c[:name],
parent_category_id: category_id_from_imported_category_id(c[:parent_id]),
post_create_action: proc do |category|
map_forum_ids(category.id, c[:forum_ids])
end
post_create_action: proc { |category| map_forum_ids(category.id, c[:forum_ids]) },
}
end
end
@@ -302,10 +313,10 @@ class ImportScripts::Telligent < ImportScripts::Base
create_categories(parent_categories) do |row|
{
id: "G#{row['GroupID']}",
id: "G#{row["GroupID"]}",
name: clean_category_name(row["Name"]),
description: html_to_markdown(row["HtmlDescription"]),
position: row["SortOrder"]
position: row["SortOrder"],
}
end
@@ -320,28 +331,31 @@ class ImportScripts::Telligent < ImportScripts::Base
parent_category_id = parent_category_id_for(row)
if category_id = replace_with_category_id(child_categories, parent_category_id)
add_category(row['ForumId'], Category.find_by_id(category_id))
url = "f/#{row['ForumId']}"
add_category(row["ForumId"], Category.find_by_id(category_id))
url = "f/#{row["ForumId"]}"
Permalink.create(url: url, category_id: category_id) unless Permalink.exists?(url: url)
nil
else
{
id: row['ForumId'],
id: row["ForumId"],
parent_category_id: parent_category_id,
name: clean_category_name(row["Name"]),
description: html_to_markdown(row["Description"]),
position: row["SortOrder"],
post_create_action: proc do |category|
url = "f/#{row['ForumId']}"
Permalink.create(url: url, category_id: category.id) unless Permalink.exists?(url: url)
end
post_create_action:
proc do |category|
url = "f/#{row["ForumId"]}"
unless Permalink.exists?(url: url)
Permalink.create(url: url, category_id: category.id)
end
end,
}
end
end
end
def parent_category_id_for(row)
category_id_from_imported_category_id("G#{row['GroupId']}") if row.key?("GroupId")
category_id_from_imported_category_id("G#{row["GroupId"]}") if row.key?("GroupId")
end
def replace_with_category_id(child_categories, parent_category_id)
@@ -351,23 +365,21 @@ class ImportScripts::Telligent < ImportScripts::Base
def only_child?(child_categories, parent_category_id)
count = 0
child_categories.each do |row|
count += 1 if parent_category_id_for(row) == parent_category_id
end
child_categories.each { |row| count += 1 if parent_category_id_for(row) == parent_category_id }
count == 1
end
def clean_category_name(name)
CGI.unescapeHTML(name)
.strip
CGI.unescapeHTML(name).strip
end
def import_topics
puts "", "Importing topics..."
last_topic_id = -1
total_count = count("SELECT COUNT(1) AS count FROM te_Forum_Threads t WHERE #{ignored_forum_sql_condition}")
total_count =
count("SELECT COUNT(1) AS count FROM te_Forum_Threads t WHERE #{ignored_forum_sql_condition}")
batches do |offset|
rows = query(<<~SQL)
@@ -399,13 +411,16 @@ class ImportScripts::Telligent < ImportScripts::Base
created_at: row["DateCreated"],
closed: row["IsLocked"],
views: row["TotalViews"],
post_create_action: proc do |action_post|
topic = action_post.topic
Jobs.enqueue_at(topic.pinned_until, :unpin_topic, topic_id: topic.id) if topic.pinned_until
url = "f/#{row['ForumId']}/t/#{row['ThreadId']}"
Permalink.create(url: url, topic_id: topic.id) unless Permalink.exists?(url: url)
import_topic_views(topic, row["TopicContentId"])
end
post_create_action:
proc do |action_post|
topic = action_post.topic
if topic.pinned_until
Jobs.enqueue_at(topic.pinned_until, :unpin_topic, topic_id: topic.id)
end
url = "f/#{row["ForumId"]}/t/#{row["ThreadId"]}"
Permalink.create(url: url, topic_id: topic.id) unless Permalink.exists?(url: url)
import_topic_views(topic, row["TopicContentId"])
end,
}
if row["StickyDate"] > Time.now
@@ -446,9 +461,8 @@ class ImportScripts::Telligent < ImportScripts::Base
end
def ignored_forum_sql_condition
@ignored_forum_sql_condition ||= @ignored_forum_ids.present? \
? "t.ForumId NOT IN (#{@ignored_forum_ids.join(',')})" \
: "1 = 1"
@ignored_forum_sql_condition ||=
@ignored_forum_ids.present? ? "t.ForumId NOT IN (#{@ignored_forum_ids.join(",")})" : "1 = 1"
end
def import_posts
@@ -492,7 +506,8 @@ class ImportScripts::Telligent < ImportScripts::Base
next if all_records_exist?(:post, rows.map { |row| row["ThreadReplyId"] })
create_posts(rows, total: total_count, offset: offset) do |row|
imported_parent_id = row["ParentReplyId"]&.nonzero? ? row["ParentReplyId"] : import_topic_id(row["ThreadId"])
imported_parent_id =
row["ParentReplyId"]&.nonzero? ? row["ParentReplyId"] : import_topic_id(row["ThreadId"])
parent_post = topic_lookup_from_imported_post_id(imported_parent_id)
user_id = user_id_from_imported_user_id(row["UserId"]) || Discourse::SYSTEM_USER_ID
@@ -503,13 +518,13 @@ class ImportScripts::Telligent < ImportScripts::Base
user_id: user_id,
topic_id: parent_post[:topic_id],
created_at: row["ThreadReplyDate"],
reply_to_post_number: parent_post[:post_number]
reply_to_post_number: parent_post[:post_number],
}
post[:custom_fields] = { is_accepted_answer: "true" } if row["IsFirstVerifiedAnswer"]
post
else
puts "Failed to import post #{row['ThreadReplyId']}. Parent was not found."
puts "Failed to import post #{row["ThreadReplyId"]}. Parent was not found."
end
end
end
@@ -565,7 +580,7 @@ class ImportScripts::Telligent < ImportScripts::Base
id: row["MessageId"],
raw: raw_with_attachment(row, user_id, :message),
user_id: user_id,
created_at: row["DateCreated"]
created_at: row["DateCreated"],
}
if current_conversation_id == row["ConversationId"]
@@ -574,7 +589,7 @@ class ImportScripts::Telligent < ImportScripts::Base
if parent_post
post[:topic_id] = parent_post[:topic_id]
else
puts "Failed to import message #{row['MessageId']}. Parent was not found."
puts "Failed to import message #{row["MessageId"]}. Parent was not found."
post = nil
end
else
@@ -583,7 +598,7 @@ class ImportScripts::Telligent < ImportScripts::Base
post[:target_usernames] = get_recipient_usernames(row)
if post[:target_usernames].empty?
puts "Private message without recipients. Skipping #{row['MessageId']}"
puts "Private message without recipients. Skipping #{row["MessageId"]}"
post = nil
end
@@ -611,7 +626,7 @@ class ImportScripts::Telligent < ImportScripts::Base
def get_recipient_user_ids(participant_ids)
return [] if participant_ids.blank?
user_ids = participant_ids.split(';')
user_ids = participant_ids.split(";")
user_ids.uniq!
user_ids.map!(&:strip)
end
@@ -619,9 +634,9 @@ class ImportScripts::Telligent < ImportScripts::Base
def get_recipient_usernames(row)
import_user_ids = get_recipient_user_ids(row["ParticipantIds"])
import_user_ids.map! do |import_user_id|
find_user_by_import_id(import_user_id).try(:username)
end.compact
import_user_ids
.map! { |import_user_id| find_user_by_import_id(import_user_id).try(:username) }
.compact
end
def index_directory(root_directory)
@@ -646,17 +661,16 @@ class ImportScripts::Telligent < ImportScripts::Base
filename = row["FileName"]
return raw if @filestore_root_directory.blank? || filename.blank?
if row["IsRemote"]
return "#{raw}\n#{filename}"
end
return "#{raw}\n#{filename}" if row["IsRemote"]
path = File.join(
"telligent.evolution.components.attachments",
"%02d" % row["ApplicationTypeId"],
"%02d" % row["ApplicationId"],
"%02d" % row["ApplicationContentTypeId"],
("%010d" % row["ContentId"]).scan(/.{2}/)
)
path =
File.join(
"telligent.evolution.components.attachments",
"%02d" % row["ApplicationTypeId"],
"%02d" % row["ApplicationId"],
"%02d" % row["ApplicationContentTypeId"],
("%010d" % row["ContentId"]).scan(/.{2}/),
)
path = fix_attachment_path(path, filename)
if path && !embedded_paths.include?(path)
@@ -677,11 +691,11 @@ class ImportScripts::Telligent < ImportScripts::Base
def print_file_not_found_error(type, path, row)
case type
when :topic
id = row['ThreadId']
id = row["ThreadId"]
when :post
id = row['ThreadReplyId']
id = row["ThreadReplyId"]
when :message
id = row['MessageId']
id = row["MessageId"]
end
STDERR.puts "Could not find file for #{type} #{id}: #{path}"
@@ -692,30 +706,31 @@ class ImportScripts::Telligent < ImportScripts::Base
paths = []
upload_ids = []
return [raw, paths, upload_ids] if @filestore_root_directory.blank?
return raw, paths, upload_ids if @filestore_root_directory.blank?
ATTACHMENT_REGEXES.each do |regex|
raw = raw.gsub(regex) do
match_data = Regexp.last_match
raw =
raw.gsub(regex) do
match_data = Regexp.last_match
path = File.join(match_data[:directory], match_data[:path])
fixed_path = fix_attachment_path(path, match_data[:filename])
path = File.join(match_data[:directory], match_data[:path])
fixed_path = fix_attachment_path(path, match_data[:filename])
if fixed_path && File.file?(fixed_path)
filename = File.basename(fixed_path)
upload = @uploader.create_upload(user_id, fixed_path, filename)
if fixed_path && File.file?(fixed_path)
filename = File.basename(fixed_path)
upload = @uploader.create_upload(user_id, fixed_path, filename)
if upload.present? && upload.persisted?
paths << fixed_path
upload_ids << upload.id
@uploader.html_for_upload(upload, filename)
if upload.present? && upload.persisted?
paths << fixed_path
upload_ids << upload.id
@uploader.html_for_upload(upload, filename)
end
else
path = File.join(path, match_data[:filename])
print_file_not_found_error(type, path, row)
match_data[0]
end
else
path = File.join(path, match_data[:filename])
print_file_not_found_error(type, path, row)
match_data[0]
end
end
end
[raw, paths, upload_ids]
@@ -806,8 +821,8 @@ class ImportScripts::Telligent < ImportScripts::Base
md = HtmlToMarkdown.new(html).to_markdown
md.gsub!(/\[quote.*?\]/, "\n" + '\0' + "\n")
md.gsub!(/(?<!^)\[\/quote\]/, "\n[/quote]\n")
md.gsub!(/\[\/quote\](?!$)/, "\n[/quote]\n")
md.gsub!(%r{(?<!^)\[/quote\]}, "\n[/quote]\n")
md.gsub!(%r{\[/quote\](?!$)}, "\n[/quote]\n")
md.gsub!(/\[View:(http.*?)[:\d\s]*?(?:\]|\z)/i, '\1')
md.strip!
md
@@ -832,13 +847,15 @@ class ImportScripts::Telligent < ImportScripts::Base
properties = {}
return properties if names.blank? || values.blank?
names.scan(PROPERTY_NAMES_REGEX).each do |property|
name = property[0]
start_index = property[1].to_i
end_index = start_index + property[2].to_i - 1
names
.scan(PROPERTY_NAMES_REGEX)
.each do |property|
name = property[0]
start_index = property[1].to_i
end_index = start_index + property[2].to_i - 1
properties[name] = values[start_index..end_index]
end
properties[name] = values[start_index..end_index]
end
properties
end
@@ -862,12 +879,12 @@ class ImportScripts::Telligent < ImportScripts::Base
def add_permalink_normalizations
normalizations = SiteSetting.permalink_normalizations
normalizations = normalizations.blank? ? [] : normalizations.split('|')
normalizations = normalizations.blank? ? [] : normalizations.split("|")
add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION)
add_normalization(normalizations, TOPIC_LINK_NORMALIZATION)
SiteSetting.permalink_normalizations = normalizations.join('|')
SiteSetting.permalink_normalizations = normalizations.join("|")
end
def add_normalization(normalizations, normalization)

View File

@@ -7,14 +7,17 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# user documentation: https://meta.discourse.org/t/how-to-migrate-import-from-vanilla-to-discourse/27273
class ImportScripts::Vanilla < ImportScripts::Base
def initialize
super
@vanilla_file = ARGV[0]
raise ArgumentError.new('Vanilla file argument missing. Provide full path to vanilla csv file.') if @vanilla_file.blank?
if @vanilla_file.blank?
raise ArgumentError.new(
"Vanilla file argument missing. Provide full path to vanilla csv file.",
)
end
@use_lastest_activity_as_user_bio = true if ARGV.include?('use-latest-activity-as-user-bio')
@use_lastest_activity_as_user_bio = true if ARGV.include?("use-latest-activity-as-user-bio")
end
def execute
@@ -34,7 +37,9 @@ class ImportScripts::Vanilla < ImportScripts::Base
private
def check_file_exist
raise ArgumentError.new("File does not exist: #{@vanilla_file}") unless File.exist?(@vanilla_file)
unless File.exist?(@vanilla_file)
raise ArgumentError.new("File does not exist: #{@vanilla_file}")
end
end
def parse_file
@@ -65,7 +70,10 @@ class ImportScripts::Vanilla < ImportScripts::Base
next if useless_tables.include?(table)
# parse the data
puts "parsing #{table}..."
parsed_data = CSV.parse(data.join("\n"), headers: true, header_converters: :symbol).map { |row| row.to_hash }
parsed_data =
CSV
.parse(data.join("\n"), headers: true, header_converters: :symbol)
.map { |row| row.to_hash }
instance_variable_set("@#{table}".to_sym, parsed_data)
end
end
@@ -73,11 +81,14 @@ class ImportScripts::Vanilla < ImportScripts::Base
def read_file
puts "reading file..."
string = File.read(@vanilla_file).gsub("\\N", "")
.gsub(/\\$\n/m, "\\n")
.gsub("\\,", ",")
.gsub(/(?<!\\)\\"/, '""')
.gsub(/\\\\\\"/, '\\""')
string =
File
.read(@vanilla_file)
.gsub("\\N", "")
.gsub(/\\$\n/m, "\\n")
.gsub("\\,", ",")
.gsub(/(?<!\\)\\"/, '""')
.gsub(/\\\\\\"/, '\\""')
StringIO.new(string)
end
@@ -106,8 +117,16 @@ class ImportScripts::Vanilla < ImportScripts::Base
created_at: parse_date(user[:date_inserted]),
bio_raw: clean_up(bio_raw),
avatar_url: user[:photo],
moderator: @user_roles.select { |ur| ur[:user_id] == user[:user_id] }.map { |ur| ur[:role_id] }.include?(moderator_role_id),
admin: @user_roles.select { |ur| ur[:user_id] == user[:user_id] }.map { |ur| ur[:role_id] }.include?(admin_role_id),
moderator:
@user_roles
.select { |ur| ur[:user_id] == user[:user_id] }
.map { |ur| ur[:role_id] }
.include?(moderator_role_id),
admin:
@user_roles
.select { |ur| ur[:user_id] == user[:user_id] }
.map { |ur| ur[:role_id] }
.include?(admin_role_id),
}
u
@@ -143,7 +162,8 @@ class ImportScripts::Vanilla < ImportScripts::Base
c = {
id: category[:category_id],
name: category[:name],
user_id: user_id_from_imported_user_id(category[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(category[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
position: category[:sort].to_i,
created_at: parse_category_date(category[:date_inserted]),
description: clean_up(category[:description]),
@@ -164,7 +184,8 @@ class ImportScripts::Vanilla < ImportScripts::Base
create_posts(@discussions) do |discussion|
{
id: "discussion#" + discussion[:discussion_id],
user_id: user_id_from_imported_user_id(discussion[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(discussion[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
title: discussion[:name],
category: category_id_from_imported_category_id(discussion[:category_id]),
raw: clean_up(discussion[:body]),
@@ -181,7 +202,8 @@ class ImportScripts::Vanilla < ImportScripts::Base
{
id: "comment#" + comment[:comment_id],
user_id: user_id_from_imported_user_id(comment[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(comment[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
topic_id: t[:topic_id],
raw: clean_up(comment[:body]),
created_at: parse_date(comment[:date_inserted]),
@@ -196,20 +218,30 @@ class ImportScripts::Vanilla < ImportScripts::Base
next if conversation[:first_message_id].blank?
# list all other user ids in the conversation
user_ids_in_conversation = @user_conversations.select { |uc| uc[:conversation_id] == conversation[:conversation_id] && uc[:user_id] != conversation[:insert_user_id] }
.map { |uc| uc[:user_id] }
user_ids_in_conversation =
@user_conversations
.select do |uc|
uc[:conversation_id] == conversation[:conversation_id] &&
uc[:user_id] != conversation[:insert_user_id]
end
.map { |uc| uc[:user_id] }
# retrieve their emails
user_emails_in_conversation = @users.select { |u| user_ids_in_conversation.include?(u[:user_id]) }
.map { |u| u[:email] }
user_emails_in_conversation =
@users.select { |u| user_ids_in_conversation.include?(u[:user_id]) }.map { |u| u[:email] }
# retrieve their usernames from the database
target_usernames = User.joins(:user_emails)
.where(user_emails: { email: user_emails_in_conversation })
.pluck(:username)
target_usernames =
User
.joins(:user_emails)
.where(user_emails: { email: user_emails_in_conversation })
.pluck(:username)
next if target_usernames.blank?
user = find_user_by_import_id(conversation[:insert_user_id]) || Discourse.system_user
first_message = @conversation_messages.select { |cm| cm[:message_id] == conversation[:first_message_id] }.first
first_message =
@conversation_messages
.select { |cm| cm[:message_id] == conversation[:first_message_id] }
.first
{
id: "conversation#" + conversation[:conversation_id],
@@ -229,12 +261,15 @@ class ImportScripts::Vanilla < ImportScripts::Base
@conversation_messages.reject! { |cm| first_message_ids.include?(cm[:message_id]) }
create_posts(@conversation_messages) do |message|
next unless t = topic_lookup_from_imported_post_id("conversation#" + message[:conversation_id])
unless t = topic_lookup_from_imported_post_id("conversation#" + message[:conversation_id])
next
end
{
archetype: Archetype.private_message,
id: "message#" + message[:message_id],
user_id: user_id_from_imported_user_id(message[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(message[:insert_user_id]) || Discourse::SYSTEM_USER_ID,
topic_id: t[:topic_id],
raw: clean_up(message[:body]),
created_at: parse_date(message[:date_inserted]),
@@ -248,13 +283,13 @@ class ImportScripts::Vanilla < ImportScripts::Base
def clean_up(raw)
return "" if raw.blank?
raw.gsub("\\n", "\n")
.gsub(/<\/?pre\s*>/i, "\n```\n")
.gsub(/<\/?code\s*>/i, "`")
raw
.gsub("\\n", "\n")
.gsub(%r{</?pre\s*>}i, "\n```\n")
.gsub(%r{</?code\s*>}i, "`")
.gsub("&lt;", "<")
.gsub("&gt;", ">")
end
end
ImportScripts::Vanilla.new.perform

View File

@@ -14,9 +14,9 @@ class VanillaBodyParser
end
def parse
return clean_up(@row['Body']) unless rich?
return clean_up(@row["Body"]) unless rich?
full_text = json.each_with_index.map(&method(:parse_fragment)).join('')
full_text = json.each_with_index.map(&method(:parse_fragment)).join("")
normalize full_text
end
@@ -25,30 +25,46 @@ class VanillaBodyParser
def clean_up(text)
# <pre class="CodeBlock">...</pre>
text = text.gsub(/\<pre class="CodeBlock"\>(.*?)\<\/pre\>/im) { "\n```\n#{$1}\n```\n" }
text = text.gsub(%r{\<pre class="CodeBlock"\>(.*?)\</pre\>}im) { "\n```\n#{$1}\n```\n" }
# <pre>...</pre>
text = text.gsub(/\<pre\>(.*?)\<\/pre\>/im) { "\n```\n#{$1}\n```\n" }
text = text.gsub(%r{\<pre\>(.*?)\</pre\>}im) { "\n```\n#{$1}\n```\n" }
# <code></code>
text = text.gsub("\<code\>\</code\>", "").gsub(/\<code\>(.*?)\<\/code\>/im) { "#{$1}" }
text = text.gsub("\<code\>\</code\>", "").gsub(%r{\<code\>(.*?)\</code\>}im) { "#{$1}" }
# <div class="Quote">...</div>
text = text.gsub(/\<div class="Quote"\>(.*?)\<\/div\>/im) { "\n[quote]\n#{$1}\n[/quote]\n" }
text = text.gsub(%r{\<div class="Quote"\>(.*?)\</div\>}im) { "\n[quote]\n#{$1}\n[/quote]\n" }
# [code], [quote]
text = text.gsub(/\[\/?code\]/i, "\n```\n").gsub(/\[quote.*?\]/i, "\n" + '\0' + "\n").gsub(/\[\/quote\]/i, "\n" + '\0' + "\n")
text =
text
.gsub(%r{\[/?code\]}i, "\n```\n")
.gsub(/\[quote.*?\]/i, "\n" + '\0' + "\n")
.gsub(%r{\[/quote\]}i, "\n" + '\0' + "\n")
text.gsub(/<\/?font[^>]*>/, '').gsub(/<\/?span[^>]*>/, '').gsub(/<\/?div[^>]*>/, '').gsub(/^ +/, '').gsub(/ +/, ' ')
text
.gsub(%r{</?font[^>]*>}, "")
.gsub(%r{</?span[^>]*>}, "")
.gsub(%r{</?div[^>]*>}, "")
.gsub(/^ +/, "")
.gsub(/ +/, " ")
end
def rich?
@row['Format'].casecmp?('Rich')
@row["Format"].casecmp?("Rich")
end
def json
return nil unless rich?
@json ||= JSON.parse(@row['Body']).map(&:deep_symbolize_keys)
@json ||= JSON.parse(@row["Body"]).map(&:deep_symbolize_keys)
end
def parse_fragment(fragment, index)
text = fragment.keys.one? && fragment[:insert].is_a?(String) ? fragment[:insert] : rich_parse(fragment)
text =
(
if fragment.keys.one? && fragment[:insert].is_a?(String)
fragment[:insert]
else
rich_parse(fragment)
end
)
text = parse_code(text, fragment, index)
text = parse_list(text, fragment, index)
@@ -59,16 +75,18 @@ class VanillaBodyParser
def rich_parse(fragment)
insert = fragment[:insert]
return parse_mention(insert[:mention]) if insert.respond_to?(:dig) && insert.dig(:mention, :userID)
if insert.respond_to?(:dig) && insert.dig(:mention, :userID)
return parse_mention(insert[:mention])
end
return parse_formatting(fragment) if fragment[:attributes]
embed_type = insert.dig(:'embed-external', :data, :embedType)
embed_type = insert.dig(:"embed-external", :data, :embedType)
quoting = embed_type == 'quote'
quoting = embed_type == "quote"
return parse_quote(insert) if quoting
embed = embed_type.in? ['image', 'link', 'file']
embed = embed_type.in? %w[image link file]
parse_embed(insert, embed_type) if embed
end
@@ -101,10 +119,10 @@ class VanillaBodyParser
def parse_code(text, fragment, index)
next_fragment = next_fragment(index)
next_code = next_fragment.dig(:attributes, :'code-block')
next_code = next_fragment.dig(:attributes, :"code-block")
if next_code
previous_fragment = previous_fragment(index)
previous_code = previous_fragment.dig(:attributes, :'code-block')
previous_code = previous_fragment.dig(:attributes, :"code-block")
if previous_code
text = text.gsub(/\\n(.*?)\\n/) { "\n```\n#{$1}\n```\n" }
@@ -112,7 +130,7 @@ class VanillaBodyParser
last_pos = text.rindex(/\n/)
if last_pos
array = [text[0..last_pos].strip, text[last_pos + 1 .. text.length].strip]
array = [text[0..last_pos].strip, text[last_pos + 1..text.length].strip]
text = array.join("\n```\n")
else
text = "\n```\n#{text}"
@@ -120,10 +138,10 @@ class VanillaBodyParser
end
end
current_code = fragment.dig(:attributes, :'code-block')
current_code = fragment.dig(:attributes, :"code-block")
if current_code
second_next_fragment = second_next_fragment(index)
second_next_code = second_next_fragment.dig(:attributes, :'code-block')
second_next_code = second_next_fragment.dig(:attributes, :"code-block")
# if current is code and 2 after is not, prepend ```
text = "\n```\n#{text}" unless second_next_code
@@ -138,13 +156,13 @@ class VanillaBodyParser
next_list = next_fragment.dig(:attributes, :list, :type)
if next_list
# if next is list, prepend <li>
text = '<li>' + text
text = "<li>" + text
previous_fragment = previous_fragment(index)
previous_list = previous_fragment.dig(:attributes, :list, :type)
# if next is list and previous is not, prepend <ol> or <ul>
list_tag = next_list == 'ordered' ? '<ol>' : '<ul>'
list_tag = next_list == "ordered" ? "<ol>" : "<ul>"
text = "\n#{list_tag}\n#{text}" unless previous_list
end
@@ -152,13 +170,13 @@ class VanillaBodyParser
if current_list
# if current is list prepend </li>
tag_closings = '</li>'
tag_closings = "</li>"
second_next_fragment = second_next_fragment(index)
second_next_list = second_next_fragment.dig(:attributes, :list, :type)
# if current is list and 2 after is not, prepend </ol>
list_tag = current_list == 'ordered' ? '</ol>' : '</ul>'
list_tag = current_list == "ordered" ? "</ol>" : "</ul>"
tag_closings = "#{tag_closings}\n#{list_tag}" unless second_next_list
text = tag_closings + text
@@ -180,24 +198,32 @@ class VanillaBodyParser
end
def parse_quote(insert)
embed = insert.dig(:'embed-external', :data)
embed = insert.dig(:"embed-external", :data)
import_post_id = "#{embed[:recordType]}##{embed[:recordID]}"
topic = @@lookup.topic_lookup_from_imported_post_id(import_post_id)
user = user_from_imported_id(embed.dig(:insertUser, :userID))
quote_info = topic && user ? "=\"#{user.username}, post: #{topic[:post_number]}, topic: #{topic[:topic_id]}\"" : ''
quote_info =
(
if topic && user
"=\"#{user.username}, post: #{topic[:post_number]}, topic: #{topic[:topic_id]}\""
else
""
end
)
"[quote#{quote_info}]\n#{embed[:body]}\n[/quote]\n\n"""
"[quote#{quote_info}]\n#{embed[:body]}\n[/quote]\n\n" \
""
end
def parse_embed(insert, embed_type)
embed = insert.dig(:'embed-external', :data)
embed = insert.dig(:"embed-external", :data)
url = embed[:url]
if /https?\:\/\/#{@@host}\/uploads\/.*/.match?(url)
remote_path = url.scan(/uploads\/(.*)/)
if %r{https?\://#{@@host}/uploads/.*}.match?(url)
remote_path = url.scan(%r{uploads/(.*)})
path = File.join(@@uploads_path, remote_path)
upload = @@uploader.create_upload(@user_id, path, embed[:name])
@@ -206,7 +232,7 @@ class VanillaBodyParser
return "\n" + @@uploader.html_for_upload(upload, embed[:name]) + "\n"
else
puts "Failed to upload #{path}"
puts upload.errors.full_messages.join(', ') if upload
puts upload.errors.full_messages.join(", ") if upload
end
end
@@ -222,9 +248,9 @@ class VanillaBodyParser
def normalize(full_text)
code_matcher = /```(.*\n)+```/
code_block = full_text[code_matcher]
full_text[code_matcher] = '{{{CODE_BLOCK}}}' if code_block
full_text[code_matcher] = "{{{CODE_BLOCK}}}" if code_block
full_text = double_new_lines(full_text)
full_text['{{{CODE_BLOCK}}}'] = code_block if code_block
full_text["{{{CODE_BLOCK}}}"] = code_block if code_block
full_text
end

Some files were not shown because too many files have changed in this diff Show More