DEV: Apply syntax_tree formatting to script/*

This commit is contained in:
David Taylor 2023-01-07 11:53:14 +00:00
parent ff508d1ae5
commit 436b3b392b
No known key found for this signature in database
GPG Key ID: 46904C18B1D3F434
143 changed files with 8905 additions and 7353 deletions

View File

@ -5,5 +5,4 @@
--ignore-files=config/*
--ignore-files=db/*
--ignore-files=lib/*
--ignore-files=script/*
--ignore-files=spec/*

View File

@ -9,23 +9,24 @@ wait_seconds = ARGV[0]&.to_i || 10
puts "Counting messages for #{wait_seconds} seconds..."
print 'Seen 0 messages'
t = Thread.new do
MessageBus.backend_instance.global_subscribe do |m|
channel = m.channel
if channel.start_with?("/distributed_hash")
payload = JSON.parse(m.data)["data"]
info = payload["hash_key"]
# info += ".#{payload["key"]}" # Uncomment if you need more granular info
channel += " (#{info})"
print "Seen 0 messages"
t =
Thread.new do
MessageBus.backend_instance.global_subscribe do |m|
channel = m.channel
if channel.start_with?("/distributed_hash")
payload = JSON.parse(m.data)["data"]
info = payload["hash_key"]
# info += ".#{payload["key"]}" # Uncomment if you need more granular info
channel += " (#{info})"
end
channel_counters[channel] += 1
messages_seen += 1
print "\rSeen #{messages_seen} messages from #{channel_counters.size} channels"
end
channel_counters[channel] += 1
messages_seen += 1
print "\rSeen #{messages_seen} messages from #{channel_counters.size} channels"
end
end
sleep wait_seconds
@ -53,10 +54,12 @@ puts "| #{"channel".ljust(max_channel_name_length)} | #{"message count".rjust(ma
puts "|#{"-" * (max_channel_name_length + 2)}|#{"-" * (max_count_length + 2)}|"
result_count = 10
sorted_results.first(result_count).each do |name, value|
name = "`#{name}`"
puts "| #{name.ljust(max_channel_name_length)} | #{value.to_s.rjust(max_count_length)} |"
end
sorted_results
.first(result_count)
.each do |name, value|
name = "`#{name}`"
puts "| #{name.ljust(max_channel_name_length)} | #{value.to_s.rjust(max_count_length)} |"
end
other_count = messages_seen - sorted_results.first(result_count).sum { |k, v| v }
puts "| #{"(other)".ljust(max_channel_name_length)} | #{other_count.to_s.rjust(max_count_length)} |"
puts "|#{" " * (max_channel_name_length + 2)}|#{" " * (max_count_length + 2)}|"

View File

@ -2,17 +2,14 @@
require File.expand_path("../../config/environment", __FILE__)
queues = %w{default low ultra_low critical}.map { |name| Sidekiq::Queue.new(name) }.lazy.flat_map(&:lazy)
queues =
%w[default low ultra_low critical].map { |name| Sidekiq::Queue.new(name) }.lazy.flat_map(&:lazy)
stats = Hash.new(0)
queues.each do |j|
stats[j.klass] += 1
end
queues.each { |j| stats[j.klass] += 1 }
stats.sort_by { |a, b| -b }.each do |name, count|
puts "#{name}: #{count}"
end
stats.sort_by { |a, b| -b }.each { |name, count| puts "#{name}: #{count}" }
dupes = Hash.new([])
queues.each do |j|

View File

@ -19,46 +19,43 @@ require "uri"
@skip_asset_bundle = false
@unicorn_workers = 3
opts = OptionParser.new do |o|
o.banner = "Usage: ruby bench.rb [options]"
opts =
OptionParser.new do |o|
o.banner = "Usage: ruby bench.rb [options]"
o.on("-n", "--with_default_env", "Include recommended Discourse env") do
@include_env = true
end
o.on("-o", "--output [FILE]", "Output results to this file") do |f|
@result_file = f
end
o.on("-i", "--iterations [ITERATIONS]", "Number of iterations to run the bench for") do |i|
@iterations = i.to_i
end
o.on("-b", "--best_of [NUM]", "Number of times to run the bench taking best as result") do |i|
@best_of = i.to_i
end
o.on("-d", "--heap_dump") do
@dump_heap = true
# We need an env var for config/boot.rb to enable allocation tracing prior to framework init
ENV['DISCOURSE_DUMP_HEAP'] = "1"
end
o.on("-m", "--memory_stats") do
@mem_stats = true
end
o.on("-u", "--unicorn", "Use unicorn to serve pages as opposed to puma") do
@unicorn = true
end
o.on("-c", "--concurrency [NUM]", "Run benchmark with this number of concurrent requests (default: 1)") do |i|
@concurrency = i.to_i
end
o.on("-w", "--unicorn_workers [NUM]", "Run benchmark with this number of unicorn workers (default: 3)") do |i|
@unicorn_workers = i.to_i
end
o.on("-s", "--skip-bundle-assets", "Skip bundling assets") do
@skip_asset_bundle = true
end
o.on("-n", "--with_default_env", "Include recommended Discourse env") { @include_env = true }
o.on("-o", "--output [FILE]", "Output results to this file") { |f| @result_file = f }
o.on("-i", "--iterations [ITERATIONS]", "Number of iterations to run the bench for") do |i|
@iterations = i.to_i
end
o.on("-b", "--best_of [NUM]", "Number of times to run the bench taking best as result") do |i|
@best_of = i.to_i
end
o.on("-d", "--heap_dump") do
@dump_heap = true
# We need an env var for config/boot.rb to enable allocation tracing prior to framework init
ENV["DISCOURSE_DUMP_HEAP"] = "1"
end
o.on("-m", "--memory_stats") { @mem_stats = true }
o.on("-u", "--unicorn", "Use unicorn to serve pages as opposed to puma") { @unicorn = true }
o.on(
"-c",
"--concurrency [NUM]",
"Run benchmark with this number of concurrent requests (default: 1)",
) { |i| @concurrency = i.to_i }
o.on(
"-w",
"--unicorn_workers [NUM]",
"Run benchmark with this number of unicorn workers (default: 3)",
) { |i| @unicorn_workers = i.to_i }
o.on("-s", "--skip-bundle-assets", "Skip bundling assets") { @skip_asset_bundle = true }
o.on("-t", "--tests [STRING]", "List of tests to run. Example: '--tests topic,categories')") do |i|
@tests = i.split(",")
o.on(
"-t",
"--tests [STRING]",
"List of tests to run. Example: '--tests topic,categories')",
) { |i| @tests = i.split(",") }
end
end
opts.parse!
def run(command, opt = nil)
@ -73,7 +70,7 @@ def run(command, opt = nil)
end
begin
require 'facter'
require "facter"
raise LoadError if Gem::Version.new(Facter.version) < Gem::Version.new("4.0")
rescue LoadError
run "gem install facter"
@ -113,7 +110,7 @@ end
puts "Ensuring config is setup"
%x{which ab > /dev/null 2>&1}
`which ab > /dev/null 2>&1`
unless $? == 0
abort "Apache Bench is not installed. Try: apt-get install apache2-utils or brew install ab"
end
@ -125,7 +122,7 @@ end
ENV["RAILS_ENV"] = "profile"
discourse_env_vars = %w(
discourse_env_vars = %w[
DISCOURSE_DUMP_HEAP
RUBY_GC_HEAP_INIT_SLOTS
RUBY_GC_HEAP_FREE_SLOTS
@ -140,27 +137,22 @@ discourse_env_vars = %w(
RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR
RUBY_GLOBAL_METHOD_CACHE_SIZE
LD_PRELOAD
)
]
if @include_env
puts "Running with tuned environment"
discourse_env_vars.each do |v|
ENV.delete v
end
ENV['RUBY_GLOBAL_METHOD_CACHE_SIZE'] = '131072'
ENV['RUBY_GC_HEAP_GROWTH_MAX_SLOTS'] = '40000'
ENV['RUBY_GC_HEAP_INIT_SLOTS'] = '400000'
ENV['RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR'] = '1.5'
discourse_env_vars.each { |v| ENV.delete v }
ENV["RUBY_GLOBAL_METHOD_CACHE_SIZE"] = "131072"
ENV["RUBY_GC_HEAP_GROWTH_MAX_SLOTS"] = "40000"
ENV["RUBY_GC_HEAP_INIT_SLOTS"] = "400000"
ENV["RUBY_GC_HEAP_OLDOBJECT_LIMIT_FACTOR"] = "1.5"
else
# clean env
puts "Running with the following custom environment"
end
discourse_env_vars.each do |w|
puts "#{w}: #{ENV[w]}" if ENV[w].to_s.length > 0
end
discourse_env_vars.each { |w| puts "#{w}: #{ENV[w]}" if ENV[w].to_s.length > 0 }
def port_available?(port)
server = TCPServer.open("0.0.0.0", port)
@ -170,20 +162,16 @@ rescue Errno::EADDRINUSE
false
end
@port = 60079
@port = 60_079
while !port_available? @port
@port += 1
end
@port += 1 while !port_available? @port
puts "Ensuring profiling DB exists and is migrated"
puts `bundle exec rake db:create`
`bundle exec rake db:migrate`
puts "Timing loading Rails"
measure("load_rails") do
`bundle exec rake middleware`
end
measure("load_rails") { `bundle exec rake middleware` }
puts "Populating Profile DB"
run("bundle exec ruby script/profile_db_generator.rb")
@ -223,16 +211,21 @@ begin
pid =
if @unicorn
ENV['UNICORN_PORT'] = @port.to_s
ENV['UNICORN_WORKERS'] = @unicorn_workers.to_s
FileUtils.mkdir_p(File.join('tmp', 'pids'))
ENV["UNICORN_PORT"] = @port.to_s
ENV["UNICORN_WORKERS"] = @unicorn_workers.to_s
FileUtils.mkdir_p(File.join("tmp", "pids"))
unicorn_pid = spawn("bundle exec unicorn -c config/unicorn.conf.rb")
while (unicorn_master_pid = `ps aux | grep "unicorn master" | grep -v "grep" | awk '{print $2}'`.strip.to_i) == 0
while (
unicorn_master_pid =
`ps aux | grep "unicorn master" | grep -v "grep" | awk '{print $2}'`.strip.to_i
) == 0
sleep 1
end
while `ps -f --ppid #{unicorn_master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i).size != @unicorn_workers.to_i
while `ps -f --ppid #{unicorn_master_pid} | grep worker | awk '{ print $2 }'`.split("\n")
.map(&:to_i)
.size != @unicorn_workers.to_i
sleep 1
end
@ -241,48 +234,38 @@ begin
spawn("bundle exec puma -p #{@port} -e production")
end
while port_available? @port
sleep 1
end
sleep 1 while port_available? @port
puts "Starting benchmark..."
admin_headers = {
'Api-Key' => admin_api_key,
'Api-Username' => "admin1"
}
admin_headers = { "Api-Key" => admin_api_key, "Api-Username" => "admin1" }
user_headers = {
'User-Api-Key' => user_api_key
}
user_headers = { "User-Api-Key" => user_api_key }
# asset precompilation is a dog, wget to force it
run "curl -s -o /dev/null http://127.0.0.1:#{@port}/"
redirect_response = `curl -s -I "http://127.0.0.1:#{@port}/t/i-am-a-topic-used-for-perf-tests"`
if redirect_response !~ /301 Moved Permanently/
raise "Unable to locate topic for perf tests"
end
raise "Unable to locate topic for perf tests" if redirect_response !~ /301 Moved Permanently/
topic_url = redirect_response.match(/^location: .+(\/t\/i-am-a-topic-used-for-perf-tests\/.+)$/i)[1].strip
topic_url =
redirect_response.match(%r{^location: .+(/t/i-am-a-topic-used-for-perf-tests/.+)$}i)[1].strip
all_tests = [
["categories", "/categories"],
["home", "/"],
%w[categories /categories],
%w[home /],
["topic", topic_url],
["topic.json", "#{topic_url}.json"],
["user activity", "/u/admin1/activity"],
]
@tests ||= %w{categories home topic}
@tests ||= %w[categories home topic]
tests_to_run = all_tests.select do |test_name, path|
@tests.include?(test_name)
end
tests_to_run = all_tests.select { |test_name, path| @tests.include?(test_name) }
tests_to_run.concat(
tests_to_run.map { |k, url| ["#{k} user", "#{url}", user_headers] },
tests_to_run.map { |k, url| ["#{k} admin", "#{url}", admin_headers] }
tests_to_run.map { |k, url| ["#{k} admin", "#{url}", admin_headers] },
)
tests_to_run.each do |test_name, path, headers_for_path|
@ -290,15 +273,11 @@ begin
http = Net::HTTP.new(uri.host, uri.port)
request = Net::HTTP::Get.new(uri.request_uri)
headers_for_path&.each do |key, value|
request[key] = value
end
headers_for_path&.each { |key, value| request[key] = value }
response = http.request(request)
if response.code != "200"
raise "#{test_name} #{path} returned non 200 response code"
end
raise "#{test_name} #{path} returned non 200 response code" if response.code != "200"
end
# NOTE: we run the most expensive page first in the bench
@ -335,11 +314,17 @@ begin
Facter.reset
facts = Facter.to_hash
facts.delete_if { |k, v|
!["operatingsystem", "architecture", "kernelversion",
"memorysize", "physicalprocessorcount", "processor0",
"virtual"].include?(k)
}
facts.delete_if do |k, v|
!%w[
operatingsystem
architecture
kernelversion
memorysize
physicalprocessorcount
processor0
virtual
].include?(k)
end
run("RAILS_ENV=profile bundle exec rake assets:clean")
@ -349,10 +334,13 @@ begin
mem = get_mem(pid)
results = results.merge("timings" => @timings,
"ruby-version" => "#{RUBY_DESCRIPTION}",
"rss_kb" => mem["rss_kb"],
"pss_kb" => mem["pss_kb"]).merge(facts)
results =
results.merge(
"timings" => @timings,
"ruby-version" => "#{RUBY_DESCRIPTION}",
"rss_kb" => mem["rss_kb"],
"pss_kb" => mem["pss_kb"],
).merge(facts)
if @unicorn
child_pids = `ps --ppid #{pid} | awk '{ print $1; }' | grep -v PID`.split("\n")
@ -375,12 +363,7 @@ begin
puts open("http://127.0.0.1:#{@port}/admin/dump_heap", headers).read
end
if @result_file
File.open(@result_file, "wb") do |f|
f.write(results)
end
end
File.open(@result_file, "wb") { |f| f.write(results) } if @result_file
ensure
Process.kill "KILL", pid
end

View File

@ -1,10 +1,9 @@
# frozen_string_literal: true
require 'benchmark/ips'
require File.expand_path('../../../../config/environment', __FILE__)
require "benchmark/ips"
require File.expand_path("../../../../config/environment", __FILE__)
Benchmark.ips do |x|
x.report("redis setex string") do |times|
while times > 0
Discourse.redis.setex("test_key", 60, "test")

View File

@ -1,7 +1,7 @@
# frozen_string_literal: true
require 'benchmark/ips'
require File.expand_path('../../../../config/environment', __FILE__)
require "benchmark/ips"
require File.expand_path("../../../../config/environment", __FILE__)
# set any flags here
# MiniRacer::Platform.set_flags! :noturbo
@ -10,7 +10,7 @@ tests = [
["tiny post", "**hello**"],
["giant post", File.read("giant_post.md")],
["most features", File.read("most_features.md")],
["lots of mentions", File.read("lots_of_mentions.md")]
["lots of mentions", File.read("lots_of_mentions.md")],
]
PrettyText.cook("")
@ -31,9 +31,7 @@ PrettyText.v8.eval("window.commonmark = window.markdownit('commonmark')")
Benchmark.ips do |x|
[true, false].each do |sanitize|
tests.each do |test, text|
x.report("#{test} sanitize: #{sanitize}") do
PrettyText.markdown(text, sanitize: sanitize)
end
x.report("#{test} sanitize: #{sanitize}") { PrettyText.markdown(text, sanitize: sanitize) }
end
end

View File

@ -1,7 +1,7 @@
# frozen_string_literal: true
require 'memory_profiler'
require 'benchmark/ips'
require "memory_profiler"
require "benchmark/ips"
ENV["RAILS_ENV"] = "production"
@ -14,12 +14,10 @@ def req
"timings[1]" => "1001",
"timings[2]" => "1001",
"timings[3]" => "1001",
"topic_id" => "490310"
"topic_id" => "490310",
}
data = data.map do |k, v|
"#{CGI.escape(k)}=#{v}"
end.join("&")
data = data.map { |k, v| "#{CGI.escape(k)}=#{v}" }.join("&")
{
"REQUEST_METHOD" => "POST",
@ -33,7 +31,7 @@ def req
"HTTP_COOKIE" => "_t=#{_t}",
"rack.input" => StringIO.new(data),
"rack.version" => [1, 2],
"rack.url_scheme" => "http"
"rack.url_scheme" => "http",
}
end
@ -45,11 +43,7 @@ end
exit
#
#
StackProf.run(mode: :wall, out: 'report.dump') do
1000.times do
Rails.application.call(req)
end
end
StackProf.run(mode: :wall, out: "report.dump") { 1000.times { Rails.application.call(req) } }
#
# MemoryProfiler.start
# Rails.application.call(req)

View File

@ -1,37 +1,32 @@
# frozen_string_literal: true
require 'benchmark/ips'
require File.expand_path('../../../../config/environment', __FILE__)
require "benchmark/ips"
require File.expand_path("../../../../config/environment", __FILE__)
# Put pre conditions here
# Used db but it's OK in the most cases
# build the cache
SiteSetting.title = SecureRandom.hex
SiteSetting.default_locale = SiteSetting.default_locale == 'en' ? 'zh_CN' : 'en'
SiteSetting.default_locale = SiteSetting.default_locale == "en" ? "zh_CN" : "en"
SiteSetting.refresh!
tests = [
["current cache", lambda do
SiteSetting.title
SiteSetting.enable_discourse_connect
end
[
"current cache",
lambda do
SiteSetting.title
SiteSetting.enable_discourse_connect
end,
],
["change default locale with current cache refreshed", lambda do
SiteSetting.default_locale = SiteSetting.default_locale == 'en' ? 'zh_CN' : 'en'
end
],
["change site setting", lambda do
SiteSetting.title = SecureRandom.hex
end
[
"change default locale with current cache refreshed",
lambda { SiteSetting.default_locale = SiteSetting.default_locale == "en" ? "zh_CN" : "en" },
],
["change site setting", lambda { SiteSetting.title = SecureRandom.hex }],
]
Benchmark.ips do |x|
tests.each do |test, proc|
x.report(test, proc)
end
end
Benchmark.ips { |x| tests.each { |test, proc| x.report(test, proc) } }
# 2017-08-02 - Erick's Site Setting change

View File

@ -1,34 +1,26 @@
# frozen_string_literal: true
require 'ruby-prof'
require "ruby-prof"
def profile(&blk)
result = RubyProf.profile(&blk)
printer = RubyProf::GraphHtmlPrinter.new(result)
printer.print(STDOUT)
end
profile { '' } # loading profiler dependency
profile { "" } # loading profiler dependency
require File.expand_path('../../../../config/environment', __FILE__)
require File.expand_path("../../../../config/environment", __FILE__)
# warming up
SiteSetting.title
SiteSetting.enable_discourse_connect
SiteSetting.default_locale = SiteSetting.default_locale == 'en' ? 'zh_CN' : 'en'
SiteSetting.default_locale = SiteSetting.default_locale == "en" ? "zh_CN" : "en"
SiteSetting.title = SecureRandom.hex
profile do
SiteSetting.title
end
profile { SiteSetting.title }
profile do
SiteSetting.enable_discourse_connect
end
profile { SiteSetting.enable_discourse_connect }
profile do
SiteSetting.default_locale = SiteSetting.default_locale == 'en' ? 'zh_CN' : 'en'
end
profile { SiteSetting.default_locale = SiteSetting.default_locale == "en" ? "zh_CN" : "en" }
profile do
SiteSetting.title = SecureRandom.hex
end
profile { SiteSetting.title = SecureRandom.hex }

View File

@ -2,35 +2,41 @@
# simple script to measure largest objects in memory post boot
if ENV['RAILS_ENV'] != "production"
exec "RAILS_ENV=production ruby #{__FILE__}"
end
exec "RAILS_ENV=production ruby #{__FILE__}" if ENV["RAILS_ENV"] != "production"
require 'objspace'
require "objspace"
ObjectSpace.trace_object_allocations do
require File.expand_path("../../config/environment", __FILE__)
Rails.application.routes.recognize_path('abc') rescue nil
begin
Rails.application.routes.recognize_path("abc")
rescue StandardError
nil
end
# load up the yaml for the localization bits, in master process
I18n.t(:posts)
RailsMultisite::ConnectionManagement.each_connection do
(ActiveRecord::Base.connection.tables - %w[schema_migrations versions]).each do |table|
table.classify.constantize.first rescue nil
begin
table.classify.constantize.first
rescue StandardError
nil
end
end
end
end
5.times do
GC.start(full_mark: true, immediate_sweep: true)
end
5.times { GC.start(full_mark: true, immediate_sweep: true) }
[String, Array, Hash].each do |klass|
ObjectSpace.each_object(klass).sort { |a, b| b.length <=> a.length }.first(50).each do |obj|
puts "#{klass} size: #{obj.length} #{ObjectSpace.allocation_sourcefile(obj)} #{ObjectSpace.allocation_sourceline(obj)}"
end
ObjectSpace
.each_object(klass)
.sort { |a, b| b.length <=> a.length }
.first(50)
.each do |obj|
puts "#{klass} size: #{obj.length} #{ObjectSpace.allocation_sourcefile(obj)} #{ObjectSpace.allocation_sourceline(obj)}"
end
end

View File

@ -2,22 +2,30 @@
# simple script to measure memory at boot
if ENV['RAILS_ENV'] != "production"
exec "RAILS_ENV=production ruby #{__FILE__}"
end
exec "RAILS_ENV=production ruby #{__FILE__}" if ENV["RAILS_ENV"] != "production"
require 'memory_profiler'
require "memory_profiler"
MemoryProfiler.report do
require File.expand_path("../../config/environment", __FILE__)
MemoryProfiler
.report do
require File.expand_path("../../config/environment", __FILE__)
Rails.application.routes.recognize_path('abc') rescue nil
begin
Rails.application.routes.recognize_path("abc")
rescue StandardError
nil
end
# load up the yaml for the localization bits, in master process
I18n.t(:posts)
# load up the yaml for the localization bits, in master process
I18n.t(:posts)
# load up all models and schema
(ActiveRecord::Base.connection.tables - %w[schema_migrations versions]).each do |table|
table.classify.constantize.first rescue nil
# load up all models and schema
(ActiveRecord::Base.connection.tables - %w[schema_migrations versions]).each do |table|
begin
table.classify.constantize.first
rescue StandardError
nil
end
end
end
end.pretty_print
.pretty_print

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
if ARGV.include?('bbcode-to-md')
if ARGV.include?("bbcode-to-md")
# Replace (most) bbcode with markdown before creating posts.
# This will dramatically clean up the final posts in Discourse.
#
@ -10,7 +10,7 @@ if ARGV.include?('bbcode-to-md')
# cd ruby-bbcode-to-md
# gem build ruby-bbcode-to-md.gemspec
# gem install ruby-bbcode-to-md-*.gem
require 'ruby-bbcode-to-md'
require "ruby-bbcode-to-md"
end
require "pg"
@ -20,12 +20,12 @@ require "htmlentities"
puts "Loading application..."
require_relative "../../config/environment"
require_relative '../import_scripts/base/uploader'
require_relative "../import_scripts/base/uploader"
module BulkImport; end
module BulkImport
end
class BulkImport::Base
NOW ||= "now()"
PRIVATE_OFFSET ||= 2**30
@ -33,41 +33,41 @@ class BulkImport::Base
CHARSET_MAP = {
"armscii8" => nil,
"ascii" => Encoding::US_ASCII,
"big5" => Encoding::Big5,
"binary" => Encoding::ASCII_8BIT,
"cp1250" => Encoding::Windows_1250,
"cp1251" => Encoding::Windows_1251,
"cp1256" => Encoding::Windows_1256,
"cp1257" => Encoding::Windows_1257,
"cp850" => Encoding::CP850,
"cp852" => Encoding::CP852,
"cp866" => Encoding::IBM866,
"cp932" => Encoding::Windows_31J,
"dec8" => nil,
"eucjpms" => Encoding::EucJP_ms,
"euckr" => Encoding::EUC_KR,
"gb2312" => Encoding::EUC_CN,
"gbk" => Encoding::GBK,
"geostd8" => nil,
"greek" => Encoding::ISO_8859_7,
"hebrew" => Encoding::ISO_8859_8,
"hp8" => nil,
"keybcs2" => nil,
"koi8r" => Encoding::KOI8_R,
"koi8u" => Encoding::KOI8_U,
"latin1" => Encoding::ISO_8859_1,
"latin2" => Encoding::ISO_8859_2,
"latin5" => Encoding::ISO_8859_9,
"latin7" => Encoding::ISO_8859_13,
"macce" => Encoding::MacCentEuro,
"ascii" => Encoding::US_ASCII,
"big5" => Encoding::Big5,
"binary" => Encoding::ASCII_8BIT,
"cp1250" => Encoding::Windows_1250,
"cp1251" => Encoding::Windows_1251,
"cp1256" => Encoding::Windows_1256,
"cp1257" => Encoding::Windows_1257,
"cp850" => Encoding::CP850,
"cp852" => Encoding::CP852,
"cp866" => Encoding::IBM866,
"cp932" => Encoding::Windows_31J,
"dec8" => nil,
"eucjpms" => Encoding::EucJP_ms,
"euckr" => Encoding::EUC_KR,
"gb2312" => Encoding::EUC_CN,
"gbk" => Encoding::GBK,
"geostd8" => nil,
"greek" => Encoding::ISO_8859_7,
"hebrew" => Encoding::ISO_8859_8,
"hp8" => nil,
"keybcs2" => nil,
"koi8r" => Encoding::KOI8_R,
"koi8u" => Encoding::KOI8_U,
"latin1" => Encoding::ISO_8859_1,
"latin2" => Encoding::ISO_8859_2,
"latin5" => Encoding::ISO_8859_9,
"latin7" => Encoding::ISO_8859_13,
"macce" => Encoding::MacCentEuro,
"macroman" => Encoding::MacRoman,
"sjis" => Encoding::SHIFT_JIS,
"swe7" => nil,
"tis620" => Encoding::TIS_620,
"ucs2" => Encoding::UTF_16BE,
"ujis" => Encoding::EucJP_ms,
"utf8" => Encoding::UTF_8,
"sjis" => Encoding::SHIFT_JIS,
"swe7" => nil,
"tis620" => Encoding::TIS_620,
"ucs2" => Encoding::UTF_16BE,
"ujis" => Encoding::EucJP_ms,
"utf8" => Encoding::UTF_8,
}
# rubocop:enable Layout/HashAlignment
@ -82,12 +82,13 @@ class BulkImport::Base
@encoding = CHARSET_MAP[charset]
@bbcode_to_md = true if use_bbcode_to_md?
@markdown = Redcarpet::Markdown.new(
Redcarpet::Render::HTML.new(hard_wrap: true),
no_intra_emphasis: true,
fenced_code_blocks: true,
autolink: true
)
@markdown =
Redcarpet::Markdown.new(
Redcarpet::Render::HTML.new(hard_wrap: true),
no_intra_emphasis: true,
fenced_code_blocks: true,
autolink: true,
)
end
def run
@ -132,7 +133,9 @@ class BulkImport::Base
map = []
ids = []
@raw_connection.send_query("SELECT value, #{name}_id FROM #{name}_custom_fields WHERE name = 'import_id'")
@raw_connection.send_query(
"SELECT value, #{name}_id FROM #{name}_custom_fields WHERE name = 'import_id'",
)
@raw_connection.set_single_row_mode
@raw_connection.get_result.stream_each do |row|
@ -163,12 +166,14 @@ class BulkImport::Base
puts "Loading imported topic ids..."
@topics, imported_topic_ids = imported_ids("topic")
@last_imported_topic_id = imported_topic_ids.select { |id| id < PRIVATE_OFFSET }.max || -1
@last_imported_private_topic_id = imported_topic_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
@last_imported_private_topic_id =
imported_topic_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
puts "Loading imported post ids..."
@posts, imported_post_ids = imported_ids("post")
@last_imported_post_id = imported_post_ids.select { |id| id < PRIVATE_OFFSET }.max || -1
@last_imported_private_post_id = imported_post_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
@last_imported_private_post_id =
imported_post_ids.select { |id| id > PRIVATE_OFFSET }.max || (PRIVATE_OFFSET - 1)
end
def last_id(klass)
@ -182,9 +187,7 @@ class BulkImport::Base
@raw_connection.send_query("SELECT id, #{column} FROM #{name}")
@raw_connection.set_single_row_mode
@raw_connection.get_result.stream_each do |row|
map[row["id"].to_i] = row[column].to_i
end
@raw_connection.get_result.stream_each { |row| map[row["id"].to_i] = row[column].to_i }
@raw_connection.get_result
@ -199,13 +202,24 @@ class BulkImport::Base
puts "Loading users indexes..."
@last_user_id = last_id(User)
@last_user_email_id = last_id(UserEmail)
@emails = User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h
@emails =
User.unscoped.joins(:user_emails).pluck(:"user_emails.email", :"user_emails.user_id").to_h
@usernames_lower = User.unscoped.pluck(:username_lower).to_set
@mapped_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("user_custom_fields.value", "users.username").to_h
@mapped_usernames =
UserCustomField
.joins(:user)
.where(name: "import_username")
.pluck("user_custom_fields.value", "users.username")
.to_h
puts "Loading categories indexes..."
@last_category_id = last_id(Category)
@category_names = Category.unscoped.pluck(:parent_category_id, :name).map { |pci, name| "#{pci}-#{name}" }.to_set
@category_names =
Category
.unscoped
.pluck(:parent_category_id, :name)
.map { |pci, name| "#{pci}-#{name}" }
.to_set
puts "Loading topics indexes..."
@last_topic_id = last_id(Topic)
@ -233,13 +247,27 @@ class BulkImport::Base
def fix_primary_keys
puts "Updating primary key sequences..."
@raw_connection.exec("SELECT setval('#{Group.sequence_name}', #{@last_group_id})") if @last_group_id > 0
@raw_connection.exec("SELECT setval('#{User.sequence_name}', #{@last_user_id})") if @last_user_id > 0
@raw_connection.exec("SELECT setval('#{UserEmail.sequence_name}', #{@last_user_email_id})") if @last_user_email_id > 0
@raw_connection.exec("SELECT setval('#{Category.sequence_name}', #{@last_category_id})") if @last_category_id > 0
@raw_connection.exec("SELECT setval('#{Topic.sequence_name}', #{@last_topic_id})") if @last_topic_id > 0
@raw_connection.exec("SELECT setval('#{Post.sequence_name}', #{@last_post_id})") if @last_post_id > 0
@raw_connection.exec("SELECT setval('#{PostAction.sequence_name}', #{@last_post_action_id})") if @last_post_action_id > 0
if @last_group_id > 0
@raw_connection.exec("SELECT setval('#{Group.sequence_name}', #{@last_group_id})")
end
if @last_user_id > 0
@raw_connection.exec("SELECT setval('#{User.sequence_name}', #{@last_user_id})")
end
if @last_user_email_id > 0
@raw_connection.exec("SELECT setval('#{UserEmail.sequence_name}', #{@last_user_email_id})")
end
if @last_category_id > 0
@raw_connection.exec("SELECT setval('#{Category.sequence_name}', #{@last_category_id})")
end
if @last_topic_id > 0
@raw_connection.exec("SELECT setval('#{Topic.sequence_name}', #{@last_topic_id})")
end
if @last_post_id > 0
@raw_connection.exec("SELECT setval('#{Post.sequence_name}', #{@last_post_id})")
end
if @last_post_action_id > 0
@raw_connection.exec("SELECT setval('#{PostAction.sequence_name}', #{@last_post_action_id})")
end
end
def group_id_from_imported_id(id)
@ -272,63 +300,124 @@ class BulkImport::Base
post_id && @topic_id_by_post_id[post_id]
end
GROUP_COLUMNS ||= %i{
id name title bio_raw bio_cooked created_at updated_at
}
GROUP_COLUMNS ||= %i[id name title bio_raw bio_cooked created_at updated_at]
USER_COLUMNS ||= %i{
id username username_lower name active trust_level admin moderator
date_of_birth ip_address registration_ip_address primary_group_id
suspended_at suspended_till last_emailed_at created_at updated_at
}
USER_COLUMNS ||= %i[
id
username
username_lower
name
active
trust_level
admin
moderator
date_of_birth
ip_address
registration_ip_address
primary_group_id
suspended_at
suspended_till
last_emailed_at
created_at
updated_at
]
USER_EMAIL_COLUMNS ||= %i{
id user_id email primary created_at updated_at
}
USER_EMAIL_COLUMNS ||= %i[id user_id email primary created_at updated_at]
USER_STAT_COLUMNS ||= %i{
user_id topics_entered time_read days_visited posts_read_count
likes_given likes_received new_since read_faq
first_post_created_at post_count topic_count bounce_score
reset_bounce_score_after digest_attempted_at
}
USER_STAT_COLUMNS ||= %i[
user_id
topics_entered
time_read
days_visited
posts_read_count
likes_given
likes_received
new_since
read_faq
first_post_created_at
post_count
topic_count
bounce_score
reset_bounce_score_after
digest_attempted_at
]
USER_PROFILE_COLUMNS ||= %i{
user_id location website bio_raw bio_cooked views
}
USER_PROFILE_COLUMNS ||= %i[user_id location website bio_raw bio_cooked views]
GROUP_USER_COLUMNS ||= %i{
group_id user_id created_at updated_at
}
GROUP_USER_COLUMNS ||= %i[group_id user_id created_at updated_at]
CATEGORY_COLUMNS ||= %i{
id name name_lower slug user_id description position parent_category_id
created_at updated_at
}
CATEGORY_COLUMNS ||= %i[
id
name
name_lower
slug
user_id
description
position
parent_category_id
created_at
updated_at
]
TOPIC_COLUMNS ||= %i{
id archetype title fancy_title slug user_id last_post_user_id category_id
visible closed pinned_at views created_at bumped_at updated_at
}
TOPIC_COLUMNS ||= %i[
id
archetype
title
fancy_title
slug
user_id
last_post_user_id
category_id
visible
closed
pinned_at
views
created_at
bumped_at
updated_at
]
POST_COLUMNS ||= %i{
id user_id last_editor_id topic_id post_number sort_order reply_to_post_number
like_count raw cooked hidden word_count created_at last_version_at updated_at
}
POST_COLUMNS ||= %i[
id
user_id
last_editor_id
topic_id
post_number
sort_order
reply_to_post_number
like_count
raw
cooked
hidden
word_count
created_at
last_version_at
updated_at
]
POST_ACTION_COLUMNS ||= %i{
id post_id user_id post_action_type_id deleted_at created_at updated_at
deleted_by_id related_post_id staff_took_action deferred_by_id targets_topic
agreed_at agreed_by_id deferred_at disagreed_at disagreed_by_id
}
POST_ACTION_COLUMNS ||= %i[
id
post_id
user_id
post_action_type_id
deleted_at
created_at
updated_at
deleted_by_id
related_post_id
staff_took_action
deferred_by_id
targets_topic
agreed_at
agreed_by_id
deferred_at
disagreed_at
disagreed_by_id
]
TOPIC_ALLOWED_USER_COLUMNS ||= %i{
topic_id user_id created_at updated_at
}
TOPIC_ALLOWED_USER_COLUMNS ||= %i[topic_id user_id created_at updated_at]
TOPIC_TAG_COLUMNS ||= %i{
topic_id tag_id created_at updated_at
}
TOPIC_TAG_COLUMNS ||= %i[topic_id tag_id created_at updated_at]
def create_groups(rows, &block)
create_records(rows, "group", GROUP_COLUMNS, &block)
@ -340,10 +429,7 @@ class BulkImport::Base
create_records(rows, "user", USER_COLUMNS, &block)
create_custom_fields("user", "username", @imported_usernames.keys) do |username|
{
record_id: @imported_usernames[username],
value: username,
}
{ record_id: @imported_usernames[username], value: username }
end
end
@ -389,8 +475,8 @@ class BulkImport::Base
group[:name] = group_name
end
group[:title] = group[:title].scrub.strip.presence if group[:title].present?
group[:bio_raw] = group[:bio_raw].scrub.strip.presence if group[:bio_raw].present?
group[:title] = group[:title].scrub.strip.presence if group[:title].present?
group[:bio_raw] = group[:bio_raw].scrub.strip.presence if group[:bio_raw].present?
group[:bio_cooked] = pre_cook(group[:bio_raw]) if group[:bio_raw].present?
group[:created_at] ||= NOW
group[:updated_at] ||= group[:created_at]
@ -456,7 +542,9 @@ class BulkImport::Base
user_email[:email] ||= random_email
user_email[:email].downcase!
# unique email
user_email[:email] = random_email until EmailAddressValidator.valid_value?(user_email[:email]) && !@emails.has_key?(user_email[:email])
user_email[:email] = random_email until EmailAddressValidator.valid_value?(
user_email[:email],
) && !@emails.has_key?(user_email[:email])
user_email
end
@ -539,7 +627,11 @@ class BulkImport::Base
post[:raw] = (post[:raw] || "").scrub.strip.presence || "<Empty imported post>"
post[:raw] = process_raw post[:raw]
if @bbcode_to_md
post[:raw] = post[:raw].bbcode_to_md(false, {}, :disable, :quote) rescue post[:raw]
post[:raw] = begin
post[:raw].bbcode_to_md(false, {}, :disable, :quote)
rescue StandardError
post[:raw]
end
end
post[:like_count] ||= 0
post[:cooked] = pre_cook post[:raw]
@ -580,22 +672,22 @@ class BulkImport::Base
# [HTML]...[/HTML]
raw.gsub!(/\[HTML\]/i, "\n\n```html\n")
raw.gsub!(/\[\/HTML\]/i, "\n```\n\n")
raw.gsub!(%r{\[/HTML\]}i, "\n```\n\n")
# [PHP]...[/PHP]
raw.gsub!(/\[PHP\]/i, "\n\n```php\n")
raw.gsub!(/\[\/PHP\]/i, "\n```\n\n")
raw.gsub!(%r{\[/PHP\]}i, "\n```\n\n")
# [HIGHLIGHT="..."]
raw.gsub!(/\[HIGHLIGHT="?(\w+)"?\]/i) { "\n\n```#{$1.downcase}\n" }
# [CODE]...[/CODE]
# [HIGHLIGHT]...[/HIGHLIGHT]
raw.gsub!(/\[\/?CODE\]/i, "\n\n```\n\n")
raw.gsub!(/\[\/?HIGHLIGHT\]/i, "\n\n```\n\n")
raw.gsub!(%r{\[/?CODE\]}i, "\n\n```\n\n")
raw.gsub!(%r{\[/?HIGHLIGHT\]}i, "\n\n```\n\n")
# [SAMP]...[/SAMP]
raw.gsub!(/\[\/?SAMP\]/i, "`")
raw.gsub!(%r{\[/?SAMP\]}i, "`")
# replace all chevrons with HTML entities
# /!\ must be done /!\
@ -609,61 +701,61 @@ class BulkImport::Base
raw.gsub!(">", "&gt;")
raw.gsub!("\u2603", ">")
raw.gsub!(/\[\/?I\]/i, "*")
raw.gsub!(/\[\/?B\]/i, "**")
raw.gsub!(/\[\/?U\]/i, "")
raw.gsub!(%r{\[/?I\]}i, "*")
raw.gsub!(%r{\[/?B\]}i, "**")
raw.gsub!(%r{\[/?U\]}i, "")
raw.gsub!(/\[\/?RED\]/i, "")
raw.gsub!(/\[\/?BLUE\]/i, "")
raw.gsub!(%r{\[/?RED\]}i, "")
raw.gsub!(%r{\[/?BLUE\]}i, "")
raw.gsub!(/\[AUTEUR\].+?\[\/AUTEUR\]/im, "")
raw.gsub!(/\[VOIRMSG\].+?\[\/VOIRMSG\]/im, "")
raw.gsub!(/\[PSEUDOID\].+?\[\/PSEUDOID\]/im, "")
raw.gsub!(%r{\[AUTEUR\].+?\[/AUTEUR\]}im, "")
raw.gsub!(%r{\[VOIRMSG\].+?\[/VOIRMSG\]}im, "")
raw.gsub!(%r{\[PSEUDOID\].+?\[/PSEUDOID\]}im, "")
# [IMG]...[/IMG]
raw.gsub!(/(?:\s*\[IMG\]\s*)+(.+?)(?:\s*\[\/IMG\]\s*)+/im) { "\n\n#{$1}\n\n" }
raw.gsub!(%r{(?:\s*\[IMG\]\s*)+(.+?)(?:\s*\[/IMG\]\s*)+}im) { "\n\n#{$1}\n\n" }
# [IMG=url]
raw.gsub!(/\[IMG=([^\]]*)\]/im) { "\n\n#{$1}\n\n" }
# [URL=...]...[/URL]
raw.gsub!(/\[URL="?(.+?)"?\](.+?)\[\/URL\]/im) { "[#{$2.strip}](#{$1})" }
raw.gsub!(%r{\[URL="?(.+?)"?\](.+?)\[/URL\]}im) { "[#{$2.strip}](#{$1})" }
# [URL]...[/URL]
# [MP3]...[/MP3]
# [EMAIL]...[/EMAIL]
# [LEFT]...[/LEFT]
raw.gsub!(/\[\/?URL\]/i, "")
raw.gsub!(/\[\/?MP3\]/i, "")
raw.gsub!(/\[\/?EMAIL\]/i, "")
raw.gsub!(/\[\/?LEFT\]/i, "")
raw.gsub!(%r{\[/?URL\]}i, "")
raw.gsub!(%r{\[/?MP3\]}i, "")
raw.gsub!(%r{\[/?EMAIL\]}i, "")
raw.gsub!(%r{\[/?LEFT\]}i, "")
# [FONT=blah] and [COLOR=blah]
raw.gsub!(/\[FONT=.*?\](.*?)\[\/FONT\]/im, "\\1")
raw.gsub!(/\[COLOR=.*?\](.*?)\[\/COLOR\]/im, "\\1")
raw.gsub!(%r{\[FONT=.*?\](.*?)\[/FONT\]}im, "\\1")
raw.gsub!(%r{\[COLOR=.*?\](.*?)\[/COLOR\]}im, "\\1")
raw.gsub!(/\[SIZE=.*?\](.*?)\[\/SIZE\]/im, "\\1")
raw.gsub!(/\[H=.*?\](.*?)\[\/H\]/im, "\\1")
raw.gsub!(%r{\[SIZE=.*?\](.*?)\[/SIZE\]}im, "\\1")
raw.gsub!(%r{\[H=.*?\](.*?)\[/H\]}im, "\\1")
# [CENTER]...[/CENTER]
raw.gsub!(/\[CENTER\](.*?)\[\/CENTER\]/im, "\\1")
raw.gsub!(%r{\[CENTER\](.*?)\[/CENTER\]}im, "\\1")
# [INDENT]...[/INDENT]
raw.gsub!(/\[INDENT\](.*?)\[\/INDENT\]/im, "\\1")
raw.gsub!(/\[TABLE\](.*?)\[\/TABLE\]/im, "\\1")
raw.gsub!(/\[TR\](.*?)\[\/TR\]/im, "\\1")
raw.gsub!(/\[TD\](.*?)\[\/TD\]/im, "\\1")
raw.gsub!(/\[TD="?.*?"?\](.*?)\[\/TD\]/im, "\\1")
raw.gsub!(%r{\[INDENT\](.*?)\[/INDENT\]}im, "\\1")
raw.gsub!(%r{\[TABLE\](.*?)\[/TABLE\]}im, "\\1")
raw.gsub!(%r{\[TR\](.*?)\[/TR\]}im, "\\1")
raw.gsub!(%r{\[TD\](.*?)\[/TD\]}im, "\\1")
raw.gsub!(%r{\[TD="?.*?"?\](.*?)\[/TD\]}im, "\\1")
# [STRIKE]
raw.gsub!(/\[STRIKE\]/i, "<s>")
raw.gsub!(/\[\/STRIKE\]/i, "</s>")
raw.gsub!(%r{\[/STRIKE\]}i, "</s>")
# [QUOTE]...[/QUOTE]
raw.gsub!(/\[QUOTE="([^\]]+)"\]/i) { "[QUOTE=#{$1}]" }
# Nested Quotes
raw.gsub!(/(\[\/?QUOTE.*?\])/mi) { |q| "\n#{q}\n" }
raw.gsub!(%r{(\[/?QUOTE.*?\])}mi) { |q| "\n#{q}\n" }
# raw.gsub!(/\[QUOTE\](.+?)\[\/QUOTE\]/im) { |quote|
# quote.gsub!(/\[QUOTE\](.+?)\[\/QUOTE\]/im) { "\n#{$1}\n" }
@ -686,28 +778,36 @@ class BulkImport::Base
end
# [YOUTUBE]<id>[/YOUTUBE]
raw.gsub!(/\[YOUTUBE\](.+?)\[\/YOUTUBE\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
raw.gsub!(/\[DAILYMOTION\](.+?)\[\/DAILYMOTION\]/i) { "\nhttps://www.dailymotion.com/video/#{$1}\n" }
raw.gsub!(%r{\[YOUTUBE\](.+?)\[/YOUTUBE\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
raw.gsub!(%r{\[DAILYMOTION\](.+?)\[/DAILYMOTION\]}i) do
"\nhttps://www.dailymotion.com/video/#{$1}\n"
end
# [VIDEO=youtube;<id>]...[/VIDEO]
raw.gsub!(/\[VIDEO=YOUTUBE;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
raw.gsub!(/\[VIDEO=DAILYMOTION;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.dailymotion.com/video/#{$1}\n" }
raw.gsub!(%r{\[VIDEO=YOUTUBE;([^\]]+)\].*?\[/VIDEO\]}i) do
"\nhttps://www.youtube.com/watch?v=#{$1}\n"
end
raw.gsub!(%r{\[VIDEO=DAILYMOTION;([^\]]+)\].*?\[/VIDEO\]}i) do
"\nhttps://www.dailymotion.com/video/#{$1}\n"
end
# [SPOILER=Some hidden stuff]SPOILER HERE!![/SPOILER]
raw.gsub!(/\[SPOILER="?(.+?)"?\](.+?)\[\/SPOILER\]/im) { "\n#{$1}\n[spoiler]#{$2}[/spoiler]\n" }
raw.gsub!(%r{\[SPOILER="?(.+?)"?\](.+?)\[/SPOILER\]}im) do
"\n#{$1}\n[spoiler]#{$2}[/spoiler]\n"
end
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
# (https://meta.discourse.org/t/phpbb-3-importer-old/17397)
raw.gsub!(/\[list\](.*?)\[\/list\]/im, '[ul]\1[/ul]')
raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list\]/im, '[ol]\1[/ol]')
raw.gsub!(/\[list\](.*?)\[\/list:u\]/im, '[ul]\1[/ul]')
raw.gsub!(/\[list=1\|?[^\]]*\](.*?)\[\/list:o\]/im, '[ol]\1[/ol]')
raw.gsub!(%r{\[list\](.*?)\[/list\]}im, '[ul]\1[/ul]')
raw.gsub!(%r{\[list=1\|?[^\]]*\](.*?)\[/list\]}im, '[ol]\1[/ol]')
raw.gsub!(%r{\[list\](.*?)\[/list:u\]}im, '[ul]\1[/ul]')
raw.gsub!(%r{\[list=1\|?[^\]]*\](.*?)\[/list:o\]}im, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
raw.gsub!(/\[\*\]\n/, '')
raw.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]')
raw.gsub!(/\[\*\]\n/, "")
raw.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]')
raw.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]')
raw.gsub!(/\[\*=1\]/, '')
raw.gsub!(/\[\*=1\]/, "")
raw
end
@ -728,7 +828,9 @@ class BulkImport::Base
imported_ids |= mapped[:imported_ids] unless mapped[:imported_ids].nil?
@raw_connection.put_copy_data columns.map { |c| processed[c] } unless processed[:skip]
rows_created += 1
print "\r%7d - %6d/sec" % [rows_created, rows_created.to_f / (Time.now - start)] if rows_created % 100 == 0
if rows_created % 100 == 0
print "\r%7d - %6d/sec" % [rows_created, rows_created.to_f / (Time.now - start)]
end
rescue => e
puts "\n"
puts "ERROR: #{e.message}"
@ -737,15 +839,14 @@ class BulkImport::Base
end
end
print "\r%7d - %6d/sec\n" % [rows_created, rows_created.to_f / (Time.now - start)] if rows_created > 0
if rows_created > 0
print "\r%7d - %6d/sec\n" % [rows_created, rows_created.to_f / (Time.now - start)]
end
id_mapping_method_name = "#{name}_id_from_imported_id".freeze
return unless respond_to?(id_mapping_method_name)
create_custom_fields(name, "id", imported_ids) do |imported_id|
{
record_id: send(id_mapping_method_name, imported_id),
value: imported_id,
}
{ record_id: send(id_mapping_method_name, imported_id), value: imported_id }
end
rescue => e
# FIXME: errors catched here stop the rest of the COPY
@ -755,7 +856,8 @@ class BulkImport::Base
def create_custom_fields(table, name, rows)
name = "import_#{name}"
sql = "COPY #{table}_custom_fields (#{table}_id, name, value, created_at, updated_at) FROM STDIN"
sql =
"COPY #{table}_custom_fields (#{table}_id, name, value, created_at, updated_at) FROM STDIN"
@raw_connection.copy_data(sql, @encoder) do
rows.each do |row|
next unless cf = yield(row)
@ -797,7 +899,7 @@ class BulkImport::Base
cooked = raw
# Convert YouTube URLs to lazyYT DOMs before being transformed into links
cooked.gsub!(/\nhttps\:\/\/www.youtube.com\/watch\?v=(\w+)\n/) do
cooked.gsub!(%r{\nhttps\://www.youtube.com/watch\?v=(\w+)\n}) do
video_id = $1
result = <<-HTML
<div class="lazyYT" data-youtube-id="#{video_id}" data-width="480" data-height="270" data-parameters="feature=oembed&amp;wmode=opaque"></div>
@ -807,7 +909,7 @@ class BulkImport::Base
cooked = @markdown.render(cooked).scrub.strip
cooked.gsub!(/\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[\/QUOTE\]/im) do
cooked.gsub!(%r{\[QUOTE="?([^,"]+)(?:, post:(\d+), topic:(\d+))?"?\](.+?)\[/QUOTE\]}im) do
username, post_id, topic_id, quote = $1, $2, $3, $4
quote = quote.scrub.strip
@ -860,5 +962,4 @@ class BulkImport::Base
return text if @encoding == Encoding::UTF_8
text && text.encode(@encoding).force_encoding(Encoding::UTF_8)
end
end

File diff suppressed because it is too large Load Diff

View File

@ -3,17 +3,16 @@
require_relative "base"
require "pg"
require "htmlentities"
require 'ruby-bbcode-to-md'
require "ruby-bbcode-to-md"
class BulkImport::PhpBB < BulkImport::Base
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "phpbb_"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "phpbb_"
def initialize
super
charset = ENV["DB_CHARSET"] || "utf8"
charset = ENV["DB_CHARSET"] || "utf8"
database = ENV["DB_NAME"] || "flightaware"
password = ENV["DB_PASSWORD"] || "discourse"
@ -57,7 +56,7 @@ class BulkImport::PhpBB < BulkImport::Base
{
imported_id: row["group_id"],
name: normalize_text(row["group_name"]),
bio_raw: normalize_text(row["group_desc"])
bio_raw: normalize_text(row["group_desc"]),
}
end
end
@ -85,15 +84,28 @@ class BulkImport::PhpBB < BulkImport::Base
username: normalize_text(row["username"]),
email: row["user_email"],
created_at: Time.zone.at(row["user_regdate"].to_i),
last_seen_at: row["user_lastvisit"] == 0 ? Time.zone.at(row["user_regdate"].to_i) : Time.zone.at(row["user_lastvisit"].to_i),
last_seen_at:
(
if row["user_lastvisit"] == 0
Time.zone.at(row["user_regdate"].to_i)
else
Time.zone.at(row["user_lastvisit"].to_i)
end
),
trust_level: row["user_posts"] == 0 ? TrustLevel[0] : TrustLevel[1],
date_of_birth: parse_birthday(row["user_birthday"]),
primary_group_id: group_id_from_imported_id(row["group_id"])
primary_group_id: group_id_from_imported_id(row["group_id"]),
}
u[:ip_address] = row["user_ip"][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row["user_ip"].present?
if row["ban_start"]
u[:suspended_at] = Time.zone.at(row["ban_start"].to_i)
u[:suspended_till] = row["ban_end"].to_i > 0 ? Time.zone.at(row["ban_end"].to_i) : SUSPENDED_TILL
u[:suspended_till] = (
if row["ban_end"].to_i > 0
Time.zone.at(row["ban_end"].to_i)
else
SUSPENDED_TILL
end
)
end
u
end
@ -114,7 +126,7 @@ class BulkImport::PhpBB < BulkImport::Base
imported_id: row["user_id"],
imported_user_id: row["user_id"],
email: row["user_email"],
created_at: Time.zone.at(row["user_regdate"].to_i)
created_at: Time.zone.at(row["user_regdate"].to_i),
}
end
end
@ -149,7 +161,14 @@ class BulkImport::PhpBB < BulkImport::Base
create_user_profiles(user_profiles) do |row|
{
user_id: user_id_from_imported_id(row["user_id"]),
website: (URI.parse(row["user_website"]).to_s rescue nil),
website:
(
begin
URI.parse(row["user_website"]).to_s
rescue StandardError
nil
end
),
location: row["user_from"],
}
end
@ -158,17 +177,16 @@ class BulkImport::PhpBB < BulkImport::Base
def import_categories
puts "Importing categories..."
categories = psql_query(<<-SQL
categories = psql_query(<<-SQL).to_a
SELECT forum_id, parent_id, forum_name, forum_desc
FROM #{TABLE_PREFIX}forums
WHERE forum_id > #{@last_imported_category_id}
ORDER BY parent_id, left_id
SQL
).to_a
return if categories.empty?
parent_categories = categories.select { |c| c["parent_id"].to_i == 0 }
parent_categories = categories.select { |c| c["parent_id"].to_i == 0 }
children_categories = categories.select { |c| c["parent_id"].to_i != 0 }
puts "Importing parent categories..."
@ -176,7 +194,7 @@ class BulkImport::PhpBB < BulkImport::Base
{
imported_id: row["forum_id"],
name: normalize_text(row["forum_name"]),
description: normalize_text(row["forum_desc"])
description: normalize_text(row["forum_desc"]),
}
end
@ -186,7 +204,7 @@ class BulkImport::PhpBB < BulkImport::Base
imported_id: row["forum_id"],
name: normalize_text(row["forum_name"]),
description: normalize_text(row["forum_desc"]),
parent_category_id: category_id_from_imported_id(row["parent_id"])
parent_category_id: category_id_from_imported_id(row["parent_id"]),
}
end
end
@ -209,7 +227,7 @@ class BulkImport::PhpBB < BulkImport::Base
category_id: category_id_from_imported_id(row["forum_id"]),
user_id: user_id_from_imported_id(row["topic_poster"]),
created_at: Time.zone.at(row["topic_time"].to_i),
views: row["topic_views"]
views: row["topic_views"],
}
end
end
@ -261,7 +279,7 @@ class BulkImport::PhpBB < BulkImport::Base
imported_id: row["msg_id"].to_i + PRIVATE_OFFSET,
title: normalize_text(title),
user_id: user_id_from_imported_id(row["author_id"].to_i),
created_at: Time.zone.at(row["message_time"].to_i)
created_at: Time.zone.at(row["message_time"].to_i),
}
end
end
@ -271,13 +289,12 @@ class BulkImport::PhpBB < BulkImport::Base
allowed_users = []
psql_query(<<-SQL
psql_query(<<-SQL).each do |row|
SELECT msg_id, author_id, to_address
FROM #{TABLE_PREFIX}privmsgs
WHERE msg_id > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
ORDER BY msg_id
SQL
).each do |row|
next unless topic_id = topic_id_from_imported_id(row["msg_id"].to_i + PRIVATE_OFFSET)
user_ids = get_message_recipients(row["author_id"], row["to_address"])
@ -287,12 +304,7 @@ class BulkImport::PhpBB < BulkImport::Base
end
end
create_topic_allowed_users(allowed_users) do |row|
{
topic_id: row[0],
user_id: row[1]
}
end
create_topic_allowed_users(allowed_users) { |row| { topic_id: row[0], user_id: row[1] } }
end
def import_private_posts
@ -316,13 +328,13 @@ class BulkImport::PhpBB < BulkImport::Base
topic_id: topic_id,
user_id: user_id_from_imported_id(row["author_id"].to_i),
created_at: Time.zone.at(row["message_time"].to_i),
raw: process_raw_text(row["message_text"])
raw: process_raw_text(row["message_text"]),
}
end
end
def get_message_recipients(from, to)
user_ids = to.split(':')
user_ids = to.split(":")
user_ids.map! { |u| u[2..-1].to_i }
user_ids.push(from.to_i)
user_ids.uniq!
@ -332,15 +344,29 @@ class BulkImport::PhpBB < BulkImport::Base
def extract_pm_title(title)
pm_title = CGI.unescapeHTML(title)
pm_title = title.gsub(/^Re\s*:\s*/i, "") rescue nil
pm_title =
begin
title.gsub(/^Re\s*:\s*/i, "")
rescue StandardError
nil
end
pm_title
end
def parse_birthday(birthday)
return if birthday.blank?
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
date_of_birth =
begin
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
rescue StandardError
nil
end
return if date_of_birth.nil?
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
if date_of_birth.year < 1904
Date.new(1904, date_of_birth.month, date_of_birth.day)
else
date_of_birth
end
end
def psql_query(sql)
@ -352,34 +378,36 @@ class BulkImport::PhpBB < BulkImport::Base
text = raw.dup
text = CGI.unescapeHTML(text)
text.gsub!(/:(?:\w{8})\]/, ']')
text.gsub!(/:(?:\w{8})\]/, "]")
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
text.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}i, '[\2](\1)')
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
text.gsub!(%r{\[http(s)?://(www\.)?}i, "[")
# convert list tags to ul and list=1 tags to ol
# list=a is not supported, so handle it like list=1
# list=9 and list=x have the same result as list=1 and list=a
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi, '[ul]\1[/ul]')
text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
text.gsub!(%r{\[\*\](.*?)\[/\*:m\]}mi, '[li]\1[/li]')
# [QUOTE="<username>"] -- add newline
text.gsub!(/(\[quote="[a-zA-Z\d]+"\])/i) { "#{$1}\n" }
# [/QUOTE] -- add newline
text.gsub!(/(\[\/quote\])/i) { "\n#{$1}" }
text.gsub!(%r{(\[/quote\])}i) { "\n#{$1}" }
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do
text.gsub!(
/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/,
) do
smiley = $1
@smiley_map.fetch(smiley) do
# upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley)
@ -405,33 +433,30 @@ class BulkImport::PhpBB < BulkImport::Base
def add_default_smilies
{
[':D', ':-D', ':grin:'] => ':smiley:',
[':)', ':-)', ':smile:'] => ':slight_smile:',
[';)', ';-)', ':wink:'] => ':wink:',
[':(', ':-(', ':sad:'] => ':frowning:',
[':o', ':-o', ':eek:'] => ':astonished:',
[':shock:'] => ':open_mouth:',
[':?', ':-?', ':???:'] => ':confused:',
['8-)', ':cool:'] => ':sunglasses:',
[':lol:'] => ':laughing:',
[':x', ':-x', ':mad:'] => ':angry:',
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
[':oops:'] => ':blush:',
[':cry:'] => ':cry:',
[':evil:'] => ':imp:',
[':twisted:'] => ':smiling_imp:',
[':roll:'] => ':unamused:',
[':!:'] => ':exclamation:',
[':?:'] => ':question:',
[':idea:'] => ':bulb:',
[':arrow:'] => ':arrow_right:',
[':|', ':-|'] => ':neutral_face:',
[':geek:'] => ':nerd:'
}.each do |smilies, emoji|
smilies.each { |smiley| @smiley_map[smiley] = emoji }
end
%w[:D :-D :grin:] => ":smiley:",
%w[:) :-) :smile:] => ":slight_smile:",
%w[;) ;-) :wink:] => ":wink:",
%w[:( :-( :sad:] => ":frowning:",
%w[:o :-o :eek:] => ":astonished:",
[":shock:"] => ":open_mouth:",
%w[:? :-? :???:] => ":confused:",
%w[8-) :cool:] => ":sunglasses:",
[":lol:"] => ":laughing:",
%w[:x :-x :mad:] => ":angry:",
%w[:P :-P :razz:] => ":stuck_out_tongue:",
[":oops:"] => ":blush:",
[":cry:"] => ":cry:",
[":evil:"] => ":imp:",
[":twisted:"] => ":smiling_imp:",
[":roll:"] => ":unamused:",
[":!:"] => ":exclamation:",
[":?:"] => ":question:",
[":idea:"] => ":bulb:",
[":arrow:"] => ":arrow_right:",
%w[:| :-|] => ":neutral_face:",
[":geek:"] => ":nerd:",
}.each { |smilies, emoji| smilies.each { |smiley| @smiley_map[smiley] = emoji } }
end
end
BulkImport::PhpBB.new.run

View File

@ -8,7 +8,6 @@ require "htmlentities"
# NOTE: this importer expects a MySQL DB to directly connect to
class BulkImport::Vanilla < BulkImport::Base
VANILLA_DB = "dbname"
TABLE_PREFIX = "GDN_"
ATTACHMENTS_BASE_DIR = "/my/absolute/path/to/from_vanilla/uploads"
@ -20,13 +19,14 @@ class BulkImport::Vanilla < BulkImport::Base
def initialize
super
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
database: VANILLA_DB,
password: "",
reconnect: true
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
database: VANILLA_DB,
password: "",
reconnect: true,
)
@import_tags = false
begin
@ -88,10 +88,10 @@ class BulkImport::Vanilla < BulkImport::Base
end
def import_users
puts '', "Importing users..."
puts "", "Importing users..."
username = nil
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first['count']
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}User;").first["count"]
users = mysql_stream <<-SQL
SELECT UserID, Name, Title, Location, Email,
@ -103,26 +103,32 @@ class BulkImport::Vanilla < BulkImport::Base
SQL
create_users(users) do |row|
next if row['Email'].blank?
next if row['Name'].blank?
next if row["Email"].blank?
next if row["Name"].blank?
if ip_address = row['InsertIPAddress']&.split(',').try(:[], 0)
ip_address = nil unless (IPAddr.new(ip_address) rescue false)
if ip_address = row["InsertIPAddress"]&.split(",").try(:[], 0)
ip_address = nil unless (
begin
IPAddr.new(ip_address)
rescue StandardError
false
end
)
end
u = {
imported_id: row['UserID'],
email: row['Email'],
username: row['Name'],
name: row['Name'],
created_at: row['DateInserted'] == nil ? 0 : Time.zone.at(row['DateInserted']),
imported_id: row["UserID"],
email: row["Email"],
username: row["Name"],
name: row["Name"],
created_at: row["DateInserted"] == nil ? 0 : Time.zone.at(row["DateInserted"]),
registration_ip_address: ip_address,
last_seen_at: row['DateLastActive'] == nil ? 0 : Time.zone.at(row['DateLastActive']),
location: row['Location'],
admin: row['Admin'] > 0
last_seen_at: row["DateLastActive"] == nil ? 0 : Time.zone.at(row["DateLastActive"]),
location: row["Location"],
admin: row["Admin"] > 0,
}
if row["Banned"] > 0
u[:suspended_at] = Time.zone.at(row['DateInserted'])
u[:suspended_at] = Time.zone.at(row["DateInserted"])
u[:suspended_till] = SUSPENDED_TILL
end
u
@ -130,7 +136,7 @@ class BulkImport::Vanilla < BulkImport::Base
end
def import_user_emails
puts '', 'Importing user emails...'
puts "", "Importing user emails..."
users = mysql_stream <<-SQL
SELECT UserID, Name, Email, DateInserted
@ -141,20 +147,20 @@ class BulkImport::Vanilla < BulkImport::Base
SQL
create_user_emails(users) do |row|
next if row['Email'].blank?
next if row['Name'].blank?
next if row["Email"].blank?
next if row["Name"].blank?
{
imported_id: row["UserID"],
imported_user_id: row["UserID"],
email: row["Email"],
created_at: Time.zone.at(row["DateInserted"])
created_at: Time.zone.at(row["DateInserted"]),
}
end
end
def import_user_profiles
puts '', 'Importing user profiles...'
puts "", "Importing user profiles..."
user_profiles = mysql_stream <<-SQL
SELECT UserID, Name, Email, Location, About
@ -165,19 +171,19 @@ class BulkImport::Vanilla < BulkImport::Base
SQL
create_user_profiles(user_profiles) do |row|
next if row['Email'].blank?
next if row['Name'].blank?
next if row["Email"].blank?
next if row["Name"].blank?
{
user_id: user_id_from_imported_id(row["UserID"]),
location: row["Location"],
bio_raw: row["About"]
bio_raw: row["About"],
}
end
end
def import_user_stats
puts '', "Importing user stats..."
puts "", "Importing user stats..."
users = mysql_stream <<-SQL
SELECT UserID, CountDiscussions, CountComments, DateInserted
@ -190,14 +196,14 @@ class BulkImport::Vanilla < BulkImport::Base
now = Time.zone.now
create_user_stats(users) do |row|
next unless @users[row['UserID'].to_i] # shouldn't need this but it can be NULL :<
next unless @users[row["UserID"].to_i] # shouldn't need this but it can be NULL :<
{
imported_id: row['UserID'],
imported_user_id: row['UserID'],
new_since: Time.zone.at(row['DateInserted'] || now),
post_count: row['CountComments'] || 0,
topic_count: row['CountDiscussions'] || 0
imported_id: row["UserID"],
imported_user_id: row["UserID"],
new_since: Time.zone.at(row["DateInserted"] || now),
post_count: row["CountComments"] || 0,
topic_count: row["CountDiscussions"] || 0,
}
end
end
@ -215,7 +221,10 @@ class BulkImport::Vanilla < BulkImport::Base
next unless u.custom_fields["import_id"]
r = mysql_query("SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields['import_id']};").first
r =
mysql_query(
"SELECT photo FROM #{TABLE_PREFIX}User WHERE UserID = #{u.custom_fields["import_id"]};",
).first
next if r.nil?
photo = r["photo"]
next unless photo.present?
@ -229,9 +238,9 @@ class BulkImport::Vanilla < BulkImport::Base
photo_real_filename = nil
parts = photo.squeeze("/").split("/")
if parts[0] =~ /^[a-z0-9]{2}:/
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join('/')}".squeeze("/")
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[2..-2].join("/")}".squeeze("/")
elsif parts[0] == "~cf"
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join('/')}".squeeze("/")
photo_path = "#{ATTACHMENTS_BASE_DIR}/#{parts[1..-2].join("/")}".squeeze("/")
else
puts "UNKNOWN FORMAT: #{photo}"
next
@ -272,75 +281,86 @@ class BulkImport::Vanilla < BulkImport::Base
count = 0
# https://us.v-cdn.net/1234567/uploads/editor/xyz/image.jpg
cdn_regex = /https:\/\/us.v-cdn.net\/1234567\/uploads\/(\S+\/(\w|-)+.\w+)/i
cdn_regex = %r{https://us.v-cdn.net/1234567/uploads/(\S+/(\w|-)+.\w+)}i
# [attachment=10109:Screen Shot 2012-04-01 at 3.47.35 AM.png]
attachment_regex = /\[attachment=(\d+):(.*?)\]/i
Post.where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'").find_each do |post|
count += 1
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
new_raw = post.raw.dup
Post
.where("raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%'")
.find_each do |post|
count += 1
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
new_raw = post.raw.dup
new_raw.gsub!(attachment_regex) do |s|
matches = attachment_regex.match(s)
attachment_id = matches[1]
file_name = matches[2]
next unless attachment_id
new_raw.gsub!(attachment_regex) do |s|
matches = attachment_regex.match(s)
attachment_id = matches[1]
file_name = matches[2]
next unless attachment_id
r = mysql_query("SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};").first
next if r.nil?
path = r["Path"]
name = r["Name"]
next unless path.present?
r =
mysql_query(
"SELECT Path, Name FROM #{TABLE_PREFIX}Media WHERE MediaID = #{attachment_id};",
).first
next if r.nil?
path = r["Path"]
name = r["Name"]
next unless path.present?
path.gsub!("s3://content/", "")
path.gsub!("s3://uploads/", "")
file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}"
path.gsub!("s3://content/", "")
path.gsub!("s3://uploads/", "")
file_path = "#{ATTACHMENTS_BASE_DIR}/#{path}"
if File.exist?(file_path)
upload = create_upload(post.user.id, file_path, File.basename(file_path))
if upload && upload.errors.empty?
# upload.url
filename = name || file_name || File.basename(file_path)
html_for_upload(upload, normalize_text(filename))
if File.exist?(file_path)
upload = create_upload(post.user.id, file_path, File.basename(file_path))
if upload && upload.errors.empty?
# upload.url
filename = name || file_name || File.basename(file_path)
html_for_upload(upload, normalize_text(filename))
else
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
end
else
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
puts "Couldn't find file for #{attachment_id}. Skipping."
next
end
else
puts "Couldn't find file for #{attachment_id}. Skipping."
next
end
end
new_raw.gsub!(cdn_regex) do |s|
matches = cdn_regex.match(s)
attachment_id = matches[1]
new_raw.gsub!(cdn_regex) do |s|
matches = cdn_regex.match(s)
attachment_id = matches[1]
file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}"
file_path = "#{ATTACHMENTS_BASE_DIR}/#{attachment_id}"
if File.exist?(file_path)
upload = create_upload(post.user.id, file_path, File.basename(file_path))
if upload && upload.errors.empty?
upload.url
if File.exist?(file_path)
upload = create_upload(post.user.id, file_path, File.basename(file_path))
if upload && upload.errors.empty?
upload.url
else
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
end
else
puts "Error: Upload did not persist for #{post.id} #{attachment_id}!"
puts "Couldn't find file for #{attachment_id}. Skipping."
next
end
else
puts "Couldn't find file for #{attachment_id}. Skipping."
next
end
end
if new_raw != post.raw
begin
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, skip_revision: true, skip_validations: true, bypass_bump: true)
rescue
puts "PostRevisor error for #{post.id}"
post.raw = new_raw
post.save(validate: false)
if new_raw != post.raw
begin
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
skip_revision: true,
skip_validations: true,
bypass_bump: true,
)
rescue StandardError
puts "PostRevisor error for #{post.id}"
post.raw = new_raw
post.save(validate: false)
end
end
end
end
end
end
@ -352,7 +372,7 @@ class BulkImport::Vanilla < BulkImport::Base
# Otherwise, the file exists but with a prefix:
# The p prefix seems to be the full file, so try to find that one first.
['p', 't', 'n'].each do |prefix|
%w[p t n].each do |prefix|
full_guess = File.join(path, "#{prefix}#{base_guess}")
return full_guess if File.exist?(full_guess)
end
@ -364,26 +384,30 @@ class BulkImport::Vanilla < BulkImport::Base
def import_categories
puts "", "Importing categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT CategoryID, ParentCategoryID, Name, Description, Sort
FROM #{TABLE_PREFIX}Category
WHERE CategoryID > 0
ORDER BY Sort, CategoryID
").to_a
",
).to_a
# Throw the -1 level categories away since they contain no topics.
# Use the next level as root categories.
top_level_categories = categories.select { |c| c["ParentCategoryID"].blank? || c['ParentCategoryID'] == -1 }
top_level_categories =
categories.select { |c| c["ParentCategoryID"].blank? || c["ParentCategoryID"] == -1 }
# Depth = 2
create_categories(top_level_categories) do |category|
next if category_id_from_imported_id(category['CategoryID'])
next if category_id_from_imported_id(category["CategoryID"])
{
imported_id: category['CategoryID'],
name: CGI.unescapeHTML(category['Name']),
description: category['Description'] ? CGI.unescapeHTML(category['Description']) : nil,
position: category['Sort']
imported_id: category["CategoryID"],
name: CGI.unescapeHTML(category["Name"]),
description: category["Description"] ? CGI.unescapeHTML(category["Description"]) : nil,
position: category["Sort"],
}
end
@ -393,39 +417,39 @@ class BulkImport::Vanilla < BulkImport::Base
# Depth = 3
create_categories(subcategories) do |category|
next if category_id_from_imported_id(category['CategoryID'])
next if category_id_from_imported_id(category["CategoryID"])
{
imported_id: category['CategoryID'],
parent_category_id: category_id_from_imported_id(category['ParentCategoryID']),
name: CGI.unescapeHTML(category['Name']),
description: category['Description'] ? CGI.unescapeHTML(category['Description']) : nil,
position: category['Sort']
imported_id: category["CategoryID"],
parent_category_id: category_id_from_imported_id(category["ParentCategoryID"]),
name: CGI.unescapeHTML(category["Name"]),
description: category["Description"] ? CGI.unescapeHTML(category["Description"]) : nil,
position: category["Sort"],
}
end
subcategory_ids = Set.new(subcategories.map { |c| c['CategoryID'] })
subcategory_ids = Set.new(subcategories.map { |c| c["CategoryID"] })
# Depth 4 and 5 need to be tags
categories.each do |c|
next if c['ParentCategoryID'] == -1
next if top_level_category_ids.include?(c['CategoryID'])
next if subcategory_ids.include?(c['CategoryID'])
next if c["ParentCategoryID"] == -1
next if top_level_category_ids.include?(c["CategoryID"])
next if subcategory_ids.include?(c["CategoryID"])
# Find a depth 3 category for topics in this category
parent = c
while !parent.nil? && !subcategory_ids.include?(parent['CategoryID'])
parent = categories.find { |subcat| subcat['CategoryID'] == parent['ParentCategoryID'] }
while !parent.nil? && !subcategory_ids.include?(parent["CategoryID"])
parent = categories.find { |subcat| subcat["CategoryID"] == parent["ParentCategoryID"] }
end
if parent
tag_name = DiscourseTagging.clean_tag(c['Name'])
@category_mappings[c['CategoryID']] = {
category_id: category_id_from_imported_id(parent['CategoryID']),
tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name)
tag_name = DiscourseTagging.clean_tag(c["Name"])
@category_mappings[c["CategoryID"]] = {
category_id: category_id_from_imported_id(parent["CategoryID"]),
tag: Tag.find_by_name(tag_name) || Tag.create(name: tag_name),
}
else
puts '', "Couldn't find a category for #{c['CategoryID']} '#{c['Name']}'!"
puts "", "Couldn't find a category for #{c["CategoryID"]} '#{c["Name"]}'!"
end
end
end
@ -433,7 +457,8 @@ class BulkImport::Vanilla < BulkImport::Base
def import_topics
puts "", "Importing topics..."
topics_sql = "SELECT DiscussionID, CategoryID, Name, Body, DateInserted, InsertUserID, Announce, Format
topics_sql =
"SELECT DiscussionID, CategoryID, Name, Body, DateInserted, InsertUserID, Announce, Format
FROM #{TABLE_PREFIX}Discussion
WHERE DiscussionID > #{@last_imported_topic_id}
ORDER BY DiscussionID ASC"
@ -442,11 +467,12 @@ class BulkImport::Vanilla < BulkImport::Base
data = {
imported_id: row["DiscussionID"],
title: normalize_text(row["Name"]),
category_id: category_id_from_imported_id(row["CategoryID"]) ||
@category_mappings[row["CategoryID"]].try(:[], :category_id),
category_id:
category_id_from_imported_id(row["CategoryID"]) ||
@category_mappings[row["CategoryID"]].try(:[], :category_id),
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row['DateInserted']),
pinned_at: row['Announce'] == 0 ? nil : Time.zone.at(row['DateInserted'])
created_at: Time.zone.at(row["DateInserted"]),
pinned_at: row["Announce"] == 0 ? nil : Time.zone.at(row["DateInserted"]),
}
(data[:user_id].present? && data[:title].present?) ? data : false
end
@ -455,46 +481,45 @@ class BulkImport::Vanilla < BulkImport::Base
create_posts(mysql_stream(topics_sql)) do |row|
data = {
imported_id: "d-" + row['DiscussionID'].to_s,
topic_id: topic_id_from_imported_id(row['DiscussionID']),
imported_id: "d-" + row["DiscussionID"].to_s,
topic_id: topic_id_from_imported_id(row["DiscussionID"]),
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row['DateInserted']),
raw: clean_up(row['Body'], row['Format'])
created_at: Time.zone.at(row["DateInserted"]),
raw: clean_up(row["Body"], row["Format"]),
}
data[:topic_id].present? ? data : false
end
puts '', 'converting deep categories to tags...'
puts "", "converting deep categories to tags..."
create_topic_tags(mysql_stream(topics_sql)) do |row|
next unless mapping = @category_mappings[row['CategoryID']]
next unless mapping = @category_mappings[row["CategoryID"]]
{
tag_id: mapping[:tag].id,
topic_id: topic_id_from_imported_id(row["DiscussionID"])
}
{ tag_id: mapping[:tag].id, topic_id: topic_id_from_imported_id(row["DiscussionID"]) }
end
end
def import_posts
puts "", "Importing posts..."
posts = mysql_stream(
"SELECT CommentID, DiscussionID, Body, DateInserted, InsertUserID, Format
posts =
mysql_stream(
"SELECT CommentID, DiscussionID, Body, DateInserted, InsertUserID, Format
FROM #{TABLE_PREFIX}Comment
WHERE CommentID > #{@last_imported_post_id}
ORDER BY CommentID ASC")
ORDER BY CommentID ASC",
)
create_posts(posts) do |row|
next unless topic_id = topic_id_from_imported_id(row['DiscussionID'])
next if row['Body'].blank?
next unless topic_id = topic_id_from_imported_id(row["DiscussionID"])
next if row["Body"].blank?
{
imported_id: row['CommentID'],
imported_id: row["CommentID"],
topic_id: topic_id,
user_id: user_id_from_imported_id(row['InsertUserID']),
created_at: Time.zone.at(row['DateInserted']),
raw: clean_up(row['Body'], row['Format'])
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row["DateInserted"]),
raw: clean_up(row["Body"], row["Format"]),
}
end
end
@ -505,31 +530,31 @@ class BulkImport::Vanilla < BulkImport::Base
tag_mapping = {}
mysql_query("SELECT TagID, Name FROM #{TABLE_PREFIX}Tag").each do |row|
tag_name = DiscourseTagging.clean_tag(row['Name'])
tag_name = DiscourseTagging.clean_tag(row["Name"])
tag = Tag.find_by_name(tag_name) || Tag.create(name: tag_name)
tag_mapping[row['TagID']] = tag.id
tag_mapping[row["TagID"]] = tag.id
end
tags = mysql_query(
"SELECT TagID, DiscussionID
tags =
mysql_query(
"SELECT TagID, DiscussionID
FROM #{TABLE_PREFIX}TagDiscussion
WHERE DiscussionID > #{@last_imported_topic_id}
ORDER BY DateInserted")
ORDER BY DateInserted",
)
create_topic_tags(tags) do |row|
next unless topic_id = topic_id_from_imported_id(row['DiscussionID'])
next unless topic_id = topic_id_from_imported_id(row["DiscussionID"])
{
topic_id: topic_id,
tag_id: tag_mapping[row['TagID']]
}
{ topic_id: topic_id, tag_id: tag_mapping[row["TagID"]] }
end
end
def import_private_topics
puts "", "Importing private topics..."
topics_sql = "SELECT c.ConversationID, c.Subject, m.MessageID, m.Body, c.DateInserted, c.InsertUserID
topics_sql =
"SELECT c.ConversationID, c.Subject, m.MessageID, m.Body, c.DateInserted, c.InsertUserID
FROM #{TABLE_PREFIX}Conversation c, #{TABLE_PREFIX}ConversationMessage m
WHERE c.FirstMessageID = m.MessageID
AND c.ConversationID > #{@last_imported_private_topic_id - PRIVATE_OFFSET}
@ -539,9 +564,10 @@ class BulkImport::Vanilla < BulkImport::Base
{
archetype: Archetype.private_message,
imported_id: row["ConversationID"] + PRIVATE_OFFSET,
title: row["Subject"] ? normalize_text(row["Subject"]) : "Conversation #{row["ConversationID"]}",
title:
row["Subject"] ? normalize_text(row["Subject"]) : "Conversation #{row["ConversationID"]}",
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row['DateInserted'])
created_at: Time.zone.at(row["DateInserted"]),
}
end
end
@ -549,7 +575,8 @@ class BulkImport::Vanilla < BulkImport::Base
def import_topic_allowed_users
puts "", "importing topic_allowed_users..."
topic_allowed_users_sql = "
topic_allowed_users_sql =
"
SELECT ConversationID, UserID
FROM #{TABLE_PREFIX}UserConversation
WHERE Deleted = 0
@ -559,45 +586,43 @@ class BulkImport::Vanilla < BulkImport::Base
added = 0
create_topic_allowed_users(mysql_stream(topic_allowed_users_sql)) do |row|
next unless topic_id = topic_id_from_imported_id(row['ConversationID'] + PRIVATE_OFFSET)
next unless topic_id = topic_id_from_imported_id(row["ConversationID"] + PRIVATE_OFFSET)
next unless user_id = user_id_from_imported_id(row["UserID"])
added += 1
{
topic_id: topic_id,
user_id: user_id,
}
{ topic_id: topic_id, user_id: user_id }
end
puts '', "Added #{added} topic_allowed_users records."
puts "", "Added #{added} topic_allowed_users records."
end
def import_private_posts
puts "", "importing private replies..."
private_posts_sql = "
private_posts_sql =
"
SELECT ConversationID, MessageID, Body, InsertUserID, DateInserted, Format
FROM GDN_ConversationMessage
WHERE ConversationID > #{@last_imported_private_topic_id - PRIVATE_OFFSET}
ORDER BY ConversationID ASC, MessageID ASC"
create_posts(mysql_stream(private_posts_sql)) do |row|
next unless topic_id = topic_id_from_imported_id(row['ConversationID'] + PRIVATE_OFFSET)
next unless topic_id = topic_id_from_imported_id(row["ConversationID"] + PRIVATE_OFFSET)
{
imported_id: row['MessageID'] + PRIVATE_OFFSET,
imported_id: row["MessageID"] + PRIVATE_OFFSET,
topic_id: topic_id,
user_id: user_id_from_imported_id(row['InsertUserID']),
created_at: Time.zone.at(row['DateInserted']),
raw: clean_up(row['Body'], row['Format'])
user_id: user_id_from_imported_id(row["InsertUserID"]),
created_at: Time.zone.at(row["DateInserted"]),
raw: clean_up(row["Body"], row["Format"]),
}
end
end
# TODO: too slow
def create_permalinks
puts '', 'Creating permalinks...', ''
puts "", "Creating permalinks...", ""
puts ' User pages...'
puts " User pages..."
start = Time.now
count = 0
@ -606,21 +631,23 @@ class BulkImport::Vanilla < BulkImport::Base
sql = "COPY permalinks (url, created_at, updated_at, external_url) FROM STDIN"
@raw_connection.copy_data(sql, @encoder) do
User.includes(:_custom_fields).find_each do |u|
count += 1
ucf = u.custom_fields
if ucf && ucf["import_id"]
vanilla_username = ucf["import_username"] || u.username
@raw_connection.put_copy_data(
["profile/#{vanilla_username}", now, now, "/users/#{u.username}"]
)
end
User
.includes(:_custom_fields)
.find_each do |u|
count += 1
ucf = u.custom_fields
if ucf && ucf["import_id"]
vanilla_username = ucf["import_username"] || u.username
@raw_connection.put_copy_data(
["profile/#{vanilla_username}", now, now, "/users/#{u.username}"],
)
end
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
end
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
end
end
puts '', '', ' Topics and posts...'
puts "", "", " Topics and posts..."
start = Time.now
count = 0
@ -628,38 +655,36 @@ class BulkImport::Vanilla < BulkImport::Base
sql = "COPY permalinks (url, topic_id, post_id, created_at, updated_at) FROM STDIN"
@raw_connection.copy_data(sql, @encoder) do
Post.includes(:_custom_fields).find_each do |post|
count += 1
pcf = post.custom_fields
if pcf && pcf["import_id"]
topic = post.topic
if topic.present?
id = pcf["import_id"].split('-').last
if post.post_number == 1
slug = Slug.for(topic.title) # probably matches what vanilla would do...
@raw_connection.put_copy_data(
["discussion/#{id}/#{slug}", topic.id, nil, now, now]
)
else
@raw_connection.put_copy_data(
["discussion/comment/#{id}", nil, post.id, now, now]
)
Post
.includes(:_custom_fields)
.find_each do |post|
count += 1
pcf = post.custom_fields
if pcf && pcf["import_id"]
topic = post.topic
if topic.present?
id = pcf["import_id"].split("-").last
if post.post_number == 1
slug = Slug.for(topic.title) # probably matches what vanilla would do...
@raw_connection.put_copy_data(["discussion/#{id}/#{slug}", topic.id, nil, now, now])
else
@raw_connection.put_copy_data(["discussion/comment/#{id}", nil, post.id, now, now])
end
end
end
end
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
end
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)] if count % 5000 == 0
end
end
end
def clean_up(raw, format)
raw.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "")
raw.gsub!(/<(.+)>&nbsp;<\/\1>/, "\n\n")
raw.gsub!(%r{<(.+)>&nbsp;</\1>}, "\n\n")
html =
if format == 'Html'
if format == "Html"
raw
else
markdown = Redcarpet::Markdown.new(Redcarpet::Render::HTML, autolink: true, tables: true)
@ -668,29 +693,23 @@ class BulkImport::Vanilla < BulkImport::Base
doc = Nokogiri::HTML5.fragment(html)
doc.css("blockquote").each do |bq|
name = bq["rel"]
user = User.find_by(name: name)
bq.replace %{<br>[QUOTE="#{user&.username || name}"]\n#{bq.inner_html}\n[/QUOTE]<br>}
end
doc
.css("blockquote")
.each do |bq|
name = bq["rel"]
user = User.find_by(name: name)
bq.replace %{<br>[QUOTE="#{user&.username || name}"]\n#{bq.inner_html}\n[/QUOTE]<br>}
end
doc.css("font").reverse.each do |f|
f.replace f.inner_html
end
doc.css("font").reverse.each { |f| f.replace f.inner_html }
doc.css("span").reverse.each do |f|
f.replace f.inner_html
end
doc.css("span").reverse.each { |f| f.replace f.inner_html }
doc.css("sub").reverse.each do |f|
f.replace f.inner_html
end
doc.css("sub").reverse.each { |f| f.replace f.inner_html }
doc.css("u").reverse.each do |f|
f.replace f.inner_html
end
doc.css("u").reverse.each { |f| f.replace f.inner_html }
markdown = format == 'Html' ? ReverseMarkdown.convert(doc.to_html) : doc.to_html
markdown = format == "Html" ? ReverseMarkdown.convert(doc.to_html) : doc.to_html
markdown.gsub!(/\[QUOTE="([^;]+);c-(\d+)"\]/i) { "[QUOTE=#{$1};#{$2}]" }
markdown = process_raw_text(markdown)
@ -702,31 +721,31 @@ class BulkImport::Vanilla < BulkImport::Base
text = raw.dup
text = CGI.unescapeHTML(text)
text.gsub!(/:(?:\w{8})\]/, ']')
text.gsub!(/:(?:\w{8})\]/, "]")
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
text.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}i, '[\2](\1)')
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
text.gsub!(%r{\[http(s)?://(www\.)?}i, "[")
# convert list tags to ul and list=1 tags to ol
# list=a is not supported, so handle it like list=1
# list=9 and list=x have the same result as list=1 and list=a
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi, '[ul]\1[/ul]')
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi, '[ol]\1[/ol]')
text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi, '[ul]\1[/ul]')
text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
text.gsub!(/\[\*\](.*?)\[\/\*:m\]/mi, '[li]\1[/li]')
text.gsub!(%r{\[\*\](.*?)\[/\*:m\]}mi, '[li]\1[/li]')
# [QUOTE="<username>"] -- add newline
text.gsub!(/(\[quote="[a-zA-Z\d]+"\])/i) { "#{$1}\n" }
# [/QUOTE] -- add newline
text.gsub!(/(\[\/quote\])/i) { "\n#{$1}" }
text.gsub!(%r{(\[/quote\])}i) { "\n#{$1}" }
text
end
@ -742,7 +761,6 @@ class BulkImport::Vanilla < BulkImport::Base
def mysql_query(sql)
@client.query(sql)
end
end
BulkImport::Vanilla.new.start

View File

@ -7,43 +7,42 @@ require "htmlentities"
require "parallel"
class BulkImport::VBulletin < BulkImport::Base
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "vb_"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "vb_"
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/shared/import/data/attachments'
AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars'
ATTACHMENT_DIR ||= ENV["ATTACHMENT_DIR"] || "/shared/import/data/attachments"
AVATAR_DIR ||= ENV["AVATAR_DIR"] || "/shared/import/data/customavatars"
def initialize
super
host = ENV["DB_HOST"] || "localhost"
host = ENV["DB_HOST"] || "localhost"
username = ENV["DB_USERNAME"] || "root"
password = ENV["DB_PASSWORD"]
database = ENV["DB_NAME"] || "vbulletin"
charset = ENV["DB_CHARSET"] || "utf8"
charset = ENV["DB_CHARSET"] || "utf8"
@html_entities = HTMLEntities.new
@encoding = CHARSET_MAP[charset]
@client = Mysql2::Client.new(
host: host,
username: username,
password: password,
database: database,
encoding: charset,
reconnect: true
)
@client =
Mysql2::Client.new(
host: host,
username: username,
password: password,
database: database,
encoding: charset,
reconnect: true,
)
@client.query_options.merge!(as: :array, cache_rows: false)
@has_post_thanks = mysql_query(<<-SQL
@has_post_thanks = mysql_query(<<-SQL).to_a.count > 0
SELECT `COLUMN_NAME`
FROM `INFORMATION_SCHEMA`.`COLUMNS`
WHERE `TABLE_SCHEMA`='#{database}'
AND `TABLE_NAME`='user'
AND `COLUMN_NAME` LIKE 'post_thanks_%'
SQL
).to_a.count > 0
@user_ids_by_email = {}
end
@ -95,7 +94,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_groups
puts '', "Importing groups..."
puts "", "Importing groups..."
groups = mysql_stream <<-SQL
SELECT usergroupid, title, description, usertitle
@ -115,7 +114,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_users
puts '', "Importing users..."
puts "", "Importing users..."
users = mysql_stream <<-SQL
SELECT u.userid, username, email, joindate, birthday, ipaddress, u.usergroupid, bandate, liftdate
@ -145,7 +144,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_user_emails
puts '', "Importing user emails..."
puts "", "Importing user emails..."
users = mysql_stream <<-SQL
SELECT u.userid, email, joindate
@ -155,7 +154,7 @@ class BulkImport::VBulletin < BulkImport::Base
SQL
create_user_emails(users) do |row|
user_id, email = row[0 .. 1]
user_id, email = row[0..1]
@user_ids_by_email[email.downcase] ||= []
user_ids = @user_ids_by_email[email.downcase] << user_id
@ -170,7 +169,7 @@ class BulkImport::VBulletin < BulkImport::Base
imported_id: user_id,
imported_user_id: user_id,
email: email,
created_at: Time.zone.at(row[2])
created_at: Time.zone.at(row[2]),
}
end
@ -179,7 +178,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_user_stats
puts '', "Importing user stats..."
puts "", "Importing user stats..."
users = mysql_stream <<-SQL
SELECT u.userid, joindate, posts, COUNT(t.threadid) AS threads, p.dateline
@ -199,7 +198,7 @@ class BulkImport::VBulletin < BulkImport::Base
new_since: Time.zone.at(row[1]),
post_count: row[2],
topic_count: row[3],
first_post_created_at: row[4] && Time.zone.at(row[4])
first_post_created_at: row[4] && Time.zone.at(row[4]),
}
if @has_post_thanks
@ -212,7 +211,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_group_users
puts '', "Importing group users..."
puts "", "Importing group users..."
group_users = mysql_stream <<-SQL
SELECT usergroupid, userid
@ -221,15 +220,12 @@ class BulkImport::VBulletin < BulkImport::Base
SQL
create_group_users(group_users) do |row|
{
group_id: group_id_from_imported_id(row[0]),
user_id: user_id_from_imported_id(row[1]),
}
{ group_id: group_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]) }
end
end
def import_user_passwords
puts '', "Importing user passwords..."
puts "", "Importing user passwords..."
user_passwords = mysql_stream <<-SQL
SELECT userid, password
@ -239,15 +235,12 @@ class BulkImport::VBulletin < BulkImport::Base
SQL
create_custom_fields("user", "password", user_passwords) do |row|
{
record_id: user_id_from_imported_id(row[0]),
value: row[1],
}
{ record_id: user_id_from_imported_id(row[0]), value: row[1] }
end
end
def import_user_salts
puts '', "Importing user salts..."
puts "", "Importing user salts..."
user_salts = mysql_stream <<-SQL
SELECT userid, salt
@ -258,15 +251,12 @@ class BulkImport::VBulletin < BulkImport::Base
SQL
create_custom_fields("user", "salt", user_salts) do |row|
{
record_id: user_id_from_imported_id(row[0]),
value: row[1],
}
{ record_id: user_id_from_imported_id(row[0]), value: row[1] }
end
end
def import_user_profiles
puts '', "Importing user profiles..."
puts "", "Importing user profiles..."
user_profiles = mysql_stream <<-SQL
SELECT userid, homepage, profilevisits
@ -278,16 +268,23 @@ class BulkImport::VBulletin < BulkImport::Base
create_user_profiles(user_profiles) do |row|
{
user_id: user_id_from_imported_id(row[0]),
website: (URI.parse(row[1]).to_s rescue nil),
website:
(
begin
URI.parse(row[1]).to_s
rescue StandardError
nil
end
),
views: row[2],
}
end
end
def import_categories
puts '', "Importing categories..."
puts "", "Importing categories..."
categories = mysql_query(<<-SQL
categories = mysql_query(<<-SQL).to_a
select
forumid,
parentid,
@ -311,23 +308,20 @@ class BulkImport::VBulletin < BulkImport::Base
from forum
order by forumid
SQL
).to_a
return if categories.empty?
parent_categories = categories.select { |c| c[1] == -1 }
parent_categories = categories.select { |c| c[1] == -1 }
children_categories = categories.select { |c| c[1] != -1 }
parent_category_ids = Set.new parent_categories.map { |c| c[0] }
# cut down the tree to only 2 levels of categories
children_categories.each do |cc|
until parent_category_ids.include?(cc[1])
cc[1] = categories.find { |c| c[0] == cc[1] }[1]
end
cc[1] = categories.find { |c| c[0] == cc[1] }[1] until parent_category_ids.include?(cc[1])
end
puts '', "Importing parent categories..."
puts "", "Importing parent categories..."
create_categories(parent_categories) do |row|
{
imported_id: row[0],
@ -337,7 +331,7 @@ class BulkImport::VBulletin < BulkImport::Base
}
end
puts '', "Importing children categories..."
puts "", "Importing children categories..."
create_categories(children_categories) do |row|
{
imported_id: row[0],
@ -350,7 +344,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_topics
puts '', "Importing topics..."
puts "", "Importing topics..."
topics = mysql_stream <<-SQL
SELECT threadid, title, forumid, postuserid, open, dateline, views, visible, sticky
@ -381,7 +375,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_posts
puts '', "Importing posts..."
puts "", "Importing posts..."
posts = mysql_stream <<-SQL
SELECT postid, p.threadid, parentid, userid, p.dateline, p.visible, pagetext
@ -396,7 +390,8 @@ class BulkImport::VBulletin < BulkImport::Base
create_posts(posts) do |row|
topic_id = topic_id_from_imported_id(row[1])
replied_post_topic_id = topic_id_from_imported_post_id(row[2])
reply_to_post_number = topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
reply_to_post_number =
topic_id == replied_post_topic_id ? post_number_from_imported_id(row[2]) : nil
post = {
imported_id: row[0],
@ -415,7 +410,7 @@ class BulkImport::VBulletin < BulkImport::Base
def import_likes
return unless @has_post_thanks
puts '', "Importing likes..."
puts "", "Importing likes..."
@imported_likes = Set.new
@last_imported_post_id = 0
@ -438,13 +433,13 @@ class BulkImport::VBulletin < BulkImport::Base
post_id: post_id_from_imported_id(row[0]),
user_id: user_id_from_imported_id(row[1]),
post_action_type_id: 2,
created_at: Time.zone.at(row[2])
created_at: Time.zone.at(row[2]),
}
end
end
def import_private_topics
puts '', "Importing private topics..."
puts "", "Importing private topics..."
@imported_topics = {}
@ -473,34 +468,31 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_topic_allowed_users
puts '', "Importing topic allowed users..."
puts "", "Importing topic allowed users..."
allowed_users = Set.new
mysql_stream(<<-SQL
mysql_stream(<<-SQL).each do |row|
SELECT pmtextid, touserarray
FROM #{TABLE_PREFIX}pmtext
WHERE pmtextid > (#{@last_imported_private_topic_id - PRIVATE_OFFSET})
ORDER BY pmtextid
SQL
).each do |row|
next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET)
row[1].scan(/i:(\d+)/).flatten.each do |id|
next unless user_id = user_id_from_imported_id(id)
allowed_users << [topic_id, user_id]
end
row[1]
.scan(/i:(\d+)/)
.flatten
.each do |id|
next unless user_id = user_id_from_imported_id(id)
allowed_users << [topic_id, user_id]
end
end
create_topic_allowed_users(allowed_users) do |row|
{
topic_id: row[0],
user_id: row[1],
}
end
create_topic_allowed_users(allowed_users) { |row| { topic_id: row[0], user_id: row[1] } }
end
def import_private_posts
puts '', "Importing private posts..."
puts "", "Importing private posts..."
posts = mysql_stream <<-SQL
SELECT pmtextid, title, fromuserid, touserarray, dateline, message
@ -527,7 +519,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def create_permalink_file
puts '', 'Creating Permalink File...', ''
puts "", "Creating Permalink File...", ""
total = Topic.listable_topics.count
start = Time.now
@ -538,9 +530,9 @@ class BulkImport::VBulletin < BulkImport::Base
i += 1
pcf = topic.posts.includes(:_custom_fields).where(post_number: 1).first.custom_fields
if pcf && pcf["import_id"]
id = pcf["import_id"].split('-').last
id = pcf["import_id"].split("-").last
f.print [ "XXX#{id} YYY#{topic.id}" ].to_csv
f.print ["XXX#{id} YYY#{topic.id}"].to_csv
print "\r%7d/%7d - %6d/sec" % [i, total, i.to_f / (Time.now - start)] if i % 5000 == 0
end
end
@ -549,7 +541,8 @@ class BulkImport::VBulletin < BulkImport::Base
# find the uploaded file information from the db
def find_upload(post, attachment_id)
sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename
sql =
"SELECT a.attachmentid attachment_id, a.userid user_id, a.filename filename
FROM #{TABLE_PREFIX}attachment a
WHERE a.attachmentid = #{attachment_id}"
results = mysql_query(sql)
@ -563,9 +556,10 @@ class BulkImport::VBulletin < BulkImport::Base
user_id = row[1]
db_filename = row[2]
filename = File.join(ATTACHMENT_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach")
filename =
File.join(ATTACHMENT_DIR, user_id.to_s.split("").join("/"), "#{attachment_id}.attach")
real_filename = db_filename
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
unless File.exist?(filename)
puts "Attachment file #{row.inspect} doesn't exist"
@ -588,7 +582,7 @@ class BulkImport::VBulletin < BulkImport::Base
end
def import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
RateLimiter.disable
current_count = 0
@ -596,7 +590,7 @@ class BulkImport::VBulletin < BulkImport::Base
success_count = 0
fail_count = 0
attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i
attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
Post.find_each do |post|
current_count += 1
@ -618,7 +612,12 @@ class BulkImport::VBulletin < BulkImport::Base
end
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from vBulletin')
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachments from vBulletin",
)
end
success_count += 1
@ -639,7 +638,7 @@ class BulkImport::VBulletin < BulkImport::Base
Dir.foreach(AVATAR_DIR) do |item|
print "\r%7d - %6d/sec" % [count, count.to_f / (Time.now - start)]
next if item == ('.') || item == ('..') || item == ('.DS_Store')
next if item == (".") || item == ("..") || item == (".DS_Store")
next unless item =~ /avatar(\d+)_(\d).gif/
scan = item.scan(/avatar(\d+)_(\d).gif/)
next unless scan[0][0].present?
@ -671,11 +670,10 @@ class BulkImport::VBulletin < BulkImport::Base
def import_signatures
puts "Importing user signatures..."
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first[0].to_i
SELECT COUNT(userid) count
FROM #{TABLE_PREFIX}sigparsed
SQL
).first[0].to_i
current_count = 0
user_signatures = mysql_stream <<-SQL
@ -695,13 +693,20 @@ class BulkImport::VBulletin < BulkImport::Base
next unless u.present?
# can not hold dupes
UserCustomField.where(user_id: u.id, name: ["see_signatures", "signature_raw", "signature_cooked"]).destroy_all
UserCustomField.where(
user_id: u.id,
name: %w[see_signatures signature_raw signature_cooked],
).destroy_all
user_sig.gsub!(/\[\/?sigpic\]/i, "")
user_sig.gsub!(%r{\[/?sigpic\]}i, "")
UserCustomField.create!(user_id: u.id, name: "see_signatures", value: true)
UserCustomField.create!(user_id: u.id, name: "signature_raw", value: user_sig)
UserCustomField.create!(user_id: u.id, name: "signature_cooked", value: PrettyText.cook(user_sig, omit_nofollow: false))
UserCustomField.create!(
user_id: u.id,
name: "signature_cooked",
value: PrettyText.cook(user_sig, omit_nofollow: false),
)
end
end
@ -710,15 +715,15 @@ class BulkImport::VBulletin < BulkImport::Base
total_count = 0
duplicated = {}
@user_ids_by_email.
select { |e, ids| ids.count > 1 }.
each_with_index do |(email, ids), i|
duplicated[email] = [ ids, i ]
@user_ids_by_email
.select { |e, ids| ids.count > 1 }
.each_with_index do |(email, ids), i|
duplicated[email] = [ids, i]
count += 1
total_count += ids.count
end
puts '', "Merging #{total_count} duplicated users across #{count} distinct emails..."
puts "", "Merging #{total_count} duplicated users across #{count} distinct emails..."
start = Time.now
@ -727,14 +732,15 @@ class BulkImport::VBulletin < BulkImport::Base
next unless email.presence
# queried one by one to ensure ordering
first, *rest = user_ids.map do |id|
UserCustomField.includes(:user).find_by!(name: 'import_id', value: id).user
end
first, *rest =
user_ids.map do |id|
UserCustomField.includes(:user).find_by!(name: "import_id", value: id).user
end
rest.each do |dup|
UserMerger.new(dup, first).merge!
first.reload
printf '.'
printf "."
end
print "\n%6d/%6d - %6d/sec" % [i, count, i.to_f / (Time.now - start)] if i % 10 == 0
@ -744,13 +750,11 @@ class BulkImport::VBulletin < BulkImport::Base
end
def save_duplicated_users
File.open('duplicated_users.json', 'w+') do |f|
f.puts @user_ids_by_email.to_json
end
File.open("duplicated_users.json", "w+") { |f| f.puts @user_ids_by_email.to_json }
end
def read_duplicated_users
@user_ids_by_email = JSON.parse File.read('duplicated_users.json')
@user_ids_by_email = JSON.parse File.read("duplicated_users.json")
end
def extract_pm_title(title)
@ -759,17 +763,26 @@ class BulkImport::VBulletin < BulkImport::Base
def parse_birthday(birthday)
return if birthday.blank?
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
date_of_birth =
begin
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
rescue StandardError
nil
end
return if date_of_birth.nil?
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
if date_of_birth.year < 1904
Date.new(1904, date_of_birth.month, date_of_birth.day)
else
date_of_birth
end
end
def print_status(current, max, start_time = nil)
if start_time.present?
elapsed_seconds = Time.now - start_time
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60]
else
elements_per_minute = ''
elements_per_minute = ""
end
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
@ -782,7 +795,6 @@ class BulkImport::VBulletin < BulkImport::Base
def mysql_query(sql)
@client.query(sql)
end
end
BulkImport::VBulletin.new.run

View File

@ -5,47 +5,56 @@ require "cgi"
require "set"
require "mysql2"
require "htmlentities"
require 'ruby-bbcode-to-md'
require 'find'
require "ruby-bbcode-to-md"
require "find"
class BulkImport::VBulletin5 < BulkImport::Base
DB_PREFIX = ""
SUSPENDED_TILL ||= Date.new(3000, 1, 1)
ATTACH_DIR ||= ENV['ATTACH_DIR'] || '/shared/import/data/attachments'
AVATAR_DIR ||= ENV['AVATAR_DIR'] || '/shared/import/data/customavatars'
ATTACH_DIR ||= ENV["ATTACH_DIR"] || "/shared/import/data/attachments"
AVATAR_DIR ||= ENV["AVATAR_DIR"] || "/shared/import/data/customavatars"
ROOT_NODE = 2
def initialize
super
host = ENV["DB_HOST"] || "localhost"
host = ENV["DB_HOST"] || "localhost"
username = ENV["DB_USERNAME"] || "root"
password = ENV["DB_PASSWORD"]
database = ENV["DB_NAME"] || "vbulletin"
charset = ENV["DB_CHARSET"] || "utf8"
charset = ENV["DB_CHARSET"] || "utf8"
@html_entities = HTMLEntities.new
@encoding = CHARSET_MAP[charset]
@bbcode_to_md = true
@client = Mysql2::Client.new(
host: host,
username: username,
password: password,
database: database,
encoding: charset,
reconnect: true
)
@client =
Mysql2::Client.new(
host: host,
username: username,
password: password,
database: database,
encoding: charset,
reconnect: true,
)
@client.query_options.merge!(as: :array, cache_rows: false)
# TODO: Add `LIMIT 1` to the below queries
# ------
# be aware there may be other contenttypeid's in use, such as poll, link, video, etc.
@forum_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").to_a[0][0]
@channel_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").to_a[0][0]
@text_typeid = mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").to_a[0][0]
@forum_typeid =
mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Forum'").to_a[0][
0
]
@channel_typeid =
mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Channel'").to_a[
0
][
0
]
@text_typeid =
mysql_query("SELECT contenttypeid FROM #{DB_PREFIX}contenttype WHERE class='Text'").to_a[0][0]
end
def execute
@ -127,7 +136,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
date_of_birth: parse_birthday(row[3]),
primary_group_id: group_id_from_imported_id(row[5]),
admin: row[5] == 6,
moderator: row[5] == 7
moderator: row[5] == 7,
}
u[:ip_address] = row[4][/\b(?:\d{1,3}\.){3}\d{1,3}\b/] if row[4].present?
if row[7]
@ -153,7 +162,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
imported_id: row[0],
imported_user_id: row[0],
email: random_email,
created_at: Time.zone.at(row[2])
created_at: Time.zone.at(row[2]),
}
end
end
@ -203,10 +212,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
SQL
create_group_users(group_users) do |row|
{
group_id: group_id_from_imported_id(row[0]),
user_id: user_id_from_imported_id(row[1]),
}
{ group_id: group_id_from_imported_id(row[0]), user_id: user_id_from_imported_id(row[1]) }
end
# import secondary group memberships
@ -228,12 +234,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
end
end
create_group_users(group_mapping) do |row|
{
group_id: row[0],
user_id: row[1]
}
end
create_group_users(group_mapping) { |row| { group_id: row[0], user_id: row[1] } }
end
def import_user_profiles
@ -249,7 +250,14 @@ class BulkImport::VBulletin5 < BulkImport::Base
create_user_profiles(user_profiles) do |row|
{
user_id: user_id_from_imported_id(row[0]),
website: (URI.parse(row[1]).to_s rescue nil),
website:
(
begin
URI.parse(row[1]).to_s
rescue StandardError
nil
end
),
views: row[2],
}
end
@ -258,7 +266,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
def import_categories
puts "Importing categories..."
categories = mysql_query(<<-SQL
categories = mysql_query(<<-SQL).to_a
SELECT nodeid AS forumid, title, description, displayorder, parentid, urlident
FROM #{DB_PREFIX}node
WHERE parentid = #{ROOT_NODE}
@ -269,11 +277,10 @@ class BulkImport::VBulletin5 < BulkImport::Base
WHERE contenttypeid = #{@channel_typeid}
AND nodeid > #{@last_imported_category_id}
SQL
).to_a
return if categories.empty?
parent_categories = categories.select { |c| c[4] == ROOT_NODE }
parent_categories = categories.select { |c| c[4] == ROOT_NODE }
children_categories = categories.select { |c| c[4] != ROOT_NODE }
parent_category_ids = Set.new parent_categories.map { |c| c[0] }
@ -285,7 +292,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
name: normalize_text(row[1]),
description: normalize_text(row[2]),
position: row[3],
slug: row[5]
slug: row[5],
}
end
@ -297,7 +304,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
description: normalize_text(row[2]),
position: row[3],
parent_category_id: category_id_from_imported_id(row[4]),
slug: row[5]
slug: row[5],
}
end
end
@ -428,7 +435,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
post_id: post_id,
user_id: user_id,
post_action_type_id: 2,
created_at: Time.zone.at(row[2])
created_at: Time.zone.at(row[2]),
}
end
end
@ -455,7 +462,6 @@ class BulkImport::VBulletin5 < BulkImport::Base
user_id: user_id_from_imported_id(row[2]),
created_at: Time.zone.at(row[3]),
}
end
end
@ -475,17 +481,18 @@ class BulkImport::VBulletin5 < BulkImport::Base
users_added = Set.new
create_topic_allowed_users(mysql_stream(allowed_users_sql)) do |row|
next unless topic_id = topic_id_from_imported_id(row[0] + PRIVATE_OFFSET) || topic_id_from_imported_id(row[2] + PRIVATE_OFFSET)
unless topic_id =
topic_id_from_imported_id(row[0] + PRIVATE_OFFSET) ||
topic_id_from_imported_id(row[2] + PRIVATE_OFFSET)
next
end
next unless user_id = user_id_from_imported_id(row[1])
next if users_added.add?([topic_id, user_id]).nil?
added += 1
{
topic_id: topic_id,
user_id: user_id,
}
{ topic_id: topic_id, user_id: user_id }
end
puts '', "Added #{added} topic allowed users records."
puts "", "Added #{added} topic allowed users records."
end
def import_private_first_posts
@ -543,7 +550,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
end
def create_permalinks
puts '', 'creating permalinks...', ''
puts "", "creating permalinks...", ""
# add permalink normalizations to site settings
# EVERYTHING: /.*\/([\w-]+)$/\1 -- selects the last segment of the URL
@ -580,21 +587,23 @@ class BulkImport::VBulletin5 < BulkImport::Base
return nil
end
tmpfile = 'attach_' + row[6].to_s
filename = File.join('/tmp/', tmpfile)
File.open(filename, 'wb') { |f| f.write(row[5]) }
tmpfile = "attach_" + row[6].to_s
filename = File.join("/tmp/", tmpfile)
File.open(filename, "wb") { |f| f.write(row[5]) }
filename
end
def find_upload(post, opts = {})
if opts[:node_id].present?
sql = "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
sql =
"SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
FROM #{DB_PREFIX}attach a
LEFT JOIN #{DB_PREFIX}filedata fd ON fd.filedataid = a.filedataid
LEFT JOIN #{DB_PREFIX}node n ON n.nodeid = a.nodeid
WHERE a.nodeid = #{opts[:node_id]}"
elsif opts[:attachment_id].present?
sql = "SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
sql =
"SELECT a.nodeid, n.parentid, a.filename, fd.userid, LENGTH(fd.filedata), filedata, fd.filedataid
FROM #{DB_PREFIX}attachment a
LEFT JOIN #{DB_PREFIX}filedata fd ON fd.filedataid = a.filedataid
LEFT JOIN #{DB_PREFIX}node n ON n.nodeid = a.nodeid
@ -612,9 +621,9 @@ class BulkImport::VBulletin5 < BulkImport::Base
user_id = row[3]
db_filename = row[2]
filename = File.join(ATTACH_DIR, user_id.to_s.split('').join('/'), "#{attachment_id}.attach")
filename = File.join(ATTACH_DIR, user_id.to_s.split("").join("/"), "#{attachment_id}.attach")
real_filename = db_filename
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
unless File.exist?(filename)
filename = check_database_for_attachment(row) if filename.blank?
@ -637,7 +646,7 @@ class BulkImport::VBulletin5 < BulkImport::Base
end
def import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
# add extensions to authorized setting
#ext = mysql_query("SELECT GROUP_CONCAT(DISTINCT(extension)) exts FROM #{DB_PREFIX}filedata").first[0].split(',')
@ -655,8 +664,8 @@ class BulkImport::VBulletin5 < BulkImport::Base
# new style matches the nodeid in the attach table
# old style matches the filedataid in attach/filedata tables
# if the site is very old, there may be multiple different attachment syntaxes used in posts
attachment_regex = /\[attach[^\]]*\].*\"data-attachmentid\":"?(\d+)"?,?.*\[\/attach\]/i
attachment_regex_oldstyle = /\[attach[^\]]*\](\d+)\[\/attach\]/i
attachment_regex = %r{\[attach[^\]]*\].*\"data-attachmentid\":"?(\d+)"?,?.*\[/attach\]}i
attachment_regex_oldstyle = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
Post.find_each do |post|
current_count += 1
@ -715,9 +724,18 @@ class BulkImport::VBulletin5 < BulkImport::Base
def parse_birthday(birthday)
return if birthday.blank?
date_of_birth = Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y") rescue nil
date_of_birth =
begin
Date.strptime(birthday.gsub(/[^\d-]+/, ""), "%m-%d-%Y")
rescue StandardError
nil
end
return if date_of_birth.nil?
date_of_birth.year < 1904 ? Date.new(1904, date_of_birth.month, date_of_birth.day) : date_of_birth
if date_of_birth.year < 1904
Date.new(1904, date_of_birth.month, date_of_birth.day)
else
date_of_birth
end
end
def preprocess_raw(raw)
@ -726,33 +744,37 @@ class BulkImport::VBulletin5 < BulkImport::Base
raw = raw.dup
# [PLAINTEXT]...[/PLAINTEXT]
raw.gsub!(/\[\/?PLAINTEXT\]/i, "\n\n```\n\n")
raw.gsub!(%r{\[/?PLAINTEXT\]}i, "\n\n```\n\n")
# [FONT=font]...[/FONT]
raw.gsub!(/\[FONT=\w*\]/im, "")
raw.gsub!(/\[\/FONT\]/im, "")
raw.gsub!(%r{\[/FONT\]}im, "")
# @[URL=<user_profile>]<username>[/URL]
# [USER=id]username[/USER]
# [MENTION=id]username[/MENTION]
raw.gsub!(/@\[URL=\"\S+\"\]([\w\s]+)\[\/URL\]/i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(/\[USER=\"\d+\"\]([\S]+)\[\/USER\]/i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(/\[MENTION=\d+\]([\S]+)\[\/MENTION\]/i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(%r{@\[URL=\"\S+\"\]([\w\s]+)\[/URL\]}i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(%r{\[USER=\"\d+\"\]([\S]+)\[/USER\]}i) { "@#{$1.gsub(" ", "_")}" }
raw.gsub!(%r{\[MENTION=\d+\]([\S]+)\[/MENTION\]}i) { "@#{$1.gsub(" ", "_")}" }
# [IMG2=JSON]{..."src":"<url>"}[/IMG2]
raw.gsub!(/\[img2[^\]]*\].*\"src\":\"?([\w\\\/:\.\-;%]*)\"?}.*\[\/img2\]/i) { "\n#{CGI::unescape($1)}\n" }
raw.gsub!(/\[img2[^\]]*\].*\"src\":\"?([\w\\\/:\.\-;%]*)\"?}.*\[\/img2\]/i) do
"\n#{CGI.unescape($1)}\n"
end
# [TABLE]...[/TABLE]
raw.gsub!(/\[TABLE=\\"[\w:\-\s,]+\\"\]/i, "")
raw.gsub!(/\[\/TABLE\]/i, "")
raw.gsub!(%r{\[/TABLE\]}i, "")
# [HR]...[/HR]
raw.gsub(/\[HR\]\s*\[\/HR\]/im, "---")
raw.gsub(%r{\[HR\]\s*\[/HR\]}im, "---")
# [VIDEO=youtube_share;<id>]...[/VIDEO]
# [VIDEO=vimeo;<id>]...[/VIDEO]
raw.gsub!(/\[VIDEO=YOUTUBE_SHARE;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
raw.gsub!(/\[VIDEO=VIMEO;([^\]]+)\].*?\[\/VIDEO\]/i) { "\nhttps://vimeo.com/#{$1}\n" }
raw.gsub!(%r{\[VIDEO=YOUTUBE_SHARE;([^\]]+)\].*?\[/VIDEO\]}i) do
"\nhttps://www.youtube.com/watch?v=#{$1}\n"
end
raw.gsub!(%r{\[VIDEO=VIMEO;([^\]]+)\].*?\[/VIDEO\]}i) { "\nhttps://vimeo.com/#{$1}\n" }
raw
end
@ -760,9 +782,9 @@ class BulkImport::VBulletin5 < BulkImport::Base
def print_status(current, max, start_time = nil)
if start_time.present?
elapsed_seconds = Time.now - start_time
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60]
else
elements_per_minute = ''
elements_per_minute = ""
end
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
@ -775,7 +797,6 @@ class BulkImport::VBulletin5 < BulkImport::Base
def mysql_query(sql)
@client.query(sql)
end
end
BulkImport::VBulletin5.new.run

View File

@ -13,20 +13,22 @@ end
Discourse.after_fork
pretty
child = fork do
Discourse.after_fork
pretty
grand_child = fork do
child =
fork do
Discourse.after_fork
pretty
puts "try to exit"
grand_child =
fork do
Discourse.after_fork
pretty
puts "try to exit"
Process.kill "KILL", Process.pid
end
puts "before wait 2"
Process.wait grand_child
puts "after wait 2"
Process.kill "KILL", Process.pid
end
puts "before wait 2"
Process.wait grand_child
puts "after wait 2"
Process.kill "KILL", Process.pid
end
puts "before wait 1"
Process.wait child

View File

@ -12,20 +12,18 @@ class TimestampsUpdater
def initialize(schema, ignore_tables)
@schema = schema
@ignore_tables = ignore_tables
@raw_connection = PG.connect(
host: ENV['DISCOURSE_DB_HOST'] || 'localhost',
port: ENV['DISCOURSE_DB_PORT'] || 5432,
dbname: ENV['DISCOURSE_DB_NAME'] || 'discourse_development',
user: ENV['DISCOURSE_DB_USERNAME'] || 'postgres',
password: ENV['DISCOURSE_DB_PASSWORD'] || '')
@raw_connection =
PG.connect(
host: ENV["DISCOURSE_DB_HOST"] || "localhost",
port: ENV["DISCOURSE_DB_PORT"] || 5432,
dbname: ENV["DISCOURSE_DB_NAME"] || "discourse_development",
user: ENV["DISCOURSE_DB_USERNAME"] || "postgres",
password: ENV["DISCOURSE_DB_PASSWORD"] || "",
)
end
def move_by(days)
postgresql_date_types = [
"timestamp without time zone",
"timestamp with time zone",
"date"
]
postgresql_date_types = ["timestamp without time zone", "timestamp with time zone", "date"]
postgresql_date_types.each do |data_type|
columns = all_columns_of_type(data_type)
@ -118,11 +116,19 @@ class TimestampsUpdater
end
def is_i?(string)
true if Integer(string) rescue false
begin
true if Integer(string)
rescue StandardError
false
end
end
def is_date?(string)
true if Date.parse(string) rescue false
begin
true if Date.parse(string)
rescue StandardError
false
end
end
def create_updater

View File

@ -6,8 +6,8 @@
# rbtrace -p 15193 -e 'Thread.new{require "objspace"; ObjectSpace.trace_object_allocations_start; GC.start(full_mark: true); ObjectSpace.dump_all(output: File.open("heap.json","w"))}.join'
#
#
require 'set'
require 'json'
require "set"
require "json"
if ARGV.length != 2
puts "Usage: diff_heaps [ORIG.json] [AFTER.json]"
@ -16,26 +16,26 @@ end
origs = Set.new
File.open(ARGV[0], "r").each_line do |line|
parsed = JSON.parse(line)
origs << parsed["address"] if parsed && parsed["address"]
end
File
.open(ARGV[0], "r")
.each_line do |line|
parsed = JSON.parse(line)
origs << parsed["address"] if parsed && parsed["address"]
end
diff = []
File.open(ARGV[1], "r").each_line do |line|
parsed = JSON.parse(line)
if parsed && parsed["address"]
diff << parsed unless origs.include? parsed["address"]
File
.open(ARGV[1], "r")
.each_line do |line|
parsed = JSON.parse(line)
if parsed && parsed["address"]
diff << parsed unless origs.include? parsed["address"]
end
end
end
diff.group_by do |x|
[x["type"], x["file"], x["line"]]
end.map { |x, y|
[x, y.count]
}.sort { |a, b|
b[1] <=> a[1]
}.each { |x, y|
puts "Leaked #{y} #{x[0]} objects at: #{x[1]}:#{x[2]}"
}
diff
.group_by { |x| [x["type"], x["file"], x["line"]] }
.map { |x, y| [x, y.count] }
.sort { |a, b| b[1] <=> a[1] }
.each { |x, y| puts "Leaked #{y} #{x[0]} objects at: #{x[1]}:#{x[2]}" }

View File

@ -19,11 +19,11 @@ def run_or_fail(command)
exit 1 unless $?.exitstatus == 0
end
unless ENV['NO_UPDATE']
unless ENV["NO_UPDATE"]
run_or_fail("git reset --hard")
run_or_fail("git fetch")
checkout = ENV['COMMIT_HASH'] || "FETCH_HEAD"
checkout = ENV["COMMIT_HASH"] || "FETCH_HEAD"
run_or_fail("LEFTHOOK=0 git checkout #{checkout}")
run_or_fail("bundle")
@ -31,7 +31,7 @@ end
log("Running tests")
if ENV['RUN_SMOKE_TESTS']
if ENV["RUN_SMOKE_TESTS"]
run_or_fail("bundle exec rake smoke:test")
else
run_or_fail("bundle exec rake docker:test")

View File

@ -1,7 +1,7 @@
# frozen_string_literal: true
require 'colored2'
require 'psych'
require "colored2"
require "psych"
class I18nLinter
def initialize(filenames_or_patterns)
@ -27,16 +27,22 @@ end
class LocaleFileValidator
ERROR_MESSAGES = {
invalid_relative_links: "The following keys have relative links, but do not start with %{base_url} or %{base_path}:",
invalid_relative_image_sources: "The following keys have relative image sources, but do not start with %{base_url} or %{base_path}:",
invalid_interpolation_key_format: "The following keys use {{key}} instead of %{key} for interpolation keys:",
wrong_pluralization_keys: "Pluralized strings must have only the sub-keys 'one' and 'other'.\nThe following keys have missing or additional keys:",
invalid_one_keys: "The following keys contain the number 1 instead of the interpolation key %{count}:",
invalid_message_format_one_key: "The following keys use 'one {1 foo}' instead of the generic 'one {# foo}':",
invalid_relative_links:
"The following keys have relative links, but do not start with %{base_url} or %{base_path}:",
invalid_relative_image_sources:
"The following keys have relative image sources, but do not start with %{base_url} or %{base_path}:",
invalid_interpolation_key_format:
"The following keys use {{key}} instead of %{key} for interpolation keys:",
wrong_pluralization_keys:
"Pluralized strings must have only the sub-keys 'one' and 'other'.\nThe following keys have missing or additional keys:",
invalid_one_keys:
"The following keys contain the number 1 instead of the interpolation key %{count}:",
invalid_message_format_one_key:
"The following keys use 'one {1 foo}' instead of the generic 'one {# foo}':",
}
PLURALIZATION_KEYS = ['zero', 'one', 'two', 'few', 'many', 'other']
ENGLISH_KEYS = ['one', 'other']
PLURALIZATION_KEYS = %w[zero one two few many other]
ENGLISH_KEYS = %w[one other]
def initialize(filename)
@filename = filename
@ -66,7 +72,7 @@ class LocaleFileValidator
private
def each_translation(hash, parent_key = '', &block)
def each_translation(hash, parent_key = "", &block)
hash.each do |key, value|
current_key = parent_key.empty? ? key : "#{parent_key}.#{key}"
@ -85,13 +91,9 @@ class LocaleFileValidator
@errors[:invalid_message_format_one_key] = []
each_translation(yaml) do |key, value|
if value.match?(/href\s*=\s*["']\/[^\/]|\]\(\/[^\/]/i)
@errors[:invalid_relative_links] << key
end
@errors[:invalid_relative_links] << key if value.match?(%r{href\s*=\s*["']/[^/]|\]\(/[^/]}i)
if value.match?(/src\s*=\s*["']\/[^\/]/i)
@errors[:invalid_relative_image_sources] << key
end
@errors[:invalid_relative_image_sources] << key if value.match?(%r{src\s*=\s*["']/[^/]}i)
if value.match?(/{{.+?}}/) && !key.end_with?("_MF")
@errors[:invalid_interpolation_key_format] << key
@ -103,7 +105,7 @@ class LocaleFileValidator
end
end
def each_pluralization(hash, parent_key = '', &block)
def each_pluralization(hash, parent_key = "", &block)
hash.each do |key, value|
if Hash === value
current_key = parent_key.empty? ? key : "#{parent_key}.#{key}"
@ -124,8 +126,8 @@ class LocaleFileValidator
@errors[:wrong_pluralization_keys] << key if hash.keys.sort != ENGLISH_KEYS
one_value = hash['one']
if one_value && one_value.include?('1') && !one_value.match?(/%{count}|{{count}}/)
one_value = hash["one"]
if one_value && one_value.include?("1") && !one_value.match?(/%{count}|{{count}}/)
@errors[:invalid_one_keys] << key
end
end

View File

@ -1,9 +1,9 @@
# frozen_string_literal: true
require 'csv'
require 'reverse_markdown'
require_relative 'base'
require_relative 'base/generic_database'
require "csv"
require "reverse_markdown"
require_relative "base"
require_relative "base/generic_database"
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/answerbase.rb DIRNAME
@ -15,8 +15,10 @@ class ImportScripts::Answerbase < ImportScripts::Base
ANSWER_IMAGE_DIRECTORY = "Answer Images"
QUESTION_ATTACHMENT_DIRECTORY = "Question Attachments"
QUESTION_IMAGE_DIRECTORY = "Question Images"
EMBEDDED_IMAGE_REGEX = /<a[^>]*href="[^"]*relativeUrl=(?<path>[^"\&]*)[^"]*"[^>]*>\s*<img[^>]*>\s*<\/a>/i
QUESTION_LINK_REGEX = /<a[^>]*?href="#{Regexp.escape(OLD_DOMAIN)}\/[^"]*?(?:q|questionid=)(?<id>\d+)[^"]*?"[^>]*>(?<text>.*?)<\/a>/i
EMBEDDED_IMAGE_REGEX =
%r{<a[^>]*href="[^"]*relativeUrl=(?<path>[^"\&]*)[^"]*"[^>]*>\s*<img[^>]*>\s*</a>}i
QUESTION_LINK_REGEX =
%r{<a[^>]*?href="#{Regexp.escape(OLD_DOMAIN)}/[^"]*?(?:q|questionid=)(?<id>\d+)[^"]*?"[^>]*>(?<text>.*?)</a>}i
TOPIC_LINK_NORMALIZATION = '/.*?-(q\d+).*/\1'
BATCH_SIZE = 1000
@ -24,12 +26,13 @@ class ImportScripts::Answerbase < ImportScripts::Base
super()
@path = path
@db = ImportScripts::GenericDatabase.new(
@path,
batch_size: BATCH_SIZE,
recreate: true,
numeric_keys: true
)
@db =
ImportScripts::GenericDatabase.new(
@path,
batch_size: BATCH_SIZE,
recreate: true,
numeric_keys: true,
)
end
def execute
@ -47,11 +50,7 @@ class ImportScripts::Answerbase < ImportScripts::Base
category_position = 0
csv_parse("categories") do |row|
@db.insert_category(
id: row[:id],
name: row[:name],
position: category_position += 1
)
@db.insert_category(id: row[:id], name: row[:name], position: category_position += 1)
end
csv_parse("users") do |row|
@ -62,7 +61,7 @@ class ImportScripts::Answerbase < ImportScripts::Base
bio: row[:description],
avatar_path: row[:profile_image],
created_at: parse_date(row[:createtime]),
active: true
active: true,
)
end
@ -74,8 +73,9 @@ class ImportScripts::Answerbase < ImportScripts::Base
begin
if row[:type] == "Question"
attachments = parse_filenames(row[:attachments], QUESTION_ATTACHMENT_DIRECTORY) +
parse_filenames(row[:images], QUESTION_IMAGE_DIRECTORY)
attachments =
parse_filenames(row[:attachments], QUESTION_ATTACHMENT_DIRECTORY) +
parse_filenames(row[:images], QUESTION_IMAGE_DIRECTORY)
@db.insert_topic(
id: row[:id],
@ -84,12 +84,13 @@ class ImportScripts::Answerbase < ImportScripts::Base
category_id: row[:categorylist],
user_id: user_id,
created_at: created_at,
attachments: attachments
attachments: attachments,
)
last_topic_id = row[:id]
else
attachments = parse_filenames(row[:attachments], ANSWER_ATTACHMENT_DIRECTORY) +
parse_filenames(row[:images], ANSWER_IMAGE_DIRECTORY)
attachments =
parse_filenames(row[:attachments], ANSWER_ATTACHMENT_DIRECTORY) +
parse_filenames(row[:images], ANSWER_IMAGE_DIRECTORY)
@db.insert_post(
id: row[:id],
@ -97,10 +98,10 @@ class ImportScripts::Answerbase < ImportScripts::Base
topic_id: last_topic_id,
user_id: user_id,
created_at: created_at,
attachments: attachments
attachments: attachments,
)
end
rescue
rescue StandardError
p row
raise
end
@ -110,9 +111,7 @@ class ImportScripts::Answerbase < ImportScripts::Base
def parse_filenames(text, directory)
return [] if text.blank?
text
.split(';')
.map { |filename| File.join(@path, directory, filename.strip) }
text.split(";").map { |filename| File.join(@path, directory, filename.strip) }
end
def parse_date(text)
@ -132,10 +131,10 @@ class ImportScripts::Answerbase < ImportScripts::Base
create_categories(rows) do |row|
{
id: row['id'],
name: row['name'],
description: row['description'],
position: row['position']
id: row["id"],
name: row["name"],
description: row["description"],
position: row["position"],
}
end
end
@ -153,19 +152,17 @@ class ImportScripts::Answerbase < ImportScripts::Base
rows, last_id = @db.fetch_users(last_id)
break if rows.empty?
next if all_records_exist?(:users, rows.map { |row| row['id'] })
next if all_records_exist?(:users, rows.map { |row| row["id"] })
create_users(rows, total: total_count, offset: offset) do |row|
{
id: row['id'],
email: row['email'],
username: row['username'],
bio_raw: row['bio'],
created_at: row['created_at'],
active: row['active'] == 1,
post_create_action: proc do |user|
create_avatar(user, row['avatar_path'])
end
id: row["id"],
email: row["email"],
username: row["username"],
bio_raw: row["bio"],
created_at: row["created_at"],
active: row["active"] == 1,
post_create_action: proc { |user| create_avatar(user, row["avatar_path"]) },
}
end
end
@ -191,24 +188,25 @@ class ImportScripts::Answerbase < ImportScripts::Base
rows, last_id = @db.fetch_topics(last_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| row['id'] })
next if all_records_exist?(:posts, rows.map { |row| row["id"] })
create_posts(rows, total: total_count, offset: offset) do |row|
attachments = @db.fetch_topic_attachments(row['id']) if row['upload_count'] > 0
user_id = user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id
attachments = @db.fetch_topic_attachments(row["id"]) if row["upload_count"] > 0
user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id
{
id: row['id'],
title: row['title'],
raw: raw_with_attachments(row['raw'].presence || row['title'], attachments, user_id),
category: category_id_from_imported_category_id(row['category_id']),
id: row["id"],
title: row["title"],
raw: raw_with_attachments(row["raw"].presence || row["title"], attachments, user_id),
category: category_id_from_imported_category_id(row["category_id"]),
user_id: user_id,
created_at: row['created_at'],
closed: row['closed'] == 1,
post_create_action: proc do |post|
url = "q#{row['id']}"
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
end
created_at: row["created_at"],
closed: row["closed"] == 1,
post_create_action:
proc do |post|
url = "q#{row["id"]}"
Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url)
end,
}
end
end
@ -223,19 +221,19 @@ class ImportScripts::Answerbase < ImportScripts::Base
rows, last_row_id = @db.fetch_posts(last_row_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| row['id'] })
next if all_records_exist?(:posts, rows.map { |row| row["id"] })
create_posts(rows, total: total_count, offset: offset) do |row|
topic = topic_lookup_from_imported_post_id(row['topic_id'])
attachments = @db.fetch_post_attachments(row['id']) if row['upload_count'] > 0
user_id = user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id
topic = topic_lookup_from_imported_post_id(row["topic_id"])
attachments = @db.fetch_post_attachments(row["id"]) if row["upload_count"] > 0
user_id = user_id_from_imported_user_id(row["user_id"]) || Discourse.system_user.id
{
id: row['id'],
raw: raw_with_attachments(row['raw'], attachments, user_id),
id: row["id"],
raw: raw_with_attachments(row["raw"], attachments, user_id),
user_id: user_id,
topic_id: topic[:topic_id],
created_at: row['created_at']
created_at: row["created_at"],
}
end
end
@ -247,7 +245,7 @@ class ImportScripts::Answerbase < ImportScripts::Base
raw = ReverseMarkdown.convert(raw) || ""
attachments&.each do |attachment|
path = attachment['path']
path = attachment["path"]
next if embedded_paths.include?(path)
if File.exist?(path)
@ -269,23 +267,24 @@ class ImportScripts::Answerbase < ImportScripts::Base
paths = []
upload_ids = []
raw = raw.gsub(EMBEDDED_IMAGE_REGEX) do
path = File.join(@path, Regexp.last_match['path'])
filename = File.basename(path)
path = find_image_path(filename)
raw =
raw.gsub(EMBEDDED_IMAGE_REGEX) do
path = File.join(@path, Regexp.last_match["path"])
filename = File.basename(path)
path = find_image_path(filename)
if path
upload = @uploader.create_upload(user_id, path, filename)
if path
upload = @uploader.create_upload(user_id, path, filename)
if upload.present? && upload.persisted?
paths << path
upload_ids << upload.id
@uploader.html_for_upload(upload, filename)
if upload.present? && upload.persisted?
paths << path
upload_ids << upload.id
@uploader.html_for_upload(upload, filename)
end
else
STDERR.puts "Could not find file: #{path}"
end
else
STDERR.puts "Could not find file: #{path}"
end
end
[raw, paths, upload_ids]
end
@ -311,11 +310,11 @@ class ImportScripts::Answerbase < ImportScripts::Base
def add_permalink_normalizations
normalizations = SiteSetting.permalink_normalizations
normalizations = normalizations.blank? ? [] : normalizations.split('|')
normalizations = normalizations.blank? ? [] : normalizations.split("|")
add_normalization(normalizations, TOPIC_LINK_NORMALIZATION)
SiteSetting.permalink_normalizations = normalizations.join('|')
SiteSetting.permalink_normalizations = normalizations.join("|")
end
def add_normalization(normalizations, normalization)
@ -327,11 +326,13 @@ class ImportScripts::Answerbase < ImportScripts::Base
end
def csv_parse(table_name)
CSV.foreach(File.join(@path, "#{table_name}.csv"),
headers: true,
header_converters: :symbol,
skip_blanks: true,
encoding: 'bom|utf-8') { |row| yield row }
CSV.foreach(
File.join(@path, "#{table_name}.csv"),
headers: true,
header_converters: :symbol,
skip_blanks: true,
encoding: "bom|utf-8",
) { |row| yield row }
end
end

View File

@ -5,34 +5,29 @@
# Based on having access to a mysql dump.
# Pass in the ENV variables listed below before running the script.
require_relative 'base'
require 'mysql2'
require 'open-uri'
require_relative "base"
require "mysql2"
require "open-uri"
class ImportScripts::AnswerHub < ImportScripts::Base
DB_NAME ||= ENV['DB_NAME'] || "answerhub"
DB_PASS ||= ENV['DB_PASS'] || "answerhub"
DB_USER ||= ENV['DB_USER'] || "answerhub"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "network1"
BATCH_SIZE ||= ENV['BATCH_SIZE'].to_i || 1000
ATTACHMENT_DIR = ENV['ATTACHMENT_DIR'] || ''
PROCESS_UPLOADS = ENV['PROCESS_UPLOADS'].to_i || 0
ANSWERHUB_DOMAIN = ENV['ANSWERHUB_DOMAIN']
AVATAR_DIR = ENV['AVATAR_DIR'] || ""
SITE_ID = ENV['SITE_ID'].to_i || 0
CATEGORY_MAP_FROM = ENV['CATEGORY_MAP_FROM'].to_i || 0
CATEGORY_MAP_TO = ENV['CATEGORY_MAP_TO'].to_i || 0
SCRAPE_AVATARS = ENV['SCRAPE_AVATARS'].to_i || 0
DB_NAME ||= ENV["DB_NAME"] || "answerhub"
DB_PASS ||= ENV["DB_PASS"] || "answerhub"
DB_USER ||= ENV["DB_USER"] || "answerhub"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "network1"
BATCH_SIZE ||= ENV["BATCH_SIZE"].to_i || 1000
ATTACHMENT_DIR = ENV["ATTACHMENT_DIR"] || ""
PROCESS_UPLOADS = ENV["PROCESS_UPLOADS"].to_i || 0
ANSWERHUB_DOMAIN = ENV["ANSWERHUB_DOMAIN"]
AVATAR_DIR = ENV["AVATAR_DIR"] || ""
SITE_ID = ENV["SITE_ID"].to_i || 0
CATEGORY_MAP_FROM = ENV["CATEGORY_MAP_FROM"].to_i || 0
CATEGORY_MAP_TO = ENV["CATEGORY_MAP_TO"].to_i || 0
SCRAPE_AVATARS = ENV["SCRAPE_AVATARS"].to_i || 0
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: DB_USER,
password: DB_PASS,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: "localhost", username: DB_USER, password: DB_PASS, database: DB_NAME)
@skip_updates = true
SiteSetting.tagging_enabled = true
SiteSetting.max_tags_per_topic = 10
@ -56,7 +51,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
query =
"SELECT count(*) count
@ -64,12 +59,13 @@ class ImportScripts::AnswerHub < ImportScripts::Base
WHERE c_type = 'user'
AND c_active = 1
AND c_system <> 1;"
total_count = @client.query(query).first['count']
total_count = @client.query(query).first["count"]
puts "Total count: #{total_count}"
@last_user_id = -1
batches(BATCH_SIZE) do |offset|
query = "SELECT c_id, c_creation_date, c_name, c_primaryEmail, c_last_seen, c_description
query =
"SELECT c_id, c_creation_date, c_name, c_primaryEmail, c_last_seen, c_description
FROM #{TABLE_PREFIX}_authoritables
WHERE c_type = 'user'
AND c_active = 1
@ -79,17 +75,18 @@ class ImportScripts::AnswerHub < ImportScripts::Base
results = @client.query(query)
break if results.size < 1
@last_user_id = results.to_a.last['c_id']
@last_user_id = results.to_a.last["c_id"]
create_users(results, total: total_count, offset: offset) do |user|
# puts user['c_id'].to_s + ' ' + user['c_name']
next if @lookup.user_id_from_imported_user_id(user['c_id'])
{ id: user['c_id'],
next if @lookup.user_id_from_imported_user_id(user["c_id"])
{
id: user["c_id"],
email: "#{SecureRandom.hex}@invalid.invalid",
username: user['c_name'],
created_at: user['c_creation_date'],
bio_raw: user['c_description'],
last_seen_at: user['c_last_seen'],
username: user["c_name"],
created_at: user["c_creation_date"],
bio_raw: user["c_description"],
last_seen_at: user["c_last_seen"],
}
end
end
@ -99,7 +96,8 @@ class ImportScripts::AnswerHub < ImportScripts::Base
puts "", "importing categories..."
# Import parent categories first
query = "SELECT c_id, c_name, c_plug, c_parent
query =
"SELECT c_id, c_name, c_plug, c_parent
FROM containers
WHERE c_type = 'space'
AND c_active = 1
@ -107,15 +105,12 @@ class ImportScripts::AnswerHub < ImportScripts::Base
results = @client.query(query)
create_categories(results) do |c|
{
id: c['c_id'],
name: c['c_name'],
parent_category_id: check_parent_id(c['c_parent']),
}
{ id: c["c_id"], name: c["c_name"], parent_category_id: check_parent_id(c["c_parent"]) }
end
# Import sub-categories
query = "SELECT c_id, c_name, c_plug, c_parent
query =
"SELECT c_id, c_name, c_plug, c_parent
FROM containers
WHERE c_type = 'space'
AND c_active = 1
@ -125,9 +120,9 @@ class ImportScripts::AnswerHub < ImportScripts::Base
create_categories(results) do |c|
# puts c.inspect
{
id: c['c_id'],
name: c['c_name'],
parent_category_id: category_id_from_imported_category_id(check_parent_id(c['c_parent'])),
id: c["c_id"],
name: c["c_name"],
parent_category_id: category_id_from_imported_category_id(check_parent_id(c["c_parent"])),
}
end
end
@ -141,7 +136,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
WHERE c_visibility <> 'deleted'
AND (c_type = 'question'
OR c_type = 'kbentry');"
total_count = @client.query(count_query).first['count']
total_count = @client.query(count_query).first["count"]
@last_topic_id = -1
@ -159,26 +154,25 @@ class ImportScripts::AnswerHub < ImportScripts::Base
topics = @client.query(query)
break if topics.size < 1
@last_topic_id = topics.to_a.last['c_id']
@last_topic_id = topics.to_a.last["c_id"]
create_posts(topics, total: total_count, offset: offset) do |t|
user_id = user_id_from_imported_user_id(t['c_author']) || Discourse::SYSTEM_USER_ID
body = process_mentions(t['c_body'])
if PROCESS_UPLOADS == 1
body = process_uploads(body, user_id)
end
user_id = user_id_from_imported_user_id(t["c_author"]) || Discourse::SYSTEM_USER_ID
body = process_mentions(t["c_body"])
body = process_uploads(body, user_id) if PROCESS_UPLOADS == 1
markdown_body = HtmlToMarkdown.new(body).to_markdown
{
id: t['c_id'],
id: t["c_id"],
user_id: user_id,
title: t['c_title'],
category: category_id_from_imported_category_id(t['c_primaryContainer']),
title: t["c_title"],
category: category_id_from_imported_category_id(t["c_primaryContainer"]),
raw: markdown_body,
created_at: t['c_creation_date'],
post_create_action: proc do |post|
tag_names = t['c_topic_names'].split(',')
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end
created_at: t["c_creation_date"],
post_create_action:
proc do |post|
tag_names = t["c_topic_names"].split(",")
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end,
}
end
end
@ -194,7 +188,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
AND (c_type = 'answer'
OR c_type = 'comment'
OR c_type = 'kbentry');"
total_count = @client.query(count_query).first['count']
total_count = @client.query(count_query).first["count"]
@last_post_id = -1
@ -210,49 +204,49 @@ class ImportScripts::AnswerHub < ImportScripts::Base
ORDER BY c_id ASC
LIMIT #{BATCH_SIZE};"
posts = @client.query(query)
next if all_records_exist? :posts, posts.map { |p| p['c_id'] }
next if all_records_exist? :posts, posts.map { |p| p["c_id"] }
break if posts.size < 1
@last_post_id = posts.to_a.last['c_id']
@last_post_id = posts.to_a.last["c_id"]
create_posts(posts, total: total_count, offset: offset) do |p|
t = topic_lookup_from_imported_post_id(p['c_originalParent'])
t = topic_lookup_from_imported_post_id(p["c_originalParent"])
next unless t
reply_to_post_id = post_id_from_imported_post_id(p['c_parent'])
reply_to_post_id = post_id_from_imported_post_id(p["c_parent"])
reply_to_post = reply_to_post_id.present? ? Post.find(reply_to_post_id) : nil
reply_to_post_number = reply_to_post.present? ? reply_to_post.post_number : nil
user_id = user_id_from_imported_user_id(p['c_author']) || Discourse::SYSTEM_USER_ID
user_id = user_id_from_imported_user_id(p["c_author"]) || Discourse::SYSTEM_USER_ID
body = process_mentions(p['c_body'])
if PROCESS_UPLOADS == 1
body = process_uploads(body, user_id)
end
body = process_mentions(p["c_body"])
body = process_uploads(body, user_id) if PROCESS_UPLOADS == 1
markdown_body = HtmlToMarkdown.new(body).to_markdown
{
id: p['c_id'],
id: p["c_id"],
user_id: user_id,
topic_id: t[:topic_id],
reply_to_post_number: reply_to_post_number,
raw: markdown_body,
created_at: p['c_creation_date'],
post_create_action: proc do |post_info|
begin
if p['c_type'] == 'answer' && p['c_marked'] == 1
post = Post.find(post_info[:id])
if post
user_id = user_id_from_imported_user_id(p['c_author']) || Discourse::SYSTEM_USER_ID
current_user = User.find(user_id)
solved = DiscourseSolved.accept_answer!(post, current_user)
# puts "SOLVED: #{solved}"
created_at: p["c_creation_date"],
post_create_action:
proc do |post_info|
begin
if p["c_type"] == "answer" && p["c_marked"] == 1
post = Post.find(post_info[:id])
if post
user_id =
user_id_from_imported_user_id(p["c_author"]) || Discourse::SYSTEM_USER_ID
current_user = User.find(user_id)
solved = DiscourseSolved.accept_answer!(post, current_user)
# puts "SOLVED: #{solved}"
end
end
rescue ActiveRecord::RecordInvalid
puts "SOLVED: Skipped post_id: #{post.id} because invalid"
end
rescue ActiveRecord::RecordInvalid
puts "SOLVED: Skipped post_id: #{post.id} because invalid"
end
end
end,
}
end
end
@ -269,11 +263,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
groups = @client.query(query)
create_groups(groups) do |group|
{
id: group["c_id"],
name: group["c_name"],
visibility_level: 1
}
{ id: group["c_id"], name: group["c_name"], visibility_level: 1 }
end
end
@ -298,11 +288,16 @@ class ImportScripts::AnswerHub < ImportScripts::Base
group_members.map
groups.each do |group|
dgroup = find_group_by_import_id(group['c_id'])
dgroup = find_group_by_import_id(group["c_id"])
next if dgroup.custom_fields['import_users_added']
next if dgroup.custom_fields["import_users_added"]
group_member_ids = group_members.map { |m| user_id_from_imported_user_id(m["c_members"]) if m["c_groups"] == group['c_id'] }.compact
group_member_ids =
group_members
.map do |m|
user_id_from_imported_user_id(m["c_members"]) if m["c_groups"] == group["c_id"]
end
.compact
# add members
dgroup.bulk_add(group_member_ids)
@ -310,7 +305,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
# reload group
dgroup.reload
dgroup.custom_fields['import_users_added'] = true
dgroup.custom_fields["import_users_added"] = true
dgroup.save
progress_count += 1
@ -362,7 +357,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
avatars.each do |a|
begin
user_id = user_id_from_imported_user_id(a['c_user'])
user_id = user_id_from_imported_user_id(a["c_user"])
user = User.find(user_id)
if user
filename = "avatar-#{user_id}.png"
@ -371,9 +366,11 @@ class ImportScripts::AnswerHub < ImportScripts::Base
# Scrape Avatars - Avatars are saved in the db, but it might be easier to just scrape them
if SCRAPE_AVATARS == 1
File.open(path, 'wb') { |f|
f << open("https://#{ANSWERHUB_DOMAIN}/forums/users/#{a['c_user']}/photo/view.html?s=240").read
}
File.open(path, "wb") do |f|
f << open(
"https://#{ANSWERHUB_DOMAIN}/forums/users/#{a["c_user"]}/photo/view.html?s=240",
).read
end
end
upload = @uploader.create_upload(user.id, path, filename)
@ -389,7 +386,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
end
end
rescue ActiveRecord::RecordNotFound
puts "Could not find User for user_id: #{a['c_user']}"
puts "Could not find User for user_id: #{a["c_user"]}"
end
end
end
@ -438,9 +435,10 @@ class ImportScripts::AnswerHub < ImportScripts::Base
raw = body.dup
# https://example.forum.com/forums/users/1469/XYZ_Rob.html
raw.gsub!(/(https:\/\/example.forum.com\/forums\/users\/\d+\/[\w_%-.]*.html)/) do
raw.gsub!(%r{(https://example.forum.com/forums/users/\d+/[\w_%-.]*.html)}) do
legacy_url = $1
import_user_id = legacy_url.match(/https:\/\/example.forum.com\/forums\/users\/(\d+)\/[\w_%-.]*.html/).captures
import_user_id =
legacy_url.match(%r{https://example.forum.com/forums/users/(\d+)/[\w_%-.]*.html}).captures
user = @lookup.find_user_by_import_id(import_user_id[0])
if user.present?
@ -453,9 +451,9 @@ class ImportScripts::AnswerHub < ImportScripts::Base
end
# /forums/users/395/petrocket.html
raw.gsub!(/(\/forums\/users\/\d+\/[\w_%-.]*.html)/) do
raw.gsub!(%r{(/forums/users/\d+/[\w_%-.]*.html)}) do
legacy_url = $1
import_user_id = legacy_url.match(/\/forums\/users\/(\d+)\/[\w_%-.]*.html/).captures
import_user_id = legacy_url.match(%r{/forums/users/(\d+)/[\w_%-.]*.html}).captures
# puts raw
user = @lookup.find_user_by_import_id(import_user_id[0])
@ -472,7 +470,7 @@ class ImportScripts::AnswerHub < ImportScripts::Base
end
def create_permalinks
puts '', 'Creating redirects...', ''
puts "", "Creating redirects...", ""
# https://example.forum.com/forums/questions/2005/missing-file.html
Topic.find_each do |topic|
@ -480,8 +478,12 @@ class ImportScripts::AnswerHub < ImportScripts::Base
if pcf && pcf["import_id"]
id = pcf["import_id"]
slug = Slug.for(topic.title)
Permalink.create(url: "questions/#{id}/#{slug}.html", topic_id: topic.id) rescue nil
print '.'
begin
Permalink.create(url: "questions/#{id}/#{slug}.html", topic_id: topic.id)
rescue StandardError
nil
end
print "."
end
end
end
@ -496,7 +498,6 @@ class ImportScripts::AnswerHub < ImportScripts::Base
return CATEGORY_MAP_TO if CATEGORY_MAP_FROM > 0 && id == CATEGORY_MAP_FROM
id
end
end
ImportScripts::AnswerHub.new.perform

View File

@ -1,23 +1,23 @@
# frozen_string_literal: true
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'pg'
require "pg"
class ImportScripts::MyAskBot < ImportScripts::Base
# CHANGE THESE BEFORE RUNNING THE IMPORTER
BATCH_SIZE = 1000
OLD_SITE = "ask.cvxr.com"
DB_NAME = "cvxforum"
DB_USER = "cvxforum"
DB_PORT = 5432
DB_HOST = "ask.cvxr.com"
DB_PASS = 'yeah, right'
OLD_SITE = "ask.cvxr.com"
DB_NAME = "cvxforum"
DB_USER = "cvxforum"
DB_PORT = 5432
DB_HOST = "ask.cvxr.com"
DB_PASS = "yeah, right"
# A list of categories to create. Any post with one of these tags will be
# assigned to that category. Ties are broken by list order.
CATEGORIES = [ 'Nonconvex', 'TFOCS', 'MIDCP', 'FAQ' ]
CATEGORIES = %w[Nonconvex TFOCS MIDCP FAQ]
def initialize
super
@ -25,13 +25,8 @@ class ImportScripts::MyAskBot < ImportScripts::Base
@thread_parents = {}
@tagmap = []
@td = PG::TextDecoder::TimestampWithTimeZone.new
@client = PG.connect(
dbname: DB_NAME,
host: DB_HOST,
port: DB_PORT,
user: DB_USER,
password: DB_PASS
)
@client =
PG.connect(dbname: DB_NAME, host: DB_HOST, port: DB_PORT, user: DB_USER, password: DB_PASS)
end
def execute
@ -55,18 +50,17 @@ class ImportScripts::MyAskBot < ImportScripts::Base
def read_tags
puts "", "reading thread tags..."
tag_count = @client.exec(<<-SQL
tag_count = @client.exec(<<-SQL)[0]["count"]
SELECT COUNT(A.id)
FROM askbot_thread_tags A
JOIN tag B
ON A.tag_id = B.id
WHERE A.tag_id > 0
SQL
)[0]["count"]
tags_done = 0
batches(BATCH_SIZE) do |offset|
tags = @client.exec(<<-SQL
tags = @client.exec(<<-SQL)
SELECT A.thread_id, B.name
FROM askbot_thread_tags A
JOIN tag B
@ -75,7 +69,6 @@ class ImportScripts::MyAskBot < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if tags.ntuples() < 1
tags.each do |tag|
tid = tag["thread_id"].to_i
@ -83,7 +76,7 @@ class ImportScripts::MyAskBot < ImportScripts::Base
if @tagmap[tid]
@tagmap[tid].push(tnm)
else
@tagmap[tid] = [ tnm ]
@tagmap[tid] = [tnm]
end
tags_done += 1
print_status tags_done, tag_count
@ -94,21 +87,19 @@ class ImportScripts::MyAskBot < ImportScripts::Base
def import_users
puts "", "importing users"
total_count = @client.exec(<<-SQL
total_count = @client.exec(<<-SQL)[0]["count"]
SELECT COUNT(id)
FROM auth_user
SQL
)[0]["count"]
batches(BATCH_SIZE) do |offset|
users = @client.query(<<-SQL
users = @client.query(<<-SQL)
SELECT id, username, email, is_staff, date_joined, last_seen, real_name, website, location, about
FROM auth_user
ORDER BY date_joined
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if users.ntuples() < 1
@ -133,17 +124,16 @@ class ImportScripts::MyAskBot < ImportScripts::Base
def import_posts
puts "", "importing questions..."
post_count = @client.exec(<<-SQL
post_count = @client.exec(<<-SQL)[0]["count"]
SELECT COUNT(A.id)
FROM askbot_post A
JOIN askbot_thread B
ON A.thread_id = B.id
WHERE NOT B.closed AND A.post_type='question'
SQL
)[0]["count"]
batches(BATCH_SIZE) do |offset|
posts = @client.exec(<<-SQL
posts = @client.exec(<<-SQL)
SELECT A.id, A.author_id, A.added_at, A.text, A.thread_id, B.title
FROM askbot_post A
JOIN askbot_thread B
@ -153,7 +143,6 @@ class ImportScripts::MyAskBot < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if posts.ntuples() < 1
@ -176,7 +165,11 @@ class ImportScripts::MyAskBot < ImportScripts::Base
id: pid,
title: post["title"],
category: cat,
custom_fields: { import_id: pid, import_thread_id: tid, import_tags: tags },
custom_fields: {
import_id: pid,
import_thread_id: tid,
import_tags: tags,
},
user_id: user_id_from_imported_user_id(post["author_id"]) || Discourse::SYSTEM_USER_ID,
created_at: Time.zone.at(@td.decode(post["added_at"])),
raw: post["text"],
@ -188,17 +181,16 @@ class ImportScripts::MyAskBot < ImportScripts::Base
def import_replies
puts "", "importing answers and comments..."
post_count = @client.exec(<<-SQL
post_count = @client.exec(<<-SQL)[0]["count"]
SELECT COUNT(A.id)
FROM askbot_post A
JOIN askbot_thread B
ON A.thread_id = B.id
WHERE NOT B.closed AND A.post_type<>'question'
SQL
)[0]["count"]
batches(BATCH_SIZE) do |offset|
posts = @client.exec(<<-SQL
posts = @client.exec(<<-SQL)
SELECT A.id, A.author_id, A.added_at, A.text, A.thread_id, B.title
FROM askbot_post A
JOIN askbot_thread B
@ -208,7 +200,6 @@ class ImportScripts::MyAskBot < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if posts.ntuples() < 1
@ -222,10 +213,12 @@ class ImportScripts::MyAskBot < ImportScripts::Base
{
id: pid,
topic_id: parent[:topic_id],
custom_fields: { import_id: pid },
custom_fields: {
import_id: pid,
},
user_id: user_id_from_imported_user_id(post["author_id"]) || Discourse::SYSTEM_USER_ID,
created_at: Time.zone.at(@td.decode(post["added_at"])),
raw: post["text"]
raw: post["text"],
}
end
end
@ -240,32 +233,37 @@ class ImportScripts::MyAskBot < ImportScripts::Base
# I am sure this is incomplete, but we didn't make heavy use of internal
# links on our site.
tmp = Regexp.quote("http://#{OLD_SITE}")
r1 = /"(#{tmp})?\/question\/(\d+)\/[a-zA-Z-]*\/?"/
r2 = /\((#{tmp})?\/question\/(\d+)\/[a-zA-Z-]*\/?\)/
r3 = /<?#tmp\/question\/(\d+)\/[a-zA-Z-]*\/?>?/
r1 = %r{"(#{tmp})?/question/(\d+)/[a-zA-Z-]*/?"}
r2 = %r{\((#{tmp})?/question/(\d+)/[a-zA-Z-]*/?\)}
r3 = %r{<?#tmp/question/(\d+)/[a-zA-Z-]*/?>?}
Post.find_each do |post|
raw = post.raw.gsub(r1) do
if topic = topic_lookup_from_imported_post_id($2)
"\"#{topic[:url]}\""
else
$&
raw =
post
.raw
.gsub(r1) do
if topic = topic_lookup_from_imported_post_id($2)
"\"#{topic[:url]}\""
else
$&
end
end
raw =
raw.gsub(r2) do
if topic = topic_lookup_from_imported_post_id($2)
"(#{topic[:url]})"
else
$&
end
end
end
raw = raw.gsub(r2) do
if topic = topic_lookup_from_imported_post_id($2)
"(#{topic[:url]})"
else
$&
raw =
raw.gsub(r3) do
if topic = topic_lookup_from_imported_post_id($1)
trec = Topic.find_by(id: topic[:topic_id])
"[#{trec.title}](#{topic[:url]})"
else
$&
end
end
end
raw = raw.gsub(r3) do
if topic = topic_lookup_from_imported_post_id($1)
trec = Topic.find_by(id: topic[:topic_id])
"[#{trec.title}](#{topic[:url]})"
else
$&
end
end
if raw != post.raw
post.raw = raw

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
if ARGV.include?('bbcode-to-md')
if ARGV.include?("bbcode-to-md")
# Replace (most) bbcode with markdown before creating posts.
# This will dramatically clean up the final posts in Discourse.
#
@ -10,17 +10,17 @@ if ARGV.include?('bbcode-to-md')
# cd ruby-bbcode-to-md
# gem build ruby-bbcode-to-md.gemspec
# gem install ruby-bbcode-to-md-*.gem
require 'ruby-bbcode-to-md'
require "ruby-bbcode-to-md"
end
require_relative '../../config/environment'
require_relative 'base/lookup_container'
require_relative 'base/uploader'
require_relative "../../config/environment"
require_relative "base/lookup_container"
require_relative "base/uploader"
module ImportScripts; end
module ImportScripts
end
class ImportScripts::Base
def initialize
preload_i18n
@ -62,15 +62,14 @@ class ImportScripts::Base
end
elapsed = Time.now - @start_times[:import]
puts '', '', 'Done (%02dh %02dmin %02dsec)' % [elapsed / 3600, elapsed / 60 % 60, elapsed % 60]
puts "", "", "Done (%02dh %02dmin %02dsec)" % [elapsed / 3600, elapsed / 60 % 60, elapsed % 60]
ensure
reset_site_settings
end
def get_site_settings_for_import
{
blocked_email_domains: '',
blocked_email_domains: "",
min_topic_title_length: 1,
min_post_length: 1,
min_first_post_length: 1,
@ -78,21 +77,23 @@ class ImportScripts::Base
min_personal_message_title_length: 1,
allow_duplicate_topic_titles: true,
allow_duplicate_topic_titles_category: false,
disable_emails: 'yes',
max_attachment_size_kb: 102400,
max_image_size_kb: 102400,
authorized_extensions: '*',
disable_emails: "yes",
max_attachment_size_kb: 102_400,
max_image_size_kb: 102_400,
authorized_extensions: "*",
clean_up_inactive_users_after_days: 0,
clean_up_unused_staged_users_after_days: 0,
clean_up_uploads: false,
clean_orphan_uploads_grace_period_hours: 1800
clean_orphan_uploads_grace_period_hours: 1800,
}
end
def change_site_settings
if SiteSetting.bootstrap_mode_enabled
SiteSetting.default_trust_level = TrustLevel[0] if SiteSetting.default_trust_level == TrustLevel[1]
SiteSetting.default_email_digest_frequency = 10080 if SiteSetting.default_email_digest_frequency == 1440
SiteSetting.default_trust_level = TrustLevel[0] if SiteSetting.default_trust_level ==
TrustLevel[1]
SiteSetting.default_email_digest_frequency =
10_080 if SiteSetting.default_email_digest_frequency == 1440
SiteSetting.bootstrap_mode_enabled = false
end
@ -131,7 +132,7 @@ class ImportScripts::Base
raise NotImplementedError
end
%i{
%i[
add_category
add_group
add_post
@ -146,9 +147,7 @@ class ImportScripts::Base
topic_lookup_from_imported_post_id
user_already_imported?
user_id_from_imported_user_id
}.each do |method_name|
delegate method_name, to: :@lookup
end
].each { |method_name| delegate method_name, to: :@lookup }
def create_admin(opts = {})
admin = User.new
@ -196,7 +195,11 @@ class ImportScripts::Base
end
end
print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("groups"))
print_status(
created + skipped + failed + (opts[:offset] || 0),
total,
get_start_time("groups"),
)
end
[created, skipped]
@ -224,23 +227,22 @@ class ImportScripts::Base
ActiveRecord::Base.transaction do
begin
connection = ActiveRecord::Base.connection.raw_connection
connection.exec('CREATE TEMP TABLE import_ids(val text PRIMARY KEY)')
connection.exec("CREATE TEMP TABLE import_ids(val text PRIMARY KEY)")
import_id_clause = import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",")
import_id_clause =
import_ids.map { |id| "('#{PG::Connection.escape_string(id.to_s)}')" }.join(",")
connection.exec("INSERT INTO import_ids VALUES #{import_id_clause}")
existing = "#{type.to_s.classify}CustomField".constantize
existing = existing.where(name: 'import_id')
.joins('JOIN import_ids ON val = value')
.count
existing = existing.where(name: "import_id").joins("JOIN import_ids ON val = value").count
if existing == import_ids.length
puts "Skipping #{import_ids.length} already imported #{type}"
true
end
ensure
connection.exec('DROP TABLE import_ids') unless connection.nil?
connection.exec("DROP TABLE import_ids") unless connection.nil?
end
end
end
@ -292,7 +294,11 @@ class ImportScripts::Base
end
end
print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("users"))
print_status(
created + skipped + failed + (opts[:offset] || 0),
total,
get_start_time("users"),
)
end
[created, skipped]
@ -305,7 +311,9 @@ class ImportScripts::Base
post_create_action = opts.delete(:post_create_action)
existing = find_existing_user(opts[:email], opts[:username])
return existing if existing && (merge || existing.custom_fields["import_id"].to_s == import_id.to_s)
if existing && (merge || existing.custom_fields["import_id"].to_s == import_id.to_s)
return existing
end
bio_raw = opts.delete(:bio_raw)
website = opts.delete(:website)
@ -316,8 +324,11 @@ class ImportScripts::Base
original_name = opts[:name]
original_email = opts[:email] = opts[:email].downcase
if !UsernameValidator.new(opts[:username]).valid_format? || !User.username_available?(opts[:username])
opts[:username] = UserNameSuggester.suggest(opts[:username].presence || opts[:name].presence || opts[:email])
if !UsernameValidator.new(opts[:username]).valid_format? ||
!User.username_available?(opts[:username])
opts[:username] = UserNameSuggester.suggest(
opts[:username].presence || opts[:name].presence || opts[:email],
)
end
if !EmailAddressValidator.valid_value?(opts[:email])
@ -339,7 +350,8 @@ class ImportScripts::Base
u = User.new(opts)
(opts[:custom_fields] || {}).each { |k, v| u.custom_fields[k] = v }
u.custom_fields["import_id"] = import_id
u.custom_fields["import_username"] = original_username if original_username.present? && original_username != opts[:username]
u.custom_fields["import_username"] = original_username if original_username.present? &&
original_username != opts[:username]
u.custom_fields["import_avatar_url"] = avatar_url if avatar_url.present?
u.custom_fields["import_pass"] = opts[:password] if opts[:password].present?
u.custom_fields["import_email"] = original_email if original_email != opts[:email]
@ -359,9 +371,7 @@ class ImportScripts::Base
end
end
if opts[:active] && opts[:password].present?
u.activate
end
u.activate if opts[:active] && opts[:password].present?
rescue => e
# try based on email
if e.try(:record).try(:errors).try(:messages).try(:[], :primary_email).present?
@ -377,7 +387,7 @@ class ImportScripts::Base
end
end
if u.custom_fields['import_email']
if u.custom_fields["import_email"]
u.suspended_at = Time.zone.at(Time.now)
u.suspended_till = 200.years.from_now
u.save!
@ -388,11 +398,15 @@ class ImportScripts::Base
user_option.email_messages_level = UserOption.email_level_types[:never]
user_option.save!
if u.save
StaffActionLogger.new(Discourse.system_user).log_user_suspend(u, 'Invalid email address on import')
StaffActionLogger.new(Discourse.system_user).log_user_suspend(
u,
"Invalid email address on import",
)
else
Rails.logger.error("Failed to suspend user #{u.username}. #{u.errors.try(:full_messages).try(:inspect)}")
Rails.logger.error(
"Failed to suspend user #{u.username}. #{u.errors.try(:full_messages).try(:inspect)}",
)
end
end
post_create_action.try(:call, u) if u.persisted?
@ -402,7 +416,8 @@ class ImportScripts::Base
def find_existing_user(email, username)
# Force the use of the index on the 'user_emails' table
UserEmail.where("lower(email) = ?", email.downcase).first&.user || User.where(username: username).first
UserEmail.where("lower(email) = ?", email.downcase).first&.user ||
User.where(username: username).first
end
def created_category(category)
@ -435,7 +450,8 @@ class ImportScripts::Base
# make sure categories don't go more than 2 levels deep
if params[:parent_category_id]
top = Category.find_by_id(params[:parent_category_id])
top = top.parent_category while (top&.height_of_ancestors || -1) + 1 >= SiteSetting.max_category_nesting
top = top.parent_category while (top&.height_of_ancestors || -1) + 1 >=
SiteSetting.max_category_nesting
params[:parent_category_id] = top.id if top
end
@ -471,15 +487,16 @@ class ImportScripts::Base
post_create_action = opts.delete(:post_create_action)
new_category = Category.new(
name: opts[:name],
user_id: opts[:user_id] || opts[:user].try(:id) || Discourse::SYSTEM_USER_ID,
position: opts[:position],
parent_category_id: opts[:parent_category_id],
color: opts[:color] || category_color(opts[:parent_category_id]),
text_color: opts[:text_color] || "FFF",
read_restricted: opts[:read_restricted] || false,
)
new_category =
Category.new(
name: opts[:name],
user_id: opts[:user_id] || opts[:user].try(:id) || Discourse::SYSTEM_USER_ID,
position: opts[:position],
parent_category_id: opts[:parent_category_id],
color: opts[:color] || category_color(opts[:parent_category_id]),
text_color: opts[:text_color] || "FFF",
read_restricted: opts[:read_restricted] || false,
)
new_category.custom_fields["import_id"] = import_id if import_id
new_category.save!
@ -498,10 +515,16 @@ class ImportScripts::Base
end
def category_color(parent_category_id)
@category_colors ||= SiteSetting.category_colors.split('|')
@category_colors ||= SiteSetting.category_colors.split("|")
index = @next_category_color_index[parent_category_id].presence || 0
@next_category_color_index[parent_category_id] = index + 1 >= @category_colors.count ? 0 : index + 1
@next_category_color_index[parent_category_id] = (
if index + 1 >= @category_colors.count
0
else
index + 1
end
)
@category_colors[index]
end
@ -571,7 +594,7 @@ class ImportScripts::Base
opts = opts.merge(skip_validations: true)
opts[:import_mode] = true
opts[:custom_fields] ||= {}
opts[:custom_fields]['import_id'] = import_id
opts[:custom_fields]["import_id"] = import_id
unless opts[:topic_id]
opts[:meta_data] = meta_data = {}
@ -582,7 +605,11 @@ class ImportScripts::Base
opts[:guardian] = STAFF_GUARDIAN
if @bbcode_to_md
opts[:raw] = opts[:raw].bbcode_to_md(false, {}, :disable, :quote) rescue opts[:raw]
opts[:raw] = begin
opts[:raw].bbcode_to_md(false, {}, :disable, :quote)
rescue StandardError
opts[:raw]
end
end
post_creator = PostCreator.new(user, opts)
@ -628,7 +655,7 @@ class ImportScripts::Base
created += 1 if manager.errors.none?
skipped += 1 if manager.errors.any?
rescue
rescue StandardError
skipped += 1
end
end
@ -671,14 +698,14 @@ class ImportScripts::Base
def close_inactive_topics(opts = {})
num_days = opts[:days] || 30
puts '', "Closing topics that have been inactive for more than #{num_days} days."
puts "", "Closing topics that have been inactive for more than #{num_days} days."
query = Topic.where('last_posted_at < ?', num_days.days.ago).where(closed: false)
query = Topic.where("last_posted_at < ?", num_days.days.ago).where(closed: false)
total_count = query.count
closed_count = 0
query.find_each do |topic|
topic.update_status('closed', true, Discourse.system_user)
topic.update_status("closed", true, Discourse.system_user)
closed_count += 1
print_status(closed_count, total_count, get_start_time("close_inactive_topics"))
end
@ -790,7 +817,9 @@ class ImportScripts::Base
puts "", "Updating user digest_attempted_at..."
DB.exec("UPDATE user_stats SET digest_attempted_at = now() - random() * interval '1 week' WHERE digest_attempted_at IS NULL")
DB.exec(
"UPDATE user_stats SET digest_attempted_at = now() - random() * interval '1 week' WHERE digest_attempted_at IS NULL",
)
end
# scripts that are able to import last_seen_at from the source data should override this method
@ -854,13 +883,15 @@ class ImportScripts::Base
count = 0
total = User.count
User.includes(:user_stat).find_each do |user|
begin
user.update_columns(trust_level: 0) if user.trust_level > 0 && user.post_count == 0
rescue Discourse::InvalidAccess
User
.includes(:user_stat)
.find_each do |user|
begin
user.update_columns(trust_level: 0) if user.trust_level > 0 && user.post_count == 0
rescue Discourse::InvalidAccess
end
print_status(count += 1, total, get_start_time("update_tl0"))
end
print_status(count += 1, total, get_start_time("update_tl0"))
end
end
def update_user_signup_date_based_on_first_post
@ -870,7 +901,7 @@ class ImportScripts::Base
total = User.count
User.find_each do |user|
if first = user.posts.order('created_at ASC').first
if first = user.posts.order("created_at ASC").first
user.created_at = first.created_at
user.save!
end
@ -893,16 +924,16 @@ class ImportScripts::Base
def print_status(current, max, start_time = nil)
if start_time.present?
elapsed_seconds = Time.now - start_time
elements_per_minute = '[%.0f items/min] ' % [current / elapsed_seconds.to_f * 60]
elements_per_minute = "[%.0f items/min] " % [current / elapsed_seconds.to_f * 60]
else
elements_per_minute = ''
elements_per_minute = ""
end
print "\r%9d / %d (%5.1f%%) %s" % [current, max, current / max.to_f * 100, elements_per_minute]
end
def print_spinner
@spinner_chars ||= %w{ | / - \\ }
@spinner_chars ||= %w[| / - \\]
@spinner_chars.push @spinner_chars.shift
print "\b#{@spinner_chars[0]}"
end

View File

@ -13,65 +13,69 @@ module ImportScripts
def initialize(cols)
cols.each_with_index do |col, idx|
self.class.public_send(:define_method, col.downcase.gsub(/[\W]/, '_').squeeze('_')) do
@row[idx]
end
self
.class
.public_send(:define_method, col.downcase.gsub(/[\W]/, "_").squeeze("_")) { @row[idx] }
end
end
end
def csv_parse(filename, col_sep = ',')
def csv_parse(filename, col_sep = ",")
first = true
row = nil
current_row = +""
double_quote_count = 0
File.open(filename).each_line do |line|
File
.open(filename)
.each_line do |line|
line.strip!
line.strip!
current_row << "\n" unless current_row.empty?
current_row << line
current_row << "\n" unless current_row.empty?
current_row << line
double_quote_count += line.scan('"').count
double_quote_count += line.scan('"').count
next if double_quote_count % 2 == 1 # this row continues on a new line. don't parse until we have the whole row.
next if double_quote_count % 2 == 1 # this row continues on a new line. don't parse until we have the whole row.
raw =
begin
CSV.parse(current_row, col_sep: col_sep)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
raw = begin
CSV.parse(current_row, col_sep: col_sep)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
current_row = ""
double_quote_count = 0
current_row = ""
double_quote_count = 0
next
end[
0
]
next
end[0]
if first
row = RowResolver.create(raw)
if first
row = RowResolver.create(raw)
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
end
end
end
end

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'sqlite3'
require "sqlite3"
module ImportScripts
class GenericDatabase
@ -80,24 +80,20 @@ module ImportScripts
VALUES (:id, :raw, :topic_id, :user_id, :created_at, :reply_to_post_id, :url, :upload_count)
SQL
attachments&.each do |attachment|
@db.execute(<<-SQL, post_id: post[:id], path: attachment)
attachments&.each { |attachment| @db.execute(<<-SQL, post_id: post[:id], path: attachment) }
INSERT OR REPLACE INTO post_upload (post_id, path)
VALUES (:post_id, :path)
SQL
end
like_user_ids&.each do |user_id|
@db.execute(<<-SQL, post_id: post[:id], user_id: user_id)
like_user_ids&.each { |user_id| @db.execute(<<-SQL, post_id: post[:id], user_id: user_id) }
INSERT OR REPLACE INTO like (post_id, user_id)
VALUES (:post_id, :user_id)
SQL
end
end
end
def sort_posts_by_created_at
@db.execute 'DELETE FROM post_order'
@db.execute "DELETE FROM post_order"
@db.execute <<-SQL
INSERT INTO post_order (post_id)
@ -146,7 +142,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'id')
add_last_column_value(rows, "id")
end
def get_user_id(username)
@ -173,7 +169,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'id')
add_last_column_value(rows, "id")
end
def fetch_topic_attachments(topic_id)
@ -200,7 +196,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
add_last_column_value(rows, "rowid")
end
def fetch_sorted_posts(last_row_id)
@ -213,7 +209,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
add_last_column_value(rows, "rowid")
end
def fetch_post_attachments(post_id)
@ -240,7 +236,7 @@ module ImportScripts
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
add_last_column_value(rows, "rowid")
end
def execute_sql(sql)
@ -254,12 +250,12 @@ module ImportScripts
private
def configure_database
@db.execute 'PRAGMA journal_mode = OFF'
@db.execute 'PRAGMA locking_mode = EXCLUSIVE'
@db.execute "PRAGMA journal_mode = OFF"
@db.execute "PRAGMA locking_mode = EXCLUSIVE"
end
def key_data_type
@numeric_keys ? 'INTEGER' : 'TEXT'
@numeric_keys ? "INTEGER" : "TEXT"
end
def create_category_table
@ -299,7 +295,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS user_by_username ON user (username)'
@db.execute "CREATE INDEX IF NOT EXISTS user_by_username ON user (username)"
end
def create_topic_table
@ -317,7 +313,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS topic_by_user_id ON topic (user_id)'
@db.execute "CREATE INDEX IF NOT EXISTS topic_by_user_id ON topic (user_id)"
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS topic_upload (
@ -326,7 +322,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE UNIQUE INDEX IF NOT EXISTS topic_upload_unique ON topic_upload(topic_id, path)'
@db.execute "CREATE UNIQUE INDEX IF NOT EXISTS topic_upload_unique ON topic_upload(topic_id, path)"
end
def create_post_table
@ -343,7 +339,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS post_by_user_id ON post (user_id)'
@db.execute "CREATE INDEX IF NOT EXISTS post_by_user_id ON post (user_id)"
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS post_order (
@ -358,7 +354,7 @@ module ImportScripts
)
SQL
@db.execute 'CREATE UNIQUE INDEX IF NOT EXISTS post_upload_unique ON post_upload(post_id, path)'
@db.execute "CREATE UNIQUE INDEX IF NOT EXISTS post_upload_unique ON post_upload(post_id, path)"
end
def prepare(hash)

View File

@ -3,27 +3,26 @@
module ImportScripts
class LookupContainer
def initialize
puts 'Loading existing groups...'
@groups = GroupCustomField.where(name: 'import_id').pluck(:value, :group_id).to_h
puts "Loading existing groups..."
@groups = GroupCustomField.where(name: "import_id").pluck(:value, :group_id).to_h
puts 'Loading existing users...'
@users = UserCustomField.where(name: 'import_id').pluck(:value, :user_id).to_h
puts "Loading existing users..."
@users = UserCustomField.where(name: "import_id").pluck(:value, :user_id).to_h
puts 'Loading existing categories...'
@categories = CategoryCustomField.where(name: 'import_id').pluck(:value, :category_id).to_h
puts "Loading existing categories..."
@categories = CategoryCustomField.where(name: "import_id").pluck(:value, :category_id).to_h
puts 'Loading existing posts...'
@posts = PostCustomField.where(name: 'import_id').pluck(:value, :post_id).to_h
puts "Loading existing posts..."
@posts = PostCustomField.where(name: "import_id").pluck(:value, :post_id).to_h
puts 'Loading existing topics...'
puts "Loading existing topics..."
@topics = {}
Post.joins(:topic).pluck('posts.id, posts.topic_id, posts.post_number, topics.slug').each do |p|
@topics[p[0]] = {
topic_id: p[1],
post_number: p[2],
url: Post.url(p[3], p[1], p[2])
}
end
Post
.joins(:topic)
.pluck("posts.id, posts.topic_id, posts.post_number, topics.slug")
.each do |p|
@topics[p[0]] = { topic_id: p[1], post_number: p[2], url: Post.url(p[3], p[1], p[2]) }
end
end
# Get the Discourse Post id based on the id of the source record
@ -44,7 +43,7 @@ module ImportScripts
# Get the Discourse Group based on the id of the source group
def find_group_by_import_id(import_id)
GroupCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:group)
GroupCustomField.where(name: "import_id", value: import_id.to_s).first.try(:group)
end
# Get the Discourse User id based on the id of the source user
@ -54,7 +53,7 @@ module ImportScripts
# Get the Discourse User based on the id of the source user
def find_user_by_import_id(import_id)
UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user)
UserCustomField.where(name: "import_id", value: import_id.to_s).first.try(:user)
end
def find_username_by_import_id(import_id)
@ -84,11 +83,7 @@ module ImportScripts
end
def add_topic(post)
@topics[post.id] = {
post_number: post.post_number,
topic_id: post.topic_id,
url: post.url,
}
@topics[post.id] = { post_number: post.post_number, topic_id: post.topic_id, url: post.url }
end
def user_already_imported?(import_id)
@ -98,6 +93,5 @@ module ImportScripts
def post_already_imported?(import_id)
@posts.has_key?(import_id) || @posts.has_key?(import_id.to_s)
end
end
end

View File

@ -13,8 +13,16 @@ module ImportScripts
STDERR.puts "Failed to create upload: #{e}"
nil
ensure
tmp.close rescue nil
tmp.unlink rescue nil
begin
tmp.close
rescue StandardError
nil
end
begin
tmp.unlink
rescue StandardError
nil
end
end
def create_avatar(user, avatar_path)
@ -30,7 +38,7 @@ module ImportScripts
STDERR.puts "Failed to upload avatar for user #{user.username}: #{avatar_path}"
STDERR.puts upload.errors.inspect if upload
end
rescue
rescue StandardError
STDERR.puts "Failed to create avatar for user #{user.username}: #{avatar_path}"
ensure
tempfile.close! if tempfile
@ -52,11 +60,9 @@ module ImportScripts
def copy_to_tempfile(source_path)
extension = File.extname(source_path)
tmp = Tempfile.new(['discourse-upload', extension])
tmp = Tempfile.new(["discourse-upload", extension])
File.open(source_path) do |source_stream|
IO.copy_stream(source_stream, tmp)
end
File.open(source_path) { |source_stream| IO.copy_stream(source_stream, tmp) }
tmp.rewind
tmp

View File

@ -1,29 +1,29 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Bbpress < ImportScripts::Base
BB_PRESS_HOST ||= ENV['BBPRESS_HOST'] || "localhost"
BB_PRESS_DB ||= ENV['BBPRESS_DB'] || "bbpress"
BATCH_SIZE ||= 1000
BB_PRESS_PW ||= ENV['BBPRESS_PW'] || ""
BB_PRESS_USER ||= ENV['BBPRESS_USER'] || "root"
BB_PRESS_PREFIX ||= ENV['BBPRESS_PREFIX'] || "wp_"
BB_PRESS_ATTACHMENTS_DIR ||= ENV['BBPRESS_ATTACHMENTS_DIR'] || "/path/to/attachments"
BB_PRESS_HOST ||= ENV["BBPRESS_HOST"] || "localhost"
BB_PRESS_DB ||= ENV["BBPRESS_DB"] || "bbpress"
BATCH_SIZE ||= 1000
BB_PRESS_PW ||= ENV["BBPRESS_PW"] || ""
BB_PRESS_USER ||= ENV["BBPRESS_USER"] || "root"
BB_PRESS_PREFIX ||= ENV["BBPRESS_PREFIX"] || "wp_"
BB_PRESS_ATTACHMENTS_DIR ||= ENV["BBPRESS_ATTACHMENTS_DIR"] || "/path/to/attachments"
def initialize
super
@he = HTMLEntities.new
@client = Mysql2::Client.new(
host: BB_PRESS_HOST,
username: BB_PRESS_USER,
database: BB_PRESS_DB,
password: BB_PRESS_PW,
)
@client =
Mysql2::Client.new(
host: BB_PRESS_HOST,
username: BB_PRESS_USER,
database: BB_PRESS_DB,
password: BB_PRESS_PW,
)
end
def execute
@ -40,17 +40,16 @@ class ImportScripts::Bbpress < ImportScripts::Base
puts "", "importing users..."
last_user_id = -1
total_users = bbpress_query(<<-SQL
total_users = bbpress_query(<<-SQL).first["cnt"]
SELECT COUNT(DISTINCT(u.id)) AS cnt
FROM #{BB_PRESS_PREFIX}users u
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
WHERE p.post_type IN ('forum', 'reply', 'topic')
AND user_email LIKE '%@%'
SQL
).first["cnt"]
batches(BATCH_SIZE) do |offset|
users = bbpress_query(<<-SQL
users = bbpress_query(<<-SQL).to_a
SELECT u.id, user_nicename, display_name, user_email, user_registered, user_url, user_pass
FROM #{BB_PRESS_PREFIX}users u
LEFT JOIN #{BB_PRESS_PREFIX}posts p ON p.post_author = u.id
@ -61,7 +60,6 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY u.id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@ -73,22 +71,20 @@ class ImportScripts::Bbpress < ImportScripts::Base
user_ids_sql = user_ids.join(",")
users_description = {}
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each { |um| users_description[um["user_id"]] = um["description"] }
SELECT user_id, meta_value description
FROM #{BB_PRESS_PREFIX}usermeta
WHERE user_id IN (#{user_ids_sql})
AND meta_key = 'description'
SQL
).each { |um| users_description[um["user_id"]] = um["description"] }
users_last_activity = {}
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] }
SELECT user_id, meta_value last_activity
FROM #{BB_PRESS_PREFIX}usermeta
WHERE user_id IN (#{user_ids_sql})
AND meta_key = 'last_activity'
SQL
).each { |um| users_last_activity[um["user_id"]] = um["last_activity"] }
create_users(users, total: total_users, offset: offset) do |u|
{
@ -96,7 +92,7 @@ class ImportScripts::Bbpress < ImportScripts::Base
username: u["user_nicename"],
password: u["user_pass"],
email: u["user_email"].downcase,
name: u["display_name"].presence || u['user_nicename'],
name: u["display_name"].presence || u["user_nicename"],
created_at: u["user_registered"],
website: u["user_url"],
bio_raw: users_description[u["id"]],
@ -114,67 +110,60 @@ class ImportScripts::Bbpress < ImportScripts::Base
emails = Array.new
# gather anonymous users via postmeta table
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each do |pm|
SELECT post_id, meta_key, meta_value
FROM #{BB_PRESS_PREFIX}postmeta
WHERE meta_key LIKE '_bbp_anonymous%'
SQL
).each do |pm|
anon_posts[pm['post_id']] = Hash.new if not anon_posts[pm['post_id']]
anon_posts[pm["post_id"]] = Hash.new if not anon_posts[pm["post_id"]]
if pm['meta_key'] == '_bbp_anonymous_email'
anon_posts[pm['post_id']]['email'] = pm['meta_value']
if pm["meta_key"] == "_bbp_anonymous_email"
anon_posts[pm["post_id"]]["email"] = pm["meta_value"]
end
if pm['meta_key'] == '_bbp_anonymous_name'
anon_posts[pm['post_id']]['name'] = pm['meta_value']
if pm["meta_key"] == "_bbp_anonymous_name"
anon_posts[pm["post_id"]]["name"] = pm["meta_value"]
end
if pm['meta_key'] == '_bbp_anonymous_website'
anon_posts[pm['post_id']]['website'] = pm['meta_value']
if pm["meta_key"] == "_bbp_anonymous_website"
anon_posts[pm["post_id"]]["website"] = pm["meta_value"]
end
end
# gather every existent username
anon_posts.each do |id, post|
anon_names[post['name']] = Hash.new if not anon_names[post['name']]
anon_names[post["name"]] = Hash.new if not anon_names[post["name"]]
# overwriting email address, one user can only use one email address
anon_names[post['name']]['email'] = post['email']
anon_names[post['name']]['website'] = post['website'] if post['website'] != ''
anon_names[post["name"]]["email"] = post["email"]
anon_names[post["name"]]["website"] = post["website"] if post["website"] != ""
end
# make sure every user name has a unique email address
anon_names.each do |k, name|
if not emails.include? name['email']
emails.push ( name['email'])
if not emails.include? name["email"]
emails.push (name["email"])
else
name['email'] = "anonymous_#{SecureRandom.hex}@no-email.invalid"
name["email"] = "anonymous_#{SecureRandom.hex}@no-email.invalid"
end
end
create_users(anon_names) do |k, n|
{
id: k,
email: n["email"].downcase,
name: k,
website: n["website"]
}
{ id: k, email: n["email"].downcase, name: k, website: n["website"] }
end
end
def import_categories
puts "", "importing categories..."
categories = bbpress_query(<<-SQL
categories = bbpress_query(<<-SQL)
SELECT id, post_name, post_parent
FROM #{BB_PRESS_PREFIX}posts
WHERE post_type = 'forum'
AND LENGTH(COALESCE(post_name, '')) > 0
ORDER BY post_parent, id
SQL
)
create_categories(categories) do |c|
category = { id: c['id'], name: c['post_name'] }
if (parent_id = c['post_parent'].to_i) > 0
category = { id: c["id"], name: c["post_name"] }
if (parent_id = c["post_parent"].to_i) > 0
category[:parent_category_id] = category_id_from_imported_category_id(parent_id)
end
category
@ -185,16 +174,15 @@ class ImportScripts::Bbpress < ImportScripts::Base
puts "", "importing topics and posts..."
last_post_id = -1
total_posts = bbpress_query(<<-SQL
total_posts = bbpress_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}posts
WHERE post_status <> 'spam'
AND post_type IN ('topic', 'reply')
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
posts = bbpress_query(<<-SQL
posts = bbpress_query(<<-SQL).to_a
SELECT id,
post_author,
post_date,
@ -209,7 +197,6 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if posts.empty?
@ -221,31 +208,29 @@ class ImportScripts::Bbpress < ImportScripts::Base
post_ids_sql = post_ids.join(",")
posts_likes = {}
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i }
SELECT post_id, meta_value likes
FROM #{BB_PRESS_PREFIX}postmeta
WHERE post_id IN (#{post_ids_sql})
AND meta_key = 'Likes'
SQL
).each { |pm| posts_likes[pm["post_id"]] = pm["likes"].to_i }
anon_names = {}
bbpress_query(<<-SQL
bbpress_query(<<-SQL).each { |pm| anon_names[pm["post_id"]] = pm["meta_value"] }
SELECT post_id, meta_value
FROM #{BB_PRESS_PREFIX}postmeta
WHERE post_id IN (#{post_ids_sql})
AND meta_key = '_bbp_anonymous_name'
SQL
).each { |pm| anon_names[pm["post_id"]] = pm["meta_value"] }
create_posts(posts, total: total_posts, offset: offset) do |p|
skip = false
user_id = user_id_from_imported_user_id(p["post_author"]) ||
find_user_by_import_id(p["post_author"]).try(:id) ||
user_id_from_imported_user_id(anon_names[p['id']]) ||
find_user_by_import_id(anon_names[p['id']]).try(:id) ||
-1
user_id =
user_id_from_imported_user_id(p["post_author"]) ||
find_user_by_import_id(p["post_author"]).try(:id) ||
user_id_from_imported_user_id(anon_names[p["id"]]) ||
find_user_by_import_id(anon_names[p["id"]]).try(:id) || -1
post = {
id: p["id"],
@ -256,7 +241,9 @@ class ImportScripts::Bbpress < ImportScripts::Base
}
if post[:raw].present?
post[:raw].gsub!(/\<pre\>\<code(=[a-z]*)?\>(.*?)\<\/code\>\<\/pre\>/im) { "```\n#{@he.decode($2)}\n```" }
post[:raw].gsub!(%r{\<pre\>\<code(=[a-z]*)?\>(.*?)\</code\>\</pre\>}im) do
"```\n#{@he.decode($2)}\n```"
end
end
if p["post_type"] == "topic"
@ -288,17 +275,16 @@ class ImportScripts::Bbpress < ImportScripts::Base
count = 0
last_attachment_id = -1
total_attachments = bbpress_query(<<-SQL
total_attachments = bbpress_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}postmeta pm
JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id
WHERE pm.meta_key = '_wp_attached_file'
AND p.post_parent > 0
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
attachments = bbpress_query(<<-SQL
attachments = bbpress_query(<<-SQL).to_a
SELECT pm.meta_id id, pm.meta_value, p.post_parent post_id
FROM #{BB_PRESS_PREFIX}postmeta pm
JOIN #{BB_PRESS_PREFIX}posts p ON p.id = pm.post_id
@ -308,7 +294,6 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY pm.meta_id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if attachments.empty?
last_attachment_id = attachments[-1]["id"].to_i
@ -325,7 +310,9 @@ class ImportScripts::Bbpress < ImportScripts::Base
if !post.raw[html]
post.raw << "\n\n" << html
post.save!
PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists?
unless PostUpload.where(post: post, upload: upload).exists?
PostUpload.create!(post: post, upload: upload)
end
end
end
end
@ -340,15 +327,14 @@ class ImportScripts::Bbpress < ImportScripts::Base
count = 0
last_attachment_id = -1
total_attachments = bbpress_query(<<-SQL
total_attachments = bbpress_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM #{BB_PRESS_PREFIX}bb_attachments
WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply'))
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
attachments = bbpress_query(<<-SQL
attachments = bbpress_query(<<-SQL).to_a
SELECT id, filename, post_id
FROM #{BB_PRESS_PREFIX}bb_attachments
WHERE post_id IN (SELECT id FROM #{BB_PRESS_PREFIX}posts WHERE post_status <> 'spam' AND post_type IN ('topic', 'reply'))
@ -356,13 +342,16 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if attachments.empty?
last_attachment_id = attachments[-1]["id"].to_i
attachments.each do |a|
print_status(count += 1, total_attachments, get_start_time("attachments_from_bb_attachments"))
print_status(
count += 1,
total_attachments,
get_start_time("attachments_from_bb_attachments"),
)
if path = find_attachment(a["filename"], a["id"])
if post = Post.find_by(id: post_id_from_imported_post_id(a["post_id"]))
upload = create_upload(post.user.id, path, a["filename"])
@ -371,7 +360,9 @@ class ImportScripts::Bbpress < ImportScripts::Base
if !post.raw[html]
post.raw << "\n\n" << html
post.save!
PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists?
unless PostUpload.where(post: post, upload: upload).exists?
PostUpload.create!(post: post, upload: upload)
end
end
end
end
@ -391,7 +382,7 @@ class ImportScripts::Bbpress < ImportScripts::Base
last_topic_id = -1
batches(BATCH_SIZE) do |offset|
topics = bbpress_query(<<-SQL
topics = bbpress_query(<<-SQL).to_a
SELECT id,
guid
FROM #{BB_PRESS_PREFIX}posts
@ -401,14 +392,17 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if topics.empty?
last_topic_id = topics[-1]["id"].to_i
topics.each do |t|
topic = topic_lookup_from_imported_post_id(t['id'])
Permalink.create(url: URI.parse(t['guid']).path.chomp('/'), topic_id: topic[:topic_id]) rescue nil
topic = topic_lookup_from_imported_post_id(t["id"])
begin
Permalink.create(url: URI.parse(t["guid"]).path.chomp("/"), topic_id: topic[:topic_id])
rescue StandardError
nil
end
end
end
end
@ -417,42 +411,44 @@ class ImportScripts::Bbpress < ImportScripts::Base
puts "", "importing private messages..."
last_post_id = -1
total_posts = bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_messages").first["count"]
total_posts =
bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_messages").first[
"count"
]
threads = {}
total_count = bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_recipients").first["count"]
total_count =
bbpress_query("SELECT COUNT(*) count FROM #{BB_PRESS_PREFIX}bp_messages_recipients").first[
"count"
]
current_count = 0
batches(BATCH_SIZE) do |offset|
rows = bbpress_query(<<-SQL
rows = bbpress_query(<<-SQL).to_a
SELECT thread_id, user_id
FROM #{BB_PRESS_PREFIX}bp_messages_recipients
ORDER BY id
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if rows.empty?
rows.each do |row|
current_count += 1
print_status(current_count, total_count, get_start_time('private_messages'))
print_status(current_count, total_count, get_start_time("private_messages"))
threads[row['thread_id']] ||= {
target_user_ids: [],
imported_topic_id: nil
}
user_id = user_id_from_imported_user_id(row['user_id'])
if user_id && !threads[row['thread_id']][:target_user_ids].include?(user_id)
threads[row['thread_id']][:target_user_ids] << user_id
threads[row["thread_id"]] ||= { target_user_ids: [], imported_topic_id: nil }
user_id = user_id_from_imported_user_id(row["user_id"])
if user_id && !threads[row["thread_id"]][:target_user_ids].include?(user_id)
threads[row["thread_id"]][:target_user_ids] << user_id
end
end
end
batches(BATCH_SIZE) do |offset|
posts = bbpress_query(<<-SQL
posts = bbpress_query(<<-SQL).to_a
SELECT id,
thread_id,
date_sent,
@ -464,39 +460,48 @@ class ImportScripts::Bbpress < ImportScripts::Base
ORDER BY thread_id, date_sent
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if posts.empty?
last_post_id = posts[-1]["id"].to_i
create_posts(posts, total: total_posts, offset: offset) do |post|
if tcf = TopicCustomField.where(name: 'bb_thread_id', value: post['thread_id']).first
if tcf = TopicCustomField.where(name: "bb_thread_id", value: post["thread_id"]).first
{
id: "pm#{post['id']}",
topic_id: threads[post['thread_id']][:imported_topic_id],
user_id: user_id_from_imported_user_id(post['sender_id']) || find_user_by_import_id(post['sender_id'])&.id || -1,
raw: post['message'],
created_at: post['date_sent'],
id: "pm#{post["id"]}",
topic_id: threads[post["thread_id"]][:imported_topic_id],
user_id:
user_id_from_imported_user_id(post["sender_id"]) ||
find_user_by_import_id(post["sender_id"])&.id || -1,
raw: post["message"],
created_at: post["date_sent"],
}
else
# First post of the thread
{
id: "pm#{post['id']}",
id: "pm#{post["id"]}",
archetype: Archetype.private_message,
user_id: user_id_from_imported_user_id(post['sender_id']) || find_user_by_import_id(post['sender_id'])&.id || -1,
title: post['subject'],
raw: post['message'],
created_at: post['date_sent'],
target_usernames: User.where(id: threads[post['thread_id']][:target_user_ids]).pluck(:username),
post_create_action: proc do |new_post|
if topic = new_post.topic
threads[post['thread_id']][:imported_topic_id] = topic.id
TopicCustomField.create(topic_id: topic.id, name: 'bb_thread_id', value: post['thread_id'])
else
puts "Error in post_create_action! Can't find topic!"
end
end
user_id:
user_id_from_imported_user_id(post["sender_id"]) ||
find_user_by_import_id(post["sender_id"])&.id || -1,
title: post["subject"],
raw: post["message"],
created_at: post["date_sent"],
target_usernames:
User.where(id: threads[post["thread_id"]][:target_user_ids]).pluck(:username),
post_create_action:
proc do |new_post|
if topic = new_post.topic
threads[post["thread_id"]][:imported_topic_id] = topic.id
TopicCustomField.create(
topic_id: topic.id,
name: "bb_thread_id",
value: post["thread_id"],
)
else
puts "Error in post_create_action! Can't find topic!"
end
end,
}
end
end
@ -506,7 +511,6 @@ class ImportScripts::Bbpress < ImportScripts::Base
def bbpress_query(sql)
@client.query(sql, cache_rows: false)
end
end
ImportScripts::Bbpress.new.perform

View File

@ -2,13 +2,12 @@
# bespoke importer for a customer, feel free to borrow ideas
require 'csv'
require "csv"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/bespoke_1.rb
class ImportScripts::Bespoke < ImportScripts::Base
BATCH_SIZE = 1000
def initialize(path)
@ -18,9 +17,9 @@ class ImportScripts::Bespoke < ImportScripts::Base
puts "loading post mappings..."
@post_number_map = {}
Post.pluck(:id, :post_number).each do |post_id, post_number|
@post_number_map[post_id] = post_number
end
Post
.pluck(:id, :post_number)
.each { |post_id, post_number| @post_number_map[post_id] = post_number }
end
def created_post(post)
@ -32,7 +31,6 @@ class ImportScripts::Bespoke < ImportScripts::Base
import_users
import_categories
import_posts
end
class RowResolver
@ -45,19 +43,13 @@ class ImportScripts::Bespoke < ImportScripts::Base
end
def initialize(cols)
cols.each_with_index do |col, idx|
self.class.public_send(:define_method, col) do
@row[idx]
end
end
cols.each_with_index { |col, idx| self.class.public_send(:define_method, col) { @row[idx] } }
end
end
def load_user_batch!(users, offset, total)
if users.length > 0
create_users(users, offset: offset, total: total) do |user|
user
end
create_users(users, offset: offset, total: total) { |user| user }
users.clear
end
end
@ -70,54 +62,56 @@ class ImportScripts::Bespoke < ImportScripts::Base
current_row = +""
double_quote_count = 0
File.open(filename).each_line do |line|
File
.open(filename)
.each_line do |line|
# escaping is mental here
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
line.strip!
# escaping is mental here
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
line.strip!
current_row << "\n" unless current_row.empty?
current_row << line
current_row << "\n" unless current_row.empty?
current_row << line
double_quote_count += line.scan('"').count
double_quote_count += line.scan('"').count
next if double_quote_count % 2 == 1
if double_quote_count % 2 == 1
next
end
raw =
begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
raw = begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
current_row = ""
double_quote_count = 0
next
end[
0
]
current_row = ""
double_quote_count = 0
next
end[0]
if first
row = RowResolver.create(raw)
if first
row = RowResolver.create(raw)
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
end
end
def total_rows(table)
@ -133,14 +127,11 @@ class ImportScripts::Bespoke < ImportScripts::Base
total = total_rows("users")
csv_parse("users") do |row|
id = row.id
email = row.email
# fake it
if row.email.blank? || row.email !~ /@/
email = fake_email
end
email = fake_email if row.email.blank? || row.email !~ /@/
name = row.display_name
username = row.key_custom
@ -150,19 +141,10 @@ class ImportScripts::Bespoke < ImportScripts::Base
username = email.split("@")[0] if username.blank?
name = email.split("@")[0] if name.blank?
users << {
id: id,
email: email,
name: name,
username: username,
created_at: created_at
}
users << { id: id, email: email, name: name, username: username, created_at: created_at }
count += 1
if count % BATCH_SIZE == 0
load_user_batch! users, count - users.length, total
end
load_user_batch! users, count - users.length, total if count % BATCH_SIZE == 0
end
load_user_batch! users, count, total
@ -174,22 +156,19 @@ class ImportScripts::Bespoke < ImportScripts::Base
rows << { id: row.id, name: row.name, description: row.description }
end
create_categories(rows) do |row|
row
end
create_categories(rows) { |row| row }
end
def normalize_raw!(raw)
# purple and #1223f3
raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")
raw.gsub!(/\[\/color\]/i, "")
raw.gsub!(/\[signature\].+\[\/signature\]/im, "")
raw.gsub!(%r{\[/color\]}i, "")
raw.gsub!(%r{\[signature\].+\[/signature\]}im, "")
raw
end
def import_post_batch!(posts, topics, offset, total)
create_posts(posts, total: total, offset: offset) do |post|
mapped = {}
mapped[:id] = post[:id]
@ -223,7 +202,7 @@ class ImportScripts::Bespoke < ImportScripts::Base
mapped
end
posts.clear
posts.clear
end
def import_posts
@ -237,7 +216,7 @@ class ImportScripts::Bespoke < ImportScripts::Base
category_id: topic.forum_category_id,
deleted: topic.is_deleted.to_i == 1,
locked: topic.is_locked.to_i == 1,
pinned: topic.is_pinned.to_i == 1
pinned: topic.is_pinned.to_i == 1,
}
end
@ -246,7 +225,6 @@ class ImportScripts::Bespoke < ImportScripts::Base
posts = []
count = 0
csv_parse("posts") do |row|
unless row.dcreate
puts "NO CREATION DATE FOR POST"
p row
@ -261,7 +239,7 @@ class ImportScripts::Bespoke < ImportScripts::Base
title: row.title,
body: normalize_raw!(row.body),
deleted: row.is_deleted.to_i == 1,
created_at: DateTime.parse(row.dcreate)
created_at: DateTime.parse(row.dcreate),
}
posts << row
count += 1
@ -275,7 +253,6 @@ class ImportScripts::Bespoke < ImportScripts::Base
exit
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])

View File

@ -7,18 +7,18 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Make sure to follow the right format in your CSV files.
class ImportScripts::CsvImporter < ImportScripts::Base
CSV_FILE_PATH = ENV['CSV_USER_FILE'] || '/var/www/discourse/tmp/users.csv'
CSV_CUSTOM_FIELDS = ENV['CSV_CUSTOM_FIELDS'] || '/var/www/discourse/tmp/custom_fields.csv'
CSV_EMAILS = ENV['CSV_EMAILS'] || '/var/www/discourse/tmp/emails.csv'
CSV_CATEGORIES = ENV['CSV_CATEGORIES'] || '/var/www/discourse/tmp/categories.csv'
CSV_TOPICS = ENV['CSV_TOPICS'] || '/var/www/discourse/tmp/topics_new_users.csv'
CSV_TOPICS_EXISTING_USERS = ENV['CSV_TOPICS'] || '/var/www/discourse/tmp/topics_existing_users.csv'
IMPORT_PREFIX = ENV['IMPORT_PREFIX'] || '2022-08-11'
IMPORT_USER_ID_PREFIX = 'csv-user-import-' + IMPORT_PREFIX + '-'
IMPORT_CATEGORY_ID_PREFIX = 'csv-category-import-' + IMPORT_PREFIX + '-'
IMPORT_TOPIC_ID_PREFIX = 'csv-topic-import-' + IMPORT_PREFIX + '-'
IMPORT_TOPIC_ID_EXISITNG_PREFIX = 'csv-topic_existing-import-' + IMPORT_PREFIX + '-'
CSV_FILE_PATH = ENV["CSV_USER_FILE"] || "/var/www/discourse/tmp/users.csv"
CSV_CUSTOM_FIELDS = ENV["CSV_CUSTOM_FIELDS"] || "/var/www/discourse/tmp/custom_fields.csv"
CSV_EMAILS = ENV["CSV_EMAILS"] || "/var/www/discourse/tmp/emails.csv"
CSV_CATEGORIES = ENV["CSV_CATEGORIES"] || "/var/www/discourse/tmp/categories.csv"
CSV_TOPICS = ENV["CSV_TOPICS"] || "/var/www/discourse/tmp/topics_new_users.csv"
CSV_TOPICS_EXISTING_USERS =
ENV["CSV_TOPICS"] || "/var/www/discourse/tmp/topics_existing_users.csv"
IMPORT_PREFIX = ENV["IMPORT_PREFIX"] || "2022-08-11"
IMPORT_USER_ID_PREFIX = "csv-user-import-" + IMPORT_PREFIX + "-"
IMPORT_CATEGORY_ID_PREFIX = "csv-category-import-" + IMPORT_PREFIX + "-"
IMPORT_TOPIC_ID_PREFIX = "csv-topic-import-" + IMPORT_PREFIX + "-"
IMPORT_TOPIC_ID_EXISITNG_PREFIX = "csv-topic_existing-import-" + IMPORT_PREFIX + "-"
def initialize
super
@ -49,25 +49,19 @@ class ImportScripts::CsvImporter < ImportScripts::Base
return nil
end
CSV.parse(File.read(path, encoding: 'bom|utf-8'), headers: true)
CSV.parse(File.read(path, encoding: "bom|utf-8"), headers: true)
end
def username_for(name)
result = name.downcase.gsub(/[^a-z0-9\-\_]/, '')
if result.blank?
result = Digest::SHA1.hexdigest(name)[0...10]
end
result = name.downcase.gsub(/[^a-z0-9\-\_]/, "")
result = Digest::SHA1.hexdigest(name)[0...10] if result.blank?
result
end
def get_email(id)
email = nil
@imported_emails.each do |e|
if e["user_id"] == id
email = e["email"]
end
end
@imported_emails.each { |e| email = e["email"] if e["user_id"] == id }
email
end
@ -76,9 +70,7 @@ class ImportScripts::CsvImporter < ImportScripts::Base
custom_fields = {}
@imported_custom_fields.each do |cf|
if cf["user_id"] == id
@imported_custom_fields_names.each do |name|
custom_fields[name] = cf[name]
end
@imported_custom_fields_names.each { |name| custom_fields[name] = cf[name] }
end
end
@ -86,98 +78,95 @@ class ImportScripts::CsvImporter < ImportScripts::Base
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
users = []
@imported_users.each do |u|
email = get_email(u['id'])
custom_fields = get_custom_fields(u['id'])
u['email'] = email
u['custom_fields'] = custom_fields
u['id'] = IMPORT_USER_ID_PREFIX + u['id']
email = get_email(u["id"])
custom_fields = get_custom_fields(u["id"])
u["email"] = email
u["custom_fields"] = custom_fields
u["id"] = IMPORT_USER_ID_PREFIX + u["id"]
users << u
end
users.uniq!
create_users(users) do |u|
{
id: u['id'],
username: u['username'],
email: u['email'],
created_at: u['created_at'],
custom_fields: u['custom_fields'],
id: u["id"],
username: u["username"],
email: u["email"],
created_at: u["created_at"],
custom_fields: u["custom_fields"],
}
end
end
def import_categories
puts '', "Importing categories"
puts "", "Importing categories"
categories = []
@imported_categories.each do |c|
c['user_id'] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + c['user_id']) || Discourse::SYSTEM_USER_ID
c['id'] = IMPORT_CATEGORY_ID_PREFIX + c['id']
c["user_id"] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + c["user_id"]) ||
Discourse::SYSTEM_USER_ID
c["id"] = IMPORT_CATEGORY_ID_PREFIX + c["id"]
categories << c
end
categories.uniq!
create_categories(categories) do |c|
{
id: c['id'],
user_id: c['user_id'],
name: c['name'],
description: c['description']
}
{ id: c["id"], user_id: c["user_id"], name: c["name"], description: c["description"] }
end
end
def import_topics
puts '', "Importing topics"
puts "", "Importing topics"
topics = []
@imported_topics.each do |t|
t['user_id'] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + t['user_id']) || Discourse::SYSTEM_USER_ID
t['category_id'] = category_id_from_imported_category_id(IMPORT_CATEGORY_ID_PREFIX + t['category_id'])
t['id'] = IMPORT_TOPIC_ID_PREFIX + t['id']
t["user_id"] = user_id_from_imported_user_id(IMPORT_USER_ID_PREFIX + t["user_id"]) ||
Discourse::SYSTEM_USER_ID
t["category_id"] = category_id_from_imported_category_id(
IMPORT_CATEGORY_ID_PREFIX + t["category_id"],
)
t["id"] = IMPORT_TOPIC_ID_PREFIX + t["id"]
topics << t
end
create_posts(topics) do |t|
{
id: t['id'],
user_id: t['user_id'],
title: t['title'],
category: t['category_id'],
raw: t['raw']
id: t["id"],
user_id: t["user_id"],
title: t["title"],
category: t["category_id"],
raw: t["raw"],
}
end
end
def import_topics_existing_users
# Import topics for users that already existed in the DB, not imported during this migration
puts '', "Importing topics for existing users"
puts "", "Importing topics for existing users"
topics = []
@imported_topics_existing_users.each do |t|
t['id'] = IMPORT_TOPIC_ID_EXISITNG_PREFIX + t['id']
t["id"] = IMPORT_TOPIC_ID_EXISITNG_PREFIX + t["id"]
topics << t
end
create_posts(topics) do |t|
{
id: t['id'],
user_id: t['user_id'], # This is a Discourse user ID
title: t['title'],
category: t['category_id'], # This is a Discourse category ID
raw: t['raw']
id: t["id"],
user_id: t["user_id"], # This is a Discourse user ID
title: t["title"],
category: t["category_id"], # This is a Discourse category ID
raw: t["raw"],
}
end
end
end
if __FILE__ == $0
ImportScripts::CsvImporter.new.perform
end
ImportScripts::CsvImporter.new.perform if __FILE__ == $0
# == CSV files format
#

View File

@ -6,10 +6,9 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::CsvRestoreStagedUsers < ImportScripts::Base
CSV_FILE_PATH = ENV['CSV_USER_FILE']
CSV_CUSTOM_FIELDS = ENV['CSV_CUSTOM_FIELDS']
CSV_EMAILS = ENV['CSV_EMAILS']
CSV_FILE_PATH = ENV["CSV_USER_FILE"]
CSV_CUSTOM_FIELDS = ENV["CSV_CUSTOM_FIELDS"]
CSV_EMAILS = ENV["CSV_EMAILS"]
BATCH_SIZE ||= 1000
@ -35,62 +34,51 @@ class ImportScripts::CsvRestoreStagedUsers < ImportScripts::Base
end
def username_for(name)
result = name.downcase.gsub(/[^a-z0-9\-\_]/, '')
result = name.downcase.gsub(/[^a-z0-9\-\_]/, "")
if result.blank?
result = Digest::SHA1.hexdigest(name)[0...10]
end
result = Digest::SHA1.hexdigest(name)[0...10] if result.blank?
result
end
def get_email(id)
email = nil
@imported_emails.each do |e|
if e["user_id"] == id
email = e["email"]
end
end
@imported_emails.each { |e| email = e["email"] if e["user_id"] == id }
email
end
def get_custom_fields(id)
custom_fields = {}
@imported_custom_fields.each do |cf|
if cf["user_id"] == id
custom_fields[cf["name"]] = cf["value"]
end
custom_fields[cf["name"]] = cf["value"] if cf["user_id"] == id
end
custom_fields
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
users = []
@imported_users.each do |u|
email = get_email(u['id'])
custom_fields = get_custom_fields(u['id'])
u['email'] = email
u['custom_fields'] = custom_fields
email = get_email(u["id"])
custom_fields = get_custom_fields(u["id"])
u["email"] = email
u["custom_fields"] = custom_fields
users << u
end
users.uniq!
create_users(users) do |u|
{
id: u['id'],
username: u['username'],
email: u['email'],
created_at: u['created_at'],
staged: u['staged'],
custom_fields: u['custom_fields'],
id: u["id"],
username: u["username"],
email: u["email"],
created_at: u["created_at"],
staged: u["staged"],
custom_fields: u["custom_fields"],
}
end
end
end
if __FILE__ == $0
ImportScripts::CsvRestoreStagedUsers.new.perform
end
ImportScripts::CsvRestoreStagedUsers.new.perform if __FILE__ == $0

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,7 @@
# frozen_string_literal: true
require 'nokogiri'
require 'optparse'
require "nokogiri"
require "optparse"
require File.expand_path(File.dirname(__FILE__) + "/base")
class ImportScripts::Disqus < ImportScripts::Base
@ -35,7 +35,7 @@ class ImportScripts::Disqus < ImportScripts::Base
by_email = {}
@parser.posts.each do |id, p|
next if p[:is_spam] == 'true' || p[:is_deleted] == 'true'
next if p[:is_spam] == "true" || p[:is_deleted] == "true"
by_email[p[:author_email]] = { name: p[:author_name], username: p[:author_username] }
end
@ -45,13 +45,7 @@ class ImportScripts::Disqus < ImportScripts::Base
create_users(by_email.keys) do |email|
user = by_email[email]
{
id: email,
email: email,
username: user[:username],
name: user[:name],
merge: true
}
{ id: email, email: email, username: user[:username], name: user[:name], merge: true }
end
end
@ -59,7 +53,6 @@ class ImportScripts::Disqus < ImportScripts::Base
puts "", "importing topics..."
@parser.threads.each do |id, t|
title = t[:title]
title.gsub!(/&#8220;/, '"')
title.gsub!(/&#8221;/, '"')
@ -79,7 +72,7 @@ class ImportScripts::Disqus < ImportScripts::Base
if post.present? && post.topic.posts_count <= 1
(t[:posts] || []).each do |p|
post_user = find_existing_user(p[:author_email] || '', p[:author_username])
post_user = find_existing_user(p[:author_email] || "", p[:author_username])
next unless post_user.present?
attrs = {
@ -87,7 +80,7 @@ class ImportScripts::Disqus < ImportScripts::Base
topic_id: post.topic_id,
raw: p[:cooked],
cooked: p[:cooked],
created_at: Date.parse(p[:created_at])
created_at: Date.parse(p[:created_at]),
}
if p[:parent_id]
@ -125,23 +118,22 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
end
def start_element(name, attrs = [])
hashed = Hash[attrs]
case name
when 'post'
when "post"
@post = {}
@post[:id] = hashed['dsq:id'] if @post
when 'thread'
id = hashed['dsq:id']
@post[:id] = hashed["dsq:id"] if @post
when "thread"
id = hashed["dsq:id"]
if @post
thread = @threads[id]
thread[:posts] << @post
else
@thread = { id: id, posts: [] }
end
when 'parent'
when "parent"
if @post
id = hashed['dsq:id']
id = hashed["dsq:id"]
@post[:parent_id] = id
end
end
@ -151,10 +143,10 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
def end_element(name)
case name
when 'post'
when "post"
@posts[@post[:id]] = @post
@post = nil
when 'thread'
when "thread"
if @post.nil?
@threads[@thread[:id]] = @thread
@thread = nil
@ -165,25 +157,25 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
end
def characters(str)
record(@post, :author_email, str, 'author', 'email')
record(@post, :author_name, str, 'author', 'name')
record(@post, :author_username, str, 'author', 'username')
record(@post, :author_anonymous, str, 'author', 'isAnonymous')
record(@post, :created_at, str, 'createdAt')
record(@post, :is_deleted, str, 'isDeleted')
record(@post, :is_spam, str, 'isSpam')
record(@post, :author_email, str, "author", "email")
record(@post, :author_name, str, "author", "name")
record(@post, :author_username, str, "author", "username")
record(@post, :author_anonymous, str, "author", "isAnonymous")
record(@post, :created_at, str, "createdAt")
record(@post, :is_deleted, str, "isDeleted")
record(@post, :is_spam, str, "isSpam")
record(@thread, :link, str, 'link')
record(@thread, :title, str, 'title')
record(@thread, :created_at, str, 'createdAt')
record(@thread, :author_email, str, 'author', 'email')
record(@thread, :author_name, str, 'author', 'name')
record(@thread, :author_username, str, 'author', 'username')
record(@thread, :author_anonymous, str, 'author', 'isAnonymous')
record(@thread, :link, str, "link")
record(@thread, :title, str, "title")
record(@thread, :created_at, str, "createdAt")
record(@thread, :author_email, str, "author", "email")
record(@thread, :author_name, str, "author", "name")
record(@thread, :author_username, str, "author", "username")
record(@thread, :author_anonymous, str, "author", "isAnonymous")
end
def cdata_block(str)
record(@post, :cooked, str, 'message')
record(@post, :cooked, str, "message")
end
def record(target, sym, str, *params)
@ -205,7 +197,7 @@ class DisqusSAX < Nokogiri::XML::SAX::Document
# Remove any threads that have no posts
@threads.delete(id)
else
t[:posts].delete_if { |p| p[:is_spam] == 'true' || p[:is_deleted] == 'true' }
t[:posts].delete_if { |p| p[:is_spam] == "true" || p[:is_deleted] == "true" }
end
end

View File

@ -4,19 +4,19 @@ require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Drupal < ImportScripts::Base
DRUPAL_DB = ENV['DRUPAL_DB'] || "newsite3"
VID = ENV['DRUPAL_VID'] || 1
DRUPAL_DB = ENV["DRUPAL_DB"] || "newsite3"
VID = ENV["DRUPAL_VID"] || 1
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB,
)
end
def categories_query
@ -25,7 +25,12 @@ class ImportScripts::Drupal < ImportScripts::Base
def execute
create_users(@client.query("SELECT uid id, name, mail email, created FROM users;")) do |row|
{ id: row['id'], username: row['name'], email: row['email'], created_at: Time.zone.at(row['created']) }
{
id: row["id"],
username: row["name"],
email: row["email"],
created_at: Time.zone.at(row["created"]),
}
end
# You'll need to edit the following query for your Drupal install:
@ -34,38 +39,36 @@ class ImportScripts::Drupal < ImportScripts::Base
# * Table name may be term_data.
# * May need to select a vid other than 1.
create_categories(categories_query) do |c|
{ id: c['tid'], name: c['name'], description: c['description'] }
{ id: c["tid"], name: c["name"], description: c["description"] }
end
# "Nodes" in Drupal are divided into types. Here we import two types,
# and will later import all the comments/replies for each node.
# You will need to figure out what the type names are on your install and edit the queries to match.
if ENV['DRUPAL_IMPORT_BLOG']
create_blog_topics
end
create_blog_topics if ENV["DRUPAL_IMPORT_BLOG"]
create_forum_topics
create_replies
begin
create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil'))
create_admin(email: "neil.lalonde@discourse.org", username: UserNameSuggester.suggest("neil"))
rescue => e
puts '', "Failed to create admin user"
puts "", "Failed to create admin user"
puts e.message
end
end
def create_blog_topics
puts '', "creating blog topics"
puts "", "creating blog topics"
create_category({
name: 'Blog',
user_id: -1,
description: "Articles from the blog"
}, nil) unless Category.find_by_name('Blog')
unless Category.find_by_name("Blog")
create_category({ name: "Blog", user_id: -1, description: "Articles from the blog" }, nil)
end
results = @client.query("
results =
@client.query(
"
SELECT n.nid nid,
n.title title,
n.uid uid,
@ -76,37 +79,48 @@ class ImportScripts::Drupal < ImportScripts::Base
LEFT JOIN node_revisions nr ON nr.vid=n.vid
WHERE n.type = 'blog'
AND n.status = 1
", cache_rows: false)
",
cache_rows: false,
)
create_posts(results) do |row|
{
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: 'Blog',
raw: row['body'],
created_at: Time.zone.at(row['created']),
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
title: row['title'].try(:strip),
custom_fields: { import_id: "nid:#{row['nid']}" }
id: "nid:#{row["nid"]}",
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
category: "Blog",
raw: row["body"],
created_at: Time.zone.at(row["created"]),
pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil,
title: row["title"].try(:strip),
custom_fields: {
import_id: "nid:#{row["nid"]}",
},
}
end
end
def create_forum_topics
puts '', "creating forum topics"
puts "", "creating forum topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(*) count
FROM node n
LEFT JOIN forum f ON f.vid=n.vid
WHERE n.type = 'forum'
AND n.status = 1
").first['count']
",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT n.nid nid,
n.title title,
f.tid tid,
@ -121,48 +135,57 @@ class ImportScripts::Drupal < ImportScripts::Base
AND n.status = 1
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
{
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: category_id_from_imported_category_id(row['tid']),
raw: row['body'],
created_at: Time.zone.at(row['created']),
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
title: row['title'].try(:strip)
id: "nid:#{row["nid"]}",
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
category: category_id_from_imported_category_id(row["tid"]),
raw: row["body"],
created_at: Time.zone.at(row["created"]),
pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil,
title: row["title"].try(:strip),
}
end
end
end
def create_replies
puts '', "creating replies in topics"
puts "", "creating replies in topics"
if ENV['DRUPAL_IMPORT_BLOG']
if ENV["DRUPAL_IMPORT_BLOG"]
node_types = "('forum','blog')"
else
node_types = "('forum')"
end
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(*) count
FROM comments c
LEFT JOIN node n ON n.nid=c.nid
WHERE n.type IN #{node_types}
AND n.status = 1
AND c.status=0;
").first['count']
",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT c.cid,
c.pid,
c.nid,
@ -176,37 +199,36 @@ class ImportScripts::Drupal < ImportScripts::Base
AND c.status=0
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['timestamp']),
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: row["body"],
created_at: Time.zone.at(row["timestamp"]),
}
if row['pid']
parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}")
if row["pid"]
parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}")
h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1)
end
h
else
puts "No topic found for comment #{row['cid']}"
puts "No topic found for comment #{row["cid"]}"
nil
end
end
end
end
end
if __FILE__ == $0
ImportScripts::Drupal.new.perform
end
ImportScripts::Drupal.new.perform if __FILE__ == $0

View File

@ -5,9 +5,8 @@ require "htmlentities"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Drupal < ImportScripts::Base
DRUPAL_DB = ENV['DRUPAL_DB'] || "drupal"
VID = ENV['DRUPAL_VID'] || 1
DRUPAL_DB = ENV["DRUPAL_DB"] || "drupal"
VID = ENV["DRUPAL_VID"] || 1
BATCH_SIZE = 1000
ATTACHMENT_DIR = "/root/files/upload"
@ -16,25 +15,23 @@ class ImportScripts::Drupal < ImportScripts::Base
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: DRUPAL_DB,
)
end
def execute
import_users
import_categories
# "Nodes" in Drupal are divided into types. Here we import two types,
# and will later import all the comments/replies for each node.
# You will need to figure out what the type names are on your install and edit the queries to match.
if ENV['DRUPAL_IMPORT_BLOG']
import_blog_topics
end
import_blog_topics if ENV["DRUPAL_IMPORT_BLOG"]
import_forum_topics
@ -56,7 +53,7 @@ class ImportScripts::Drupal < ImportScripts::Base
last_user_id = -1
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL
users = mysql_query(<<-SQL).to_a
SELECT uid,
name username,
mail email,
@ -66,7 +63,6 @@ class ImportScripts::Drupal < ImportScripts::Base
ORDER BY uid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@ -80,12 +76,7 @@ class ImportScripts::Drupal < ImportScripts::Base
username = @htmlentities.decode(user["username"]).strip
{
id: user["uid"],
name: username,
email: email,
created_at: Time.zone.at(user["created"])
}
{ id: user["uid"], name: username, email: email, created_at: Time.zone.at(user["created"]) }
end
end
end
@ -99,35 +90,31 @@ class ImportScripts::Drupal < ImportScripts::Base
puts "", "importing categories"
categories = mysql_query(<<-SQL
categories = mysql_query(<<-SQL).to_a
SELECT tid,
name,
description
FROM taxonomy_term_data
WHERE vid = #{VID}
SQL
).to_a
create_categories(categories) do |category|
{
id: category['tid'],
name: @htmlentities.decode(category['name']).strip,
description: @htmlentities.decode(category['description']).strip
id: category["tid"],
name: @htmlentities.decode(category["name"]).strip,
description: @htmlentities.decode(category["description"]).strip,
}
end
end
def import_blog_topics
puts '', "importing blog topics"
puts "", "importing blog topics"
create_category(
{
name: 'Blog',
description: "Articles from the blog"
},
nil) unless Category.find_by_name('Blog')
unless Category.find_by_name("Blog")
create_category({ name: "Blog", description: "Articles from the blog" }, nil)
end
blogs = mysql_query(<<-SQL
blogs = mysql_query(<<-SQL).to_a
SELECT n.nid nid, n.title title, n.uid uid, n.created created, n.sticky sticky,
f.body_value body
FROM node n,
@ -136,38 +123,38 @@ class ImportScripts::Drupal < ImportScripts::Base
AND n.nid = f.entity_id
AND n.status = 1
SQL
).to_a
category_id = Category.find_by_name('Blog').id
category_id = Category.find_by_name("Blog").id
create_posts(blogs) do |topic|
{
id: "nid:#{topic['nid']}",
user_id: user_id_from_imported_user_id(topic['uid']) || -1,
id: "nid:#{topic["nid"]}",
user_id: user_id_from_imported_user_id(topic["uid"]) || -1,
category: category_id,
raw: topic['body'],
created_at: Time.zone.at(topic['created']),
pinned_at: topic['sticky'].to_i == 1 ? Time.zone.at(topic['created']) : nil,
title: topic['title'].try(:strip),
custom_fields: { import_id: "nid:#{topic['nid']}" }
raw: topic["body"],
created_at: Time.zone.at(topic["created"]),
pinned_at: topic["sticky"].to_i == 1 ? Time.zone.at(topic["created"]) : nil,
title: topic["title"].try(:strip),
custom_fields: {
import_id: "nid:#{topic["nid"]}",
},
}
end
end
def import_forum_topics
puts '', "importing forum topics"
puts "", "importing forum topics"
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM forum_index fi, node n
WHERE n.type = 'forum'
AND fi.nid = n.nid
AND n.status = 1
SQL
).first['count']
batches(BATCH_SIZE) do |offset|
results = mysql_query(<<-SQL
results = mysql_query(<<-SQL).to_a
SELECT fi.nid nid,
fi.title title,
fi.tid tid,
@ -188,34 +175,33 @@ class ImportScripts::Drupal < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
SQL
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
raw = preprocess_raw(row['body'])
raw = preprocess_raw(row["body"])
topic = {
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: category_id_from_imported_category_id(row['tid']),
id: "nid:#{row["nid"]}",
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
category: category_id_from_imported_category_id(row["tid"]),
raw: raw,
created_at: Time.zone.at(row['created']),
pinned_at: row['sticky'].to_i == 1 ? Time.zone.at(row['created']) : nil,
title: row['title'].try(:strip),
views: row['views']
created_at: Time.zone.at(row["created"]),
pinned_at: row["sticky"].to_i == 1 ? Time.zone.at(row["created"]) : nil,
title: row["title"].try(:strip),
views: row["views"],
}
topic[:custom_fields] = { import_solved: true } if row['solved'].present?
topic[:custom_fields] = { import_solved: true } if row["solved"].present?
topic
end
end
end
def import_replies
puts '', "creating replies in topics"
puts "", "creating replies in topics"
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM comment c,
node n
@ -224,10 +210,9 @@ class ImportScripts::Drupal < ImportScripts::Base
AND n.type IN ('article', 'forum')
AND n.status = 1
SQL
).first['count']
batches(BATCH_SIZE) do |offset|
results = mysql_query(<<-SQL
results = mysql_query(<<-SQL).to_a
SELECT c.cid, c.pid, c.nid, c.uid, c.created,
f.comment_body_value body
FROM comment c,
@ -241,30 +226,29 @@ class ImportScripts::Drupal < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
raw = preprocess_raw(row['body'])
raw = preprocess_raw(row["body"])
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: raw,
created_at: Time.zone.at(row['created']),
created_at: Time.zone.at(row["created"]),
}
if row['pid']
parent = topic_lookup_from_imported_post_id("cid:#{row['pid']}")
if row["pid"]
parent = topic_lookup_from_imported_post_id("cid:#{row["pid"]}")
h[:reply_to_post_number] = parent[:post_number] if parent && parent[:post_number] > (1)
end
h
else
puts "No topic found for comment #{row['cid']}"
puts "No topic found for comment #{row["cid"]}"
nil
end
end
@ -275,7 +259,7 @@ class ImportScripts::Drupal < ImportScripts::Base
puts "", "importing post likes"
batches(BATCH_SIZE) do |offset|
likes = mysql_query(<<-SQL
likes = mysql_query(<<-SQL).to_a
SELECT flagging_id,
fid,
entity_id,
@ -286,17 +270,20 @@ class ImportScripts::Drupal < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if likes.empty?
likes.each do |l|
identifier = l['fid'] == 5 ? 'nid' : 'cid'
next unless user_id = user_id_from_imported_user_id(l['uid'])
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l['entity_id']}")
identifier = l["fid"] == 5 ? "nid" : "cid"
next unless user_id = user_id_from_imported_user_id(l["uid"])
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{l["entity_id"]}")
next unless user = User.find_by(id: user_id)
next unless post = Post.find_by(id: post_id)
PostActionCreator.like(user, post) rescue nil
begin
PostActionCreator.like(user, post)
rescue StandardError
nil
end
end
end
end
@ -304,7 +291,8 @@ class ImportScripts::Drupal < ImportScripts::Base
def mark_topics_as_solved
puts "", "marking topics as solved"
solved_topics = TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)
solved_topics =
TopicCustomField.where(name: "import_solved").where(value: true).pluck(:topic_id)
solved_topics.each do |topic_id|
next unless topic = Topic.find(topic_id)
@ -336,8 +324,13 @@ class ImportScripts::Drupal < ImportScripts::Base
begin
current_count += 1
print_status(current_count, total_count, start_time)
SingleSignOnRecord.create!(user_id: user.id, external_id: external_id, external_email: user.email, last_payload: '')
rescue
SingleSignOnRecord.create!(
user_id: user.id,
external_id: external_id,
external_email: user.email,
last_payload: "",
)
rescue StandardError
next
end
end
@ -350,14 +343,13 @@ class ImportScripts::Drupal < ImportScripts::Base
success_count = 0
fail_count = 0
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first["count"]
SELECT count(field_post_attachment_fid) count
FROM field_data_field_post_attachment
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
attachments = mysql_query(<<-SQL
attachments = mysql_query(<<-SQL).to_a
SELECT *
FROM field_data_field_post_attachment fp
LEFT JOIN file_managed fm
@ -365,7 +357,6 @@ class ImportScripts::Drupal < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if attachments.size < 1
@ -373,9 +364,11 @@ class ImportScripts::Drupal < ImportScripts::Base
current_count += 1
print_status current_count, total_count
identifier = attachment['entity_type'] == "comment" ? "cid" : "nid"
next unless user_id = user_id_from_imported_user_id(attachment['uid'])
next unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment['entity_id']}")
identifier = attachment["entity_type"] == "comment" ? "cid" : "nid"
next unless user_id = user_id_from_imported_user_id(attachment["uid"])
unless post_id = post_id_from_imported_post_id("#{identifier}:#{attachment["entity_id"]}")
next
end
next unless user = User.find(user_id)
next unless post = Post.find(post_id)
@ -392,9 +385,14 @@ class ImportScripts::Drupal < ImportScripts::Base
new_raw = "#{new_raw}\n\n#{upload_html}" unless new_raw.include?(upload_html)
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: "Import attachment from Drupal")
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachment from Drupal",
)
else
puts '', 'Skipped upload: already imported'
puts "", "Skipped upload: already imported"
end
success_count += 1
@ -406,13 +404,13 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def create_permalinks
puts '', 'creating permalinks...'
puts "", "creating permalinks..."
Topic.listable_topics.find_each do |topic|
begin
tcf = topic.custom_fields
if tcf && tcf['import_id']
node_id = tcf['import_id'][/nid:(\d+)/, 1]
if tcf && tcf["import_id"]
node_id = tcf["import_id"][/nid:(\d+)/, 1]
slug = "/node/#{node_id}"
Permalink.create(url: slug, topic_id: topic.id)
end
@ -424,18 +422,16 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def find_upload(post, attachment)
uri = attachment['uri'][/public:\/\/upload\/(.+)/, 1]
uri = attachment["uri"][%r{public://upload/(.+)}, 1]
real_filename = CGI.unescapeHTML(uri)
file = File.join(ATTACHMENT_DIR, real_filename)
unless File.exist?(file)
puts "Attachment file #{attachment['filename']} doesn't exist"
puts "Attachment file #{attachment["filename"]} doesn't exist"
tmpfile = "attachments_failed.txt"
filename = File.join('/tmp/', tmpfile)
File.open(filename, 'a') { |f|
f.puts attachment['filename']
}
filename = File.join("/tmp/", tmpfile)
File.open(filename, "a") { |f| f.puts attachment["filename"] }
end
upload = create_upload(post.user.id || -1, file, real_filename)
@ -452,13 +448,13 @@ class ImportScripts::Drupal < ImportScripts::Base
def preprocess_raw(raw)
return if raw.blank?
# quotes on new lines
raw.gsub!(/\[quote\](.+?)\[\/quote\]/im) { |quote|
quote.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n#{$1}\n" }
raw.gsub!(%r{\[quote\](.+?)\[/quote\]}im) do |quote|
quote.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n#{$1}\n" }
quote.gsub!(/\n(.+?)/) { "\n> #{$1}" }
}
end
# [QUOTE=<username>]...[/QUOTE]
raw.gsub!(/\[quote=([^;\]]+)\](.+?)\[\/quote\]/im) do
raw.gsub!(%r{\[quote=([^;\]]+)\](.+?)\[/quote\]}im) do
username, quote = $1, $2
"\n[quote=\"#{username}\"]\n#{quote}\n[/quote]\n"
end
@ -468,7 +464,7 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def postprocess_posts
puts '', 'postprocessing posts'
puts "", "postprocessing posts"
current = 0
max = Post.count
@ -479,7 +475,7 @@ class ImportScripts::Drupal < ImportScripts::Base
new_raw = raw.dup
# replace old topic to new topic links
new_raw.gsub!(/https:\/\/site.com\/forum\/topic\/(\d+)/im) do
new_raw.gsub!(%r{https://site.com/forum/topic/(\d+)}im) do
post_id = post_id_from_imported_post_id("nid:#{$1}")
next unless post_id
topic = Post.find(post_id).topic
@ -487,7 +483,7 @@ class ImportScripts::Drupal < ImportScripts::Base
end
# replace old comment to reply links
new_raw.gsub!(/https:\/\/site.com\/comment\/(\d+)#comment-\d+/im) do
new_raw.gsub!(%r{https://site.com/comment/(\d+)#comment-\d+}im) do
post_id = post_id_from_imported_post_id("cid:#{$1}")
next unless post_id
post_ref = Post.find(post_id)
@ -498,8 +494,8 @@ class ImportScripts::Drupal < ImportScripts::Base
post.raw = new_raw
post.save
end
rescue
puts '', "Failed rewrite on post: #{post.id}"
rescue StandardError
puts "", "Failed rewrite on post: #{post.id}"
ensure
print_status(current += 1, max)
end
@ -507,15 +503,15 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def import_gravatars
puts '', 'importing gravatars'
puts "", "importing gravatars"
current = 0
max = User.count
User.find_each do |user|
begin
user.create_user_avatar(user_id: user.id) unless user.user_avatar
user.user_avatar.update_gravatar!
rescue
puts '', 'Failed avatar update on user #{user.id}'
rescue StandardError
puts "", 'Failed avatar update on user #{user.id}'
ensure
print_status(current += 1, max)
end
@ -523,15 +519,12 @@ class ImportScripts::Drupal < ImportScripts::Base
end
def parse_datetime(time)
DateTime.strptime(time, '%s')
DateTime.strptime(time, "%s")
end
def mysql_query(sql)
@client.query(sql, cache_rows: true)
end
end
if __FILE__ == $0
ImportScripts::Drupal.new.perform
end
ImportScripts::Drupal.new.perform if __FILE__ == $0

View File

@ -5,7 +5,6 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::DrupalJson < ImportScripts::Base
JSON_FILES_DIR = "/Users/techapj/Documents"
def initialize
@ -28,20 +27,18 @@ class ImportScripts::DrupalJson < ImportScripts::Base
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
create_users(@users_json) do |u|
{
id: u["uid"],
name: u["name"],
email: u["mail"],
created_at: Time.zone.at(u["created"].to_i)
created_at: Time.zone.at(u["created"].to_i),
}
end
EmailToken.delete_all
end
end
if __FILE__ == $0
ImportScripts::DrupalJson.new.perform
end
ImportScripts::DrupalJson.new.perform if __FILE__ == $0

View File

@ -5,41 +5,51 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require File.expand_path(File.dirname(__FILE__) + "/drupal.rb")
class ImportScripts::DrupalQA < ImportScripts::Drupal
def categories_query
result = @client.query("SELECT n.nid, GROUP_CONCAT(ti.tid) AS tids
result =
@client.query(
"SELECT n.nid, GROUP_CONCAT(ti.tid) AS tids
FROM node AS n
INNER JOIN taxonomy_index AS ti ON ti.nid = n.nid
WHERE n.type = 'question'
AND n.status = 1
GROUP BY n.nid")
GROUP BY n.nid",
)
categories = {}
result.each do |r|
tids = r['tids']
tids = r["tids"]
if tids.present?
tids = tids.split(',')
tids = tids.split(",")
categories[tids[0].to_i] = true
end
end
@client.query("SELECT tid, name, description FROM taxonomy_term_data WHERE tid IN (#{categories.keys.join(',')})")
@client.query(
"SELECT tid, name, description FROM taxonomy_term_data WHERE tid IN (#{categories.keys.join(",")})",
)
end
def create_forum_topics
puts "", "creating forum topics"
puts '', "creating forum topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(*) count
FROM node n
WHERE n.type = 'question'
AND n.status = 1;").first['count']
AND n.status = 1;",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT n.nid,
n.title,
GROUP_CONCAT(t.tid) AS tid,
@ -54,40 +64,48 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
GROUP BY n.nid, n.title, n.uid, n.created, f.body_value
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "nid:#{p['nid']}" }
next if all_records_exist? :posts, results.map { |p| "nid:#{p["nid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
{
id: "nid:#{row['nid']}",
user_id: user_id_from_imported_user_id(row['uid']) || -1,
category: category_id_from_imported_category_id((row['tid'] || '').split(',')[0]),
raw: row['body'],
created_at: Time.zone.at(row['created']),
id: "nid:#{row["nid"]}",
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
category: category_id_from_imported_category_id((row["tid"] || "").split(",")[0]),
raw: row["body"],
created_at: Time.zone.at(row["created"]),
pinned_at: nil,
title: row['title'].try(:strip)
title: row["title"].try(:strip),
}
end
end
end
def create_direct_replies
puts '', "creating replies in topics"
puts "", "creating replies in topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(*) count
FROM node n
WHERE n.type = 'answer'
AND n.status = 1;").first['count']
AND n.status = 1;",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT n.nid AS cid,
q.field_answer_question_nid AS nid,
n.uid,
@ -100,25 +118,27 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
AND n.type = 'answer'
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: row["body"],
created_at: Time.zone.at(row["created"]),
}
h
else
puts "No topic found for answer #{row['cid']}"
puts "No topic found for answer #{row["cid"]}"
nil
end
end
@ -126,21 +146,27 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
end
def create_nested_replies
puts '', "creating nested replies to posts in topics"
puts "", "creating nested replies to posts in topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(c.cid) count
FROM node n
INNER JOIN comment AS c ON n.nid = c.nid
WHERE n.type = 'question'
AND n.status = 1;").first['count']
AND n.status = 1;",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
# WARNING: If there are more than 1000000 this might have to be revisited
results = @client.query("
results =
@client.query(
"
SELECT (c.cid + 1000000) as cid,
c.nid,
c.uid,
@ -153,45 +179,53 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
AND n.type = 'question'
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: row["body"],
created_at: Time.zone.at(row["created"]),
}
h
else
puts "No topic found for comment #{row['cid']}"
puts "No topic found for comment #{row["cid"]}"
nil
end
end
end
puts '', "creating nested replies to answers in topics"
puts "", "creating nested replies to answers in topics"
total_count = @client.query("
total_count =
@client.query(
"
SELECT COUNT(c.cid) count
FROM node n
INNER JOIN comment AS c ON n.nid = c.nid
WHERE n.type = 'answer'
AND n.status = 1;").first['count']
AND n.status = 1;",
).first[
"count"
]
batch_size = 1000
batches(batch_size) do |offset|
# WARNING: If there are more than 1000000 this might have to be revisited
results = @client.query("
results =
@client.query(
"
SELECT (c.cid + 1000000) as cid,
q.field_answer_question_nid AS nid,
c.uid,
@ -205,25 +239,27 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
AND n.type = 'answer'
LIMIT #{batch_size}
OFFSET #{offset}
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| "cid:#{p['cid']}" }
next if all_records_exist? :posts, results.map { |p| "cid:#{p["cid"]}" }
create_posts(results, total: total_count, offset: offset) do |row|
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row['nid']}")
topic_mapping = topic_lookup_from_imported_post_id("nid:#{row["nid"]}")
if topic_mapping && topic_id = topic_mapping[:topic_id]
h = {
id: "cid:#{row['cid']}",
id: "cid:#{row["cid"]}",
topic_id: topic_id,
user_id: user_id_from_imported_user_id(row['uid']) || -1,
raw: row['body'],
created_at: Time.zone.at(row['created']),
user_id: user_id_from_imported_user_id(row["uid"]) || -1,
raw: row["body"],
created_at: Time.zone.at(row["created"]),
}
h
else
puts "No topic found for comment #{row['cid']}"
puts "No topic found for comment #{row["cid"]}"
nil
end
end
@ -234,9 +270,6 @@ class ImportScripts::DrupalQA < ImportScripts::Drupal
create_direct_replies
create_nested_replies
end
end
if __FILE__ == $0
ImportScripts::DrupalQA.new.perform
end
ImportScripts::DrupalQA.new.perform if __FILE__ == $0

View File

@ -1,22 +1,16 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Elgg < ImportScripts::Base
BATCH_SIZE ||= 1000
def initialize
super
@client = Mysql2::Client.new(
host: "127.0.0.1",
port: "3306",
username: "",
database: "",
password: ""
)
@client =
Mysql2::Client.new(host: "127.0.0.1", port: "3306", username: "", database: "", password: "")
SiteSetting.max_username_length = 50
end
@ -31,7 +25,7 @@ class ImportScripts::Elgg < ImportScripts::Base
def create_avatar(user, guid)
puts "#{@path}"
# Put your avatar at the root of discourse in this folder:
path_prefix = 'import/data/www/'
path_prefix = "import/data/www/"
# https://github.com/Elgg/Elgg/blob/2fc9c1910a9169bbe4010026c61d8e41a5b56239/engine/classes/ElggDiskFilestore.php#L24
# const BUCKET_SIZE = 5000;
bucket_size = 5000
@ -40,13 +34,11 @@ class ImportScripts::Elgg < ImportScripts::Base
bucket_id = [guid / bucket_size * bucket_size, 1].max
avatar_path = File.join(path_prefix, bucket_id.to_s, "/#{guid}/profile/#{guid}master.jpg")
if File.exist?(avatar_path)
@uploader.create_avatar(user, avatar_path)
end
@uploader.create_avatar(user, avatar_path) if File.exist?(avatar_path)
end
def grant_admin(user, is_admin)
if is_admin == 'yes'
if is_admin == "yes"
puts "", "#{user.username} is granted admin!"
user.grant_admin!
end
@ -56,10 +48,11 @@ class ImportScripts::Elgg < ImportScripts::Base
puts "", "importing users..."
last_user_id = -1
total_users = mysql_query("select count(*) from elgg_users_entity where banned='no'").first["count"]
total_users =
mysql_query("select count(*) from elgg_users_entity where banned='no'").first["count"]
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL
users = mysql_query(<<-SQL).to_a
select eue.guid, eue.username, eue.name, eue.email, eue.admin,
max(case when ems1.string='cae_structure' then ems2.string end)cae_structure,
max(case when ems1.string='location' then ems2.string end)location,
@ -76,7 +69,6 @@ class ImportScripts::Elgg < ImportScripts::Base
group by eue.guid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@ -97,11 +89,12 @@ class ImportScripts::Elgg < ImportScripts::Base
name: u["name"],
website: u["website"],
bio_raw: u["briefdescription"].to_s + " " + u["cae_structure"].to_s,
post_create_action: proc do |user|
create_avatar(user, u["guid"])
#add_user_to_group(user, u["cae_structure"])
grant_admin(user, u["admin"])
end
post_create_action:
proc do |user|
create_avatar(user, u["guid"])
#add_user_to_group(user, u["cae_structure"])
grant_admin(user, u["admin"])
end,
}
end
end
@ -115,9 +108,9 @@ class ImportScripts::Elgg < ImportScripts::Base
create_categories(categories) do |c|
{
id: c['guid'],
name: CGI.unescapeHTML(c['name']),
description: CGI.unescapeHTML(c['description'])
id: c["guid"],
name: CGI.unescapeHTML(c["name"]),
description: CGI.unescapeHTML(c["description"]),
}
end
end
@ -125,10 +118,13 @@ class ImportScripts::Elgg < ImportScripts::Base
def import_topics
puts "", "creating topics"
total_count = mysql_query("select count(*) count from elgg_entities where subtype = 32;").first["count"]
total_count =
mysql_query("select count(*) count from elgg_entities where subtype = 32;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT
ee.guid id,
owner_guid user_id,
@ -143,30 +139,35 @@ class ImportScripts::Elgg < ImportScripts::Base
ORDER BY ee.guid
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
{
id: m['id'],
user_id: user_id_from_imported_user_id(m['user_id']) || -1,
raw: CGI.unescapeHTML(m['raw']),
created_at: Time.zone.at(m['created_at']),
category: category_id_from_imported_category_id(m['category_id']),
title: CGI.unescapeHTML(m['title']),
post_create_action: proc do |post|
tag_names = mysql_query("
id: m["id"],
user_id: user_id_from_imported_user_id(m["user_id"]) || -1,
raw: CGI.unescapeHTML(m["raw"]),
created_at: Time.zone.at(m["created_at"]),
category: category_id_from_imported_category_id(m["category_id"]),
title: CGI.unescapeHTML(m["title"]),
post_create_action:
proc do |post|
tag_names =
mysql_query(
"
select ms.string
from elgg_metadata md
join elgg_metastrings ms on md.value_id = ms.id
where name_id = 43
and entity_guid = #{m['id']};
").map { |tag| tag['string'] }
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end
and entity_guid = #{m["id"]};
",
).map { |tag| tag["string"] }
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end,
}
end
end
@ -179,10 +180,13 @@ class ImportScripts::Elgg < ImportScripts::Base
def import_posts
puts "", "creating posts"
total_count = mysql_query("SELECT count(*) count FROM elgg_entities WHERE subtype = 42").first["count"]
total_count =
mysql_query("SELECT count(*) count FROM elgg_entities WHERE subtype = 42").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT
ee.guid id,
container_guid topic_id,
@ -195,19 +199,20 @@ class ImportScripts::Elgg < ImportScripts::Base
ORDER BY ee.guid
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
{
id: m['id'],
user_id: user_id_from_imported_user_id(m['user_id']) || -1,
topic_id: topic_lookup_from_imported_post_id(m['topic_id'])[:topic_id],
raw: CGI.unescapeHTML(m['raw']),
created_at: Time.zone.at(m['created_at']),
id: m["id"],
user_id: user_id_from_imported_user_id(m["user_id"]) || -1,
topic_id: topic_lookup_from_imported_post_id(m["topic_id"])[:topic_id],
raw: CGI.unescapeHTML(m["raw"]),
created_at: Time.zone.at(m["created_at"]),
}
end
end
@ -216,7 +221,6 @@ class ImportScripts::Elgg < ImportScripts::Base
def mysql_query(sql)
@client.query(sql, cache_rows: false)
end
end
ImportScripts::Elgg.new.perform

View File

@ -1,60 +1,62 @@
# frozen_string_literal: true
require "mysql2"
require 'time'
require 'date'
require "time"
require "date"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::FLARUM < ImportScripts::Base
#SET THE APPROPRIATE VALUES FOR YOUR MYSQL CONNECTION
FLARUM_HOST ||= ENV['FLARUM_HOST'] || "db_host"
FLARUM_DB ||= ENV['FLARUM_DB'] || "db_name"
FLARUM_HOST ||= ENV["FLARUM_HOST"] || "db_host"
FLARUM_DB ||= ENV["FLARUM_DB"] || "db_name"
BATCH_SIZE ||= 1000
FLARUM_USER ||= ENV['FLARUM_USER'] || "db_user"
FLARUM_PW ||= ENV['FLARUM_PW'] || "db_user_pass"
FLARUM_USER ||= ENV["FLARUM_USER"] || "db_user"
FLARUM_PW ||= ENV["FLARUM_PW"] || "db_user_pass"
def initialize
super
@client = Mysql2::Client.new(
host: FLARUM_HOST,
username: FLARUM_USER,
password: FLARUM_PW,
database: FLARUM_DB
)
@client =
Mysql2::Client.new(
host: FLARUM_HOST,
username: FLARUM_USER,
password: FLARUM_PW,
database: FLARUM_DB,
)
end
def execute
import_users
import_categories
import_posts
end
def import_users
puts '', "creating users"
total_count = mysql_query("SELECT count(*) count FROM users;").first['count']
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM users;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT id, username, email, joined_at, last_seen_at
results =
mysql_query(
"SELECT id, username, email, joined_at, last_seen_at
FROM users
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'],
email: user['email'],
username: user['username'],
name: user['username'],
created_at: user['joined_at'],
last_seen_at: user['last_seen_at']
{
id: user["id"],
email: user["email"],
username: user["username"],
name: user["username"],
created_at: user["joined_at"],
last_seen_at: user["last_seen_at"],
}
end
end
@ -63,30 +65,31 @@ class ImportScripts::FLARUM < ImportScripts::Base
def import_categories
puts "", "importing top level categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT id, name, description, position
FROM tags
ORDER BY position ASC
").to_a
",
).to_a
create_categories(categories) do |category|
{
id: category["id"],
name: category["name"]
}
end
create_categories(categories) { |category| { id: category["id"], name: category["name"] } }
puts "", "importing children categories..."
children_categories = mysql_query("
children_categories =
mysql_query(
"
SELECT id, name, description, position
FROM tags
ORDER BY position
").to_a
",
).to_a
create_categories(children_categories) do |category|
{
id: "child##{category['id']}",
id: "child##{category["id"]}",
name: category["name"],
description: category["description"],
}
@ -99,7 +102,9 @@ class ImportScripts::FLARUM < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.id id,
d.id topic_id,
d.title title,
@ -116,29 +121,30 @@ class ImportScripts::FLARUM < ImportScripts::Base
ORDER BY p.created_at
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
").to_a
",
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_FLARUM_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_FLARUM_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id("child##{m['category_id']}")
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["id"] == m["first_post_id"]
mapped[:category] = category_id_from_imported_category_id("child##{m["category_id"]}")
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
parent = topic_lookup_from_imported_post_id(m["first_post_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end

View File

@ -17,23 +17,23 @@ export FLUXBB_PREFIX=""
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/fluxbb.rb
class ImportScripts::FluxBB < ImportScripts::Base
FLUXBB_HOST ||= ENV['FLUXBB_HOST'] || "localhost"
FLUXBB_DB ||= ENV['FLUXBB_DB'] || "fluxbb"
FLUXBB_HOST ||= ENV["FLUXBB_HOST"] || "localhost"
FLUXBB_DB ||= ENV["FLUXBB_DB"] || "fluxbb"
BATCH_SIZE ||= 1000
FLUXBB_USER ||= ENV['FLUXBB_USER'] || "root"
FLUXBB_PW ||= ENV['FLUXBB_PW'] || ""
FLUXBB_PREFIX ||= ENV['FLUXBB_PREFIX'] || ""
FLUXBB_USER ||= ENV["FLUXBB_USER"] || "root"
FLUXBB_PW ||= ENV["FLUXBB_PW"] || ""
FLUXBB_PREFIX ||= ENV["FLUXBB_PREFIX"] || ""
def initialize
super
@client = Mysql2::Client.new(
host: FLUXBB_HOST,
username: FLUXBB_USER,
password: FLUXBB_PW,
database: FLUXBB_DB
)
@client =
Mysql2::Client.new(
host: FLUXBB_HOST,
username: FLUXBB_USER,
password: FLUXBB_PW,
database: FLUXBB_DB,
)
end
def execute
@ -45,64 +45,67 @@ class ImportScripts::FluxBB < ImportScripts::Base
end
def import_groups
puts '', "creating groups"
puts "", "creating groups"
results = mysql_query(
"SELECT g_id id, g_title name, g_user_title title
FROM #{FLUXBB_PREFIX}groups")
results =
mysql_query(
"SELECT g_id id, g_title name, g_user_title title
FROM #{FLUXBB_PREFIX}groups",
)
customgroups = results.select { |group| group['id'] > 2 }
customgroups = results.select { |group| group["id"] > 2 }
create_groups(customgroups) do |group|
{ id: group['id'],
name: group['name'],
title: group['title'] }
{ id: group["id"], name: group["name"], title: group["title"] }
end
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}users;").first['count']
total_count = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}users;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT id, username, realname name, url website, email email, registered created_at,
results =
mysql_query(
"SELECT id, username, realname name, url website, email email, registered created_at,
registration_ip registration_ip_address, last_visit last_visit_time,
last_email_sent last_emailed_at, location, group_id
FROM #{FLUXBB_PREFIX}users
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'],
email: user['email'],
username: user['username'],
name: user['name'],
created_at: Time.zone.at(user['created_at']),
website: user['website'],
registration_ip_address: user['registration_ip_address'],
last_seen_at: Time.zone.at(user['last_visit_time']),
last_emailed_at: user['last_emailed_at'] == nil ? 0 : Time.zone.at(user['last_emailed_at']),
location: user['location'],
moderator: user['group_id'] == 2,
admin: user['group_id'] == 1 }
{
id: user["id"],
email: user["email"],
username: user["username"],
name: user["name"],
created_at: Time.zone.at(user["created_at"]),
website: user["website"],
registration_ip_address: user["registration_ip_address"],
last_seen_at: Time.zone.at(user["last_visit_time"]),
last_emailed_at:
user["last_emailed_at"] == nil ? 0 : Time.zone.at(user["last_emailed_at"]),
location: user["location"],
moderator: user["group_id"] == 2,
admin: user["group_id"] == 1,
}
end
groupusers = results.select { |user| user['group_id'] > 2 }
groupusers = results.select { |user| user["group_id"] > 2 }
groupusers.each do |user|
if user['group_id']
user_id = user_id_from_imported_user_id(user['id'])
group_id = group_id_from_imported_group_id(user['group_id'])
if user["group_id"]
user_id = user_id_from_imported_user_id(user["id"])
group_id = group_id_from_imported_group_id(user["group_id"])
if user_id && group_id
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
end
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) if user_id && group_id
end
end
end
@ -111,33 +114,34 @@ class ImportScripts::FluxBB < ImportScripts::Base
def import_categories
puts "", "importing top level categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT id, cat_name name, disp_position position
FROM #{FLUXBB_PREFIX}categories
ORDER BY id ASC
").to_a
",
).to_a
create_categories(categories) do |category|
{
id: category["id"],
name: category["name"]
}
end
create_categories(categories) { |category| { id: category["id"], name: category["name"] } }
puts "", "importing children categories..."
children_categories = mysql_query("
children_categories =
mysql_query(
"
SELECT id, forum_name name, forum_desc description, disp_position position, cat_id parent_category_id
FROM #{FLUXBB_PREFIX}forums
ORDER BY id
").to_a
",
).to_a
create_categories(children_categories) do |category|
{
id: "child##{category['id']}",
id: "child##{category["id"]}",
name: category["name"],
description: category["description"],
parent_category_id: category_id_from_imported_category_id(category["parent_category_id"])
parent_category_id: category_id_from_imported_category_id(category["parent_category_id"]),
}
end
end
@ -148,7 +152,9 @@ class ImportScripts::FluxBB < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from #{FLUXBB_PREFIX}posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.id id,
t.id topic_id,
t.forum_id category_id,
@ -163,29 +169,30 @@ class ImportScripts::FluxBB < ImportScripts::Base
ORDER BY p.posted
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
").to_a
",
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_fluxbb_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_fluxbb_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id("child##{m['category_id']}")
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["id"] == m["first_post_id"]
mapped[:category] = category_id_from_imported_category_id("child##{m["category_id"]}")
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
parent = topic_lookup_from_imported_post_id(m["first_post_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
@ -196,16 +203,16 @@ class ImportScripts::FluxBB < ImportScripts::Base
end
def suspend_users
puts '', "updating banned users"
puts "", "updating banned users"
banned = 0
failed = 0
total = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}bans").first['count']
total = mysql_query("SELECT count(*) count FROM #{FLUXBB_PREFIX}bans").first["count"]
system_user = Discourse.system_user
mysql_query("SELECT username, email FROM #{FLUXBB_PREFIX}bans").each do |b|
user = User.find_by_email(b['email'])
user = User.find_by_email(b["email"])
if user
user.suspended_at = Time.now
user.suspended_till = 200.years.from_now
@ -218,7 +225,7 @@ class ImportScripts::FluxBB < ImportScripts::Base
failed += 1
end
else
puts "Not found: #{b['email']}"
puts "Not found: #{b["email"]}"
failed += 1
end
@ -233,15 +240,15 @@ class ImportScripts::FluxBB < ImportScripts::Base
s.gsub!(/<!-- s(\S+) -->(?:.*)<!-- s(?:\S+) -->/, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')
# Many bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s.gsub!(/:(?:\w{8})\]/, "]")
# Remove video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")
s = CGI.unescapeHTML(s)
@ -249,7 +256,7 @@ class ImportScripts::FluxBB < ImportScripts::Base
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
s.gsub!(%r{\[http(s)?://(www\.)?}, "[")
s
end

View File

@ -2,7 +2,7 @@
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'csv'
require "csv"
# Importer for Friends+Me Google+ Exporter (F+MG+E) output.
#
@ -32,18 +32,18 @@ require 'csv'
# Edit values at the top of the script to fit your preferences
class ImportScripts::FMGP < ImportScripts::Base
def initialize
super
# Set this to the base URL for the site; required for importing videos
# typically just 'https:' in production
@site_base_url = 'http://localhost:3000'
@site_base_url = "http://localhost:3000"
@system_user = Discourse.system_user
SiteSetting.max_image_size_kb = 40960
SiteSetting.max_attachment_size_kb = 40960
SiteSetting.max_image_size_kb = 40_960
SiteSetting.max_attachment_size_kb = 40_960
# handle the same video extension as the rest of Discourse
SiteSetting.authorized_extensions = (SiteSetting.authorized_extensions.split("|") + ['mp4', 'mov', 'webm', 'ogv']).uniq.join("|")
SiteSetting.authorized_extensions =
(SiteSetting.authorized_extensions.split("|") + %w[mp4 mov webm ogv]).uniq.join("|")
@invalid_bounce_score = 5.0
@min_title_words = 3
@max_title_words = 14
@ -76,7 +76,7 @@ class ImportScripts::FMGP < ImportScripts::Base
@allowlist = nil
# Tags to apply to every topic; empty Array to not have any tags applied everywhere
@globaltags = [ "gplus" ]
@globaltags = ["gplus"]
@imagefiles = nil
@ -101,34 +101,30 @@ class ImportScripts::FMGP < ImportScripts::Base
@first_date = nil
# every argument is a filename, do the right thing based on the file name
ARGV.each do |arg|
if arg.end_with?('.csv')
if arg.end_with?(".csv")
# CSV files produced by F+MG+E have "URL";"IsDownloaded";"FileName";"FilePath";"FileSize"
CSV.foreach(arg, headers: true, col_sep: ';') do |row|
@images[row[0]] = {
filename: row[2],
filepath: row[3],
filesize: row[4]
}
CSV.foreach(arg, headers: true, col_sep: ";") do |row|
@images[row[0]] = { filename: row[2], filepath: row[3], filesize: row[4] }
end
elsif arg.end_with?("upload-paths.txt")
@imagefiles = File.open(arg, "w")
elsif arg.end_with?('categories.json')
elsif arg.end_with?("categories.json")
@categories_filename = arg
@categories = load_fmgp_json(arg)
elsif arg.end_with?("usermap.json")
@usermap = load_fmgp_json(arg)
elsif arg.end_with?('blocklist.json')
elsif arg.end_with?("blocklist.json")
@blocklist = load_fmgp_json(arg).map { |i| i.to_s }.to_set
elsif arg.end_with?('allowlist.json')
elsif arg.end_with?("allowlist.json")
@allowlist = load_fmgp_json(arg).map { |i| i.to_s }.to_set
elsif arg.end_with?('.json')
elsif arg.end_with?(".json")
@feeds << load_fmgp_json(arg)
elsif arg == '--dry-run'
elsif arg == "--dry-run"
@dryrun = true
elsif arg.start_with?("--last-date=")
@last_date = Time.zone.parse(arg.gsub(/.*=/, ''))
@last_date = Time.zone.parse(arg.gsub(/.*=/, ""))
elsif arg.start_with?("--first-date=")
@first_date = Time.zone.parse(arg.gsub(/.*=/, ''))
@first_date = Time.zone.parse(arg.gsub(/.*=/, ""))
else
raise RuntimeError.new("unknown argument #{arg}")
end
@ -153,7 +149,6 @@ class ImportScripts::FMGP < ImportScripts::Base
@blocked_posts = 0
# count uploaded file size
@totalsize = 0
end
def execute
@ -222,7 +217,9 @@ class ImportScripts::FMGP < ImportScripts::Base
categories_new = "#{@categories_filename}.new"
File.open(categories_new, "w") do |f|
f.write(@categories.to_json)
raise RuntimeError.new("Category file missing categories for #{incomplete_categories}, edit #{categories_new} and rename it to #{@category_filename} before running the same import")
raise RuntimeError.new(
"Category file missing categories for #{incomplete_categories}, edit #{categories_new} and rename it to #{@category_filename} before running the same import",
)
end
end
end
@ -233,28 +230,32 @@ class ImportScripts::FMGP < ImportScripts::Base
@categories.each do |id, cat|
if cat["parent"].present? && !cat["parent"].empty?
# Two separate sub-categories can have the same name, so need to identify by parent
Category.where(name: cat["category"]).each do |category|
parent = Category.where(id: category.parent_category_id).first
@cats[id] = category if parent.name == cat["parent"]
end
Category
.where(name: cat["category"])
.each do |category|
parent = Category.where(id: category.parent_category_id).first
@cats[id] = category if parent.name == cat["parent"]
end
else
if category = Category.where(name: cat["category"]).first
@cats[id] = category
elsif @create_categories
params = {}
params[:name] = cat['category']
params[:name] = cat["category"]
params[:id] = id
puts "Creating #{cat['category']}"
puts "Creating #{cat["category"]}"
category = create_category(params, id)
@cats[id] = category
end
end
raise RuntimeError.new("Could not find category #{cat["category"]} for #{cat}") if @cats[id].nil?
if @cats[id].nil?
raise RuntimeError.new("Could not find category #{cat["category"]} for #{cat}")
end
end
end
def import_users
puts '', "Importing Google+ post and comment author users..."
puts "", "Importing Google+ post and comment author users..."
# collect authors of both posts and comments
@feeds.each do |feed|
@ -263,14 +264,10 @@ class ImportScripts::FMGP < ImportScripts::Base
community["categories"].each do |category|
category["posts"].each do |post|
import_author_user(post["author"])
if post["message"].present?
import_message_users(post["message"])
end
import_message_users(post["message"]) if post["message"].present?
post["comments"].each do |comment|
import_author_user(comment["author"])
if comment["message"].present?
import_message_users(comment["message"])
end
import_message_users(comment["message"]) if comment["message"].present?
end
end
end
@ -282,12 +279,7 @@ class ImportScripts::FMGP < ImportScripts::Base
# now create them all
create_users(@newusers) do |id, u|
{
id: id,
email: u[:email],
name: u[:name],
post_create_action: u[:post_create_action]
}
{ id: id, email: u[:email], name: u[:name], post_create_action: u[:post_create_action] }
end
end
@ -308,7 +300,8 @@ class ImportScripts::FMGP < ImportScripts::Base
def import_google_user(id, name)
if !@emails[id].present?
google_user_info = UserAssociatedAccount.find_by(provider_name: 'google_oauth2', provider_uid: id.to_i)
google_user_info =
UserAssociatedAccount.find_by(provider_name: "google_oauth2", provider_uid: id.to_i)
if google_user_info.nil?
# create new google user on system; expect this user to merge
# when they later log in with google authentication
@ -320,36 +313,39 @@ class ImportScripts::FMGP < ImportScripts::Base
@newusers[id] = {
email: email,
name: name,
post_create_action: proc do |newuser|
newuser.approved = true
newuser.approved_by_id = @system_user.id
newuser.approved_at = newuser.created_at
if @blocklist.include?(id.to_s)
now = DateTime.now
forever = 1000.years.from_now
# you can suspend as well if you want your blocklist to
# be hard to recover from
#newuser.suspended_at = now
#newuser.suspended_till = forever
newuser.silenced_till = forever
end
newuser.save
@users[id] = newuser
UserAssociatedAccount.create(provider_name: 'google_oauth2', user_id: newuser.id, provider_uid: id)
# Do not send email to the invalid email addresses
# this can be removed after merging with #7162
s = UserStat.where(user_id: newuser.id).first
s.bounce_score = @invalid_bounce_score
s.reset_bounce_score_after = 1000.years.from_now
s.save
end
post_create_action:
proc do |newuser|
newuser.approved = true
newuser.approved_by_id = @system_user.id
newuser.approved_at = newuser.created_at
if @blocklist.include?(id.to_s)
now = DateTime.now
forever = 1000.years.from_now
# you can suspend as well if you want your blocklist to
# be hard to recover from
#newuser.suspended_at = now
#newuser.suspended_till = forever
newuser.silenced_till = forever
end
newuser.save
@users[id] = newuser
UserAssociatedAccount.create(
provider_name: "google_oauth2",
user_id: newuser.id,
provider_uid: id,
)
# Do not send email to the invalid email addresses
# this can be removed after merging with #7162
s = UserStat.where(user_id: newuser.id).first
s.bounce_score = @invalid_bounce_score
s.reset_bounce_score_after = 1000.years.from_now
s.save
end,
}
else
# user already on system
u = User.find(google_user_info.user_id)
if u.silenced? || u.suspended?
@blocklist.add(id)
end
@blocklist.add(id) if u.silenced? || u.suspended?
@users[id] = u
email = u.email
end
@ -362,7 +358,7 @@ class ImportScripts::FMGP < ImportScripts::Base
# - A google+ post is a discourse topic
# - A google+ comment is a discourse post
puts '', "Importing Google+ posts and comments..."
puts "", "Importing Google+ posts and comments..."
@feeds.each do |feed|
feed["accounts"].each do |account|
@ -371,14 +367,16 @@ class ImportScripts::FMGP < ImportScripts::Base
category["posts"].each do |post|
# G+ post / Discourse topic
import_topic(post, category)
print("\r#{@topics_imported}/#{@posts_imported} topics/posts (skipped: #{@topics_skipped}/#{@posts_skipped} blocklisted: #{@blocked_topics}/#{@blocked_posts}) ")
print(
"\r#{@topics_imported}/#{@posts_imported} topics/posts (skipped: #{@topics_skipped}/#{@posts_skipped} blocklisted: #{@blocked_topics}/#{@blocked_posts}) ",
)
end
end
end
end
end
puts ''
puts ""
end
def import_topic(post, category)
@ -431,9 +429,7 @@ class ImportScripts::FMGP < ImportScripts::Base
return nil if !@frst_date.nil? && created_at < @first_date
user_id = user_id_from_imported_user_id(post_author_id)
if user_id.nil?
user_id = @users[post["author"]["id"]].id
end
user_id = @users[post["author"]["id"]].id if user_id.nil?
mapped = {
id: post["id"],
@ -472,7 +468,8 @@ class ImportScripts::FMGP < ImportScripts::Base
def title_text(post, created_at)
words = message_text(post["message"])
if words.empty? || words.join("").length < @min_title_characters || words.length < @min_title_words
if words.empty? || words.join("").length < @min_title_characters ||
words.length < @min_title_words
# database has minimum length
# short posts appear not to work well as titles most of the time (in practice)
return untitled(post["author"]["name"], created_at)
@ -483,17 +480,13 @@ class ImportScripts::FMGP < ImportScripts::Base
(@min_title_words..(words.length - 1)).each do |i|
# prefer full stop
if words[i].end_with?(".")
lastword = i
end
lastword = i if words[i].end_with?(".")
end
if lastword.nil?
# fall back on other punctuation
(@min_title_words..(words.length - 1)).each do |i|
if words[i].end_with?(',', ';', ':', '?')
lastword = i
end
lastword = i if words[i].end_with?(",", ";", ":", "?")
end
end
@ -516,9 +509,7 @@ class ImportScripts::FMGP < ImportScripts::Base
text_types = [0, 3]
message.each do |fragment|
if text_types.include?(fragment[0])
fragment[1].split().each do |word|
words << word
end
fragment[1].split().each { |word| words << word }
elsif fragment[0] == 2
# use the display text of a link
words << fragment[1]
@ -543,14 +534,10 @@ class ImportScripts::FMGP < ImportScripts::Base
lines << "\n#{formatted_link(post["image"]["proxy"])}\n"
end
if post["images"].present?
post["images"].each do |image|
lines << "\n#{formatted_link(image["proxy"])}\n"
end
post["images"].each { |image| lines << "\n#{formatted_link(image["proxy"])}\n" }
end
if post["videos"].present?
post["videos"].each do |video|
lines << "\n#{formatted_link(video["proxy"])}\n"
end
post["videos"].each { |video| lines << "\n#{formatted_link(video["proxy"])}\n" }
end
if post["link"].present? && post["link"]["url"].present?
url = post["link"]["url"]
@ -575,12 +562,8 @@ class ImportScripts::FMGP < ImportScripts::Base
if fragment[2].nil?
text
else
if fragment[2]["italic"].present?
text = "<i>#{text}</i>"
end
if fragment[2]["bold"].present?
text = "<b>#{text}</b>"
end
text = "<i>#{text}</i>" if fragment[2]["italic"].present?
text = "<b>#{text}</b>" if fragment[2]["bold"].present?
if fragment[2]["strikethrough"].present?
# s more likely than del to represent user intent?
text = "<s>#{text}</s>"
@ -594,9 +577,7 @@ class ImportScripts::FMGP < ImportScripts::Base
formatted_link_text(fragment[2], fragment[1])
elsif fragment[0] == 3
# reference to a user
if @usermap.include?(fragment[2].to_s)
return "@#{@usermap[fragment[2].to_s]}"
end
return "@#{@usermap[fragment[2].to_s]}" if @usermap.include?(fragment[2].to_s)
if fragment[2].nil?
# deleted G+ users show up with a null ID
return "<b>+#{fragment[1]}</b>"
@ -606,12 +587,18 @@ class ImportScripts::FMGP < ImportScripts::Base
# user was in this import's authors
"@#{user.username} "
else
if google_user_info = UserAssociatedAccount.find_by(provider_name: 'google_oauth2', provider_uid: fragment[2])
if google_user_info =
UserAssociatedAccount.find_by(
provider_name: "google_oauth2",
provider_uid: fragment[2],
)
# user was not in this import, but has logged in or been imported otherwise
user = User.find(google_user_info.user_id)
"@#{user.username} "
else
raise RuntimeError.new("Google user #{fragment[1]} (id #{fragment[2]}) not imported") if !@dryrun
if !@dryrun
raise RuntimeError.new("Google user #{fragment[1]} (id #{fragment[2]}) not imported")
end
# if you want to fall back to their G+ name, just erase the raise above,
# but this should not happen
"<b>+#{fragment[1]}</b>"
@ -681,6 +668,4 @@ class ImportScripts::FMGP < ImportScripts::Base
end
end
if __FILE__ == $0
ImportScripts::FMGP.new.perform
end
ImportScripts::FMGP.new.perform if __FILE__ == $0

View File

@ -22,15 +22,14 @@
# that correctly and will import the replies in the wrong order.
# You should run `rake posts:reorder_posts` after the import.
require 'csv'
require 'set'
require "csv"
require "set"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'reverse_markdown' # gem 'reverse_markdown'
require "reverse_markdown" # gem 'reverse_markdown'
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/getsatisfaction.rb DIRNAME
class ImportScripts::GetSatisfaction < ImportScripts::Base
IMPORT_ARCHIVED_TOPICS = false
# The script classifies each topic as private when at least one associated category
@ -85,22 +84,24 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
previous_line = nil
File.open(target_filename, "w") do |file|
File.open(source_filename).each_line do |line|
line.gsub!(/(?<![^\\]\\)\\"/, '""')
line.gsub!(/\\\\/, '\\')
File
.open(source_filename)
.each_line do |line|
line.gsub!(/(?<![^\\]\\)\\"/, '""')
line.gsub!(/\\\\/, '\\')
if previous_line
previous_line << "\n" unless line.starts_with?(",")
line = "#{previous_line}#{line}"
previous_line = nil
end
if previous_line
previous_line << "\n" unless line.starts_with?(",")
line = "#{previous_line}#{line}"
previous_line = nil
end
if line.gsub!(/,\+1\\\R$/m, ',"+1"').present?
previous_line = line
else
file.puts(line)
if line.gsub!(/,\+1\\\R$/m, ',"+1"').present?
previous_line = line
else
file.puts(line)
end
end
end
file.puts(previous_line) if previous_line
end
@ -108,18 +109,18 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
end
def csv_parse(table_name)
CSV.foreach(csv_filename(table_name),
headers: true,
header_converters: :symbol,
skip_blanks: true,
encoding: 'bom|utf-8') { |row| yield row }
CSV.foreach(
csv_filename(table_name),
headers: true,
header_converters: :symbol,
skip_blanks: true,
encoding: "bom|utf-8",
) { |row| yield row }
end
def total_rows(table_name)
CSV.foreach(csv_filename(table_name),
headers: true,
skip_blanks: true,
encoding: 'bom|utf-8')
CSV
.foreach(csv_filename(table_name), headers: true, skip_blanks: true, encoding: "bom|utf-8")
.inject(0) { |c, _| c + 1 }
end
@ -138,13 +139,11 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
name: row[:realname],
username: row[:nickname],
created_at: DateTime.parse(row[:joined_date]),
active: true
active: true,
}
count += 1
if count % BATCH_SIZE == 0
import_users_batch!(users, count - users.length, total)
end
import_users_batch!(users, count - users.length, total) if count % BATCH_SIZE == 0
end
import_users_batch!(users, count - users.length, total)
@ -153,9 +152,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
def import_users_batch!(users, offset, total)
return if users.empty?
create_users(users, offset: offset, total: total) do |user|
user
end
create_users(users, offset: offset, total: total) { |user| user }
users.clear
end
@ -168,13 +165,11 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
rows << {
id: row[:category_id],
name: row[:name],
description: row[:description].present? ? normalize_raw!(row[:description]) : nil
description: row[:description].present? ? normalize_raw!(row[:description]) : nil,
}
end
create_categories(rows) do |row|
row
end
create_categories(rows) { |row| row }
end
def import_topic_id(topic_id)
@ -200,7 +195,13 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
else
topic = map_post(row)
topic[:id] = topic_id
topic[:title] = row[:subject].present? ? row[:subject].strip[0...255] : "Topic title missing"
topic[:title] = (
if row[:subject].present?
row[:subject].strip[0...255]
else
"Topic title missing"
end
)
topic[:category] = category_id(row)
topic[:archived] = row[:archived_at].present?
@ -210,9 +211,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
topics << topic
count += 1
if count % BATCH_SIZE == 0
import_topics_batch!(topics, count - topics.length, total)
end
import_topics_batch!(topics, count - topics.length, total) if count % BATCH_SIZE == 0
end
import_topics_batch!(topics, count - topics.length, total)
@ -290,9 +289,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
posts << post
count += 1
if count % BATCH_SIZE == 0
import_posts_batch!(posts, count - posts.length, total)
end
import_posts_batch!(posts, count - posts.length, total) if count % BATCH_SIZE == 0
end
import_posts_batch!(posts, count - posts.length, total)
@ -324,7 +321,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
{
user_id: user_id_from_imported_user_id(row[:user_id]) || Discourse.system_user.id,
created_at: DateTime.parse(row[:created_at]),
raw: normalize_raw!(row[:formatted_content])
raw: normalize_raw!(row[:formatted_content]),
}
end
@ -334,7 +331,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
# hoist code
hoisted = {}
raw.gsub!(/(<pre>\s*)?<code>(.*?)<\/code>(\s*<\/pre>)?/mi) do
raw.gsub!(%r{(<pre>\s*)?<code>(.*?)</code>(\s*</pre>)?}mi) do
code = $2
hoist = SecureRandom.hex
# tidy code, wow, this is impressively crazy
@ -350,9 +347,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
# in this case double space works best ... so odd
raw.gsub!(" ", "\n\n")
hoisted.each do |hoist, code|
raw.gsub!(hoist, "\n```\n#{code}\n```\n")
end
hoisted.each { |hoist, code| raw.gsub!(hoist, "\n```\n#{code}\n```\n") }
raw = CGI.unescapeHTML(raw)
raw = ReverseMarkdown.convert(raw)
@ -360,7 +355,7 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
end
def create_permalinks
puts '', 'Creating Permalinks...', ''
puts "", "Creating Permalinks...", ""
Topic.listable_topics.find_each do |topic|
tcf = topic.first_post.custom_fields
@ -372,7 +367,6 @@ class ImportScripts::GetSatisfaction < ImportScripts::Base
end
end
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])

View File

@ -20,19 +20,18 @@ DEFAULT_COOKIES_TXT = "/shared/import/cookies.txt"
ABORT_AFTER_SKIPPED_TOPIC_COUNT = 10
def driver
@driver ||= begin
chrome_args = ["disable-gpu"]
chrome_args << "headless" unless ENV["NOT_HEADLESS"] == '1'
chrome_args << "no-sandbox" if inside_container?
options = Selenium::WebDriver::Chrome::Options.new(args: chrome_args)
Selenium::WebDriver.for(:chrome, options: options)
end
@driver ||=
begin
chrome_args = ["disable-gpu"]
chrome_args << "headless" unless ENV["NOT_HEADLESS"] == "1"
chrome_args << "no-sandbox" if inside_container?
options = Selenium::WebDriver::Chrome::Options.new(args: chrome_args)
Selenium::WebDriver.for(:chrome, options: options)
end
end
def inside_container?
File.foreach("/proc/1/cgroup") do |line|
return true if line.include?("docker")
end
File.foreach("/proc/1/cgroup") { |line| return true if line.include?("docker") }
false
end
@ -79,35 +78,38 @@ def base_url
end
def crawl_topics
1.step(nil, 100).each do |start|
url = "#{base_url}/#{@groupname}[#{start}-#{start + 99}]"
get(url)
1
.step(nil, 100)
.each do |start|
url = "#{base_url}/#{@groupname}[#{start}-#{start + 99}]"
get(url)
begin
if start == 1 && find("h2").text == "Error 403"
exit_with_error(<<~TEXT.red.bold)
begin
exit_with_error(<<~TEXT.red.bold) if start == 1 && find("h2").text == "Error 403"
Unable to find topics. Try running the script with the "--domain example.com"
option if you are a G Suite user and your group's URL contains a path with
your domain that looks like "/a/example.com".
TEXT
rescue Selenium::WebDriver::Error::NoSuchElementError
# Ignore this error. It simply means there wasn't an error.
end
rescue Selenium::WebDriver::Error::NoSuchElementError
# Ignore this error. It simply means there wasn't an error.
end
topic_urls = extract(".subject a[href*='#{@groupname}']") { |a| a["href"].sub("/d/topic/", "/forum/?_escaped_fragment_=topic/") }
break if topic_urls.size == 0
topic_urls =
extract(".subject a[href*='#{@groupname}']") do |a|
a["href"].sub("/d/topic/", "/forum/?_escaped_fragment_=topic/")
end
break if topic_urls.size == 0
topic_urls.each do |topic_url|
crawl_topic(topic_url)
topic_urls.each do |topic_url|
crawl_topic(topic_url)
# abort if this in an incremental crawl and there were too many consecutive, skipped topics
if @finished && @skipped_topic_count > ABORT_AFTER_SKIPPED_TOPIC_COUNT
puts "Skipping all other topics, because this is an incremental crawl.".green
return
# abort if this in an incremental crawl and there were too many consecutive, skipped topics
if @finished && @skipped_topic_count > ABORT_AFTER_SKIPPED_TOPIC_COUNT
puts "Skipping all other topics, because this is an incremental crawl.".green
return
end
end
end
end
end
def crawl_topic(url)
@ -126,17 +128,14 @@ def crawl_topic(url)
messages_crawled = false
extract(".subject a[href*='#{@groupname}']") do |a|
[
a["href"].sub("/d/msg/", "/forum/message/raw?msg="),
a["title"].empty?
]
[a["href"].sub("/d/msg/", "/forum/message/raw?msg="), a["title"].empty?]
end.each do |msg_url, might_be_deleted|
messages_crawled |= crawl_message(msg_url, might_be_deleted)
end
@skipped_topic_count = skippable && messages_crawled ? 0 : @skipped_topic_count + 1
@scraped_topic_urls << url
rescue
rescue StandardError
puts "Failed to scrape topic at #{url}".red
raise if @abort_on_error
end
@ -144,18 +143,16 @@ end
def crawl_message(url, might_be_deleted)
get(url)
filename = File.join(@path, "#{url[/#{@groupname}\/(.+)/, 1].sub("/", "-")}.eml")
filename = File.join(@path, "#{url[%r{#{@groupname}/(.+)}, 1].sub("/", "-")}.eml")
content = find("pre")["innerText"]
if !@first_message_checked
@first_message_checked = true
if content.match?(/From:.*\.\.\.@.*/i) && !@force_import
exit_with_error(<<~TEXT.red.bold)
exit_with_error(<<~TEXT.red.bold) if content.match?(/From:.*\.\.\.@.*/i) && !@force_import
It looks like you do not have permissions to see email addresses. Aborting.
Use the --force option to import anyway.
TEXT
end
end
old_md5 = Digest::MD5.file(filename) if File.exist?(filename)
@ -169,7 +166,7 @@ rescue Selenium::WebDriver::Error::NoSuchElementError
puts "Failed to scrape message at #{url}".red
raise if @abort_on_error
end
rescue
rescue StandardError
puts "Failed to scrape message at #{url}".red
raise if @abort_on_error
end
@ -178,10 +175,7 @@ def login
puts "Logging in..."
get("https://google.com/404")
add_cookies(
"myaccount.google.com",
"google.com"
)
add_cookies("myaccount.google.com", "google.com")
get("https://myaccount.google.com/?utm_source=sign_in_no_continue")
@ -193,20 +187,24 @@ def login
end
def add_cookies(*domains)
File.readlines(@cookies).each do |line|
parts = line.chomp.split("\t")
next if parts.size != 7 || !domains.any? { |domain| parts[0] =~ /^\.?#{Regexp.escape(domain)}$/ }
File
.readlines(@cookies)
.each do |line|
parts = line.chomp.split("\t")
if parts.size != 7 || !domains.any? { |domain| parts[0] =~ /^\.?#{Regexp.escape(domain)}$/ }
next
end
driver.manage.add_cookie(
domain: parts[0],
httpOnly: "true".casecmp?(parts[1]),
path: parts[2],
secure: "true".casecmp?(parts[3]),
expires: parts[4] == "0" ? nil : DateTime.strptime(parts[4], "%s"),
name: parts[5],
value: parts[6]
)
end
driver.manage.add_cookie(
domain: parts[0],
httpOnly: "true".casecmp?(parts[1]),
path: parts[2],
secure: "true".casecmp?(parts[3]),
expires: parts[4] == "0" ? nil : DateTime.strptime(parts[4], "%s"),
name: parts[5],
value: parts[6],
)
end
end
def wait_for_url
@ -240,10 +238,7 @@ def crawl
crawl_topics
@finished = true
ensure
File.write(status_filename, {
finished: @finished,
urls: @scraped_topic_urls
}.to_yaml)
File.write(status_filename, { finished: @finished, urls: @scraped_topic_urls }.to_yaml)
end
elapsed = Time.now - start_time
@ -258,20 +253,25 @@ def parse_arguments
@abort_on_error = false
@cookies = DEFAULT_COOKIES_TXT if File.exist?(DEFAULT_COOKIES_TXT)
parser = OptionParser.new do |opts|
opts.banner = "Usage: google_groups.rb [options]"
parser =
OptionParser.new do |opts|
opts.banner = "Usage: google_groups.rb [options]"
opts.on("-g", "--groupname GROUPNAME") { |v| @groupname = v }
opts.on("-d", "--domain DOMAIN") { |v| @domain = v }
opts.on("-c", "--cookies PATH", "path to cookies.txt") { |v| @cookies = v }
opts.on("--path PATH", "output path for emails") { |v| @path = v }
opts.on("-f", "--force", "force import when user isn't allowed to see email addresses") { @force_import = true }
opts.on("-a", "--abort-on-error", "abort crawl on error instead of skipping message") { @abort_on_error = true }
opts.on("-h", "--help") do
puts opts
exit
opts.on("-g", "--groupname GROUPNAME") { |v| @groupname = v }
opts.on("-d", "--domain DOMAIN") { |v| @domain = v }
opts.on("-c", "--cookies PATH", "path to cookies.txt") { |v| @cookies = v }
opts.on("--path PATH", "output path for emails") { |v| @path = v }
opts.on("-f", "--force", "force import when user isn't allowed to see email addresses") do
@force_import = true
end
opts.on("-a", "--abort-on-error", "abort crawl on error instead of skipping message") do
@abort_on_error = true
end
opts.on("-h", "--help") do
puts opts
exit
end
end
end
begin
parser.parse!
@ -279,10 +279,12 @@ def parse_arguments
exit_with_error(e.message, "", parser)
end
mandatory = [:groupname, :cookies]
mandatory = %i[groupname cookies]
missing = mandatory.select { |name| instance_variable_get("@#{name}").nil? }
exit_with_error("Missing arguments: #{missing.join(', ')}".red.bold, "", parser, "") if missing.any?
if missing.any?
exit_with_error("Missing arguments: #{missing.join(", ")}".red.bold, "", parser, "")
end
exit_with_error("cookies.txt not found at #{@cookies}".red.bold, "") if !File.exist?(@cookies)
@path = File.join(DEFAULT_OUTPUT_PATH, @groupname) if @path.nil?

View File

@ -4,7 +4,6 @@ require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::HigherLogic < ImportScripts::Base
HIGHERLOGIC_DB = "higherlogic"
BATCH_SIZE = 1000
ATTACHMENT_DIR = "/shared/import/data/attachments"
@ -12,11 +11,7 @@ class ImportScripts::HigherLogic < ImportScripts::Base
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
database: HIGHERLOGIC_DB
)
@client = Mysql2::Client.new(host: "localhost", username: "root", database: HIGHERLOGIC_DB)
end
def execute
@ -29,7 +24,7 @@ class ImportScripts::HigherLogic < ImportScripts::Base
end
def import_groups
puts '', 'importing groups'
puts "", "importing groups"
groups = mysql_query <<-SQL
SELECT CommunityKey, CommunityName
@ -37,16 +32,11 @@ class ImportScripts::HigherLogic < ImportScripts::Base
ORDER BY CommunityName
SQL
create_groups(groups) do |group|
{
id: group['CommunityKey'],
name: group['CommunityName']
}
end
create_groups(groups) { |group| { id: group["CommunityKey"], name: group["CommunityName"] } }
end
def import_users
puts '', 'importing users'
puts "", "importing users"
total_count = mysql_query("SELECT count(*) FROM Contact").first["count"]
batches(BATCH_SIZE) do |offset|
@ -59,43 +49,42 @@ class ImportScripts::HigherLogic < ImportScripts::Base
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u['ContactKey'] }
next if all_records_exist? :users, results.map { |u| u["ContactKey"] }
create_users(results, total: total_count, offset: offset) do |user|
next if user['EmailAddress'].blank?
next if user["EmailAddress"].blank?
{
id: user['ContactKey'],
email: user['EmailAddress'],
name: "#{user['FirstName']} #{user['LastName']}",
created_at: user['CreatedOn'] == nil ? 0 : Time.zone.at(user['CreatedOn']),
bio_raw: user['Bio'],
active: user['UserStatus'] == "Active",
admin: user['HLAdminFlag'] == 1
id: user["ContactKey"],
email: user["EmailAddress"],
name: "#{user["FirstName"]} #{user["LastName"]}",
created_at: user["CreatedOn"] == nil ? 0 : Time.zone.at(user["CreatedOn"]),
bio_raw: user["Bio"],
active: user["UserStatus"] == "Active",
admin: user["HLAdminFlag"] == 1,
}
end
end
end
def import_group_users
puts '', 'importing group users'
puts "", "importing group users"
group_users = mysql_query(<<-SQL
group_users = mysql_query(<<-SQL).to_a
SELECT CommunityKey, ContactKey
FROM CommunityMember
SQL
).to_a
group_users.each do |row|
next unless user_id = user_id_from_imported_user_id(row['ContactKey'])
next unless group_id = group_id_from_imported_group_id(row['CommunityKey'])
puts '', '.'
next unless user_id = user_id_from_imported_user_id(row["ContactKey"])
next unless group_id = group_id_from_imported_group_id(row["CommunityKey"])
puts "", "."
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
end
end
def import_categories
puts '', 'importing categories'
puts "", "importing categories"
categories = mysql_query <<-SQL
SELECT DiscussionKey, DiscussionName
@ -103,15 +92,12 @@ class ImportScripts::HigherLogic < ImportScripts::Base
SQL
create_categories(categories) do |category|
{
id: category['DiscussionKey'],
name: category['DiscussionName']
}
{ id: category["DiscussionKey"], name: category["DiscussionName"] }
end
end
def import_posts
puts '', 'importing topics and posts'
puts "", "importing topics and posts"
total_count = mysql_query("SELECT count(*) FROM DiscussionPost").first["count"]
batches(BATCH_SIZE) do |offset|
@ -131,28 +117,28 @@ class ImportScripts::HigherLogic < ImportScripts::Base
SQL
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| p['MessageKey'] }
next if all_records_exist? :posts, results.map { |p| p["MessageKey"] }
create_posts(results, total: total_count, offset: offset) do |post|
raw = preprocess_raw(post['Body'])
raw = preprocess_raw(post["Body"])
mapped = {
id: post['MessageKey'],
user_id: user_id_from_imported_user_id(post['ContactKey']),
id: post["MessageKey"],
user_id: user_id_from_imported_user_id(post["ContactKey"]),
raw: raw,
created_at: Time.zone.at(post['CreatedOn']),
created_at: Time.zone.at(post["CreatedOn"]),
}
if post['ParentMessageKey'].nil?
mapped[:category] = category_id_from_imported_category_id(post['DiscussionKey']).to_i
mapped[:title] = CGI.unescapeHTML(post['Subject'])
mapped[:pinned] = post['PinnedFlag'] == 1
if post["ParentMessageKey"].nil?
mapped[:category] = category_id_from_imported_category_id(post["DiscussionKey"]).to_i
mapped[:title] = CGI.unescapeHTML(post["Subject"])
mapped[:pinned] = post["PinnedFlag"] == 1
else
topic = topic_lookup_from_imported_post_id(post['ParentMessageKey'])
topic = topic_lookup_from_imported_post_id(post["ParentMessageKey"])
if topic.present?
mapped[:topic_id] = topic[:topic_id]
else
puts "Parent post #{post['ParentMessageKey']} doesn't exist. Skipping."
puts "Parent post #{post["ParentMessageKey"]} doesn't exist. Skipping."
next
end
end
@ -163,20 +149,19 @@ class ImportScripts::HigherLogic < ImportScripts::Base
end
def import_attachments
puts '', 'importing attachments'
puts "", "importing attachments"
count = 0
total_attachments = mysql_query(<<-SQL
total_attachments = mysql_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM LibraryEntryFile l
JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey
WHERE p.CreatedOn > '2020-01-01 00:00:00'
SQL
).first['count']
batches(BATCH_SIZE) do |offset|
attachments = mysql_query(<<-SQL
attachments = mysql_query(<<-SQL).to_a
SELECT l.VersionName,
l.FileExtension,
p.MessageKey
@ -186,17 +171,16 @@ class ImportScripts::HigherLogic < ImportScripts::Base
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
).to_a
break if attachments.empty?
attachments.each do |a|
print_status(count += 1, total_attachments, get_start_time("attachments"))
original_filename = "#{a['VersionName']}.#{a['FileExtension']}"
original_filename = "#{a["VersionName"]}.#{a["FileExtension"]}"
path = File.join(ATTACHMENT_DIR, original_filename)
if File.exist?(path)
if post = Post.find(post_id_from_imported_post_id(a['MessageKey']))
if post = Post.find(post_id_from_imported_post_id(a["MessageKey"]))
filename = File.basename(original_filename)
upload = create_upload(post.user.id, path, filename)
@ -205,7 +189,9 @@ class ImportScripts::HigherLogic < ImportScripts::Base
post.raw << "\n\n" << html
post.save!
PostUpload.create!(post: post, upload: upload) unless PostUpload.where(post: post, upload: upload).exists?
unless PostUpload.where(post: post, upload: upload).exists?
PostUpload.create!(post: post, upload: upload)
end
end
end
end
@ -217,7 +203,7 @@ class ImportScripts::HigherLogic < ImportScripts::Base
raw = body.dup
# trim off any post text beyond ---- to remove email threading
raw = raw.slice(0..(raw.index('------'))) || raw
raw = raw.slice(0..(raw.index("------"))) || raw
raw = HtmlToMarkdown.new(raw).to_markdown
raw

File diff suppressed because it is too large Load Diff

View File

@ -5,19 +5,19 @@ require "reverse_markdown"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::IPBoard3 < ImportScripts::Base
BATCH_SIZE ||= 5000
BATCH_SIZE ||= 5000
UPLOADS_DIR ||= "/path/to/uploads"
def initialize
super
@client = Mysql2::Client.new(
host: ENV["DB_HOST"] || "localhost",
username: ENV["DB_USER"] || "root",
password: ENV["DB_PW"],
database: ENV["DB_NAME"],
)
@client =
Mysql2::Client.new(
host: ENV["DB_HOST"] || "localhost",
username: ENV["DB_USER"] || "root",
password: ENV["DB_PW"],
database: ENV["DB_NAME"],
)
@client.query("SET character_set_results = binary")
end
@ -39,7 +39,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
total_users = mysql_query("SELECT COUNT(*) count FROM members").first["count"]
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<~SQL
users = mysql_query(<<~SQL).to_a
SELECT member_id id
, name
, email
@ -59,7 +59,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY member_id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if users.empty?
@ -67,7 +66,9 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
create_users(users, total: total_users, offset: offset) do |u|
next if user_id_from_imported_user_id(u["id"])
%W{name email title pp_about_me}.each { |k| u[k]&.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "") }
%W[name email title pp_about_me].each do |k|
u[k]&.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "")
end
next if u["name"].blank? && !Email.is_valid?(u["email"])
{
@ -77,30 +78,38 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
created_at: Time.zone.at(u["joined"]),
registration_ip_address: u["ip_address"],
title: CGI.unescapeHTML(u["title"].presence || ""),
date_of_birth: (Date.parse(u["date_of_birth"]) rescue nil),
date_of_birth:
(
begin
Date.parse(u["date_of_birth"])
rescue StandardError
nil
end
),
last_seen_at: Time.zone.at(u["last_activity"]),
admin: !!(u["g_title"] =~ /admin/i),
moderator: !!(u["g_title"] =~ /moderator/i),
bio_raw: clean_up(u["pp_about_me"]),
post_create_action: proc do |new_user|
if u["member_banned"] == 1
new_user.update(suspended_at: DateTime.now, suspended_till: 100.years.from_now)
elsif u["pp_main_photo"].present?
path = File.join(UPLOADS_DIR, u["pp_main_photo"])
if File.exist?(path)
begin
upload = create_upload(new_user.id, path, File.basename(path))
if upload.persisted?
new_user.create_user_avatar
new_user.user_avatar.update(custom_upload_id: upload.id)
new_user.update(uploaded_avatar_id: upload.id)
post_create_action:
proc do |new_user|
if u["member_banned"] == 1
new_user.update(suspended_at: DateTime.now, suspended_till: 100.years.from_now)
elsif u["pp_main_photo"].present?
path = File.join(UPLOADS_DIR, u["pp_main_photo"])
if File.exist?(path)
begin
upload = create_upload(new_user.id, path, File.basename(path))
if upload.persisted?
new_user.create_user_avatar
new_user.user_avatar.update(custom_upload_id: upload.id)
new_user.update(uploaded_avatar_id: upload.id)
end
rescue StandardError
# don't care
end
rescue
# don't care
end
end
end
end
end,
}
end
end
@ -109,10 +118,11 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
def import_categories
puts "", "importing categories..."
categories = mysql_query("SELECT id, parent_id, name, description, position FROM forums ORDER BY id").to_a
categories =
mysql_query("SELECT id, parent_id, name, description, position FROM forums ORDER BY id").to_a
parent_categories = categories.select { |c| c["parent_id"] == -1 }
child_categories = categories.select { |c| c["parent_id"] != -1 }
child_categories = categories.select { |c| c["parent_id"] != -1 }
create_categories(parent_categories) do |c|
next if category_id_from_imported_category_id(c["id"])
@ -142,7 +152,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
@closed_topic_ids = []
last_topic_id = -1
total_topics = mysql_query(<<~SQL
total_topics = mysql_query(<<~SQL).first["count"]
SELECT COUNT(*) count
FROM topics
JOIN posts ON tid = topic_id
@ -152,10 +162,9 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
AND approved = 1
AND queued = 0
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
topics = mysql_query(<<~SQL
topics = mysql_query(<<~SQL).to_a
SELECT tid id
, title
, state
@ -176,7 +185,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY tid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if topics.empty?
@ -206,17 +214,16 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
puts "", "importing posts..."
last_post_id = -1
total_posts = mysql_query(<<~SQL
total_posts = mysql_query(<<~SQL).first["count"]
SELECT COUNT(*) count
FROM posts
WHERE new_topic = 0
AND pdelete_time = 0
AND queued = 0
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
posts = mysql_query(<<~SQL
posts = mysql_query(<<~SQL).to_a
SELECT pid id
, author_id
, post_date
@ -230,7 +237,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY pid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if posts.empty?
@ -276,17 +282,16 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
puts "", "import personal topics..."
last_personal_topic_id = -1
total_personal_topics = mysql_query(<<~SQL
total_personal_topics = mysql_query(<<~SQL).first["count"]
SELECT COUNT(*) count
FROM message_topics
JOIN message_posts ON msg_topic_id = mt_id
WHERE mt_is_deleted = 0
AND msg_is_first_post = 1
SQL
).first["count"]
batches(BATCH_SIZE) do |offset|
personal_topics = mysql_query(<<~SQL
personal_topics = mysql_query(<<~SQL).to_a
SELECT mt_id id
, mt_date
, mt_title
@ -302,7 +307,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY mt_id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if personal_topics.empty?
@ -312,7 +316,8 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
next if post_id_from_imported_post_id("pt-#{pt["id"]}")
user_id = user_id_from_imported_user_id(pt["mt_starter_id"]) || -1
user_ids = [pt["mt_to_member_id"]] + pt["mt_invited_members"].scan(/i:(\d+);/).flatten.map(&:to_i)
user_ids =
[pt["mt_to_member_id"]] + pt["mt_invited_members"].scan(/i:(\d+);/).flatten.map(&:to_i)
user_ids.map! { |id| user_id_from_imported_user_id(id) }
user_ids.compact!
user_ids.uniq!
@ -334,10 +339,13 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
puts "", "importing personal posts..."
last_personal_post_id = -1
total_personal_posts = mysql_query("SELECT COUNT(*) count FROM message_posts WHERE msg_is_first_post = 0").first["count"]
total_personal_posts =
mysql_query("SELECT COUNT(*) count FROM message_posts WHERE msg_is_first_post = 0").first[
"count"
]
batches(BATCH_SIZE) do |offset|
personal_posts = mysql_query(<<~SQL
personal_posts = mysql_query(<<~SQL).to_a
SELECT msg_id id
, msg_topic_id
, msg_date
@ -349,7 +357,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
ORDER BY msg_id
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if personal_posts.empty?
@ -374,27 +381,32 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
def clean_up(raw, user_id = -1)
raw.encode!("utf-8", "utf-8", invalid: :replace, undef: :replace, replace: "")
raw.gsub!(/<(.+)>&nbsp;<\/\1>/, "\n\n")
raw.gsub!(%r{<(.+)>&nbsp;</\1>}, "\n\n")
doc = Nokogiri::HTML5.fragment(raw)
doc.css("blockquote.ipsBlockquote").each do |bq|
post_id = post_id_from_imported_post_id(bq["data-cid"])
if post = Post.find_by(id: post_id)
bq.replace %{<br>[quote="#{post.user.username},post:#{post.post_number},topic:#{post.topic_id}"]\n#{bq.inner_html}\n[/quote]<br>}
doc
.css("blockquote.ipsBlockquote")
.each do |bq|
post_id = post_id_from_imported_post_id(bq["data-cid"])
if post = Post.find_by(id: post_id)
bq.replace %{<br>[quote="#{post.user.username},post:#{post.post_number},topic:#{post.topic_id}"]\n#{bq.inner_html}\n[/quote]<br>}
end
end
end
markdown = ReverseMarkdown.convert(doc.to_html)
markdown.gsub!(/\[attachment=(\d+):.+\]/) do
if a = mysql_query("SELECT attach_file, attach_location FROM attachments WHERE attach_id = #{$1}").first
if a =
mysql_query(
"SELECT attach_file, attach_location FROM attachments WHERE attach_id = #{$1}",
).first
path = File.join(UPLOADS_DIR, a["attach_location"])
if File.exist?(path)
begin
upload = create_upload(user_id, path, a["attach_file"])
return html_for_upload(upload, a["attach_file"]) if upload.persisted?
rescue
rescue StandardError
end
end
end
@ -406,7 +418,6 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
def mysql_query(sql)
@client.query(sql)
end
end
ImportScripts::IPBoard3.new.perform

View File

@ -1,25 +1,26 @@
# frozen_string_literal: true
require "mysql2"
require_relative 'base'
require_relative "base"
class ImportScripts::JForum < ImportScripts::Base
BATCH_SIZE = 1000
REMOTE_AVATAR_REGEX ||= /\Ahttps?:\/\//i
REMOTE_AVATAR_REGEX ||= %r{\Ahttps?://}i
def initialize
super
@settings = YAML.safe_load(File.read(ARGV.first), symbolize_names: true)
@database_client = Mysql2::Client.new(
host: @settings[:database][:host],
port: @settings[:database][:port],
username: @settings[:database][:username],
password: @settings[:database][:password],
database: @settings[:database][:schema],
reconnect: true
)
@database_client =
Mysql2::Client.new(
host: @settings[:database][:host],
port: @settings[:database][:port],
username: @settings[:database][:username],
password: @settings[:database][:password],
database: @settings[:database][:schema],
reconnect: true,
)
end
def execute
@ -39,7 +40,7 @@ class ImportScripts::JForum < ImportScripts::Base
end
def import_users
puts '', 'creating users'
puts "", "creating users"
total_count = count("SELECT COUNT(1) AS count FROM jforum_users")
last_user_id = 0
@ -69,9 +70,7 @@ class ImportScripts::JForum < ImportScripts::Base
active: row[:user_active] == 1,
location: row[:user_from],
custom_fields: user_custom_fields(row),
post_create_action: proc do |user|
import_avatar(user, row[:user_avatar])
end
post_create_action: proc { |user| import_avatar(user, row[:user_avatar]) },
}
end
end
@ -84,13 +83,14 @@ class ImportScripts::JForum < ImportScripts::Base
@settings[:custom_fields].map do |field|
columns << (field[:alias] ? "#{field[:column]} AS #{field[:alias]}" : field[:column])
end
", #{columns.join(', ')}"
", #{columns.join(", ")}"
end
def user_fields
@user_fields ||= begin
Hash[UserField.all.map { |field| [field.name, field] }]
end
@user_fields ||=
begin
Hash[UserField.all.map { |field| [field.name, field] }]
end
end
def user_custom_fields(row)
@ -124,7 +124,11 @@ class ImportScripts::JForum < ImportScripts::Base
if File.file?(path)
@uploader.create_avatar(user, path)
elsif avatar_source.match?(REMOTE_AVATAR_REGEX)
UserAvatar.import_url_for_user(avatar_source, user) rescue nil
begin
UserAvatar.import_url_for_user(avatar_source, user)
rescue StandardError
nil
end
end
end
@ -218,10 +222,11 @@ class ImportScripts::JForum < ImportScripts::Base
id: "C#{row[:categories_id]}",
name: row[:title],
position: row[:display_order],
post_create_action: proc do |category|
url = File.join(@settings[:permalink_prefix], "forums/list/#{row[:categories_id]}.page")
Permalink.create(url: url, category_id: category.id) unless Permalink.find_by(url: url)
end
post_create_action:
proc do |category|
url = File.join(@settings[:permalink_prefix], "forums/list/#{row[:categories_id]}.page")
Permalink.create(url: url, category_id: category.id) unless Permalink.find_by(url: url)
end,
}
end
@ -237,17 +242,19 @@ class ImportScripts::JForum < ImportScripts::Base
name: row[:forum_name],
description: row[:forum_desc],
position: row[:forum_order],
parent_category_id: @lookup.category_id_from_imported_category_id("C#{row[:categories_id]}"),
post_create_action: proc do |category|
url = File.join(@settings[:permalink_prefix], "forums/show/#{row[:forum_id]}.page")
Permalink.create(url: url, category_id: category.id) unless Permalink.find_by(url: url)
end
parent_category_id:
@lookup.category_id_from_imported_category_id("C#{row[:categories_id]}"),
post_create_action:
proc do |category|
url = File.join(@settings[:permalink_prefix], "forums/show/#{row[:forum_id]}.page")
Permalink.create(url: url, category_id: category.id) unless Permalink.find_by(url: url)
end,
}
end
end
def import_posts
puts '', 'creating topics and posts'
puts "", "creating topics and posts"
total_count = count("SELECT COUNT(1) AS count FROM jforum_posts")
last_post_id = 0
@ -286,7 +293,7 @@ class ImportScripts::JForum < ImportScripts::Base
user_id: user_id,
created_at: row[:post_time],
raw: post_text,
import_topic_id: row[:topic_id]
import_topic_id: row[:topic_id],
}
if row[:topic_acceptedanswer_post_id] == row[:post_id]
@ -312,7 +319,9 @@ class ImportScripts::JForum < ImportScripts::Base
TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true)
end
mapped[:tags] = @tags_by_import_forum_id[row[:forum_id]] if @settings[:import_categories_as_tags]
mapped[:tags] = @tags_by_import_forum_id[row[:forum_id]] if @settings[
:import_categories_as_tags
]
mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id])
mapped
@ -470,7 +479,11 @@ class ImportScripts::JForum < ImportScripts::Base
category_id = @lookup.category_id_from_imported_category_id(row[:forum_id])
if user && category_id
CategoryUser.set_notification_level_for_category(user, NotificationLevels.all[:watching], category_id)
CategoryUser.set_notification_level_for_category(
user,
NotificationLevels.all[:watching],
category_id,
)
end
end
@ -511,7 +524,11 @@ class ImportScripts::JForum < ImportScripts::Base
topic = @lookup.topic_lookup_from_imported_post_id(row[:topic_first_post_id])
if user_id && topic
TopicUser.change(user_id, topic[:topic_id], notification_level: NotificationLevels.all[:watching])
TopicUser.change(
user_id,
topic[:topic_id],
notification_level: NotificationLevels.all[:watching],
)
end
current_index += 1
@ -545,17 +562,17 @@ class ImportScripts::JForum < ImportScripts::Base
end
def fix_bbcode_tag!(tag:, text:)
text.gsub!(/\s+(\[#{tag}\].*?\[\/#{tag}\])/im, '\1')
text.gsub!(%r{\s+(\[#{tag}\].*?\[/#{tag}\])}im, '\1')
text.gsub!(/(\[#{tag}.*?\])(?!$)/i) { "#{$1}\n" }
text.gsub!(/((?<!^)\[#{tag}.*?\])/i) { "\n#{$1}" }
text.gsub!(/(\[\/#{tag}\])(?!$)/i) { "#{$1}\n" }
text.gsub!(/((?<!^)\[\/#{tag}\])/i) { "\n#{$1}" }
text.gsub!(%r{(\[/#{tag}\])(?!$)}i) { "#{$1}\n" }
text.gsub!(%r{((?<!^)\[/#{tag}\])}i) { "\n#{$1}" }
end
def fix_inline_bbcode!(tag:, text:)
text.gsub!(/\[(#{tag}.*?)\](.*?)\[\/#{tag}\]/im) do
text.gsub!(%r{\[(#{tag}.*?)\](.*?)\[/#{tag}\]}im) do
beginning_tag = $1
content = $2.gsub(/(\n{2,})/) { "[/#{tag}]#{$1}[#{beginning_tag}]" }
"[#{beginning_tag}]#{content}[/#{tag}]"

View File

@ -1,12 +1,11 @@
# frozen_string_literal: true
# Jive importer
require 'nokogiri'
require 'csv'
require "nokogiri"
require "csv"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Jive < ImportScripts::Base
BATCH_SIZE = 1000
CATEGORY_IDS = [2023, 2003, 2004, 2042, 2036, 2029] # categories that should be imported
@ -17,9 +16,9 @@ class ImportScripts::Jive < ImportScripts::Base
puts "loading post mappings..."
@post_number_map = {}
Post.pluck(:id, :post_number).each do |post_id, post_number|
@post_number_map[post_id] = post_number
end
Post
.pluck(:id, :post_number)
.each { |post_id, post_number| @post_number_map[post_id] = post_number }
end
def created_post(post)
@ -47,19 +46,13 @@ class ImportScripts::Jive < ImportScripts::Base
end
def initialize(cols)
cols.each_with_index do |col, idx|
self.class.public_send(:define_method, col) do
@row[idx]
end
end
cols.each_with_index { |col, idx| self.class.public_send(:define_method, col) { @row[idx] } }
end
end
def load_user_batch!(users, offset, total)
if users.length > 0
create_users(users, offset: offset, total: total) do |user|
user
end
create_users(users, offset: offset, total: total) { |user| user }
users.clear
end
end
@ -72,53 +65,55 @@ class ImportScripts::Jive < ImportScripts::Base
current_row = +""
double_quote_count = 0
File.open(filename).each_line do |line|
File
.open(filename)
.each_line do |line|
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
line.strip!
line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
line.strip!
current_row << "\n" unless current_row.empty?
current_row << line
current_row << "\n" unless current_row.empty?
current_row << line
double_quote_count += line.scan('"').count
double_quote_count += line.scan('"').count
next if double_quote_count % 2 == 1
if double_quote_count % 2 == 1
next
end
raw =
begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
raw = begin
CSV.parse(current_row)
rescue CSV::MalformedCSVError => e
puts e.message
puts "*" * 100
puts "Bad row skipped, line is: #{line}"
puts
puts current_row
puts
puts "double quote count is : #{double_quote_count}"
puts "*" * 100
current_row = ""
double_quote_count = 0
next
end[
0
]
current_row = ""
double_quote_count = 0
next
end[0]
if first
row = RowResolver.create(raw)
if first
row = RowResolver.create(raw)
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
first = false
next
end
row.load(raw)
yield row
current_row = ""
double_quote_count = 0
end
end
def total_rows(table)
@ -129,13 +124,9 @@ class ImportScripts::Jive < ImportScripts::Base
puts "", "importing groups..."
rows = []
csv_parse("groups") do |row|
rows << { id: row.groupid, name: row.name }
end
csv_parse("groups") { |row| rows << { id: row.groupid, name: row.name } }
create_groups(rows) do |row|
row
end
create_groups(rows) { |row| row }
end
def import_users
@ -147,15 +138,12 @@ class ImportScripts::Jive < ImportScripts::Base
total = total_rows("users")
csv_parse("users") do |row|
id = row.userid
email = "#{row.email}"
# fake it
if row.email.blank? || row.email !~ /@/
email = fake_email
end
email = fake_email if row.email.blank? || row.email !~ /@/
name = "#{row.firstname} #{row.lastname}"
username = row.username
@ -175,14 +163,11 @@ class ImportScripts::Jive < ImportScripts::Base
created_at: created_at,
last_seen_at: last_seen_at,
active: is_activated.to_i == 1,
approved: true
approved: true,
}
count += 1
if count % BATCH_SIZE == 0
load_user_batch! users, count - users.length, total
end
load_user_batch! users, count - users.length, total if count % BATCH_SIZE == 0
end
load_user_batch! users, count, total
@ -195,9 +180,7 @@ class ImportScripts::Jive < ImportScripts::Base
user_id = user_id_from_imported_user_id(row.userid)
group_id = group_id_from_imported_group_id(row.groupid)
if user_id && group_id
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
end
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id) if user_id && group_id
end
end
@ -209,9 +192,7 @@ class ImportScripts::Jive < ImportScripts::Base
rows << { id: row.communityid, name: "#{row.name} (#{row.communityid})" }
end
create_categories(rows) do |row|
row
end
create_categories(rows) { |row| row }
end
def normalize_raw!(raw)
@ -219,9 +200,7 @@ class ImportScripts::Jive < ImportScripts::Base
raw = raw[5..-6]
doc = Nokogiri::HTML5.fragment(raw)
doc.css('img').each do |img|
img.remove if img['class'] == "jive-image"
end
doc.css("img").each { |img| img.remove if img["class"] == "jive-image" }
raw = doc.to_html
raw = raw[4..-1]
@ -231,7 +210,6 @@ class ImportScripts::Jive < ImportScripts::Base
def import_post_batch!(posts, topics, offset, total)
create_posts(posts, total: total, offset: offset) do |post|
mapped = {}
mapped[:id] = post[:id]
@ -271,7 +249,7 @@ class ImportScripts::Jive < ImportScripts::Base
mapped
end
posts.clear
posts.clear
end
def import_posts
@ -281,7 +259,6 @@ class ImportScripts::Jive < ImportScripts::Base
thread_map = {}
csv_parse("messages") do |thread|
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
if !thread.parentmessageid
@ -291,32 +268,38 @@ class ImportScripts::Jive < ImportScripts::Base
#IMAGE UPLOADER
if thread.imagecount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}") do |item|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
photo_path = "/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
Dir.foreach(
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}",
) do |item|
next if item == (".") || item == ("..") || item == (".DS_Store")
photo_path =
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
if upload.persisted?
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
puts "Error: Image upload is not successful for #{photo_path}!"
end
if upload.persisted?
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
puts "Error: Image upload is not successful for #{photo_path}!"
end
end
end
#ATTACHMENT UPLOADER
if thread.attachmentcount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}") do |item|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
attach_path = "/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
Dir.foreach(
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}",
) do |item|
next if item == (".") || item == ("..") || item == (".DS_Store")
attach_path =
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
if upload.persisted?
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
if upload.persisted?
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
end
end
@ -329,7 +312,6 @@ class ImportScripts::Jive < ImportScripts::Base
body: normalize_raw!(thread.body || thread.subject || "<missing>"),
created_at: DateTime.parse(thread.creationdate),
}
end
end
@ -348,35 +330,40 @@ class ImportScripts::Jive < ImportScripts::Base
next unless CATEGORY_IDS.include?(thread.containerid.to_i)
if thread.parentmessageid
#IMAGE UPLOADER
if thread.imagecount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}") do |item|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
photo_path = "/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
Dir.foreach(
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}",
) do |item|
next if item == (".") || item == ("..") || item == (".DS_Store")
photo_path =
"/var/www/discourse/script/import_scripts/jive/img/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, photo_path, File.basename(photo_path))
if upload.persisted?
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
puts "Error: Image upload is not successful for #{photo_path}!"
end
if upload.persisted?
puts "Image upload is successful for #{photo_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
else
puts "Error: Image upload is not successful for #{photo_path}!"
end
end
end
#ATTACHMENT UPLOADER
if thread.attachmentcount
Dir.foreach("/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}") do |item|
next if item == ('.') || item == ('..') || item == ('.DS_Store')
attach_path = "/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
Dir.foreach(
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}",
) do |item|
next if item == (".") || item == ("..") || item == (".DS_Store")
attach_path =
"/var/www/discourse/script/import_scripts/jive/attach/#{thread.messageid}/#{item}"
upload = create_upload(thread.userid, attach_path, File.basename(attach_path))
if upload.persisted?
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
if upload.persisted?
puts "Attachment upload is successful for #{attach_path}, new path is #{upload.url}!"
thread.body.gsub!(item, upload.url)
thread.body << "<br/><br/> #{attachment_html(upload, item)}"
else
puts "Error: Attachment upload is not successful for #{attach_path}!"
end
end
end
@ -386,7 +373,7 @@ class ImportScripts::Jive < ImportScripts::Base
user_id: thread.userid,
title: thread.subject,
body: normalize_raw!(thread.body),
created_at: DateTime.parse(thread.creationdate)
created_at: DateTime.parse(thread.creationdate),
}
posts << row
count += 1
@ -399,7 +386,6 @@ class ImportScripts::Jive < ImportScripts::Base
import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0
end
end
unless ARGV[0] && Dir.exist?(ARGV[0])

View File

@ -7,7 +7,6 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# https://developers.jivesoftware.com/api/v3/cloud/rest/index.html
class ImportScripts::JiveApi < ImportScripts::Base
USER_COUNT ||= 1000
POST_COUNT ||= 100
STAFF_GUARDIAN ||= Guardian.new(Discourse.system_user)
@ -16,37 +15,141 @@ class ImportScripts::JiveApi < ImportScripts::Base
#############################
# WHOLE CATEGORY OF CONTENT #
#############################
# Announcement & News
{ jive_object: { type: 37, id: 1004 }, filters: { created_after: 1.year.ago, type: "post" }, category_id: 7 },
{
jive_object: {
type: 37,
id: 1004,
},
filters: {
created_after: 1.year.ago,
type: "post",
},
category_id: 7,
},
# Questions & Answers / General Discussions
{ jive_object: { type: 14, id: 2006 }, filters: { created_after: 6.months.ago, type: "discussion" }, category: Proc.new { |c| c["question"] ? 5 : 21 } },
{
jive_object: {
type: 14,
id: 2006,
},
filters: {
created_after: 6.months.ago,
type: "discussion",
},
category: Proc.new { |c| c["question"] ? 5 : 21 },
},
# Anywhere beta
{ jive_object: { type: 14, id: 2052 }, filters: { created_after: 6.months.ago, type: "discussion" }, category_id: 22 },
{
jive_object: {
type: 14,
id: 2052,
},
filters: {
created_after: 6.months.ago,
type: "discussion",
},
category_id: 22,
},
# Tips & Tricks
{ jive_object: { type: 37, id: 1284 }, filters: { type: "post" }, category_id: 6 },
{ jive_object: { type: 37, id: 1319 }, filters: { type: "post" }, category_id: 6 },
{ jive_object: { type: 37, id: 1177 }, filters: { type: "post" }, category_id: 6 },
{ jive_object: { type: 37, id: 1165 }, filters: { type: "post" }, category_id: 6 },
# Ambassadors
{ jive_object: { type: 700, id: 1001 }, filters: { type: "discussion" }, authenticated: true, category_id: 8 },
{
jive_object: {
type: 700,
id: 1001,
},
filters: {
type: "discussion",
},
authenticated: true,
category_id: 8,
},
# Experts
{ jive_object: { type: 700, id: 1034 }, filters: { type: "discussion" }, authenticated: true, category_id: 15 },
{
jive_object: {
type: 700,
id: 1034,
},
filters: {
type: "discussion",
},
authenticated: true,
category_id: 15,
},
# Feature Requests
{ jive_object: { type: 14, id: 2015 }, filters: { type: "idea" }, category_id: 31 },
####################
# SELECTED CONTENT #
####################
# Announcement & News
{ jive_object: { type: 37, id: 1004 }, filters: { entities: { 38 => [1345, 1381, 1845, 2046, 2060, 2061] } }, category_id: 7 },
{
jive_object: {
type: 37,
id: 1004,
},
filters: {
entities: {
38 => [1345, 1381, 1845, 2046, 2060, 2061],
},
},
category_id: 7,
},
# Problem Solving
{ jive_object: { type: 14, id: 2006 }, filters: { entities: { 2 => [116685, 160745, 177010, 223482, 225036, 233228, 257882, 285103, 292297, 345243, 363250, 434546] } }, category_id: 10 },
{
jive_object: {
type: 14,
id: 2006,
},
filters: {
entities: {
2 => [
116_685,
160_745,
177_010,
223_482,
225_036,
233_228,
257_882,
285_103,
292_297,
345_243,
363_250,
434_546,
],
},
},
category_id: 10,
},
# General Discussions
{ jive_object: { type: 14, id: 2006 }, filters: { entities: { 2 => [178203, 188350, 312734] } }, category_id: 21 },
{
jive_object: {
type: 14,
id: 2006,
},
filters: {
entities: {
2 => [178_203, 188_350, 312_734],
},
},
category_id: 21,
},
# Questions & Answers
{ jive_object: { type: 14, id: 2006 }, filters: { entities: { 2 => [418811] } }, category_id: 5 },
{
jive_object: {
type: 14,
id: 2006,
},
filters: {
entities: {
2 => [418_811],
},
},
category_id: 5,
},
]
def initialize
@ -75,9 +178,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
people = get("people/email/#{user.email}?fields=initialLogin,-resources", true)
if people && people["initialLogin"].present?
created_at = DateTime.parse(people["initialLogin"])
if user.created_at > created_at
user.update_columns(created_at: created_at)
end
user.update_columns(created_at: created_at) if user.created_at > created_at
end
end
end
@ -89,7 +190,11 @@ class ImportScripts::JiveApi < ImportScripts::Base
start_index = [0, UserCustomField.where(name: "import_id").count - USER_COUNT].max
loop do
users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}", true)
users =
get(
"people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}",
true,
)
create_users(users["list"], offset: imported_users) do |user|
{
id: user["id"],
@ -113,7 +218,11 @@ class ImportScripts::JiveApi < ImportScripts::Base
TO_IMPORT.each do |to_import|
puts Time.now
entity = to_import[:jive_object]
places = get("places?fields=placeID,name,-resources&filter=entityDescriptor(#{entity[:type]},#{entity[:id]})", to_import[:authenticated])
places =
get(
"places?fields=placeID,name,-resources&filter=entityDescriptor(#{entity[:type]},#{entity[:id]})",
to_import[:authenticated],
)
import_place_contents(places["list"][0], to_import) if places && places["list"].present?
end
end
@ -125,19 +234,28 @@ class ImportScripts::JiveApi < ImportScripts::Base
if to_import.dig(:filters, :entities).present?
path = "contents"
entities = to_import[:filters][:entities].flat_map { |type, ids| ids.map { |id| "#{type},#{id}" } }
entities =
to_import[:filters][:entities].flat_map { |type, ids| ids.map { |id| "#{type},#{id}" } }
filters = "filter=entityDescriptor(#{entities.join(",")})"
else
path = "places/#{place["placeID"]}/contents"
filters = +"filter=status(published)"
if to_import[:filters]
filters << "&filter=type(#{to_import[:filters][:type]})" if to_import[:filters][:type].present?
filters << "&filter=creationDate(null,#{to_import[:filters][:created_after].strftime("%Y-%m-%dT%TZ")})" if to_import[:filters][:created_after].present?
if to_import[:filters][:type].present?
filters << "&filter=type(#{to_import[:filters][:type]})"
end
if to_import[:filters][:created_after].present?
filters << "&filter=creationDate(null,#{to_import[:filters][:created_after].strftime("%Y-%m-%dT%TZ")})"
end
end
end
loop do
contents = get("#{path}?#{filters}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
contents =
get(
"#{path}?#{filters}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}",
to_import[:authenticated],
)
contents["list"].each do |content|
content_id = content["contentID"].presence || "#{content["type"]}_#{content["id"]}"
@ -149,7 +267,8 @@ class ImportScripts::JiveApi < ImportScripts::Base
created_at: content["published"],
title: @htmlentities.decode(content["subject"]),
raw: process_raw(content["content"]["text"]),
user_id: user_id_from_imported_user_id(content["author"]["id"]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(content["author"]["id"]) || Discourse::SYSTEM_USER_ID,
views: content["viewCount"],
custom_fields: custom_fields,
}
@ -165,10 +284,16 @@ class ImportScripts::JiveApi < ImportScripts::Base
if parent_post&.id && parent_post&.topic_id
resources = content["resources"]
import_likes(resources["likes"]["ref"], parent_post.id) if content["likeCount"].to_i > 0 && resources.dig("likes", "ref").present?
if content["likeCount"].to_i > 0 && resources.dig("likes", "ref").present?
import_likes(resources["likes"]["ref"], parent_post.id)
end
if content["replyCount"].to_i > 0
import_comments(resources["comments"]["ref"], parent_post.topic_id, to_import) if resources.dig("comments", "ref").present?
import_messages(resources["messages"]["ref"], parent_post.topic_id, to_import) if resources.dig("messages", "ref").present?
if resources.dig("comments", "ref").present?
import_comments(resources["comments"]["ref"], parent_post.topic_id, to_import)
end
if resources.dig("messages", "ref").present?
import_messages(resources["messages"]["ref"], parent_post.topic_id, to_import)
end
end
end
end
@ -198,7 +323,11 @@ class ImportScripts::JiveApi < ImportScripts::Base
start_index = 0
loop do
comments = get("#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
comments =
get(
"#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}",
to_import[:authenticated],
)
break if comments["error"]
comments["list"].each do |comment|
next if post_id_from_imported_post_id(comment["id"])
@ -207,9 +336,12 @@ class ImportScripts::JiveApi < ImportScripts::Base
id: comment["id"],
created_at: comment["published"],
topic_id: topic_id,
user_id: user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
raw: process_raw(comment["content"]["text"]),
custom_fields: { import_id: comment["id"] },
custom_fields: {
import_id: comment["id"],
},
}
if (parent_post_id = comment["parentID"]).to_i > 0
@ -234,7 +366,11 @@ class ImportScripts::JiveApi < ImportScripts::Base
start_index = 0
loop do
messages = get("#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
messages =
get(
"#{url}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}",
to_import[:authenticated],
)
break if messages["error"]
messages["list"].each do |message|
next if post_id_from_imported_post_id(message["id"])
@ -243,9 +379,12 @@ class ImportScripts::JiveApi < ImportScripts::Base
id: message["id"],
created_at: message["published"],
topic_id: topic_id,
user_id: user_id_from_imported_user_id(message["author"]["id"]) || Discourse::SYSTEM_USER_ID,
user_id:
user_id_from_imported_user_id(message["author"]["id"]) || Discourse::SYSTEM_USER_ID,
raw: process_raw(message["content"]["text"]),
custom_fields: { import_id: message["id"] },
custom_fields: {
import_id: message["id"],
},
}
post[:custom_fields][:is_accepted_answer] = true if message["answer"]
@ -280,20 +419,25 @@ class ImportScripts::JiveApi < ImportScripts::Base
puts "", "importing bookmarks..."
start_index = 0
fields = "fields=author.id,favoriteObject.id,-resources,-author.resources,-favoriteObject.resources"
fields =
"fields=author.id,favoriteObject.id,-resources,-author.resources,-favoriteObject.resources"
filter = "&filter=creationDate(null,2016-01-01T00:00:00Z)"
loop do
favorites = get("contents?#{fields}&filter=type(favorite)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
bookmarks_to_create = favorites["list"].map do |favorite|
next unless user_id = user_id_from_imported_user_id(favorite["author"]["id"])
next unless post_id = post_id_from_imported_post_id(favorite["favoriteObject"]["id"])
{ user_id: user_id, post_id: post_id }
end.flatten
favorites =
get(
"contents?#{fields}&filter=type(favorite)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}",
)
bookmarks_to_create =
favorites["list"]
.map do |favorite|
next unless user_id = user_id_from_imported_user_id(favorite["author"]["id"])
next unless post_id = post_id_from_imported_post_id(favorite["favoriteObject"]["id"])
{ user_id: user_id, post_id: post_id }
end
.flatten
create_bookmarks(bookmarks_to_create) do |row|
row
end
create_bookmarks(bookmarks_to_create) { |row| row }
break if favorites["list"].size < POST_COUNT || favorites.dig("links", "next").blank?
break unless start_index = favorites["links"]["next"][/startIndex=(\d+)/, 1]
@ -304,22 +448,26 @@ class ImportScripts::JiveApi < ImportScripts::Base
doc = Nokogiri::HTML5.fragment(raw)
# convert emoticon
doc.css("span.emoticon-inline").each do |span|
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
end
doc
.css("span.emoticon-inline")
.each do |span|
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
end
# convert mentions
doc.css("a.jive-link-profile-small").each { |a| a.replace("@#{a.content}") }
# fix links
doc.css("a[href]").each do |a|
if a["href"]["#{@base_uri}/docs/DOC-"]
a["href"] = a["href"][/#{Regexp.escape(@base_uri)}\/docs\/DOC-\d+/]
elsif a["href"][@base_uri]
a.replace(a.inner_html)
doc
.css("a[href]")
.each do |a|
if a["href"]["#{@base_uri}/docs/DOC-"]
a["href"] = a["href"][%r{#{Regexp.escape(@base_uri)}/docs/DOC-\d+}]
elsif a["href"][@base_uri]
a.replace(a.inner_html)
end
end
end
html = doc.at(".jive-rendered-content").to_html
@ -341,17 +489,22 @@ class ImportScripts::JiveApi < ImportScripts::Base
def get(url_or_path, authenticated = false)
tries ||= 3
command = ["curl", "--silent"]
command = %w[curl --silent]
command << "--user \"#{@username}:#{@password}\"" if !!authenticated
command << (url_or_path.start_with?("http") ? "\"#{url_or_path}\"" : "\"#{@base_uri}/api/core/v3/#{url_or_path}\"")
command << (
if url_or_path.start_with?("http")
"\"#{url_or_path}\""
else
"\"#{@base_uri}/api/core/v3/#{url_or_path}\""
end
)
puts command.join(" ") if ENV["VERBOSE"] == "1"
JSON.parse `#{command.join(" ")}`
rescue
rescue StandardError
retry if (tries -= 1) >= 0
end
end
ImportScripts::JiveApi.new.perform

View File

@ -6,8 +6,7 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::JsonGeneric < ImportScripts::Base
JSON_FILE_PATH = ENV['JSON_FILE']
JSON_FILE_PATH = ENV["JSON_FILE"]
BATCH_SIZE ||= 1000
def initialize
@ -30,24 +29,18 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
end
def username_for(name)
result = name.downcase.gsub(/[^a-z0-9\-\_]/, '')
result = name.downcase.gsub(/[^a-z0-9\-\_]/, "")
if result.blank?
result = Digest::SHA1.hexdigest(name)[0...10]
end
result = Digest::SHA1.hexdigest(name)[0...10] if result.blank?
result
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
users = []
@imported_json['topics'].each do |t|
t['posts'].each do |p|
users << p['author'].scrub
end
end
@imported_json["topics"].each { |t| t["posts"].each { |p| users << p["author"].scrub } }
users.uniq!
create_users(users) do |u|
@ -56,7 +49,7 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
username: username_for(u),
name: u,
email: "#{username_for(u)}@example.com",
created_at: Time.now
created_at: Time.now,
}
end
end
@ -67,8 +60,8 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
topics = 0
posts = 0
@imported_json['topics'].each do |t|
first_post = t['posts'][0]
@imported_json["topics"].each do |t|
first_post = t["posts"][0]
next unless first_post
topic = {
@ -77,25 +70,32 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
raw: first_post["body"],
created_at: Time.zone.parse(first_post["date"]),
cook_method: Post.cook_methods[:raw_html],
title: t['title'],
category: ENV['CATEGORY_ID'],
custom_fields: { import_id: "pid:#{first_post['id']}" }
title: t["title"],
category: ENV["CATEGORY_ID"],
custom_fields: {
import_id: "pid:#{first_post["id"]}",
},
}
topic[:pinned_at] = Time.zone.parse(first_post["date"]) if t['pinned']
topic[:pinned_at] = Time.zone.parse(first_post["date"]) if t["pinned"]
topics += 1
parent_post = create_post(topic, topic[:id])
t['posts'][1..-1].each do |p|
create_post({
id: p["id"],
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(username_for(p["author"])) || -1,
raw: p["body"],
created_at: Time.zone.parse(p["date"]),
cook_method: Post.cook_methods[:raw_html],
custom_fields: { import_id: "pid:#{p['id']}" }
}, p['id'])
t["posts"][1..-1].each do |p|
create_post(
{
id: p["id"],
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(username_for(p["author"])) || -1,
raw: p["body"],
created_at: Time.zone.parse(p["date"]),
cook_method: Post.cook_methods[:raw_html],
custom_fields: {
import_id: "pid:#{p["id"]}",
},
},
p["id"],
)
posts += 1
end
end
@ -104,6 +104,4 @@ class ImportScripts::JsonGeneric < ImportScripts::Base
end
end
if __FILE__ == $0
ImportScripts::JsonGeneric.new.perform
end
ImportScripts::JsonGeneric.new.perform if __FILE__ == $0

View File

@ -4,7 +4,6 @@ require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Kunena < ImportScripts::Base
KUNENA_DB = "kunena"
def initialize
@ -12,12 +11,13 @@ class ImportScripts::Kunena < ImportScripts::Base
@users = {}
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: KUNENA_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
#password: "password",
database: KUNENA_DB,
)
end
def execute
@ -26,7 +26,8 @@ class ImportScripts::Kunena < ImportScripts::Base
puts "creating users"
create_users(@users) do |id, user|
{ id: id,
{
id: id,
email: user[:email],
username: user[:username],
created_at: user[:created_at],
@ -34,15 +35,25 @@ class ImportScripts::Kunena < ImportScripts::Base
moderator: user[:moderator] ? true : false,
admin: user[:admin] ? true : false,
suspended_at: user[:suspended] ? Time.zone.now : nil,
suspended_till: user[:suspended] ? 100.years.from_now : nil }
suspended_till: user[:suspended] ? 100.years.from_now : nil,
}
end
@users = nil
create_categories(@client.query("SELECT id, parent, name, description, ordering FROM jos_kunena_categories ORDER BY parent, id;")) do |c|
h = { id: c['id'], name: c['name'], description: c['description'], position: c['ordering'].to_i }
if c['parent'].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(c['parent'])
create_categories(
@client.query(
"SELECT id, parent, name, description, ordering FROM jos_kunena_categories ORDER BY parent, id;",
),
) do |c|
h = {
id: c["id"],
name: c["name"],
description: c["description"],
position: c["ordering"].to_i,
}
if c["parent"].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(c["parent"])
end
h
end
@ -50,9 +61,9 @@ class ImportScripts::Kunena < ImportScripts::Base
import_posts
begin
create_admin(email: 'neil.lalonde@discourse.org', username: UserNameSuggester.suggest('neil'))
create_admin(email: "neil.lalonde@discourse.org", username: UserNameSuggester.suggest("neil"))
rescue => e
puts '', "Failed to create admin user"
puts "", "Failed to create admin user"
puts e.message
end
end
@ -61,38 +72,50 @@ class ImportScripts::Kunena < ImportScripts::Base
# Need to merge data from joomla with kunena
puts "fetching Joomla users data from mysql"
results = @client.query("SELECT id, username, email, registerDate FROM jos_users;", cache_rows: false)
results =
@client.query("SELECT id, username, email, registerDate FROM jos_users;", cache_rows: false)
results.each do |u|
next unless u['id'].to_i > (0) && u['username'].present? && u['email'].present?
username = u['username'].gsub(' ', '_').gsub(/[^A-Za-z0-9_]/, '')[0, User.username_length.end]
next unless u["id"].to_i > (0) && u["username"].present? && u["email"].present?
username = u["username"].gsub(" ", "_").gsub(/[^A-Za-z0-9_]/, "")[0, User.username_length.end]
if username.length < User.username_length.first
username = username * User.username_length.first
end
@users[u['id'].to_i] = { id: u['id'].to_i, username: username, email: u['email'], created_at: u['registerDate'] }
@users[u["id"].to_i] = {
id: u["id"].to_i,
username: username,
email: u["email"],
created_at: u["registerDate"],
}
end
puts "fetching Kunena user data from mysql"
results = @client.query("SELECT userid, signature, moderator, banned FROM jos_kunena_users;", cache_rows: false)
results =
@client.query(
"SELECT userid, signature, moderator, banned FROM jos_kunena_users;",
cache_rows: false,
)
results.each do |u|
next unless u['userid'].to_i > 0
user = @users[u['userid'].to_i]
next unless u["userid"].to_i > 0
user = @users[u["userid"].to_i]
if user
user[:bio] = u['signature']
user[:moderator] = (u['moderator'].to_i == 1)
user[:suspended] = u['banned'].present?
user[:bio] = u["signature"]
user[:moderator] = (u["moderator"].to_i == 1)
user[:suspended] = u["banned"].present?
end
end
end
def import_posts
puts '', "creating topics and posts"
puts "", "creating topics and posts"
total_count = @client.query("SELECT COUNT(*) count FROM jos_kunena_messages m;").first['count']
total_count = @client.query("SELECT COUNT(*) count FROM jos_kunena_messages m;").first["count"]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT m.id id,
m.thread thread,
m.parent parent,
@ -107,31 +130,33 @@ class ImportScripts::Kunena < ImportScripts::Base
ORDER BY m.id
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| p['id'].to_i }
next if all_records_exist? :posts, results.map { |p| p["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['userid']) || -1
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["userid"]) || -1
mapped[:raw] = m["message"]
mapped[:created_at] = Time.zone.at(m['time'])
mapped[:created_at] = Time.zone.at(m["time"])
if m['id'] == m['thread']
mapped[:category] = category_id_from_imported_category_id(m['catid'])
mapped[:title] = m['subject']
if m["id"] == m["thread"]
mapped[:category] = category_id_from_imported_category_id(m["catid"])
mapped[:title] = m["subject"]
else
parent = topic_lookup_from_imported_post_id(m['parent'])
parent = topic_lookup_from_imported_post_id(m["parent"])
if parent
mapped[:topic_id] = parent[:topic_id]
mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
else
puts "Parent post #{m['parent']} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}"
puts "Parent post #{m["parent"]} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}"
skip = true
end
end

View File

@ -19,27 +19,21 @@ export PARENT_FIELD="parent_id" # "parent" in some versions
=end
class ImportScripts::Kunena < ImportScripts::Base
DB_HOST ||= ENV['DB_HOST'] || "localhost"
DB_NAME ||= ENV['DB_NAME'] || "kunena"
DB_USER ||= ENV['DB_USER'] || "kunena"
DB_PW ||= ENV['DB_PW'] || "kunena"
KUNENA_PREFIX ||= ENV['KUNENA_PREFIX'] || "jos_" # "iff_" sometimes
IMAGE_PREFIX ||= ENV['IMAGE_PREFIX'] || "http://EXAMPLE.com/media/kunena/attachments"
PARENT_FIELD ||= ENV['PARENT_FIELD'] || "parent_id" # "parent" in some versions
DB_HOST ||= ENV["DB_HOST"] || "localhost"
DB_NAME ||= ENV["DB_NAME"] || "kunena"
DB_USER ||= ENV["DB_USER"] || "kunena"
DB_PW ||= ENV["DB_PW"] || "kunena"
KUNENA_PREFIX ||= ENV["KUNENA_PREFIX"] || "jos_" # "iff_" sometimes
IMAGE_PREFIX ||= ENV["IMAGE_PREFIX"] || "http://EXAMPLE.com/media/kunena/attachments"
PARENT_FIELD ||= ENV["PARENT_FIELD"] || "parent_id" # "parent" in some versions
def initialize
super
@users = {}
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
end
def execute
@ -48,7 +42,8 @@ class ImportScripts::Kunena < ImportScripts::Base
puts "creating users"
create_users(@users) do |id, user|
{ id: id,
{
id: id,
email: user[:email],
username: user[:username],
created_at: user[:created_at],
@ -56,15 +51,25 @@ class ImportScripts::Kunena < ImportScripts::Base
moderator: user[:moderator] ? true : false,
admin: user[:admin] ? true : false,
suspended_at: user[:suspended] ? Time.zone.now : nil,
suspended_till: user[:suspended] ? 100.years.from_now : nil }
suspended_till: user[:suspended] ? 100.years.from_now : nil,
}
end
@users = nil
create_categories(@client.query("SELECT id, #{PARENT_FIELD} as parent_id, name, description, ordering FROM #{KUNENA_PREFIX}kunena_categories ORDER BY #{PARENT_FIELD}, id;")) do |c|
h = { id: c['id'], name: c['name'], description: c['description'], position: c['ordering'].to_i }
if c['parent_id'].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(c['parent_id'])
create_categories(
@client.query(
"SELECT id, #{PARENT_FIELD} as parent_id, name, description, ordering FROM #{KUNENA_PREFIX}kunena_categories ORDER BY #{PARENT_FIELD}, id;",
),
) do |c|
h = {
id: c["id"],
name: c["name"],
description: c["description"],
position: c["ordering"].to_i,
}
if c["parent_id"].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(c["parent_id"])
end
h
end
@ -72,9 +77,9 @@ class ImportScripts::Kunena < ImportScripts::Base
import_posts
begin
create_admin(email: 'CHANGE@ME.COM', username: UserNameSuggester.suggest('CHAMGEME'))
create_admin(email: "CHANGE@ME.COM", username: UserNameSuggester.suggest("CHAMGEME"))
rescue => e
puts '', "Failed to create admin user"
puts "", "Failed to create admin user"
puts e.message
end
end
@ -83,38 +88,54 @@ class ImportScripts::Kunena < ImportScripts::Base
# Need to merge data from joomla with kunena
puts "fetching Joomla users data from mysql"
results = @client.query("SELECT id, username, email, registerDate FROM #{KUNENA_PREFIX}users;", cache_rows: false)
results =
@client.query(
"SELECT id, username, email, registerDate FROM #{KUNENA_PREFIX}users;",
cache_rows: false,
)
results.each do |u|
next unless u['id'].to_i > (0) && u['username'].present? && u['email'].present?
username = u['username'].gsub(' ', '_').gsub(/[^A-Za-z0-9_]/, '')[0, User.username_length.end]
next unless u["id"].to_i > (0) && u["username"].present? && u["email"].present?
username = u["username"].gsub(" ", "_").gsub(/[^A-Za-z0-9_]/, "")[0, User.username_length.end]
if username.length < User.username_length.first
username = username * User.username_length.first
end
@users[u['id'].to_i] = { id: u['id'].to_i, username: username, email: u['email'], created_at: u['registerDate'] }
@users[u["id"].to_i] = {
id: u["id"].to_i,
username: username,
email: u["email"],
created_at: u["registerDate"],
}
end
puts "fetching Kunena user data from mysql"
results = @client.query("SELECT userid, signature, moderator, banned FROM #{KUNENA_PREFIX}kunena_users;", cache_rows: false)
results =
@client.query(
"SELECT userid, signature, moderator, banned FROM #{KUNENA_PREFIX}kunena_users;",
cache_rows: false,
)
results.each do |u|
next unless u['userid'].to_i > 0
user = @users[u['userid'].to_i]
next unless u["userid"].to_i > 0
user = @users[u["userid"].to_i]
if user
user[:bio] = u['signature']
user[:moderator] = (u['moderator'].to_i == 1)
user[:suspended] = u['banned'].present?
user[:bio] = u["signature"]
user[:moderator] = (u["moderator"].to_i == 1)
user[:suspended] = u["banned"].present?
end
end
end
def import_posts
puts '', "creating topics and posts"
puts "", "creating topics and posts"
total_count = @client.query("SELECT COUNT(*) count FROM #{KUNENA_PREFIX}kunena_messages m;").first['count']
total_count =
@client.query("SELECT COUNT(*) count FROM #{KUNENA_PREFIX}kunena_messages m;").first["count"]
batch_size = 1000
batches(batch_size) do |offset|
results = @client.query("
results =
@client.query(
"
SELECT m.id id,
m.thread thread,
m.parent parent,
@ -129,33 +150,38 @@ class ImportScripts::Kunena < ImportScripts::Base
ORDER BY m.id
LIMIT #{batch_size}
OFFSET #{offset};
", cache_rows: false)
",
cache_rows: false,
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| p['id'].to_i }
next if all_records_exist? :posts, results.map { |p| p["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['userid']) || -1
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["userid"]) || -1
id = m['userid']
mapped[:raw] = m["message"].gsub(/\[attachment=[0-9]+\](.+?)\[\/attachment\]/, "\n#{IMAGE_PREFIX}/#{id}/\\1")
mapped[:created_at] = Time.zone.at(m['time'])
id = m["userid"]
mapped[:raw] = m["message"].gsub(
%r{\[attachment=[0-9]+\](.+?)\[/attachment\]},
"\n#{IMAGE_PREFIX}/#{id}/\\1",
)
mapped[:created_at] = Time.zone.at(m["time"])
if m['parent'] == 0
mapped[:category] = category_id_from_imported_category_id(m['catid'])
mapped[:title] = m['subject']
if m["parent"] == 0
mapped[:category] = category_id_from_imported_category_id(m["catid"])
mapped[:title] = m["subject"]
else
parent = topic_lookup_from_imported_post_id(m['parent'])
parent = topic_lookup_from_imported_post_id(m["parent"])
if parent
mapped[:topic_id] = parent[:topic_id]
mapped[:reply_to_post_number] = parent[:post_number] if parent[:post_number] > 1
else
puts "Parent post #{m['parent']} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}"
puts "Parent post #{m["parent"]} doesn't exist. Skipping #{m["id"]}: #{m["subject"][0..40]}"
skip = true
end
end

View File

@ -12,16 +12,14 @@
# that was done using import_scripts/support/convert_mysql_xml_to_mysql.rb
#
require 'mysql2'
require 'csv'
require 'reverse_markdown'
require "mysql2"
require "csv"
require "reverse_markdown"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities'
require "htmlentities"
# remove table conversion
[:table, :td, :tr, :th, :thead, :tbody].each do |tag|
ReverseMarkdown::Converters.unregister(tag)
end
%i[table td tr th thead tbody].each { |tag| ReverseMarkdown::Converters.unregister(tag) }
class ImportScripts::Lithium < ImportScripts::Base
BATCH_SIZE = 1000
@ -29,11 +27,11 @@ class ImportScripts::Lithium < ImportScripts::Base
# CHANGE THESE BEFORE RUNNING THE IMPORTER
DATABASE = "wd"
PASSWORD = "password"
AVATAR_DIR = '/tmp/avatars'
ATTACHMENT_DIR = '/tmp/attachments'
UPLOAD_DIR = '/tmp/uploads'
AVATAR_DIR = "/tmp/avatars"
ATTACHMENT_DIR = "/tmp/attachments"
UPLOAD_DIR = "/tmp/uploads"
OLD_DOMAIN = 'community.wd.com'
OLD_DOMAIN = "community.wd.com"
TEMP = ""
@ -44,11 +42,10 @@ class ImportScripts::Lithium < ImportScripts::Base
{ name: "user_field_3", profile: "industry" },
]
LITHIUM_PROFILE_FIELDS = "'profile.jobtitle', 'profile.company', 'profile.industry', 'profile.location'"
LITHIUM_PROFILE_FIELDS =
"'profile.jobtitle', 'profile.company', 'profile.industry', 'profile.location'"
USERNAME_MAPPINGS = {
"admins": "admin_user"
}.with_indifferent_access
USERNAME_MAPPINGS = { admins: "admin_user" }.with_indifferent_access
def initialize
super
@ -57,16 +54,16 @@ class ImportScripts::Lithium < ImportScripts::Base
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
password: PASSWORD,
database: DATABASE
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
password: PASSWORD,
database: DATABASE,
)
end
def execute
@max_start_id = Post.maximum(:id)
import_groups
@ -94,10 +91,7 @@ class ImportScripts::Lithium < ImportScripts::Base
SQL
create_groups(groups) do |group|
{
id: group["name"],
name: @htmlentities.decode(group["name"]).strip
}
{ id: group["name"], name: @htmlentities.decode(group["name"]).strip }
end
end
@ -106,7 +100,10 @@ class ImportScripts::Lithium < ImportScripts::Base
user_count = mysql_query("SELECT COUNT(*) count FROM users").first["count"]
avatar_files = Dir.entries(AVATAR_DIR)
duplicate_emails = mysql_query("SELECT email_lower FROM users GROUP BY email_lower HAVING COUNT(email_lower) > 1").map { |e| [e["email_lower"], 0] }.to_h
duplicate_emails =
mysql_query(
"SELECT email_lower FROM users GROUP BY email_lower HAVING COUNT(email_lower) > 1",
).map { |e| [e["email_lower"], 0] }.to_h
batches(BATCH_SIZE) do |offset|
users = mysql_query <<-SQL
@ -134,8 +131,8 @@ class ImportScripts::Lithium < ImportScripts::Base
create_users(users, total: user_count, offset: offset) do |user|
user_id = user["id"]
profile = profiles.select { |p| p["user_id"] == user_id }
result = profile.select { |p| p["param"] == "profile.location" }
profile = profiles.select { |p| p["user_id"] == user_id }
result = profile.select { |p| p["param"] == "profile.location" }
location = result.count > 0 ? result.first["nvalue"] : nil
username = user["login_canon"]
username = USERNAME_MAPPINGS[username] if USERNAME_MAPPINGS[username].present?
@ -158,31 +155,32 @@ class ImportScripts::Lithium < ImportScripts::Base
# title: @htmlentities.decode(user["usertitle"]).strip,
# primary_group_id: group_id_from_imported_group_id(user["usergroupid"]),
created_at: unix_time(user["registration_time"]),
post_create_action: proc do |u|
@old_username_to_new_usernames[user["login_canon"]] = u.username
post_create_action:
proc do |u|
@old_username_to_new_usernames[user["login_canon"]] = u.username
# import user avatar
sso_id = u.custom_fields["sso_id"]
if sso_id.present?
prefix = "#{AVATAR_DIR}/#{sso_id}_"
file = get_file(prefix + "actual.jpeg")
file ||= get_file(prefix + "profile.jpeg")
# import user avatar
sso_id = u.custom_fields["sso_id"]
if sso_id.present?
prefix = "#{AVATAR_DIR}/#{sso_id}_"
file = get_file(prefix + "actual.jpeg")
file ||= get_file(prefix + "profile.jpeg")
if file.present?
upload = UploadCreator.new(file, file.path, type: "avatar").create_for(u.id)
u.create_user_avatar unless u.user_avatar
if file.present?
upload = UploadCreator.new(file, file.path, type: "avatar").create_for(u.id)
u.create_user_avatar unless u.user_avatar
if !u.user_avatar.contains_upload?(upload.id)
u.user_avatar.update_columns(custom_upload_id: upload.id)
if !u.user_avatar.contains_upload?(upload.id)
u.user_avatar.update_columns(custom_upload_id: upload.id)
if u.uploaded_avatar_id.nil? ||
!u.user_avatar.contains_upload?(u.uploaded_avatar_id)
u.update_columns(uploaded_avatar_id: upload.id)
if u.uploaded_avatar_id.nil? ||
!u.user_avatar.contains_upload?(u.uploaded_avatar_id)
u.update_columns(uploaded_avatar_id: upload.id)
end
end
end
end
end
end
end,
}
end
end
@ -226,7 +224,7 @@ class ImportScripts::Lithium < ImportScripts::Base
if attr[:user].present?
fields[name] = user[attr[:user]]
elsif attr[:profile].present? && profile.count > 0
result = profile.select { |p| p["param"] == "profile.#{attr[:profile]}" }
result = profile.select { |p| p["param"] == "profile.#{attr[:profile]}" }
fields[name] = result.first["nvalue"] if result.count > 0
end
end
@ -268,8 +266,16 @@ class ImportScripts::Lithium < ImportScripts::Base
imported_user.user_avatar.update(custom_upload_id: upload.id)
imported_user.update(uploaded_avatar_id: upload.id)
ensure
file.close rescue nil
file.unlind rescue nil
begin
file.close
rescue StandardError
nil
end
begin
file.unlind
rescue StandardError
nil
end
end
def import_profile_background(old_user, imported_user)
@ -295,8 +301,16 @@ class ImportScripts::Lithium < ImportScripts::Base
imported_user.user_profile.upload_profile_background(upload)
ensure
file.close rescue nil
file.unlink rescue nil
begin
file.close
rescue StandardError
nil
end
begin
file.unlink
rescue StandardError
nil
end
end
def import_categories
@ -310,14 +324,16 @@ class ImportScripts::Lithium < ImportScripts::Base
ORDER BY n.type_id DESC, n.node_id ASC
SQL
categories = categories.map { |c| (c["name"] = c["c_title"] || c["b_title"] || c["display_id"]) && c }
categories =
categories.map { |c| (c["name"] = c["c_title"] || c["b_title"] || c["display_id"]) && c }
# To prevent duplicate category names
categories = categories.map do |category|
count = categories.to_a.count { |c| c["name"].present? && c["name"] == category["name"] }
category["name"] << " (#{category["node_id"]})" if count > 1
category
end
categories =
categories.map do |category|
count = categories.to_a.count { |c| c["name"].present? && c["name"] == category["name"] }
category["name"] << " (#{category["node_id"]})" if count > 1
category
end
parent_categories = categories.select { |c| c["parent_node_id"] <= 2 }
@ -326,9 +342,7 @@ class ImportScripts::Lithium < ImportScripts::Base
id: category["node_id"],
name: category["name"],
position: category["position"],
post_create_action: lambda do |record|
after_category_create(record, category)
end
post_create_action: lambda { |record| after_category_create(record, category) },
}
end
@ -342,9 +356,7 @@ class ImportScripts::Lithium < ImportScripts::Base
name: category["name"],
position: category["position"],
parent_category_id: category_id_from_imported_category_id(category["parent_node_id"]),
post_create_action: lambda do |record|
after_category_create(record, category)
end
post_create_action: lambda { |record| after_category_create(record, category) },
}
end
end
@ -371,7 +383,6 @@ class ImportScripts::Lithium < ImportScripts::Base
end
end
end
end
def staff_guardian
@ -386,8 +397,12 @@ class ImportScripts::Lithium < ImportScripts::Base
SiteSetting.max_tags_per_topic = 10
SiteSetting.max_tag_length = 100
topic_count = mysql_query("SELECT COUNT(*) count FROM message2 where id = root_id").first["count"]
topic_tags = mysql_query("SELECT e.target_id, GROUP_CONCAT(l.tag_text SEPARATOR ',') tags FROM tag_events_label_message e LEFT JOIN tags_label l ON e.tag_id = l.tag_id GROUP BY e.target_id")
topic_count =
mysql_query("SELECT COUNT(*) count FROM message2 where id = root_id").first["count"]
topic_tags =
mysql_query(
"SELECT e.target_id, GROUP_CONCAT(l.tag_text SEPARATOR ',') tags FROM tag_events_label_message e LEFT JOIN tags_label l ON e.tag_id = l.tag_id GROUP BY e.target_id",
)
batches(BATCH_SIZE) do |offset|
topics = mysql_query <<-SQL
@ -405,7 +420,6 @@ class ImportScripts::Lithium < ImportScripts::Base
next if all_records_exist? :posts, topics.map { |topic| "#{topic["node_id"]} #{topic["id"]}" }
create_posts(topics, total: topic_count, offset: offset) do |topic|
category_id = category_id_from_imported_category_id(topic["node_id"])
deleted_at = topic["deleted"] == 1 ? topic["row_version"] : nil
raw = topic["body"]
@ -420,24 +434,31 @@ class ImportScripts::Lithium < ImportScripts::Base
created_at: unix_time(topic["post_date"]),
deleted_at: deleted_at,
views: topic["views"],
custom_fields: { import_unique_id: topic["unique_id"] },
custom_fields: {
import_unique_id: topic["unique_id"],
},
import_mode: true,
post_create_action: proc do |post|
result = topic_tags.select { |t| t["target_id"] == topic["unique_id"] }
if result.count > 0
tag_names = result.first["tags"].split(",")
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end
end
post_create_action:
proc do |post|
result = topic_tags.select { |t| t["target_id"] == topic["unique_id"] }
if result.count > 0
tag_names = result.first["tags"].split(",")
DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tag_names)
end
end,
}
else
message = "Unknown"
message = "Category '#{category_id}' not exist" if category_id.blank?
message = "Topic 'body' is empty" if raw.blank?
PluginStoreRow.find_or_create_by(plugin_name: "topic_import_log", key: topic["unique_id"].to_s, value: message, type_name: 'String')
PluginStoreRow.find_or_create_by(
plugin_name: "topic_import_log",
key: topic["unique_id"].to_s,
value: message,
type_name: "String",
)
nil
end
end
end
@ -446,9 +467,13 @@ class ImportScripts::Lithium < ImportScripts::Base
end
def import_posts
post_count = mysql_query("SELECT COUNT(*) count FROM message2
WHERE id <> root_id").first["count"]
post_count =
mysql_query(
"SELECT COUNT(*) count FROM message2
WHERE id <> root_id",
).first[
"count"
]
puts "", "importing posts... (#{post_count})"
@ -465,11 +490,18 @@ class ImportScripts::Lithium < ImportScripts::Base
break if posts.size < 1
next if all_records_exist? :posts, posts.map { |post| "#{post["node_id"]} #{post["root_id"]} #{post["id"]}" }
if all_records_exist? :posts,
posts.map { |post|
"#{post["node_id"]} #{post["root_id"]} #{post["id"]}"
}
next
end
create_posts(posts, total: post_count, offset: offset) do |post|
raw = post["raw"]
next unless topic = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]}")
unless topic = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]}")
next
end
deleted_at = topic["deleted"] == 1 ? topic["row_version"] : nil
raw = post["body"]
@ -482,17 +514,27 @@ class ImportScripts::Lithium < ImportScripts::Base
raw: raw,
created_at: unix_time(post["post_date"]),
deleted_at: deleted_at,
custom_fields: { import_unique_id: post["unique_id"] },
import_mode: true
custom_fields: {
import_unique_id: post["unique_id"],
},
import_mode: true,
}
if parent = topic_lookup_from_imported_post_id("#{post["node_id"]} #{post["root_id"]} #{post["parent_id"]}")
if parent =
topic_lookup_from_imported_post_id(
"#{post["node_id"]} #{post["root_id"]} #{post["parent_id"]}",
)
new_post[:reply_to_post_number] = parent[:post_number]
end
new_post
else
PluginStoreRow.find_or_create_by(plugin_name: "post_import_log", key: post["unique_id"].to_s, value: "Post 'body' is empty", type_name: 'String')
PluginStoreRow.find_or_create_by(
plugin_name: "post_import_log",
key: post["unique_id"].to_s,
value: "Post 'body' is empty",
type_name: "String",
)
nil
end
end
@ -521,37 +563,40 @@ class ImportScripts::Lithium < ImportScripts::Base
"catwink" => "wink",
"catfrustrated" => "grumpycat",
"catembarrassed" => "kissing_cat",
"catlol" => "joy_cat"
"catlol" => "joy_cat",
}
def import_likes
puts "\nimporting likes..."
sql = "select source_id user_id, target_id post_id, row_version created_at from tag_events_score_message"
sql =
"select source_id user_id, target_id post_id, row_version created_at from tag_events_score_message"
results = mysql_query(sql)
puts "loading unique id map"
existing_map = {}
PostCustomField.where(name: 'import_unique_id').pluck(:post_id, :value).each do |post_id, import_id|
existing_map[import_id] = post_id
end
PostCustomField
.where(name: "import_unique_id")
.pluck(:post_id, :value)
.each { |post_id, import_id| existing_map[import_id] = post_id }
puts "loading data into temp table"
DB.exec("create temp table like_data(user_id int, post_id int, created_at timestamp without time zone)")
DB.exec(
"create temp table like_data(user_id int, post_id int, created_at timestamp without time zone)",
)
PostAction.transaction do
results.each do |result|
result["user_id"] = user_id_from_imported_user_id(result["user_id"].to_s)
result["post_id"] = existing_map[result["post_id"].to_s]
next unless result["user_id"] && result["post_id"]
DB.exec("INSERT INTO like_data VALUES (:user_id,:post_id,:created_at)",
DB.exec(
"INSERT INTO like_data VALUES (:user_id,:post_id,:created_at)",
user_id: result["user_id"],
post_id: result["post_id"],
created_at: result["created_at"]
created_at: result["created_at"],
)
end
end
@ -616,31 +661,28 @@ class ImportScripts::Lithium < ImportScripts::Base
end
def import_accepted_answers
puts "\nimporting accepted answers..."
sql = "select unique_id post_id from message2 where (attributes & 0x4000 ) != 0 and deleted = 0;"
sql =
"select unique_id post_id from message2 where (attributes & 0x4000 ) != 0 and deleted = 0;"
results = mysql_query(sql)
puts "loading unique id map"
existing_map = {}
PostCustomField.where(name: 'import_unique_id').pluck(:post_id, :value).each do |post_id, import_id|
existing_map[import_id] = post_id
end
PostCustomField
.where(name: "import_unique_id")
.pluck(:post_id, :value)
.each { |post_id, import_id| existing_map[import_id] = post_id }
puts "loading data into temp table"
DB.exec("create temp table accepted_data(post_id int primary key)")
PostAction.transaction do
results.each do |result|
result["post_id"] = existing_map[result["post_id"].to_s]
next unless result["post_id"]
DB.exec("INSERT INTO accepted_data VALUES (:post_id)",
post_id: result["post_id"]
)
DB.exec("INSERT INTO accepted_data VALUES (:post_id)", post_id: result["post_id"])
end
end
@ -679,7 +721,6 @@ class ImportScripts::Lithium < ImportScripts::Base
end
def import_pms
puts "", "importing pms..."
puts "determining participation records"
@ -702,24 +743,20 @@ class ImportScripts::Lithium < ImportScripts::Base
note_to_subject = {}
subject_to_first_note = {}
mysql_query("SELECT note_id, subject, sender_user_id FROM tblia_notes_content order by note_id").each do |row|
mysql_query(
"SELECT note_id, subject, sender_user_id FROM tblia_notes_content order by note_id",
).each do |row|
user_id = user_id_from_imported_user_id(row["sender_user_id"])
ary = (users[row["note_id"]] ||= Set.new)
if user_id
ary << user_id
end
note_to_subject[row["note_id"]] = row["subject"]
ary = (users[row["note_id"]] ||= Set.new)
ary << user_id if user_id
note_to_subject[row["note_id"]] = row["subject"]
if row["subject"] !~ /^Re: /
subject_to_first_note[[row["subject"], ary]] ||= row["note_id"]
end
subject_to_first_note[[row["subject"], ary]] ||= row["note_id"] if row["subject"] !~ /^Re: /
end
puts "Loading user_id to username map"
user_map = {}
User.pluck(:id, :username).each do |id, username|
user_map[id] = username
end
User.pluck(:id, :username).each { |id, username| user_map[id] = username }
topic_count = mysql_query("SELECT COUNT(*) count FROM tblia_notes_content").first["count"]
@ -737,8 +774,8 @@ class ImportScripts::Lithium < ImportScripts::Base
next if all_records_exist? :posts, topics.map { |topic| "pm_#{topic["note_id"]}" }
create_posts(topics, total: topic_count, offset: offset) do |topic|
user_id = user_id_from_imported_user_id(topic["sender_user_id"]) || Discourse::SYSTEM_USER_ID
user_id =
user_id_from_imported_user_id(topic["sender_user_id"]) || Discourse::SYSTEM_USER_ID
participants = users[topic["note_id"]]
usernames = (participants - [user_id]).map { |id| user_map[id] }
@ -763,48 +800,54 @@ class ImportScripts::Lithium < ImportScripts::Base
user_id: user_id,
raw: raw,
created_at: unix_time(topic["sent_time"]),
import_mode: true
import_mode: true,
}
unless topic_id
msg[:title] = @htmlentities.decode(topic["subject"]).strip[0...255]
msg[:archetype] = Archetype.private_message
msg[:target_usernames] = usernames.join(',')
msg[:target_usernames] = usernames.join(",")
else
msg[:topic_id] = topic_id
end
msg
else
PluginStoreRow.find_or_create_by(plugin_name: "pm_import_log", key: topic["note_id"].to_s, value: "PM 'body' is empty", type_name: 'String')
PluginStoreRow.find_or_create_by(
plugin_name: "pm_import_log",
key: topic["note_id"].to_s,
value: "PM 'body' is empty",
type_name: "String",
)
nil
end
end
end
end
def close_topics
puts "\nclosing closed topics..."
sql = "select unique_id post_id from message2 where root_id = id AND (attributes & 0x0002 ) != 0;"
sql =
"select unique_id post_id from message2 where root_id = id AND (attributes & 0x0002 ) != 0;"
results = mysql_query(sql)
# loading post map
existing_map = {}
PostCustomField.where(name: 'import_unique_id').pluck(:post_id, :value).each do |post_id, import_id|
existing_map[import_id.to_i] = post_id.to_i
end
PostCustomField
.where(name: "import_unique_id")
.pluck(:post_id, :value)
.each { |post_id, import_id| existing_map[import_id.to_i] = post_id.to_i }
results.map { |r| r["post_id"] }.each_slice(500) do |ids|
mapped = ids.map { |id| existing_map[id] }.compact
DB.exec(<<~SQL, ids: mapped) if mapped.present?
results
.map { |r| r["post_id"] }
.each_slice(500) do |ids|
mapped = ids.map { |id| existing_map[id] }.compact
DB.exec(<<~SQL, ids: mapped) if mapped.present?
UPDATE topics SET closed = true
WHERE id IN (SELECT topic_id FROM posts where id in (:ids))
SQL
end
end
end
def create_permalinks
@ -835,7 +878,6 @@ SQL
r = DB.exec sql
puts "#{r} permalinks to posts added!"
end
def find_upload(user_id, attachment_id, real_filename)
@ -846,7 +888,7 @@ SQL
puts "Attachment file doesn't exist: #{filename}"
return nil
end
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
upload = create_upload(user_id, filename, real_filename)
if upload.nil? || !upload.valid?
@ -864,48 +906,57 @@ SQL
default_extensions = SiteSetting.authorized_extensions
default_max_att_size = SiteSetting.max_attachment_size_kb
SiteSetting.authorized_extensions = "*"
SiteSetting.max_attachment_size_kb = 307200
SiteSetting.max_attachment_size_kb = 307_200
current = 0
max = Post.count
mysql_query("create index idxUniqueId on message2(unique_id)") rescue nil
attachments = mysql_query("SELECT a.attachment_id, a.file_name, m.message_uid FROM tblia_attachment a INNER JOIN tblia_message_attachments m ON a.attachment_id = m.attachment_id")
Post.where('id > ?', @max_start_id).find_each do |post|
begin
id = post.custom_fields["import_unique_id"]
next unless id
raw = mysql_query("select body from message2 where unique_id = '#{id}'").first['body']
unless raw
puts "Missing raw for post: #{post.id}"
next
end
new_raw = postprocess_post_raw(raw, post.user_id)
files = attachments.select { |a| a["message_uid"].to_s == id }
new_raw << html_for_attachments(post.user_id, files)
unless post.raw == new_raw
post.raw = new_raw
post.cooked = post.cook(new_raw)
cpp = CookedPostProcessor.new(post)
cpp.link_post_uploads
post.custom_fields["import_post_process"] = true
post.save
end
rescue PrettyText::JavaScriptError
puts "GOT A JS error on post: #{post.id}"
nil
ensure
print_status(current += 1, max)
end
begin
mysql_query("create index idxUniqueId on message2(unique_id)")
rescue StandardError
nil
end
attachments =
mysql_query(
"SELECT a.attachment_id, a.file_name, m.message_uid FROM tblia_attachment a INNER JOIN tblia_message_attachments m ON a.attachment_id = m.attachment_id",
)
Post
.where("id > ?", @max_start_id)
.find_each do |post|
begin
id = post.custom_fields["import_unique_id"]
next unless id
raw = mysql_query("select body from message2 where unique_id = '#{id}'").first["body"]
unless raw
puts "Missing raw for post: #{post.id}"
next
end
new_raw = postprocess_post_raw(raw, post.user_id)
files = attachments.select { |a| a["message_uid"].to_s == id }
new_raw << html_for_attachments(post.user_id, files)
unless post.raw == new_raw
post.raw = new_raw
post.cooked = post.cook(new_raw)
cpp = CookedPostProcessor.new(post)
cpp.link_post_uploads
post.custom_fields["import_post_process"] = true
post.save
end
rescue PrettyText::JavaScriptError
puts "GOT A JS error on post: #{post.id}"
nil
ensure
print_status(current += 1, max)
end
end
SiteSetting.authorized_extensions = default_extensions
SiteSetting.max_attachment_size_kb = default_max_att_size
end
def postprocess_post_raw(raw, user_id)
matches = raw.match(/<messagetemplate.*<\/messagetemplate>/m) || []
matches = raw.match(%r{<messagetemplate.*</messagetemplate>}m) || []
matches.each do |match|
hash = Hash.from_xml(match)
template = hash["messagetemplate"]["zone"]["item"]
@ -915,106 +966,123 @@ SQL
doc = Nokogiri::HTML5.fragment(raw)
doc.css("a,img,li-image").each do |l|
upload_name, image, linked_upload = [nil] * 3
doc
.css("a,img,li-image")
.each do |l|
upload_name, image, linked_upload = [nil] * 3
if l.name == "li-image" && l["id"]
upload_name = l["id"]
else
uri = URI.parse(l["href"] || l["src"]) rescue nil
uri.hostname = nil if uri && uri.hostname == OLD_DOMAIN
if uri && !uri.hostname
if l["href"]
l["href"] = uri.path
# we have an internal link, lets see if we can remap it?
permalink = Permalink.find_by_url(uri.path) rescue nil
if l["href"]
if permalink && permalink.target_url
l["href"] = permalink.target_url
elsif l["href"] =~ /^\/gartner\/attachments\/gartner\/([^.]*).(\w*)/
linked_upload = "#{$1}.#{$2}"
end
end
elsif l["src"]
# we need an upload here
upload_name = $1 if uri.path =~ /image-id\/([^\/]+)/
end
end
end
if upload_name
png = UPLOAD_DIR + "/" + upload_name + ".png"
jpg = UPLOAD_DIR + "/" + upload_name + ".jpg"
gif = UPLOAD_DIR + "/" + upload_name + ".gif"
# check to see if we have it
if File.exist?(png)
image = png
elsif File.exist?(jpg)
image = jpg
elsif File.exist?(gif)
image = gif
end
if image
File.open(image) do |file|
upload = UploadCreator.new(file, "image." + (image.ends_with?(".png") ? "png" : "jpg")).create_for(user_id)
l.name = "img" if l.name == "li-image"
l["src"] = upload.url
end
if l.name == "li-image" && l["id"]
upload_name = l["id"]
else
puts "image was missing #{l["src"]}"
uri =
begin
URI.parse(l["href"] || l["src"])
rescue StandardError
nil
end
uri.hostname = nil if uri && uri.hostname == OLD_DOMAIN
if uri && !uri.hostname
if l["href"]
l["href"] = uri.path
# we have an internal link, lets see if we can remap it?
permalink =
begin
Permalink.find_by_url(uri.path)
rescue StandardError
nil
end
if l["href"]
if permalink && permalink.target_url
l["href"] = permalink.target_url
elsif l["href"] =~ %r{^/gartner/attachments/gartner/([^.]*).(\w*)}
linked_upload = "#{$1}.#{$2}"
end
end
elsif l["src"]
# we need an upload here
upload_name = $1 if uri.path =~ %r{image-id/([^/]+)}
end
end
end
elsif linked_upload
segments = linked_upload.match(/\/(\d*)\/(\d)\/([^.]*).(\w*)$/)
if segments.present?
lithium_post_id = segments[1]
attachment_number = segments[2]
if upload_name
png = UPLOAD_DIR + "/" + upload_name + ".png"
jpg = UPLOAD_DIR + "/" + upload_name + ".jpg"
gif = UPLOAD_DIR + "/" + upload_name + ".gif"
result = mysql_query("select a.attachment_id, f.file_name from tblia_message_attachments a
# check to see if we have it
if File.exist?(png)
image = png
elsif File.exist?(jpg)
image = jpg
elsif File.exist?(gif)
image = gif
end
if image
File.open(image) do |file|
upload =
UploadCreator.new(
file,
"image." + (image.ends_with?(".png") ? "png" : "jpg"),
).create_for(user_id)
l.name = "img" if l.name == "li-image"
l["src"] = upload.url
end
else
puts "image was missing #{l["src"]}"
end
elsif linked_upload
segments = linked_upload.match(%r{/(\d*)/(\d)/([^.]*).(\w*)$})
if segments.present?
lithium_post_id = segments[1]
attachment_number = segments[2]
result =
mysql_query(
"select a.attachment_id, f.file_name from tblia_message_attachments a
INNER JOIN message2 m ON a.message_uid = m.unique_id
INNER JOIN tblia_attachment f ON a.attachment_id = f.attachment_id
where m.id = #{lithium_post_id} AND a.attach_num = #{attachment_number} limit 0, 1")
where m.id = #{lithium_post_id} AND a.attach_num = #{attachment_number} limit 0, 1",
)
result.each do |row|
upload, filename = find_upload(user_id, row["attachment_id"], row["file_name"])
if upload.present?
l["href"] = upload.url
else
puts "attachment was missing #{l["href"]}"
result.each do |row|
upload, filename = find_upload(user_id, row["attachment_id"], row["file_name"])
if upload.present?
l["href"] = upload.url
else
puts "attachment was missing #{l["href"]}"
end
end
end
end
end
end
# for user mentions
doc.css("li-user").each do |l|
uid = l["uid"]
doc
.css("li-user")
.each do |l|
uid = l["uid"]
if uid.present?
user = UserCustomField.find_by(name: 'import_id', value: uid).try(:user)
if user.present?
username = user.username
span = l.document.create_element "span"
span.inner_html = "@#{username}"
l.replace span
if uid.present?
user = UserCustomField.find_by(name: "import_id", value: uid).try(:user)
if user.present?
username = user.username
span = l.document.create_element "span"
span.inner_html = "@#{username}"
l.replace span
end
end
end
end
raw = ReverseMarkdown.convert(doc.to_s)
raw.gsub!(/^\s*&nbsp;\s*$/, "")
# ugly quotes
raw.gsub!(/^>[\s\*]*$/, "")
raw.gsub!(/:([a-z]+):/) do |match|
":#{SMILEY_SUBS[$1] || $1}:"
end
raw.gsub!(/:([a-z]+):/) { |match| ":#{SMILEY_SUBS[$1] || $1}:" }
# nbsp central
raw.gsub!(/([a-zA-Z0-9])&nbsp;([a-zA-Z0-9])/, "\\1 \\2")
raw
@ -1037,7 +1105,6 @@ SQL
def mysql_query(sql)
@client.query(sql, cache_rows: true)
end
end
ImportScripts::Lithium.new.perform

View File

@ -1,14 +1,15 @@
# frozen_string_literal: true
if ARGV.length != 1 || !File.exist?(ARGV[0])
STDERR.puts '', 'Usage of mbox importer:', 'bundle exec ruby mbox.rb <path/to/settings.yml>'
STDERR.puts '', "Use the settings file from #{File.expand_path('mbox/settings.yml', File.dirname(__FILE__))} as an example."
STDERR.puts "", "Usage of mbox importer:", "bundle exec ruby mbox.rb <path/to/settings.yml>"
STDERR.puts "",
"Use the settings file from #{File.expand_path("mbox/settings.yml", File.dirname(__FILE__))} as an example."
exit 1
end
module ImportScripts
module Mbox
require_relative 'mbox/importer'
require_relative "mbox/importer"
Importer.new(ARGV[0]).perform
end
end

View File

@ -1,9 +1,9 @@
# frozen_string_literal: true
require_relative '../base'
require_relative 'support/database'
require_relative 'support/indexer'
require_relative 'support/settings'
require_relative "../base"
require_relative "support/database"
require_relative "support/indexer"
require_relative "support/settings"
module ImportScripts::Mbox
class Importer < ImportScripts::Base
@ -38,44 +38,44 @@ module ImportScripts::Mbox
end
def index_messages
puts '', 'creating index'
puts "", "creating index"
indexer = Indexer.new(@database, @settings)
indexer.execute
end
def import_categories
puts '', 'creating categories'
puts "", "creating categories"
rows = @database.fetch_categories
create_categories(rows) do |row|
{
id: row['name'],
name: row['name'],
parent_category_id: row['parent_category_id'].presence,
id: row["name"],
name: row["name"],
parent_category_id: row["parent_category_id"].presence,
}
end
end
def import_users
puts '', 'creating users'
puts "", "creating users"
total_count = @database.count_users
last_email = ''
last_email = ""
batches do |offset|
rows, last_email = @database.fetch_users(last_email)
break if rows.empty?
next if all_records_exist?(:users, rows.map { |row| row['email'] })
next if all_records_exist?(:users, rows.map { |row| row["email"] })
create_users(rows, total: total_count, offset: offset) do |row|
{
id: row['email'],
email: row['email'],
name: row['name'],
id: row["email"],
email: row["email"],
name: row["name"],
trust_level: @settings.trust_level,
staged: @settings.staged,
active: !@settings.staged,
created_at: to_time(row['date_of_first_message'])
created_at: to_time(row["date_of_first_message"]),
}
end
end
@ -86,7 +86,7 @@ module ImportScripts::Mbox
end
def import_posts
puts '', 'creating topics and posts'
puts "", "creating topics and posts"
total_count = @database.count_messages
last_row_id = 0
@ -94,47 +94,45 @@ module ImportScripts::Mbox
rows, last_row_id = @database.fetch_messages(last_row_id)
break if rows.empty?
next if all_records_exist?(:posts, rows.map { |row| row['msg_id'] })
next if all_records_exist?(:posts, rows.map { |row| row["msg_id"] })
create_posts(rows, total: total_count, offset: offset) do |row|
begin
if row['email_date'].blank?
puts "Date is missing. Skipping #{row['msg_id']}"
if row["email_date"].blank?
puts "Date is missing. Skipping #{row["msg_id"]}"
nil
elsif row['in_reply_to'].blank?
elsif row["in_reply_to"].blank?
map_first_post(row)
else
map_reply(row)
end
rescue => e
puts "Failed to map post for #{row['msg_id']}", e, e.backtrace.join("\n")
puts "Failed to map post for #{row["msg_id"]}", e, e.backtrace.join("\n")
end
end
end
end
def map_post(row)
user_id = user_id_from_imported_user_id(row['from_email']) || Discourse::SYSTEM_USER_ID
user_id = user_id_from_imported_user_id(row["from_email"]) || Discourse::SYSTEM_USER_ID
{
id: row['msg_id'],
id: row["msg_id"],
user_id: user_id,
created_at: to_time(row['email_date']),
created_at: to_time(row["email_date"]),
raw: format_raw(row, user_id),
raw_email: row['raw_message'],
raw_email: row["raw_message"],
via_email: true,
post_create_action: proc do |post|
create_incoming_email(post, row)
end
post_create_action: proc { |post| create_incoming_email(post, row) },
}
end
def format_raw(row, user_id)
body = row['body'] || ''
elided = row['elided']
body = row["body"] || ""
elided = row["elided"]
if row['attachment_count'].positive?
receiver = Email::Receiver.new(row['raw_message'])
if row["attachment_count"].positive?
receiver = Email::Receiver.new(row["raw_message"])
user = User.find(user_id)
body = receiver.add_attachments(body, user)
end
@ -147,21 +145,21 @@ module ImportScripts::Mbox
end
def map_first_post(row)
subject = row['subject']
subject = row["subject"]
tags = remove_tags!(subject)
mapped = map_post(row)
mapped[:category] = category_id_from_imported_category_id(row['category'])
mapped[:category] = category_id_from_imported_category_id(row["category"])
mapped[:title] = subject.strip[0...255]
mapped[:tags] = tags if tags.present?
mapped
end
def map_reply(row)
parent = @lookup.topic_lookup_from_imported_post_id(row['in_reply_to'])
parent = @lookup.topic_lookup_from_imported_post_id(row["in_reply_to"])
if parent.blank?
puts "Parent message #{row['in_reply_to']} doesn't exist. Skipping #{row['msg_id']}: #{row['subject'][0..40]}"
puts "Parent message #{row["in_reply_to"]} doesn't exist. Skipping #{row["msg_id"]}: #{row["subject"][0..40]}"
return nil
end
@ -178,9 +176,7 @@ module ImportScripts::Mbox
old_length = subject.length
@settings.tags.each do |tag|
if subject.sub!(tag[:regex], "") && tag[:name].present?
tag_names << tag[:name]
end
tag_names << tag[:name] if subject.sub!(tag[:regex], "") && tag[:name].present?
end
remove_prefixes!(subject) if subject.length != old_length
@ -203,13 +199,13 @@ module ImportScripts::Mbox
def create_incoming_email(post, row)
IncomingEmail.create(
message_id: row['msg_id'],
raw: row['raw_message'],
subject: row['subject'],
from_address: row['from_email'],
message_id: row["msg_id"],
raw: row["raw_message"],
subject: row["subject"],
from_address: row["from_email"],
user_id: post.user_id,
topic_id: post.topic_id,
post_id: post.id
post_id: post.id,
)
end

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'sqlite3'
require "sqlite3"
module ImportScripts::Mbox
class Database
@ -23,8 +23,7 @@ module ImportScripts::Mbox
@db.transaction
yield self
@db.commit
rescue
rescue StandardError
@db.rollback
end
@ -60,9 +59,7 @@ module ImportScripts::Mbox
SQL
@db.prepare(sql) do |stmt|
reply_message_ids.each do |in_reply_to|
stmt.execute(msg_id, in_reply_to)
end
reply_message_ids.each { |in_reply_to| stmt.execute(msg_id, in_reply_to) }
end
end
@ -95,7 +92,7 @@ module ImportScripts::Mbox
end
def sort_emails_by_date_and_reply_level
@db.execute 'DELETE FROM email_order'
@db.execute "DELETE FROM email_order"
@db.execute <<-SQL
WITH RECURSIVE
@ -117,7 +114,7 @@ module ImportScripts::Mbox
end
def sort_emails_by_subject
@db.execute 'DELETE FROM email_order'
@db.execute "DELETE FROM email_order"
@db.execute <<-SQL
INSERT INTO email_order (msg_id)
@ -128,7 +125,7 @@ module ImportScripts::Mbox
end
def fill_users_from_emails
@db.execute 'DELETE FROM user'
@db.execute "DELETE FROM user"
@db.execute <<-SQL
INSERT INTO user (email, name, date_of_first_message)
@ -172,7 +169,7 @@ module ImportScripts::Mbox
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'email')
add_last_column_value(rows, "email")
end
def count_messages
@ -193,14 +190,14 @@ module ImportScripts::Mbox
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
add_last_column_value(rows, "rowid")
end
private
def configure_database
@db.execute 'PRAGMA journal_mode = OFF'
@db.execute 'PRAGMA locking_mode = EXCLUSIVE'
@db.execute "PRAGMA journal_mode = OFF"
@db.execute "PRAGMA locking_mode = EXCLUSIVE"
end
def upgrade_schema_version
@ -260,10 +257,10 @@ module ImportScripts::Mbox
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_from ON email (from_email)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_subject ON email (subject)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_in_reply_to ON email (in_reply_to)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_date ON email (email_date)'
@db.execute "CREATE INDEX IF NOT EXISTS email_by_from ON email (from_email)"
@db.execute "CREATE INDEX IF NOT EXISTS email_by_subject ON email (subject)"
@db.execute "CREATE INDEX IF NOT EXISTS email_by_in_reply_to ON email (in_reply_to)"
@db.execute "CREATE INDEX IF NOT EXISTS email_by_date ON email (email_date)"
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS email_order (
@ -282,7 +279,7 @@ module ImportScripts::Mbox
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS reply_by_in_reply_to ON reply (in_reply_to)'
@db.execute "CREATE INDEX IF NOT EXISTS reply_by_in_reply_to ON reply (in_reply_to)"
end
def create_table_for_users

View File

@ -1,8 +1,8 @@
# frozen_string_literal: true
require_relative 'database'
require 'json'
require 'yaml'
require_relative "database"
require "json"
require "yaml"
module ImportScripts::Mbox
class Indexer
@ -15,7 +15,7 @@ module ImportScripts::Mbox
end
def execute
directories = Dir.glob(File.join(@settings.data_dir, '*'))
directories = Dir.glob(File.join(@settings.data_dir, "*"))
directories.select! { |f| File.directory?(f) }
directories.sort!
@ -25,7 +25,7 @@ module ImportScripts::Mbox
index_emails(directory, category[:name])
end
puts '', 'indexing replies and users'
puts "", "indexing replies and users"
if @settings.group_messages_by_subject
@database.sort_emails_by_subject
@database.update_in_reply_to_by_email_subject
@ -39,24 +39,24 @@ module ImportScripts::Mbox
private
METADATA_FILENAME = 'metadata.yml'
IGNORED_FILE_EXTENSIONS = ['.dbindex', '.dbnames', '.digest', '.subjects', '.yml']
METADATA_FILENAME = "metadata.yml"
IGNORED_FILE_EXTENSIONS = %w[.dbindex .dbnames .digest .subjects .yml]
def index_category(directory)
metadata_file = File.join(directory, METADATA_FILENAME)
if File.exist?(metadata_file)
# workaround for YML files that contain classname in file header
yaml = File.read(metadata_file).sub(/^--- !.*$/, '---')
yaml = File.read(metadata_file).sub(/^--- !.*$/, "---")
metadata = YAML.safe_load(yaml)
else
metadata = {}
end
category = {
name: metadata['name'].presence || File.basename(directory),
description: metadata['description'],
parent_category_id: metadata['parent_category_id'].presence,
name: metadata["name"].presence || File.basename(directory),
description: metadata["description"],
parent_category_id: metadata["parent_category_id"].presence,
}
@database.insert_category(category)
@ -75,7 +75,7 @@ module ImportScripts::Mbox
# Detect cases like this and attempt to get actual sender from other headers:
# From: Jane Smith via ListName <ListName@lists.example.com>
if receiver.mail['X-Mailman-Version'] && from_display_name =~ /\bvia \S+$/i
if receiver.mail["X-Mailman-Version"] && from_display_name =~ /\bvia \S+$/i
email_from_from_line = opts[:from_line].scan(/From (\S+)/).flatten.first
a = Mail::Address.new(email_from_from_line)
from_email = a.address
@ -88,7 +88,7 @@ module ImportScripts::Mbox
end
end
from_email = from_email.sub(/^(.*)=/, '') if @settings.elide_equals_in_addresses
from_email = from_email.sub(/^(.*)=/, "") if @settings.elide_equals_in_addresses
body, elided, format = receiver.select_body
reply_message_ids = extract_reply_message_ids(parsed_email)
@ -109,7 +109,7 @@ module ImportScripts::Mbox
filename: File.basename(filename),
first_line_number: opts[:first_line_number],
last_line_number: opts[:last_line_number],
index_duration: (monotonic_time - opts[:start_time]).round(4)
index_duration: (monotonic_time - opts[:start_time]).round(4),
}
@database.transaction do |db|
@ -132,8 +132,8 @@ module ImportScripts::Mbox
def imported_file_checksums(category_name)
rows = @database.fetch_imported_files(category_name)
rows.each_with_object({}) do |row, hash|
filename = File.basename(row['filename'])
hash[filename] = row['checksum']
filename = File.basename(row["filename"])
hash[filename] = row["checksum"]
end
end
@ -171,14 +171,14 @@ module ImportScripts::Mbox
imported_file = {
category: category_name,
filename: File.basename(filename),
checksum: calc_checksum(filename)
checksum: calc_checksum(filename),
}
@database.insert_imported_file(imported_file)
end
def each_mail(filename)
raw_message = +''
raw_message = +""
first_line_number = 1
last_line_number = 0
@ -188,7 +188,7 @@ module ImportScripts::Mbox
if line.scrub =~ @split_regex
if last_line_number > 0
yield raw_message, first_line_number, last_line_number, from_line
raw_message = +''
raw_message = +""
first_line_number = last_line_number + 1
end
@ -204,12 +204,10 @@ module ImportScripts::Mbox
end
def each_line(filename)
raw_file = File.open(filename, 'r')
text_file = filename.end_with?('.gz') ? Zlib::GzipReader.new(raw_file) : raw_file
raw_file = File.open(filename, "r")
text_file = filename.end_with?(".gz") ? Zlib::GzipReader.new(raw_file) : raw_file
text_file.each_line do |line|
yield line
end
text_file.each_line { |line| yield line }
ensure
raw_file.close if raw_file
end
@ -220,7 +218,9 @@ module ImportScripts::Mbox
end
def read_mail_from_string(raw_message)
Email::Receiver.new(raw_message, convert_plaintext: true, skip_trimming: false) unless raw_message.blank?
unless raw_message.blank?
Email::Receiver.new(raw_message, convert_plaintext: true, skip_trimming: false)
end
end
def extract_reply_message_ids(mail)
@ -229,14 +229,13 @@ module ImportScripts::Mbox
def extract_subject(receiver, list_name)
subject = receiver.subject
subject.blank? ? nil : subject.strip.gsub(/\t+/, ' ')
subject.blank? ? nil : subject.strip.gsub(/\t+/, " ")
end
def ignored_file?(path, checksums)
filename = File.basename(path)
filename.start_with?('.') ||
filename == METADATA_FILENAME ||
filename.start_with?(".") || filename == METADATA_FILENAME ||
IGNORED_FILE_EXTENSIONS.include?(File.extname(filename)) ||
fully_indexed?(path, filename, checksums)
end

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'yaml'
require "yaml"
module ImportScripts::Mbox
class Settings
@ -25,32 +25,32 @@ module ImportScripts::Mbox
attr_reader :elide_equals_in_addresses
def initialize(yaml)
@data_dir = yaml['data_dir']
@split_regex = Regexp.new(yaml['split_regex']) unless yaml['split_regex'].empty?
@data_dir = yaml["data_dir"]
@split_regex = Regexp.new(yaml["split_regex"]) unless yaml["split_regex"].empty?
@batch_size = 1000 # no need to make this actually configurable at the moment
@trust_level = yaml['default_trust_level']
@prefer_html = yaml['prefer_html']
@staged = yaml['staged']
@index_only = yaml['index_only']
@group_messages_by_subject = yaml['group_messages_by_subject']
@trust_level = yaml["default_trust_level"]
@prefer_html = yaml["prefer_html"]
@staged = yaml["staged"]
@index_only = yaml["index_only"]
@group_messages_by_subject = yaml["group_messages_by_subject"]
if yaml['remove_subject_prefixes'].present?
prefix_regexes = yaml['remove_subject_prefixes'].map { |p| Regexp.new(p) }
if yaml["remove_subject_prefixes"].present?
prefix_regexes = yaml["remove_subject_prefixes"].map { |p| Regexp.new(p) }
@subject_prefix_regex = /^#{Regexp.union(prefix_regexes).source}/i
end
@automatically_remove_list_name_prefix = yaml['automatically_remove_list_name_prefix']
@show_trimmed_content = yaml['show_trimmed_content']
@fix_mailman_via_addresses = yaml['fix_mailman_via_addresses']
@elide_equals_in_addresses = yaml['elide_equals_in_addresses']
@automatically_remove_list_name_prefix = yaml["automatically_remove_list_name_prefix"]
@show_trimmed_content = yaml["show_trimmed_content"]
@fix_mailman_via_addresses = yaml["fix_mailman_via_addresses"]
@elide_equals_in_addresses = yaml["elide_equals_in_addresses"]
@tags = []
if yaml['tags'].present?
yaml['tags'].each do |tag_name, value|
if yaml["tags"].present?
yaml["tags"].each do |tag_name, value|
prefixes = Regexp.union(value).source
@tags << {
regex: /^(?:(?:\[(?:#{prefixes})\])|(?:\((?:#{prefixes})\)))\s*/i,
name: tag_name
name: tag_name,
}
end
end

View File

@ -1,23 +1,23 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities'
require "htmlentities"
class ImportScripts::Modx < ImportScripts::Base
BATCH_SIZE = 1000
# CHANGE THESE BEFORE RUNNING THE IMPORTER
DB_HOST ||= ENV['DB_HOST'] || "localhost"
DB_NAME ||= ENV['DB_NAME'] || "modx"
DB_PW ||= ENV['DB_PW'] || "modex"
DB_USER ||= ENV['DB_USER'] || "modx"
TIMEZONE ||= ENV['TIMEZONE'] || "America/Los_Angeles"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "modx_"
ATTACHMENT_DIR ||= ENV['ATTACHMENT_DIR'] || '/path/to/your/attachment/folder'
RANDOM_CATEGORY_COLOR ||= !ENV['RANDOM_CATEGORY_COLOR'].nil?
SUSPEND_ALL_USERS ||= !ENV['SUSPEND_ALL_USERS']
DB_HOST ||= ENV["DB_HOST"] || "localhost"
DB_NAME ||= ENV["DB_NAME"] || "modx"
DB_PW ||= ENV["DB_PW"] || "modex"
DB_USER ||= ENV["DB_USER"] || "modx"
TIMEZONE ||= ENV["TIMEZONE"] || "America/Los_Angeles"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "modx_"
ATTACHMENT_DIR ||= ENV["ATTACHMENT_DIR"] || "/path/to/your/attachment/folder"
RANDOM_CATEGORY_COLOR ||= !ENV["RANDOM_CATEGORY_COLOR"].nil?
SUSPEND_ALL_USERS ||= !ENV["SUSPEND_ALL_USERS"]
# TODO: replace modx_ with #{TABLE_PREFIX}
@ -34,14 +34,10 @@ class ImportScripts::Modx < ImportScripts::Base
@htmlentities = HTMLEntities.new
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
rescue Exception => e
puts '=' * 50
puts "=" * 50
puts e.message
puts <<~TEXT
Cannot connect in to database.
@ -83,22 +79,20 @@ class ImportScripts::Modx < ImportScripts::Base
SQL
create_groups(groups) do |group|
{
id: group["usergroupid"],
name: @htmlentities.decode(group["title"]).strip
}
{ id: group["usergroupid"], name: @htmlentities.decode(group["title"]).strip }
end
end
def import_users
puts "", "importing users"
user_count = mysql_query("SELECT COUNT(id) count FROM #{TABLE_PREFIX}discuss_users").first["count"]
user_count =
mysql_query("SELECT COUNT(id) count FROM #{TABLE_PREFIX}discuss_users").first["count"]
last_user_id = -1
batches(BATCH_SIZE) do |offset|
users = mysql_query(<<-SQL
users = mysql_query(<<-SQL).to_a
SELECT id as userid, email, concat (name_first, " ", name_last) as name, username,
location, website, status, last_active as last_seen_at,
createdon as created_at, birthdate as date_of_birth,
@ -112,7 +106,6 @@ FROM #{TABLE_PREFIX}discuss_users
ORDER BY id
LIMIT #{BATCH_SIZE};
SQL
).to_a
break if users.empty?
@ -123,14 +116,14 @@ FROM #{TABLE_PREFIX}discuss_users
create_users(users, total: user_count, offset: offset) do |user|
{
id: user["userid"],
name: user['name'],
username: user['username'],
email: user['email'],
website: user['website'],
name: user["name"],
username: user["username"],
email: user["email"],
website: user["website"],
created_at: parse_timestamp(user["created_at"]),
last_seen_at: parse_timestamp(user["last_seen_at"]),
date_of_birth: user['date_of_birth'],
password: "#{user['password']}:#{user['salt']}" # not tested
date_of_birth: user["date_of_birth"],
password: "#{user["password"]}:#{user["salt"]}", # not tested
}
end
end
@ -144,13 +137,13 @@ FROM #{TABLE_PREFIX}discuss_users
categories = mysql_query("select id, name, description from modx_discuss_categories").to_a
create_categories(categories) do |category|
puts "Creating #{category['name']}"
puts "Creating #{category["name"]}"
puts category
{
id: "cat#{category['id']}",
id: "cat#{category["id"]}",
name: category["name"],
color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil,
description: category["description"]
description: category["description"],
}
end
@ -159,13 +152,13 @@ FROM #{TABLE_PREFIX}discuss_users
boards = mysql_query("select id, category, name, description from modx_discuss_boards;").to_a
create_categories(boards) do |category|
puts category
parent_category_id = category_id_from_imported_category_id("cat#{category['category']}")
parent_category_id = category_id_from_imported_category_id("cat#{category["category"]}")
{
id: category["id"],
parent_category_id: parent_category_id.to_s,
name: category["name"],
color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil,
description: category["description"]
description: category["description"],
}
end
end
@ -173,12 +166,15 @@ FROM #{TABLE_PREFIX}discuss_users
def import_topics_and_posts
puts "", "creating topics and posts"
total_count = mysql_query("SELECT count(id) count from #{TABLE_PREFIX}discuss_posts").first["count"]
total_count =
mysql_query("SELECT count(id) count from #{TABLE_PREFIX}discuss_posts").first["count"]
topic_first_post_id = {}
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT id,
thread topic_id,
board category_id,
@ -191,27 +187,28 @@ FROM #{TABLE_PREFIX}discuss_users
ORDER BY createdon
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = post_process_raw(m['raw'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = post_process_raw(m["raw"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['parent'] == 0
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
mapped[:title] = m['title']
topic_first_post_id[m['topic_id']] = m['id']
if m["parent"] == 0
mapped[:category] = category_id_from_imported_category_id(m["category_id"])
mapped[:title] = m["title"]
topic_first_post_id[m["topic_id"]] = m["id"]
else
parent = topic_lookup_from_imported_post_id(topic_first_post_id[m['topic_id']])
parent = topic_lookup_from_imported_post_id(topic_first_post_id[m["topic_id"]])
if parent
mapped[:topic_id] = parent[:topic_id]
else
@ -227,10 +224,11 @@ FROM #{TABLE_PREFIX}discuss_users
def post_process_raw(raw)
# [QUOTE]...[/QUOTE]
raw = raw.gsub(/\[quote.*?\](.+?)\[\/quote\]/im) { |quote|
quote = quote.gsub(/\[quote author=(.*?) .+\]/i) { "\n[quote=\"#{$1}\"]\n" }
quote = quote.gsub(/[^\n]\[\/quote\]/im) { "\n[/quote]\n" }
}
raw =
raw.gsub(%r{\[quote.*?\](.+?)\[/quote\]}im) do |quote|
quote = quote.gsub(/\[quote author=(.*?) .+\]/i) { "\n[quote=\"#{$1}\"]\n" }
quote = quote.gsub(%r{[^\n]\[/quote\]}im) { "\n[/quote]\n" }
end
raw
end
@ -249,25 +247,27 @@ FROM #{TABLE_PREFIX}discuss_users
# find the uploaded file information from the db
def not_find_upload(post, attachment_id)
sql = "SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename,
sql =
"SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename,
a.caption caption
FROM #{TABLE_PREFIX}attachment a
WHERE a.attachmentid = #{attachment_id}"
results = mysql_query(sql)
unless row = results.first
puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields["import_id"]}"
return
end
filename = File.join(ATTACHMENT_DIR, row['user_id'].to_s.split('').join('/'), "#{row['file_id']}.attach")
filename =
File.join(ATTACHMENT_DIR, row["user_id"].to_s.split("").join("/"), "#{row["file_id"]}.attach")
unless File.exist?(filename)
puts "Attachment file doesn't exist: #{filename}"
return
end
real_filename = row['filename']
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename = row["filename"]
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
upload = create_upload(post.user.id, filename, real_filename)
if upload.nil? || !upload.valid?
@ -286,24 +286,24 @@ FROM #{TABLE_PREFIX}discuss_users
def not_import_private_messages
puts "", "importing private messages..."
topic_count = mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"]
topic_count =
mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"]
last_private_message_id = -1
batches(BATCH_SIZE) do |offset|
private_messages = mysql_query(<<-SQL
private_messages = mysql_query(<<-SQL).to_a
SELECT pmtextid, fromuserid, title, message, touserarray, dateline
FROM #{TABLE_PREFIX}pmtext
WHERE pmtextid > #{last_private_message_id}
ORDER BY pmtextid
LIMIT #{BATCH_SIZE}
SQL
).to_a
break if private_messages.empty?
last_private_message_id = private_messages[-1]["pmtextid"]
private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm['pmtextid']}") }
private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm["pmtextid"]}") }
title_username_of_pm_first_post = {}
@ -311,11 +311,16 @@ FROM #{TABLE_PREFIX}discuss_users
skip = false
mapped = {}
mapped[:id] = "pm-#{m['pmtextid']}"
mapped[:user_id] = user_id_from_imported_user_id(m['fromuserid']) || Discourse::SYSTEM_USER_ID
mapped[:raw] = preprocess_post_raw(m['message']) rescue nil
mapped[:created_at] = Time.zone.at(m['dateline'])
title = @htmlentities.decode(m['title']).strip[0...255]
mapped[:id] = "pm-#{m["pmtextid"]}"
mapped[:user_id] = user_id_from_imported_user_id(m["fromuserid"]) ||
Discourse::SYSTEM_USER_ID
mapped[:raw] = begin
preprocess_post_raw(m["message"])
rescue StandardError
nil
end
mapped[:created_at] = Time.zone.at(m["dateline"])
title = @htmlentities.decode(m["title"]).strip[0...255]
topic_id = nil
next if mapped[:raw].blank?
@ -324,9 +329,9 @@ FROM #{TABLE_PREFIX}discuss_users
target_usernames = []
target_userids = []
begin
to_user_array = PHP.unserialize(m['touserarray'])
rescue
puts "#{m['pmtextid']} -- #{m['touserarray']}"
to_user_array = PHP.unserialize(m["touserarray"])
rescue StandardError
puts "#{m["pmtextid"]} -- #{m["touserarray"]}"
skip = true
end
@ -346,8 +351,8 @@ FROM #{TABLE_PREFIX}discuss_users
target_usernames << username if username
end
end
rescue
puts "skipping pm-#{m['pmtextid']} `to_user_array` is not properly serialized -- #{to_user_array.inspect}"
rescue StandardError
puts "skipping pm-#{m["pmtextid"]} `to_user_array` is not properly serialized -- #{to_user_array.inspect}"
skip = true
end
@ -355,18 +360,18 @@ FROM #{TABLE_PREFIX}discuss_users
participants << mapped[:user_id]
begin
participants.sort!
rescue
rescue StandardError
puts "one of the participant's id is nil -- #{participants.inspect}"
end
if title =~ /^Re:/
parent_id = title_username_of_pm_first_post[[title[3..-1], participants]] ||
title_username_of_pm_first_post[[title[4..-1], participants]] ||
title_username_of_pm_first_post[[title[5..-1], participants]] ||
title_username_of_pm_first_post[[title[6..-1], participants]] ||
title_username_of_pm_first_post[[title[7..-1], participants]] ||
title_username_of_pm_first_post[[title[8..-1], participants]]
parent_id =
title_username_of_pm_first_post[[title[3..-1], participants]] ||
title_username_of_pm_first_post[[title[4..-1], participants]] ||
title_username_of_pm_first_post[[title[5..-1], participants]] ||
title_username_of_pm_first_post[[title[6..-1], participants]] ||
title_username_of_pm_first_post[[title[7..-1], participants]] ||
title_username_of_pm_first_post[[title[8..-1], participants]]
if parent_id
if t = topic_lookup_from_imported_post_id("pm-#{parent_id}")
@ -374,18 +379,18 @@ FROM #{TABLE_PREFIX}discuss_users
end
end
else
title_username_of_pm_first_post[[title, participants]] ||= m['pmtextid']
title_username_of_pm_first_post[[title, participants]] ||= m["pmtextid"]
end
unless topic_id
mapped[:title] = title
mapped[:archetype] = Archetype.private_message
mapped[:target_usernames] = target_usernames.join(',')
mapped[:target_usernames] = target_usernames.join(",")
if mapped[:target_usernames].size < 1 # pm with yourself?
# skip = true
mapped[:target_usernames] = "system"
puts "pm-#{m['pmtextid']} has no target (#{m['touserarray']})"
puts "pm-#{m["pmtextid"]} has no target (#{m["touserarray"]})"
end
else
mapped[:topic_id] = topic_id
@ -397,22 +402,21 @@ FROM #{TABLE_PREFIX}discuss_users
end
def not_import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
current_count = 0
total_count = mysql_query(<<-SQL
total_count = mysql_query(<<-SQL).first["count"]
SELECT COUNT(postid) count
FROM #{TABLE_PREFIX}post p
JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid
WHERE t.firstpostid <> p.postid
SQL
).first["count"]
success_count = 0
fail_count = 0
attachment_regex = /\[attach[^\]]*\](\d+)\[\/attach\]/i
attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
Post.find_each do |post|
current_count += 1
@ -433,7 +437,12 @@ FROM #{TABLE_PREFIX}discuss_users
end
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from modx')
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachments from modx",
)
end
success_count += 1
@ -496,14 +505,14 @@ FROM #{TABLE_PREFIX}discuss_users
end
def not_create_permalink_file
puts '', 'Creating Permalink File...', ''
puts "", "Creating Permalink File...", ""
id_mapping = []
Topic.listable_topics.find_each do |topic|
pcf = topic.first_post.custom_fields
if pcf && pcf["import_id"]
id = pcf["import_id"].split('-').last
id = pcf["import_id"].split("-").last
id_mapping.push("XXX#{id} YYY#{topic.id}")
end
end
@ -517,11 +526,8 @@ FROM #{TABLE_PREFIX}discuss_users
# end
CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv|
id_mapping.each do |value|
csv << [value]
end
id_mapping.each { |value| csv << [value] }
end
end
def deactivate_all_users
@ -529,16 +535,23 @@ FROM #{TABLE_PREFIX}discuss_users
end
def suspend_users
puts '', "updating blocked users"
puts "", "updating blocked users"
banned = 0
failed = 0
total = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}user_attributes where blocked != 0").first['count']
total =
mysql_query(
"SELECT count(*) count FROM #{TABLE_PREFIX}user_attributes where blocked != 0",
).first[
"count"
]
system_user = Discourse.system_user
mysql_query("SELECT id, blockedafter, blockeduntil FROM #{TABLE_PREFIX}user_attributes").each do |b|
user = User.find_by_id(user_id_from_imported_user_id(b['id']))
mysql_query(
"SELECT id, blockedafter, blockeduntil FROM #{TABLE_PREFIX}user_attributes",
).each do |b|
user = User.find_by_id(user_id_from_imported_user_id(b["id"]))
if user
user.suspended_at = parse_timestamp(user["blockedafter"])
user.suspended_till = parse_timestamp(user["blockeduntil"])
@ -550,7 +563,7 @@ FROM #{TABLE_PREFIX}discuss_users
failed += 1
end
else
puts "Not found: #{b['userid']}"
puts "Not found: #{b["userid"]}"
failed += 1
end
@ -565,7 +578,6 @@ FROM #{TABLE_PREFIX}discuss_users
def mysql_query(sql)
@client.query(sql, cache_rows: true)
end
end
ImportScripts::Modx.new.perform

View File

@ -6,7 +6,6 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::Muut < ImportScripts::Base
JSON_FILE_PATH = "/path/to/json/file"
CSV_FILE_PATH = "/path/to/csv/file"
@ -36,39 +35,33 @@ class ImportScripts::Muut < ImportScripts::Base
end
def repair_json(arg)
arg.gsub!(/^\(/, "") # content of file is surround by ( )
arg.gsub!(/^\(/, "") # content of file is surround by ( )
arg.gsub!(/\)$/, "")
arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end
arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end
arg.gsub!(/\}\{/, "},{") # missing commas sometimes!
arg.gsub!("}]{", "},{") # surprise square brackets
arg.gsub!("}[{", "},{") # :troll:
arg.gsub!("}]{", "},{") # surprise square brackets
arg.gsub!("}[{", "},{") # :troll:
arg
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
create_users(@imported_users) do |u|
{
id: u[0],
email: u[1],
created_at: Time.now
}
end
create_users(@imported_users) { |u| { id: u[0], email: u[1], created_at: Time.now } }
end
def import_categories
puts "", "Importing categories"
create_categories(@imported_json['categories']) do |category|
create_categories(@imported_json["categories"]) do |category|
{
id: category['path'], # muut has no id for categories, so use the path
name: category['title'],
slug: category['path']
id: category["path"], # muut has no id for categories, so use the path
name: category["title"],
slug: category["path"],
}
end
end
@ -79,23 +72,23 @@ class ImportScripts::Muut < ImportScripts::Base
topics = 0
posts = 0
@imported_json['categories'].each do |category|
@imported_json['threads'][category['path']].each do |thread|
@imported_json["categories"].each do |category|
@imported_json["threads"][category["path"]].each do |thread|
next if thread["seed"]["key"] == "skip-this-topic"
mapped = {}
mapped[:id] = "#{thread["seed"]["key"]}-#{thread["seed"]["date"]}"
if thread["seed"]["author"] && user_id_from_imported_user_id(thread["seed"]["author"]["path"]) != ""
if thread["seed"]["author"] &&
user_id_from_imported_user_id(thread["seed"]["author"]["path"]) != ""
mapped[:user_id] = user_id_from_imported_user_id(thread["seed"]["author"]["path"]) || -1
else
mapped[:user_id] = -1
end
# update user display name
if thread["seed"]["author"] && thread["seed"]["author"]["displayname"] != "" && mapped[:user_id] != -1
if thread["seed"]["author"] && thread["seed"]["author"]["displayname"] != "" &&
mapped[:user_id] != -1
user = User.find_by(id: mapped[:user_id])
if user
user.name = thread["seed"]["author"]["displayname"]
@ -122,18 +115,21 @@ class ImportScripts::Muut < ImportScripts::Base
# create replies
if thread["replies"].present? && thread["replies"].count > 0
thread["replies"].reverse_each do |post|
if post_id_from_imported_post_id(post["id"])
next # already imported this post
end
new_post = create_post({
id: "#{post["key"]}-#{post["date"]}",
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(post["author"]["path"]) || -1,
raw: process_muut_post_body(post["body"]),
created_at: Time.zone.at(post["date"])
}, post["id"])
new_post =
create_post(
{
id: "#{post["key"]}-#{post["date"]}",
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(post["author"]["path"]) || -1,
raw: process_muut_post_body(post["body"]),
created_at: Time.zone.at(post["date"]),
},
post["id"],
)
if new_post.is_a?(Post)
posts += 1
@ -141,9 +137,7 @@ class ImportScripts::Muut < ImportScripts::Base
puts "Error creating post #{post["id"]}. Skipping."
puts new_post.inspect
end
end
end
topics += 1
@ -165,7 +159,7 @@ class ImportScripts::Muut < ImportScripts::Base
raw.gsub!("---", "```\n")
# tab
raw.gsub!(/\\t/, ' ')
raw.gsub!(/\\t/, " ")
# double quote
raw.gsub!(/\\\"/, '"')
@ -177,9 +171,6 @@ class ImportScripts::Muut < ImportScripts::Base
def file_full_path(relpath)
File.join JSON_FILES_DIR, relpath.split("?").first
end
end
if __FILE__ == $0
ImportScripts::Muut.new.perform
end
ImportScripts::Muut.new.perform if __FILE__ == $0

View File

@ -18,13 +18,12 @@ export BASE="" #
# Call it like this:
# RAILS_ENV=production ruby script/import_scripts/mybb.rb
class ImportScripts::MyBB < ImportScripts::Base
DB_HOST ||= ENV['DB_HOST'] || "localhost"
DB_NAME ||= ENV['DB_NAME'] || "mybb"
DB_PW ||= ENV['DB_PW'] || ""
DB_USER ||= ENV['DB_USER'] || "root"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "mybb_"
UPLOADS_DIR ||= ENV['UPLOADS'] || '/data/limelightgaming/uploads'
DB_HOST ||= ENV["DB_HOST"] || "localhost"
DB_NAME ||= ENV["DB_NAME"] || "mybb"
DB_PW ||= ENV["DB_PW"] || ""
DB_USER ||= ENV["DB_USER"] || "root"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "mybb_"
UPLOADS_DIR ||= ENV["UPLOADS"] || "/data/limelightgaming/uploads"
BATCH_SIZE = 1000
BASE = ""
QUIET = true
@ -33,12 +32,8 @@ class ImportScripts::MyBB < ImportScripts::Base
def initialize
super
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
end
def execute
@ -52,63 +47,79 @@ class ImportScripts::MyBB < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count
total_count =
mysql_query(
"SELECT count(*) count
FROM #{TABLE_PREFIX}users u
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
WHERE g.title != 'Banned';").first['count']
WHERE g.title != 'Banned';",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT uid id, email email, username, regdate, g.title `group`, avatar
results =
mysql_query(
"SELECT uid id, email email, username, regdate, g.title `group`, avatar
FROM #{TABLE_PREFIX}users u
JOIN #{TABLE_PREFIX}usergroups g ON g.gid = u.usergroup
WHERE g.title != 'Banned'
ORDER BY u.uid ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
avatar_url = user['avatar'].match(/^http/) ? user['avatar'].gsub(/\?.*/, "") : nil
{ id: user['id'],
email: user['email'],
username: user['username'],
created_at: Time.zone.at(user['regdate']),
moderator: user['group'] == 'Super Moderators',
admin: user['group'] == 'Administrators' ,
avatar_url = user["avatar"].match(/^http/) ? user["avatar"].gsub(/\?.*/, "") : nil
{
id: user["id"],
email: user["email"],
username: user["username"],
created_at: Time.zone.at(user["regdate"]),
moderator: user["group"] == "Super Moderators",
admin: user["group"] == "Administrators",
avatar_url: avatar_url,
post_create_action: proc do |newuser|
if !user["avatar"].blank?
avatar = user["avatar"].gsub(/\?.*/, "")
if avatar.match(/^http.*/)
UserAvatar.import_url_for_user(avatar, newuser)
else
filename = File.join(UPLOADS_DIR, avatar)
@uploader.create_avatar(newuser, filename) if File.exists?(filename)
post_create_action:
proc do |newuser|
if !user["avatar"].blank?
avatar = user["avatar"].gsub(/\?.*/, "")
if avatar.match(/^http.*/)
UserAvatar.import_url_for_user(avatar, newuser)
else
filename = File.join(UPLOADS_DIR, avatar)
@uploader.create_avatar(newuser, filename) if File.exists?(filename)
end
end
end
end
end,
}
end
end
end
def import_categories
results = mysql_query("
results =
mysql_query(
"
SELECT fid id, pid parent_id, left(name, 50) name, description
FROM #{TABLE_PREFIX}forums
ORDER BY pid ASC, fid ASC
")
",
)
create_categories(results) do |row|
h = { id: row['id'], name: CGI.unescapeHTML(row['name']), description: CGI.unescapeHTML(row['description']) }
if row['parent_id'].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
h = {
id: row["id"],
name: CGI.unescapeHTML(row["name"]),
description: CGI.unescapeHTML(row["description"]),
}
if row["parent_id"].to_i > 0
h[:parent_category_id] = category_id_from_imported_category_id(row["parent_id"])
end
h
end
@ -120,7 +131,9 @@ class ImportScripts::MyBB < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.pid id,
p.tid topic_id,
t.fid category_id,
@ -132,15 +145,16 @@ class ImportScripts::MyBB < ImportScripts::Base
FROM #{TABLE_PREFIX}posts p,
#{TABLE_PREFIX}threads t
WHERE p.tid = t.tid
#{'AND (p.visible = 1 AND t.visible = 1)' unless IMPORT_DELETED_POSTS}
#{"AND (p.visible = 1 AND t.visible = 1)" unless IMPORT_DELETED_POSTS}
ORDER BY p.dateline
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
@ -160,20 +174,20 @@ class ImportScripts::MyBB < ImportScripts::Base
# LIMIT 1
# ").first['id']
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_mybb_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['post_time'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_mybb_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["post_time"])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["id"] == m["first_post_id"]
mapped[:category] = category_id_from_imported_category_id(m["category_id"])
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
parent = topic_lookup_from_imported_post_id(m["first_post_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
@ -188,17 +202,18 @@ class ImportScripts::MyBB < ImportScripts::Base
end
def suspend_users
puts '', "banned users are not implemented"
puts "", "banned users are not implemented"
end
# Discourse usernames don't allow spaces
def convert_username(username, post_id)
count = 0
username.gsub!(/\s+/) { |a| count += 1; '_' }
# Warn on MyBB bug that places post text in the quote line - http://community.mybb.com/thread-180526.html
if count > 5
puts "Warning: probably incorrect quote in post #{post_id}"
username.gsub!(/\s+/) do |a|
count += 1
"_"
end
# Warn on MyBB bug that places post text in the quote line - http://community.mybb.com/thread-180526.html
puts "Warning: probably incorrect quote in post #{post_id}" if count > 5
username
end
@ -209,7 +224,7 @@ class ImportScripts::MyBB < ImportScripts::Base
begin
post = Post.find(quoted_post_id_from_imported)
"post:#{post.post_number}, topic:#{post.topic_id}"
rescue
rescue StandardError
puts "Could not find migrated post #{quoted_post_id_from_imported} quoted by original post #{post_id} as #{quoted_post_id}"
""
end
@ -223,23 +238,24 @@ class ImportScripts::MyBB < ImportScripts::Base
s = raw.dup
# convert the quote line
s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) {
"[quote=\"#{convert_username($1, import_id)}, " + post_id_to_post_num_and_topic($2, import_id) + '"]'
}
s.gsub!(/\[quote='([^']+)'.*?pid='(\d+).*?\]/) do
"[quote=\"#{convert_username($1, import_id)}, " +
post_id_to_post_num_and_topic($2, import_id) + '"]'
end
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
s.gsub!(/<!-- s(\S+) -->(?:.*)<!-- s(?:\S+) -->/, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s.gsub!(/:(?:\w{8})\]/, "]")
# Remove mybb video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")
s = CGI.unescapeHTML(s)
@ -247,16 +263,16 @@ class ImportScripts::MyBB < ImportScripts::Base
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
s.gsub!(%r{\[http(s)?://(www\.)?}, "[")
s
end
def create_permalinks
puts '', 'Creating redirects...', ''
puts "", "Creating redirects...", ""
SiteSetting.permalink_normalizations = '/(\\w+)-(\\d+)[-.].*/\\1-\\2.html'
puts '', 'Users...', ''
puts "", "Users...", ""
total_users = User.count
start_time = Time.now
count = 0
@ -264,12 +280,19 @@ class ImportScripts::MyBB < ImportScripts::Base
ucf = u.custom_fields
count += 1
if ucf && ucf["import_id"] && ucf["import_username"]
Permalink.create(url: "#{BASE}/user-#{ucf['import_id']}.html", external_url: "/u/#{u.username}") rescue nil
begin
Permalink.create(
url: "#{BASE}/user-#{ucf["import_id"]}.html",
external_url: "/u/#{u.username}",
)
rescue StandardError
nil
end
end
print_status(count, total_users, start_time)
end
puts '', 'Categories...', ''
puts "", "Categories...", ""
total_categories = Category.count
start_time = Time.now
count = 0
@ -277,20 +300,24 @@ class ImportScripts::MyBB < ImportScripts::Base
ccf = cat.custom_fields
count += 1
next unless id = ccf["import_id"]
unless QUIET
puts ("forum-#{id}.html --> /c/#{cat.id}")
puts ("forum-#{id}.html --> /c/#{cat.id}") unless QUIET
begin
Permalink.create(url: "#{BASE}/forum-#{id}.html", category_id: cat.id)
rescue StandardError
nil
end
Permalink.create(url: "#{BASE}/forum-#{id}.html", category_id: cat.id) rescue nil
print_status(count, total_categories, start_time)
end
puts '', 'Topics...', ''
puts "", "Topics...", ""
total_posts = Post.count
start_time = Time.now
count = 0
puts '', 'Posts...', ''
puts "", "Posts...", ""
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.pid id,
p.tid topic_id
FROM #{TABLE_PREFIX}posts p,
@ -300,13 +327,18 @@ class ImportScripts::MyBB < ImportScripts::Base
ORDER BY p.dateline
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
")
",
)
break if results.size < 1
results.each do |post|
count += 1
if topic = topic_lookup_from_imported_post_id(post['id'])
id = post['topic_id']
Permalink.create(url: "#{BASE}/thread-#{id}.html", topic_id: topic[:topic_id]) rescue nil
if topic = topic_lookup_from_imported_post_id(post["id"])
id = post["topic_id"]
begin
Permalink.create(url: "#{BASE}/thread-#{id}.html", topic_id: topic[:topic_id])
rescue StandardError
nil
end
unless QUIET
puts ("#{BASE}/thread-#{id}.html --> http://localhost:3000/t/#{topic[:topic_id]}")
end

View File

@ -9,9 +9,8 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# If your forum has non-English usernames, don't forget to enable Unicode usernames in /admin/site_settings
class ImportScripts::MybbRu < ImportScripts::Base
JSON_TOPICS_FILE_PATH ||= ENV['JSON_TOPICS_FILE'] || 'mybbru_export/threads.json'
JSON_USERS_FILE_PATH ||= ENV['JSON_USERS_FILE'] || 'mybbru_export/users.json'
JSON_TOPICS_FILE_PATH ||= ENV["JSON_TOPICS_FILE"] || "mybbru_export/threads.json"
JSON_USERS_FILE_PATH ||= ENV["JSON_USERS_FILE"] || "mybbru_export/users.json"
def initialize
super
@ -35,59 +34,49 @@ class ImportScripts::MybbRu < ImportScripts::Base
end
def clean_username(name)
name.gsub(/ /, '')
name.gsub(/ /, "")
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
create_users(@imported_users) do |u|
{
id: u['id'],
username: clean_username(u['name']),
email: u['email'],
created_at: Time.now
}
{ id: u["id"], username: clean_username(u["name"]), email: u["email"], created_at: Time.now }
end
end
def import_categories
puts "", "importing categories..."
categories = @imported_topics.map { |t| t['category'] }.uniq
categories = @imported_topics.map { |t| t["category"] }.uniq
create_categories(categories) do |c|
{
id: c['id'],
name: c['name']
}
end
create_categories(categories) { |c| { id: c["id"], name: c["name"] } }
end
def import_discussions
puts "", "Importing discussions"
@imported_topics.each do |t|
first_post = t['posts'][0]
first_post = t["posts"][0]
create_posts(t['posts']) do |p|
create_posts(t["posts"]) do |p|
result = {
id: p['id'],
user_id: user_id_from_imported_user_id(p['author']['id']),
id: p["id"],
user_id: user_id_from_imported_user_id(p["author"]["id"]),
raw: fix_post_content(p["source"]),
created_at: Time.at(p['createdAt']),
cook_method: Post.cook_methods[:regular]
created_at: Time.at(p["createdAt"]),
cook_method: Post.cook_methods[:regular],
}
if p['id'] == first_post['id']
result[:category] = category_id_from_imported_category_id(t['category']['id'])
result[:title] = t['title']
if p["id"] == first_post["id"]
result[:category] = category_id_from_imported_category_id(t["category"]["id"])
result[:title] = t["title"]
else
parent = topic_lookup_from_imported_post_id(first_post['id'])
parent = topic_lookup_from_imported_post_id(first_post["id"])
if parent
result[:topic_id] = parent[:topic_id]
else
puts "Parent post #{first_post['id']} doesn't exist. Skipping #{p["id"]}: #{t["title"][0..40]}"
puts "Parent post #{first_post["id"]} doesn't exist. Skipping #{p["id"]}: #{t["title"][0..40]}"
break
end
end
@ -100,16 +89,15 @@ class ImportScripts::MybbRu < ImportScripts::Base
def fix_post_content(text)
text
.gsub(/\[code\]/, "\n[code]\n")
.gsub(/\[\/code\]/, "\n[/code]\n")
.gsub(%r{\[/code\]}, "\n[/code]\n")
.gsub(/\[video\]/, "")
.gsub(/\[\/video\]/, "")
.gsub(%r{\[/video\]}, "")
.gsub(/\[quote.*?\]/, "\n" + '\0' + "\n")
.gsub(/\[\/quote\]/, "\n[/quote]\n")
.gsub(/\[spoiler.*?\]/, "\n" + '\0' + "\n").gsub(/\[spoiler/, '[details')
.gsub(/\[\/spoiler\]/, "\n[/details]\n")
.gsub(%r{\[/quote\]}, "\n[/quote]\n")
.gsub(/\[spoiler.*?\]/, "\n" + '\0' + "\n")
.gsub(/\[spoiler/, "[details")
.gsub(%r{\[/spoiler\]}, "\n[/details]\n")
end
end
if __FILE__ == $0
ImportScripts::MybbRu.new.perform
end
ImportScripts::MybbRu.new.perform if __FILE__ == $0

View File

@ -2,7 +2,7 @@
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'htmlentities'
require "htmlentities"
# Before running this script, paste these lines into your shell,
# then use arrow keys to edit the values
@ -18,45 +18,35 @@ export BASE="forum"
=end
class ImportScripts::MylittleforumSQL < ImportScripts::Base
DB_HOST ||= ENV['DB_HOST'] || "localhost"
DB_NAME ||= ENV['DB_NAME'] || "mylittleforum"
DB_PW ||= ENV['DB_PW'] || ""
DB_USER ||= ENV['DB_USER'] || "root"
TABLE_PREFIX ||= ENV['TABLE_PREFIX'] || "forum_"
IMPORT_AFTER ||= ENV['IMPORT_AFTER'] || "1970-01-01"
IMAGE_BASE ||= ENV['IMAGE_BASE'] || ""
BASE ||= ENV['BASE'] || "forum/"
DB_HOST ||= ENV["DB_HOST"] || "localhost"
DB_NAME ||= ENV["DB_NAME"] || "mylittleforum"
DB_PW ||= ENV["DB_PW"] || ""
DB_USER ||= ENV["DB_USER"] || "root"
TABLE_PREFIX ||= ENV["TABLE_PREFIX"] || "forum_"
IMPORT_AFTER ||= ENV["IMPORT_AFTER"] || "1970-01-01"
IMAGE_BASE ||= ENV["IMAGE_BASE"] || ""
BASE ||= ENV["BASE"] || "forum/"
BATCH_SIZE = 1000
CONVERT_HTML = true
QUIET = nil || ENV['VERBOSE'] == "TRUE"
FORCE_HOSTNAME = nil || ENV['FORCE_HOSTNAME']
QUIET = nil || ENV["VERBOSE"] == "TRUE"
FORCE_HOSTNAME = nil || ENV["FORCE_HOSTNAME"]
QUIET = true
# Site settings
SiteSetting.disable_emails = "non-staff"
if FORCE_HOSTNAME
SiteSetting.force_hostname = FORCE_HOSTNAME
end
SiteSetting.force_hostname = FORCE_HOSTNAME if FORCE_HOSTNAME
def initialize
if IMPORT_AFTER > "1970-01-01"
print_warning("Importing data after #{IMPORT_AFTER}")
end
print_warning("Importing data after #{IMPORT_AFTER}") if IMPORT_AFTER > "1970-01-01"
super
@htmlentities = HTMLEntities.new
begin
@client = Mysql2::Client.new(
host: DB_HOST,
username: DB_USER,
password: DB_PW,
database: DB_NAME
)
@client =
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
rescue Exception => e
puts '=' * 50
puts "=" * 50
puts e.message
puts <<~TEXT
Cannot log in to database.
@ -95,12 +85,19 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}userdata WHERE last_login > '#{IMPORT_AFTER}';").first['count']
total_count =
mysql_query(
"SELECT count(*) count FROM #{TABLE_PREFIX}userdata WHERE last_login > '#{IMPORT_AFTER}';",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT user_id as UserID, user_name as username,
user_real_name as Name,
user_email as Email,
@ -118,31 +115,33 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
WHERE last_login > '#{IMPORT_AFTER}'
order by UserID ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u['UserID'].to_i }
next if all_records_exist? :users, results.map { |u| u["UserID"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
next if user['Email'].blank?
next if @lookup.user_id_from_imported_user_id(user['UserID'])
next if user["Email"].blank?
next if @lookup.user_id_from_imported_user_id(user["UserID"])
# username = fix_username(user['username'])
{ id: user['UserID'],
email: user['Email'],
username: user['username'],
name: user['Name'],
created_at: user['DateInserted'] == nil ? 0 : Time.zone.at(user['DateInserted']),
bio_raw: user['bio_raw'],
registration_ip_address: user['InsertIPAddress'],
website: user['user_hp'],
password: user['password'],
last_seen_at: user['DateLastActive'] == nil ? 0 : Time.zone.at(user['DateLastActive']),
location: user['Location'],
admin: user['user_type'] == "admin",
moderator: user['user_type'] == "mod",
{
id: user["UserID"],
email: user["Email"],
username: user["username"],
name: user["Name"],
created_at: user["DateInserted"] == nil ? 0 : Time.zone.at(user["DateInserted"]),
bio_raw: user["bio_raw"],
registration_ip_address: user["InsertIPAddress"],
website: user["user_hp"],
password: user["password"],
last_seen_at: user["DateLastActive"] == nil ? 0 : Time.zone.at(user["DateLastActive"]),
location: user["Location"],
admin: user["user_type"] == "admin",
moderator: user["user_type"] == "mod",
}
end
end
@ -151,7 +150,7 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
def fix_username(username)
olduser = username.dup
username.gsub!(/Dr\. /, "Dr") # no &
username.gsub!(/[ +!\/,*()?]/, "_") # can't have these
username.gsub!(%r{[ +!/,*()?]}, "_") # can't have these
username.gsub!(/&/, "_and_") # no &
username.gsub!(/@/, "_at_") # no @
username.gsub!(/#/, "_hash_") # no &
@ -159,28 +158,29 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
username.gsub!(/[._]+/, "_") # can't have 2 special in a row
username.gsub!(/_+/, "_") # could result in dupes, but wtf?
username.gsub!(/_$/, "") # could result in dupes, but wtf?
if olduser != username
print_warning ("#{olduser} --> #{username}")
end
print_warning ("#{olduser} --> #{username}") if olduser != username
username
end
def import_categories
puts "", "importing categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT id as CategoryID,
category as Name,
description as Description
FROM #{TABLE_PREFIX}categories
ORDER BY CategoryID ASC
").to_a
",
).to_a
create_categories(categories) do |category|
{
id: category['CategoryID'],
name: CGI.unescapeHTML(category['Name']),
description: CGI.unescapeHTML(category['Description'])
id: category["CategoryID"],
name: CGI.unescapeHTML(category["Name"]),
description: CGI.unescapeHTML(category["Description"]),
}
end
end
@ -188,13 +188,19 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
def import_topics
puts "", "importing topics..."
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}entries
total_count =
mysql_query(
"SELECT count(*) count FROM #{TABLE_PREFIX}entries
WHERE time > '#{IMPORT_AFTER}'
AND pid = 0;").first['count']
AND pid = 0;",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
discussions = mysql_query(
"SELECT id as DiscussionID,
discussions =
mysql_query(
"SELECT id as DiscussionID,
category as CategoryID,
subject as Name,
text as Body,
@ -206,29 +212,32 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
AND time > '#{IMPORT_AFTER}'
ORDER BY time ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if discussions.size < 1
next if all_records_exist? :posts, discussions.map { |t| "discussion#" + t['DiscussionID'].to_s }
if all_records_exist? :posts, discussions.map { |t| "discussion#" + t["DiscussionID"].to_s }
next
end
create_posts(discussions, total: total_count, offset: offset) do |discussion|
raw = clean_up(discussion['Body'])
raw = clean_up(discussion["Body"])
youtube = nil
unless discussion['youtube'].blank?
youtube = clean_youtube(discussion['youtube'])
unless discussion["youtube"].blank?
youtube = clean_youtube(discussion["youtube"])
raw += "\n#{youtube}\n"
print_warning(raw)
end
{
id: "discussion#" + discussion['DiscussionID'].to_s,
user_id: user_id_from_imported_user_id(discussion['InsertUserID']) || Discourse::SYSTEM_USER_ID,
title: discussion['Name'].gsub('\\"', '"'),
category: category_id_from_imported_category_id(discussion['CategoryID']),
id: "discussion#" + discussion["DiscussionID"].to_s,
user_id:
user_id_from_imported_user_id(discussion["InsertUserID"]) || Discourse::SYSTEM_USER_ID,
title: discussion["Name"].gsub('\\"', '"'),
category: category_id_from_imported_category_id(discussion["CategoryID"]),
raw: raw,
created_at: Time.zone.at(discussion['DateInserted']),
created_at: Time.zone.at(discussion["DateInserted"]),
}
end
end
@ -237,15 +246,20 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
def import_posts
puts "", "importing posts..."
total_count = mysql_query(
"SELECT count(*) count
total_count =
mysql_query(
"SELECT count(*) count
FROM #{TABLE_PREFIX}entries
WHERE pid > 0
AND time > '#{IMPORT_AFTER}';").first['count']
AND time > '#{IMPORT_AFTER}';",
).first[
"count"
]
batches(BATCH_SIZE) do |offset|
comments = mysql_query(
"SELECT id as CommentID,
comments =
mysql_query(
"SELECT id as CommentID,
tid as DiscussionID,
text as Body,
time as DateInserted,
@ -256,26 +270,33 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
AND time > '#{IMPORT_AFTER}'
ORDER BY time ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if comments.size < 1
next if all_records_exist? :posts, comments.map { |comment| "comment#" + comment['CommentID'].to_s }
if all_records_exist? :posts,
comments.map { |comment| "comment#" + comment["CommentID"].to_s }
next
end
create_posts(comments, total: total_count, offset: offset) do |comment|
next unless t = topic_lookup_from_imported_post_id("discussion#" + comment['DiscussionID'].to_s)
next if comment['Body'].blank?
raw = clean_up(comment['Body'])
unless t = topic_lookup_from_imported_post_id("discussion#" + comment["DiscussionID"].to_s)
next
end
next if comment["Body"].blank?
raw = clean_up(comment["Body"])
youtube = nil
unless comment['youtube'].blank?
youtube = clean_youtube(comment['youtube'])
unless comment["youtube"].blank?
youtube = clean_youtube(comment["youtube"])
raw += "\n#{youtube}\n"
end
{
id: "comment#" + comment['CommentID'].to_s,
user_id: user_id_from_imported_user_id(comment['InsertUserID']) || Discourse::SYSTEM_USER_ID,
id: "comment#" + comment["CommentID"].to_s,
user_id:
user_id_from_imported_user_id(comment["InsertUserID"]) || Discourse::SYSTEM_USER_ID,
topic_id: t[:topic_id],
raw: clean_up(raw),
created_at: Time.zone.at(comment['DateInserted'])
created_at: Time.zone.at(comment["DateInserted"]),
}
end
end
@ -284,20 +305,20 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
def clean_youtube(youtube_raw)
youtube_cooked = clean_up(youtube_raw.dup.to_s)
# get just src from <iframe> and put on a line by itself
re = /<iframe.+?src="(\S+?)".+?<\/iframe>/mix
re = %r{<iframe.+?src="(\S+?)".+?</iframe>}mix
youtube_cooked.gsub!(re) { "\n#{$1}\n" }
re = /<object.+?src="(\S+?)".+?<\/object>/mix
re = %r{<object.+?src="(\S+?)".+?</object>}mix
youtube_cooked.gsub!(re) { "\n#{$1}\n" }
youtube_cooked.gsub!(/^\/\//, "https://") # make sure it has a protocol
youtube_cooked.gsub!(%r{^//}, "https://") # make sure it has a protocol
unless /http/.match(youtube_cooked) # handle case of only youtube object number
if youtube_cooked.length < 8 || /[<>=]/.match(youtube_cooked)
# probably not a youtube id
youtube_cooked = ""
else
youtube_cooked = 'https://www.youtube.com/watch?v=' + youtube_cooked
youtube_cooked = "https://www.youtube.com/watch?v=" + youtube_cooked
end
end
print_warning("#{'-' * 40}\nBefore: #{youtube_raw}\nAfter: #{youtube_cooked}") unless QUIET
print_warning("#{"-" * 40}\nBefore: #{youtube_raw}\nAfter: #{youtube_cooked}") unless QUIET
youtube_cooked
end
@ -313,73 +334,79 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
raw = raw.gsub("\\'", "'")
raw = raw.gsub(/\[b\]/i, "<strong>")
raw = raw.gsub(/\[\/b\]/i, "</strong>")
raw = raw.gsub(%r{\[/b\]}i, "</strong>")
raw = raw.gsub(/\[i\]/i, "<em>")
raw = raw.gsub(/\[\/i\]/i, "</em>")
raw = raw.gsub(%r{\[/i\]}i, "</em>")
raw = raw.gsub(/\[u\]/i, "<em>")
raw = raw.gsub(/\[\/u\]/i, "</em>")
raw = raw.gsub(%r{\[/u\]}i, "</em>")
raw = raw.gsub(/\[url\](\S+)\[\/url\]/im) { "#{$1}" }
raw = raw.gsub(/\[link\](\S+)\[\/link\]/im) { "#{$1}" }
raw = raw.gsub(%r{\[url\](\S+)\[/url\]}im) { "#{$1}" }
raw = raw.gsub(%r{\[link\](\S+)\[/link\]}im) { "#{$1}" }
# URL & LINK with text
raw = raw.gsub(/\[url=(\S+?)\](.*?)\[\/url\]/im) { "<a href=\"#{$1}\">#{$2}</a>" }
raw = raw.gsub(/\[link=(\S+?)\](.*?)\[\/link\]/im) { "<a href=\"#{$1}\">#{$2}</a>" }
raw = raw.gsub(%r{\[url=(\S+?)\](.*?)\[/url\]}im) { "<a href=\"#{$1}\">#{$2}</a>" }
raw = raw.gsub(%r{\[link=(\S+?)\](.*?)\[/link\]}im) { "<a href=\"#{$1}\">#{$2}</a>" }
# remote images
raw = raw.gsub(/\[img\](https?:.+?)\[\/img\]/im) { "<img src=\"#{$1}\">" }
raw = raw.gsub(/\[img=(https?.+?)\](.+?)\[\/img\]/im) { "<img src=\"#{$1}\" alt=\"#{$2}\">" }
raw = raw.gsub(%r{\[img\](https?:.+?)\[/img\]}im) { "<img src=\"#{$1}\">" }
raw = raw.gsub(%r{\[img=(https?.+?)\](.+?)\[/img\]}im) { "<img src=\"#{$1}\" alt=\"#{$2}\">" }
# local images
raw = raw.gsub(/\[img\](.+?)\[\/img\]/i) { "<img src=\"#{IMAGE_BASE}/#{$1}\">" }
raw = raw.gsub(/\[img=(.+?)\](https?.+?)\[\/img\]/im) { "<img src=\"#{IMAGE_BASE}/#{$1}\" alt=\"#{$2}\">" }
raw = raw.gsub(%r{\[img\](.+?)\[/img\]}i) { "<img src=\"#{IMAGE_BASE}/#{$1}\">" }
raw =
raw.gsub(%r{\[img=(.+?)\](https?.+?)\[/img\]}im) do
"<img src=\"#{IMAGE_BASE}/#{$1}\" alt=\"#{$2}\">"
end
# Convert image bbcode
raw.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/im, '<img width="\1" height="\2" src="\3">')
raw.gsub!(%r{\[img=(\d+),(\d+)\]([^\]]*)\[/img\]}im, '<img width="\1" height="\2" src="\3">')
# [div]s are really [quote]s
raw.gsub!(/\[div\]/mix, "[quote]")
raw.gsub!(/\[\/div\]/mix, "[/quote]")
raw.gsub!(%r{\[/div\]}mix, "[/quote]")
# [postedby] -> link to @user
raw.gsub(/\[postedby\](.+?)\[b\](.+?)\[\/b\]\[\/postedby\]/i) { "#{$1}@#{$2}" }
raw.gsub(%r{\[postedby\](.+?)\[b\](.+?)\[/b\]\[/postedby\]}i) { "#{$1}@#{$2}" }
# CODE (not tested)
raw = raw.gsub(/\[code\](\S+)\[\/code\]/im) { "```\n#{$1}\n```" }
raw = raw.gsub(/\[pre\](\S+)\[\/pre\]/im) { "```\n#{$1}\n```" }
raw = raw.gsub(%r{\[code\](\S+)\[/code\]}im) { "```\n#{$1}\n```" }
raw = raw.gsub(%r{\[pre\](\S+)\[/pre\]}im) { "```\n#{$1}\n```" }
raw = raw.gsub(/(https:\/\/youtu\S+)/i) { "\n#{$1}\n" } #youtube links on line by themselves
raw = raw.gsub(%r{(https://youtu\S+)}i) { "\n#{$1}\n" } #youtube links on line by themselves
# no center
raw = raw.gsub(/\[\/?center\]/i, "")
raw = raw.gsub(%r{\[/?center\]}i, "")
# no size
raw = raw.gsub(/\[\/?size.*?\]/i, "")
raw = raw.gsub(%r{\[/?size.*?\]}i, "")
### FROM VANILLA:
# fix whitespaces
raw = raw.gsub(/(\\r)?\\n/, "\n")
.gsub("\\t", "\t")
raw = raw.gsub(/(\\r)?\\n/, "\n").gsub("\\t", "\t")
unless CONVERT_HTML
# replace all chevrons with HTML entities
# NOTE: must be done
# - AFTER all the "code" processing
# - BEFORE the "quote" processing
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
.gsub("<", "&lt;")
.gsub("\u2603", "<")
raw =
raw
.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" }
.gsub("<", "&lt;")
.gsub("\u2603", "<")
raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
.gsub(">", "&gt;")
.gsub("\u2603", ">")
raw =
raw
.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" }
.gsub(">", "&gt;")
.gsub("\u2603", ">")
end
# Remove the color tag
raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")
raw.gsub!(/\[\/color\]/i, "")
raw.gsub!(%r{\[/color\]}i, "")
### END VANILLA:
raw
@ -395,54 +422,72 @@ class ImportScripts::MylittleforumSQL < ImportScripts::Base
end
def create_permalinks
puts '', 'Creating redirects...', ''
puts "", "Creating redirects...", ""
puts '', 'Users...', ''
puts "", "Users...", ""
User.find_each do |u|
ucf = u.custom_fields
if ucf && ucf["import_id"] && ucf["import_username"]
Permalink.create(url: "#{BASE}/user-id-#{ucf['import_id']}.html", external_url: "/u/#{u.username}") rescue nil
print '.'
begin
Permalink.create(
url: "#{BASE}/user-id-#{ucf["import_id"]}.html",
external_url: "/u/#{u.username}",
)
rescue StandardError
nil
end
print "."
end
end
puts '', 'Posts...', ''
puts "", "Posts...", ""
Post.find_each do |post|
pcf = post.custom_fields
if pcf && pcf["import_id"]
topic = post.topic
id = pcf["import_id"].split('#').last
id = pcf["import_id"].split("#").last
if post.post_number == 1
Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", topic_id: topic.id) rescue nil
begin
Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", topic_id: topic.id)
rescue StandardError
nil
end
unless QUIET
print_warning("forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}")
end
else
Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", post_id: post.id) rescue nil
begin
Permalink.create(url: "#{BASE}/forum_entry-id-#{id}.html", post_id: post.id)
rescue StandardError
nil
end
unless QUIET
print_warning("forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}/#{post.id}")
print_warning(
"forum_entry-id-#{id}.html --> http://localhost:3000/t/#{topic.id}/#{post.id}",
)
end
end
print '.'
print "."
end
end
puts '', 'Categories...', ''
puts "", "Categories...", ""
Category.find_each do |cat|
ccf = cat.custom_fields
next unless id = ccf["import_id"]
unless QUIET
print_warning("forum-category-#{id}.html --> /t/#{cat.id}")
print_warning("forum-category-#{id}.html --> /t/#{cat.id}") unless QUIET
begin
Permalink.create(url: "#{BASE}/forum-category-#{id}.html", category_id: cat.id)
rescue StandardError
nil
end
Permalink.create(url: "#{BASE}/forum-category-#{id}.html", category_id: cat.id) rescue nil
print '.'
print "."
end
end
def print_warning(message)
$stderr.puts "#{message}"
end
end
ImportScripts::MylittleforumSQL.new.perform

View File

@ -1,8 +1,8 @@
# frozen_string_literal: true
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
require 'pg'
require_relative 'base/uploader'
require "pg"
require_relative "base/uploader"
=begin
if you want to create mock users for posts made by anonymous participants,
@ -40,7 +40,7 @@ class ImportScripts::Nabble < ImportScripts::Base
BATCH_SIZE = 1000
DB_NAME = "nabble"
DB_NAME = "nabble"
CATEGORY_ID = 6
def initialize
@ -64,14 +64,13 @@ class ImportScripts::Nabble < ImportScripts::Base
total_count = @client.exec("SELECT COUNT(user_id) FROM user_")[0]["count"]
batches(BATCH_SIZE) do |offset|
users = @client.query(<<-SQL
users = @client.query(<<-SQL)
SELECT user_id, name, email, joined
FROM user_
ORDER BY joined
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
)
break if users.ntuples() < 1
@ -83,24 +82,23 @@ class ImportScripts::Nabble < ImportScripts::Base
email: row["email"] || fake_email,
created_at: Time.zone.at(@td.decode(row["joined"])),
name: row["name"],
post_create_action: proc do |user|
import_avatar(user, row["user_id"])
end
post_create_action: proc { |user| import_avatar(user, row["user_id"]) },
}
end
end
end
def import_avatar(user, org_id)
filename = 'avatar' + org_id.to_s
path = File.join('/tmp/nab', filename)
res = @client.exec("SELECT content FROM file_avatar WHERE name='avatar100.png' AND user_id = #{org_id} LIMIT 1")
filename = "avatar" + org_id.to_s
path = File.join("/tmp/nab", filename)
res =
@client.exec(
"SELECT content FROM file_avatar WHERE name='avatar100.png' AND user_id = #{org_id} LIMIT 1",
)
return if res.ntuples() < 1
binary = res[0]['content']
File.open(path, 'wb') { |f|
f.write(PG::Connection.unescape_bytea(binary))
}
binary = res[0]["content"]
File.open(path, "wb") { |f| f.write(PG::Connection.unescape_bytea(binary)) }
upload = @uploader.create_upload(user.id, path, filename)
@ -113,7 +111,6 @@ class ImportScripts::Nabble < ImportScripts::Base
else
Rails.logger.error("Could not persist avatar for user #{user.username}")
end
end
def parse_email(msg)
@ -128,11 +125,13 @@ class ImportScripts::Nabble < ImportScripts::Base
def create_forum_topics
puts "", "creating forum topics"
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]['node_id']
topic_count = @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id = #{app_node_id}")[0]["count"]
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]["node_id"]
topic_count =
@client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id = #{app_node_id}")[0][
"count"
]
batches(BATCH_SIZE) do |offset|
topics = @client.exec <<-SQL
SELECT n.node_id, n.subject, n.owner_id, n.when_created, nm.message, n.msg_fmt
FROM node AS n
@ -145,43 +144,43 @@ class ImportScripts::Nabble < ImportScripts::Base
break if topics.ntuples() < 1
next if all_records_exist? :posts, topics.map { |t| t['node_id'].to_i }
next if all_records_exist? :posts, topics.map { |t| t["node_id"].to_i }
create_posts(topics, total: topic_count, offset: offset) do |t|
raw = body_from(t)
next unless raw
raw = process_content(raw)
raw = process_attachments(raw, t['node_id'])
raw = process_attachments(raw, t["node_id"])
{
id: t['node_id'],
title: t['subject'],
id: t["node_id"],
title: t["subject"],
user_id: user_id_from_imported_user_id(t["owner_id"]) || Discourse::SYSTEM_USER_ID,
created_at: Time.zone.at(@td.decode(t["when_created"])),
category: CATEGORY_ID,
raw: raw,
cook_method: Post.cook_methods[:regular]
cook_method: Post.cook_methods[:regular],
}
end
end
end
def body_from(p)
%w(m s).include?(p['msg_fmt']) ? parse_email(p['message']) : p['message']
%w[m s].include?(p["msg_fmt"]) ? parse_email(p["message"]) : p["message"]
rescue Email::Receiver::EmptyEmailError
puts "Skipped #{p['node_id']}"
puts "Skipped #{p["node_id"]}"
end
def process_content(txt)
txt.gsub! /\<quote author="(.*?)"\>/, '[quote="\1"]'
txt.gsub! /\<\/quote\>/, '[/quote]'
txt.gsub!(/\<raw\>(.*?)\<\/raw\>/m) do |match|
txt.gsub! %r{\</quote\>}, "[/quote]"
txt.gsub!(%r{\<raw\>(.*?)\</raw\>}m) do |match|
c = Regexp.last_match[1].indent(4)
"\n#{c}\n"
"\n#{c}\n"
end
# lines starting with # are comments, not headings, insert a space to prevent markdown
txt.gsub! /\n#/m, ' #'
txt.gsub! /\n#/m, " #"
# in the languagetool forum, quite a lot of XML was not marked as raw
# so we treat <rule...>...</rule> and <category...>...</category> as raw
@ -202,12 +201,10 @@ class ImportScripts::Nabble < ImportScripts::Base
def process_attachments(txt, postid)
txt.gsub!(/<nabble_img src="(.*?)" (.*?)>/m) do |match|
basename = Regexp.last_match[1]
get_attachment_upload(basename, postid) do |upload|
@uploader.embedded_image_html(upload)
end
get_attachment_upload(basename, postid) { |upload| @uploader.embedded_image_html(upload) }
end
txt.gsub!(/<nabble_a href="(.*?)">(.*?)<\/nabble_a>/m) do |match|
txt.gsub!(%r{<nabble_a href="(.*?)">(.*?)</nabble_a>}m) do |match|
basename = Regexp.last_match[1]
get_attachment_upload(basename, postid) do |upload|
@uploader.attachment_html(upload, basename)
@ -217,13 +214,12 @@ class ImportScripts::Nabble < ImportScripts::Base
end
def get_attachment_upload(basename, postid)
contents = @client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}")
contents =
@client.exec("SELECT content FROM file_node WHERE name='#{basename}' AND node_id = #{postid}")
if contents.any?
binary = contents[0]['content']
fn = File.join('/tmp/nab', basename)
File.open(fn, 'wb') { |f|
f.write(PG::Connection.unescape_bytea(binary))
}
binary = contents[0]["content"]
fn = File.join("/tmp/nab", basename)
File.open(fn, "wb") { |f| f.write(PG::Connection.unescape_bytea(binary)) }
yield @uploader.create_upload(0, fn, basename)
end
end
@ -231,8 +227,11 @@ class ImportScripts::Nabble < ImportScripts::Base
def import_replies
puts "", "creating topic replies"
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]['node_id']
post_count = @client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id != #{app_node_id}")[0]["count"]
app_node_id = @client.exec("SELECT node_id FROM node WHERE is_app LIMIT 1")[0]["node_id"]
post_count =
@client.exec("SELECT COUNT(node_id) AS count FROM node WHERE parent_id != #{app_node_id}")[0][
"count"
]
topic_ids = {}
@ -249,11 +248,11 @@ class ImportScripts::Nabble < ImportScripts::Base
break if posts.ntuples() < 1
next if all_records_exist? :posts, posts.map { |p| p['node_id'].to_i }
next if all_records_exist? :posts, posts.map { |p| p["node_id"].to_i }
create_posts(posts, total: post_count, offset: offset) do |p|
parent_id = p['parent_id']
id = p['node_id']
parent_id = p["parent_id"]
id = p["node_id"]
topic_id = topic_ids[parent_id]
unless topic_id
@ -268,19 +267,21 @@ class ImportScripts::Nabble < ImportScripts::Base
next unless raw
raw = process_content(raw)
raw = process_attachments(raw, id)
{ id: id,
{
id: id,
topic_id: topic_id,
user_id: user_id_from_imported_user_id(p['owner_id']) || Discourse::SYSTEM_USER_ID,
user_id: user_id_from_imported_user_id(p["owner_id"]) || Discourse::SYSTEM_USER_ID,
created_at: Time.zone.at(@td.decode(p["when_created"])),
raw: raw,
cook_method: Post.cook_methods[:regular] }
cook_method: Post.cook_methods[:regular],
}
end
end
end
end
class String
def indent(count, char = ' ')
def indent(count, char = " ")
gsub(/([^\n]*)(\n|$)/) do |match|
last_iteration = ($1 == "" && $2 == "")
line = +""

View File

@ -5,28 +5,28 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Edit the constants and initialize method for your import data.
class ImportScripts::Ning < ImportScripts::Base
JSON_FILES_DIR = "/Users/techapj/Downloads/ben/ADEM"
ATTACHMENT_PREFIXES = ["discussions", "pages", "blogs", "members", "photos"]
EXTRA_AUTHORIZED_EXTENSIONS = ["bmp", "ico", "txt", "pdf", "gif", "jpg", "jpeg", "html"]
ATTACHMENT_PREFIXES = %w[discussions pages blogs members photos]
EXTRA_AUTHORIZED_EXTENSIONS = %w[bmp ico txt pdf gif jpg jpeg html]
def initialize
super
@system_user = Discourse.system_user
@users_json = load_ning_json("ning-members-local.json")
@users_json = load_ning_json("ning-members-local.json")
@discussions_json = load_ning_json("ning-discussions-local.json")
# An example of a custom category from Ning:
@blogs_json = load_ning_json("ning-blogs-local.json")
@photos_json = load_ning_json("ning-photos-local.json")
@pages_json = load_ning_json("ning-pages-local.json")
@photos_json = load_ning_json("ning-photos-local.json")
@pages_json = load_ning_json("ning-pages-local.json")
SiteSetting.max_image_size_kb = 10240
SiteSetting.max_attachment_size_kb = 10240
SiteSetting.authorized_extensions = (SiteSetting.authorized_extensions.split("|") + EXTRA_AUTHORIZED_EXTENSIONS).uniq.join("|")
SiteSetting.max_image_size_kb = 10_240
SiteSetting.max_attachment_size_kb = 10_240
SiteSetting.authorized_extensions =
(SiteSetting.authorized_extensions.split("|") + EXTRA_AUTHORIZED_EXTENSIONS).uniq.join("|")
# Example of importing a custom profile field:
# @interests_field = UserField.find_by_name("My interests")
@ -60,23 +60,23 @@ class ImportScripts::Ning < ImportScripts::Base
end
def repair_json(arg)
arg.gsub!(/^\(/, "") # content of file is surround by ( )
arg.gsub!(/^\(/, "") # content of file is surround by ( )
arg.gsub!(/\)$/, "")
arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end
arg.gsub!(/\]\]$/, "]") # there can be an extra ] at the end
arg.gsub!(/\}\{/, "},{") # missing commas sometimes!
arg.gsub!("}]{", "},{") # surprise square brackets
arg.gsub!("}[{", "},{") # :troll:
arg.gsub!("}]{", "},{") # surprise square brackets
arg.gsub!("}[{", "},{") # :troll:
arg
end
def import_users
puts '', "Importing users"
puts "", "Importing users"
staff_levels = ["admin", "moderator", "owner"]
staff_levels = %w[admin moderator owner]
create_users(@users_json) do |u|
{
@ -88,57 +88,58 @@ class ImportScripts::Ning < ImportScripts::Base
location: "#{u["location"]} #{u["country"]}",
avatar_url: u["profilePhoto"],
bio_raw: u["profileQuestions"].is_a?(Hash) ? u["profileQuestions"]["About Me"] : nil,
post_create_action: proc do |newuser|
# if u["profileQuestions"].is_a?(Hash)
# newuser.custom_fields = {"user_field_#{@interests_field.id}" => u["profileQuestions"]["My interests"]}
# end
post_create_action:
proc do |newuser|
# if u["profileQuestions"].is_a?(Hash)
# newuser.custom_fields = {"user_field_#{@interests_field.id}" => u["profileQuestions"]["My interests"]}
# end
if staff_levels.include?(u["level"].downcase)
if u["level"].downcase == "admin" || u["level"].downcase == "owner"
newuser.admin = true
else
newuser.moderator = true
end
end
# states: ["active", "suspended", "left", "pending"]
if u["state"] == "active" && newuser.approved_at.nil?
newuser.approved = true
newuser.approved_by_id = @system_user.id
newuser.approved_at = newuser.created_at
end
newuser.save
if u["profilePhoto"] && newuser.user_avatar.try(:custom_upload_id).nil?
photo_path = file_full_path(u["profilePhoto"])
if File.exist?(photo_path)
begin
upload = create_upload(newuser.id, photo_path, File.basename(photo_path))
if upload.persisted?
newuser.import_mode = false
newuser.create_user_avatar
newuser.import_mode = true
newuser.user_avatar.update(custom_upload_id: upload.id)
newuser.update(uploaded_avatar_id: upload.id)
else
puts "Error: Upload did not persist for #{photo_path}!"
end
rescue SystemCallError => err
puts "Could not import avatar #{photo_path}: #{err.message}"
if staff_levels.include?(u["level"].downcase)
if u["level"].downcase == "admin" || u["level"].downcase == "owner"
newuser.admin = true
else
newuser.moderator = true
end
else
puts "avatar file not found at #{photo_path}"
end
end
end
# states: ["active", "suspended", "left", "pending"]
if u["state"] == "active" && newuser.approved_at.nil?
newuser.approved = true
newuser.approved_by_id = @system_user.id
newuser.approved_at = newuser.created_at
end
newuser.save
if u["profilePhoto"] && newuser.user_avatar.try(:custom_upload_id).nil?
photo_path = file_full_path(u["profilePhoto"])
if File.exist?(photo_path)
begin
upload = create_upload(newuser.id, photo_path, File.basename(photo_path))
if upload.persisted?
newuser.import_mode = false
newuser.create_user_avatar
newuser.import_mode = true
newuser.user_avatar.update(custom_upload_id: upload.id)
newuser.update(uploaded_avatar_id: upload.id)
else
puts "Error: Upload did not persist for #{photo_path}!"
end
rescue SystemCallError => err
puts "Could not import avatar #{photo_path}: #{err.message}"
end
else
puts "avatar file not found at #{photo_path}"
end
end
end,
}
end
EmailToken.delete_all
end
def suspend_users
puts '', "Updating suspended users"
puts "", "Updating suspended users"
count = 0
suspended = 0
@ -151,7 +152,10 @@ class ImportScripts::Ning < ImportScripts::Base
user.suspended_till = 200.years.from_now
if user.save
StaffActionLogger.new(@system_user).log_user_suspend(user, "Import data indicates account is suspended.")
StaffActionLogger.new(@system_user).log_user_suspend(
user,
"Import data indicates account is suspended.",
)
suspended += 1
else
puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}"
@ -168,13 +172,15 @@ class ImportScripts::Ning < ImportScripts::Base
def import_categories
puts "", "Importing categories"
create_categories((["Blog", "Pages", "Photos"] + @discussions_json.map { |d| d["category"] }).uniq.compact) do |name|
create_categories(
(%w[Blog Pages Photos] + @discussions_json.map { |d| d["category"] }).uniq.compact,
) do |name|
if name.downcase == "uncategorized"
nil
else
{
id: name, # ning has no id for categories, so use the name
name: name
name: name,
}
end
end
@ -220,9 +226,7 @@ class ImportScripts::Ning < ImportScripts::Base
unless topic["category"].nil? || topic["category"].downcase == "uncategorized"
mapped[:category] = category_id_from_imported_category_id(topic["category"])
end
if topic["category"].nil? && default_category
mapped[:category] = default_category
end
mapped[:category] = default_category if topic["category"].nil? && default_category
mapped[:title] = CGI.unescapeHTML(topic["title"])
mapped[:raw] = process_ning_post_body(topic["description"])
@ -230,13 +234,9 @@ class ImportScripts::Ning < ImportScripts::Base
mapped[:raw] = add_file_attachments(mapped[:raw], topic["fileAttachments"])
end
if topic["photoUrl"]
mapped[:raw] = add_photo(mapped[:raw], topic["photoUrl"])
end
mapped[:raw] = add_photo(mapped[:raw], topic["photoUrl"]) if topic["photoUrl"]
if topic["embedCode"]
mapped[:raw] = add_video(mapped[:raw], topic["embedCode"])
end
mapped[:raw] = add_video(mapped[:raw], topic["embedCode"]) if topic["embedCode"]
parent_post = create_post(mapped, mapped[:id])
unless parent_post.is_a?(Post)
@ -247,23 +247,24 @@ class ImportScripts::Ning < ImportScripts::Base
if topic["comments"].present?
topic["comments"].reverse.each do |post|
if post_id_from_imported_post_id(post["id"])
next # already imported this post
end
raw = process_ning_post_body(post["description"])
if post["fileAttachments"]
raw = add_file_attachments(raw, post["fileAttachments"])
end
raw = add_file_attachments(raw, post["fileAttachments"]) if post["fileAttachments"]
new_post = create_post({
id: post["id"],
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(post["contributorName"]) || -1,
raw: raw,
created_at: Time.zone.parse(post["createdDate"])
}, post["id"])
new_post =
create_post(
{
id: post["id"],
topic_id: parent_post.topic_id,
user_id: user_id_from_imported_user_id(post["contributorName"]) || -1,
raw: raw,
created_at: Time.zone.parse(post["createdDate"]),
},
post["id"],
)
if new_post.is_a?(Post)
posts += 1
@ -288,11 +289,17 @@ class ImportScripts::Ning < ImportScripts::Base
end
def attachment_regex
@_attachment_regex ||= Regexp.new(%Q[<a (?:[^>]*)href="(?:#{ATTACHMENT_PREFIXES.join('|')})\/(?:[^"]+)"(?:[^>]*)><img (?:[^>]*)src="([^"]+)"(?:[^>]*)><\/a>])
@_attachment_regex ||=
Regexp.new(
%Q[<a (?:[^>]*)href="(?:#{ATTACHMENT_PREFIXES.join("|")})\/(?:[^"]+)"(?:[^>]*)><img (?:[^>]*)src="([^"]+)"(?:[^>]*)><\/a>],
)
end
def youtube_iframe_regex
@_youtube_iframe_regex ||= Regexp.new(%Q[<p><iframe(?:[^>]*)src="\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>(?:[^<]*)<\/p>])
@_youtube_iframe_regex ||=
Regexp.new(
%Q[<p><iframe(?:[^>]*)src="\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>(?:[^<]*)<\/p>],
)
end
def process_ning_post_body(arg)
@ -382,15 +389,16 @@ class ImportScripts::Ning < ImportScripts::Base
def add_video(arg, embed_code)
raw = arg
youtube_regex = Regexp.new(%Q[<iframe(?:[^>]*)src="http:\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>])
youtube_regex =
Regexp.new(
%Q[<iframe(?:[^>]*)src="http:\/\/www.youtube.com\/embed\/([^"]+)"(?:[^>]*)><\/iframe>],
)
raw.gsub!(youtube_regex) do |s|
matches = youtube_regex.match(s)
video_id = matches[1].split("?").first
if video_id
raw += "\n\nhttps://www.youtube.com/watch?v=#{video_id}\n"
end
raw += "\n\nhttps://www.youtube.com/watch?v=#{video_id}\n" if video_id
end
raw += "\n" + embed_code + "\n"
@ -398,6 +406,4 @@ class ImportScripts::Ning < ImportScripts::Base
end
end
if __FILE__ == $0
ImportScripts::Ning.new.perform
end
ImportScripts::Ning.new.perform if __FILE__ == $0

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'mongo'
require "mongo"
module NodeBB
class Mongo
@ -43,8 +43,8 @@ module NodeBB
user["joindate"] = timestamp_to_date(user["joindate"])
user["lastonline"] = timestamp_to_date(user["lastonline"])
user['banned'] = user['banned'].to_s
user['uid'] = user['uid'].to_s
user["banned"] = user["banned"].to_s
user["uid"] = user["uid"].to_s
user
end
@ -56,17 +56,17 @@ module NodeBB
category_keys.each do |category_key|
category = mongo.find(_key: "category:#{category_key}").first
category['parentCid'] = category['parentCid'].to_s
category['disabled'] = category['disabled'].to_s
category['cid'] = category['cid'].to_s
category["parentCid"] = category["parentCid"].to_s
category["disabled"] = category["disabled"].to_s
category["cid"] = category["cid"].to_s
categories[category['cid']] = category
categories[category["cid"]] = category
end
end
end
def topics(offset = 0, page_size = 2000)
topic_keys = mongo.find(_key: 'topics:tid').skip(offset).limit(page_size).pluck(:value)
topic_keys = mongo.find(_key: "topics:tid").skip(offset).limit(page_size).pluck(:value)
topic_keys.map { |topic_key| topic(topic_key) }
end
@ -86,11 +86,11 @@ module NodeBB
end
def topic_count
mongo.find(_key: 'topics:tid').count
mongo.find(_key: "topics:tid").count
end
def posts(offset = 0, page_size = 2000)
post_keys = mongo.find(_key: 'posts:pid').skip(offset).limit(page_size).pluck(:value)
post_keys = mongo.find(_key: "posts:pid").skip(offset).limit(page_size).pluck(:value)
post_keys.map { |post_key| post(post_key) }
end
@ -111,7 +111,7 @@ module NodeBB
end
def post_count
mongo.find(_key: 'posts:pid').count
mongo.find(_key: "posts:pid").count
end
private

View File

@ -1,13 +1,13 @@
# frozen_string_literal: true
require_relative '../base'
require_relative './redis'
require_relative './mongo'
require_relative "../base"
require_relative "./redis"
require_relative "./mongo"
class ImportScripts::NodeBB < ImportScripts::Base
# CHANGE THESE BEFORE RUNNING THE IMPORTER
# ATTACHMENT_DIR needs to be absolute, not relative path
ATTACHMENT_DIR = '/Users/orlando/www/orlando/NodeBB/public/uploads'
ATTACHMENT_DIR = "/Users/orlando/www/orlando/NodeBB/public/uploads"
BATCH_SIZE = 2000
def initialize
@ -17,17 +17,13 @@ class ImportScripts::NodeBB < ImportScripts::Base
# @client = adapter.new('mongodb://127.0.0.1:27017/nodebb')
adapter = NodeBB::Redis
@client = adapter.new(
host: "localhost",
port: "6379",
db: 14
)
@client = adapter.new(host: "localhost", port: "6379", db: 14)
load_merged_posts
end
def load_merged_posts
puts 'loading merged posts with topics...'
puts "loading merged posts with topics..."
# we keep here the posts that were merged
# as topics
@ -35,13 +31,16 @@ class ImportScripts::NodeBB < ImportScripts::Base
# { post_id: discourse_post_id }
@merged_posts_map = {}
PostCustomField.where(name: 'import_merged_post_id').pluck(:post_id, :value).each do |post_id, import_id|
post = Post.find(post_id)
topic_id = post.topic_id
nodebb_post_id = post.custom_fields['import_merged_post_id']
PostCustomField
.where(name: "import_merged_post_id")
.pluck(:post_id, :value)
.each do |post_id, import_id|
post = Post.find(post_id)
topic_id = post.topic_id
nodebb_post_id = post.custom_fields["import_merged_post_id"]
@merged_posts_map[nodebb_post_id] = topic_id
end
@merged_posts_map[nodebb_post_id] = topic_id
end
end
def execute
@ -56,19 +55,14 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
def import_groups
puts '', 'importing groups'
puts "", "importing groups"
groups = @client.groups
total_count = groups.count
progress_count = 0
start_time = Time.now
create_groups(groups) do |group|
{
id: group["name"],
name: group["slug"]
}
end
create_groups(groups) { |group| { id: group["name"], name: group["slug"] } }
end
def import_categories
@ -107,15 +101,18 @@ class ImportScripts::NodeBB < ImportScripts::Base
name: category["name"],
position: category["order"],
description: category["description"],
parent_category_id: category_id_from_imported_category_id(category["parentCid"])
parent_category_id: category_id_from_imported_category_id(category["parentCid"]),
}
end
categories.each do |source_category|
cid = category_id_from_imported_category_id(source_category['cid'])
Permalink.create(url: "/category/#{source_category['slug']}", category_id: cid) rescue nil
cid = category_id_from_imported_category_id(source_category["cid"])
begin
Permalink.create(url: "/category/#{source_category["slug"]}", category_id: cid)
rescue StandardError
nil
end
end
end
def import_users
@ -158,12 +155,13 @@ class ImportScripts::NodeBB < ImportScripts::Base
bio_raw: user["aboutme"],
active: true,
custom_fields: {
import_pass: user["password"]
import_pass: user["password"],
},
post_create_action: proc do |u|
import_profile_picture(user, u)
import_profile_background(user, u)
end
post_create_action:
proc do |u|
import_profile_picture(user, u)
import_profile_background(user, u)
end,
}
end
end
@ -204,7 +202,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
# write tmp file
file = Tempfile.new(filename, encoding: 'ascii-8bit')
file = Tempfile.new(filename, encoding: "ascii-8bit")
file.write string_io.read
file.rewind
@ -230,9 +228,21 @@ class ImportScripts::NodeBB < ImportScripts::Base
imported_user.user_avatar.update(custom_upload_id: upload.id)
imported_user.update(uploaded_avatar_id: upload.id)
ensure
string_io.close rescue nil
file.close rescue nil
file.unlind rescue nil
begin
string_io.close
rescue StandardError
nil
end
begin
file.close
rescue StandardError
nil
end
begin
file.unlind
rescue StandardError
nil
end
end
def import_profile_background(old_user, imported_user)
@ -264,7 +274,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
# write tmp file
file = Tempfile.new(filename, encoding: 'ascii-8bit')
file = Tempfile.new(filename, encoding: "ascii-8bit")
file.write string_io.read
file.rewind
@ -288,9 +298,21 @@ class ImportScripts::NodeBB < ImportScripts::Base
imported_user.user_profile.upload_profile_background(upload)
ensure
string_io.close rescue nil
file.close rescue nil
file.unlink rescue nil
begin
string_io.close
rescue StandardError
nil
end
begin
file.close
rescue StandardError
nil
end
begin
file.unlink
rescue StandardError
nil
end
end
def add_users_to_groups
@ -305,7 +327,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
dgroup = find_group_by_import_id(group["name"])
# do thing if we migrated this group already
next if dgroup.custom_fields['import_users_added']
next if dgroup.custom_fields["import_users_added"]
group_member_ids = group["member_ids"].map { |uid| user_id_from_imported_user_id(uid) }
group_owner_ids = group["owner_ids"].map { |uid| user_id_from_imported_user_id(uid) }
@ -320,7 +342,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
owners = User.find(group_owner_ids)
owners.each { |owner| dgroup.add_owner(owner) }
dgroup.custom_fields['import_users_added'] = true
dgroup.custom_fields["import_users_added"] = true
dgroup.save
progress_count += 1
@ -357,12 +379,13 @@ class ImportScripts::NodeBB < ImportScripts::Base
created_at: topic["timestamp"],
views: topic["viewcount"],
closed: topic["locked"] == "1",
post_create_action: proc do |p|
# keep track of this to use in import_posts
p.custom_fields["import_merged_post_id"] = topic["mainPid"]
p.save
@merged_posts_map[topic["mainPid"]] = p.id
end
post_create_action:
proc do |p|
# keep track of this to use in import_posts
p.custom_fields["import_merged_post_id"] = topic["mainPid"]
p.save
@merged_posts_map[topic["mainPid"]] = p.id
end,
}
data[:pinned_at] = data[:created_at] if topic["pinned"] == "1"
@ -372,7 +395,11 @@ class ImportScripts::NodeBB < ImportScripts::Base
topics.each do |import_topic|
topic = topic_lookup_from_imported_post_id("t#{import_topic["tid"]}")
Permalink.create(url: "/topic/#{import_topic['slug']}", topic_id: topic[:topic_id]) rescue nil
begin
Permalink.create(url: "/topic/#{import_topic["slug"]}", topic_id: topic[:topic_id])
rescue StandardError
nil
end
end
end
end
@ -411,21 +438,23 @@ class ImportScripts::NodeBB < ImportScripts::Base
topic_id: topic[:topic_id],
raw: raw,
created_at: post["timestamp"],
post_create_action: proc do |p|
post["upvoted_by"].each do |upvoter_id|
user = User.new
user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID
PostActionCreator.like(user, p)
end
end
post_create_action:
proc do |p|
post["upvoted_by"].each do |upvoter_id|
user = User.new
user.id = user_id_from_imported_user_id(upvoter_id) || Discourse::SYSTEM_USER_ID
PostActionCreator.like(user, p)
end
end,
}
if post['toPid']
if post["toPid"]
# Look reply to topic
parent_id = topic_lookup_from_imported_post_id("t#{post['toPid']}").try(:[], :post_number)
parent_id = topic_lookup_from_imported_post_id("t#{post["toPid"]}").try(:[], :post_number)
# Look reply post if topic is missing
parent_id ||= topic_lookup_from_imported_post_id("p#{post['toPid']}").try(:[], :post_number)
parent_id ||=
topic_lookup_from_imported_post_id("p#{post["toPid"]}").try(:[], :post_number)
if parent_id
data[:reply_to_post_number] = parent_id
@ -448,12 +477,12 @@ class ImportScripts::NodeBB < ImportScripts::Base
Post.find_each do |post|
begin
next if post.custom_fields['import_post_processing']
next if post.custom_fields["import_post_processing"]
new_raw = postprocess_post(post)
if new_raw != post.raw
post.raw = new_raw
post.custom_fields['import_post_processing'] = true
post.custom_fields["import_post_processing"] = true
post.save
end
ensure
@ -463,7 +492,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
def import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
current = 0
max = Post.count
@ -474,7 +503,7 @@ class ImportScripts::NodeBB < ImportScripts::Base
print_status(current, max, start_time)
new_raw = post.raw.dup
new_raw.gsub!(/\[(.*)\]\((\/assets\/uploads\/files\/.*)\)/) do
new_raw.gsub!(%r{\[(.*)\]\((/assets/uploads/files/.*)\)}) do
image_md = Regexp.last_match[0]
text, filepath = $1, $2
filepath = filepath.gsub("/assets/uploads", ATTACHMENT_DIR)
@ -493,7 +522,12 @@ class ImportScripts::NodeBB < ImportScripts::Base
end
if new_raw != post.raw
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: 'Import attachments from NodeBB')
PostRevisor.new(post).revise!(
post.user,
{ raw: new_raw },
bypass_bump: true,
edit_reason: "Import attachments from NodeBB",
)
end
end
end
@ -502,28 +536,30 @@ class ImportScripts::NodeBB < ImportScripts::Base
raw = post.raw
# [link to post](/post/:id)
raw = raw.gsub(/\[(.*)\]\(\/post\/(\d+).*\)/) do
text, post_id = $1, $2
raw =
raw.gsub(%r{\[(.*)\]\(/post/(\d+).*\)}) do
text, post_id = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
if topic_lookup = topic_lookup_from_imported_post_id("p#{post_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
end
end
end
# [link to topic](/topic/:id)
raw = raw.gsub(/\[(.*)\]\(\/topic\/(\d+).*\)/) do
text, topic_id = $1, $2
raw =
raw.gsub(%r{\[(.*)\]\(/topic/(\d+).*\)}) do
text, topic_id = $1, $2
if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
if topic_lookup = topic_lookup_from_imported_post_id("t#{topic_id}")
url = topic_lookup[:url]
"[#{text}](#{url})"
else
"/404"
end
end
end
raw
end

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'redis'
require "redis"
module NodeBB
class Redis
@ -11,7 +11,7 @@ module NodeBB
end
def groups
group_keys = redis.zrange('groups:visible:createtime', 0, -1)
group_keys = redis.zrange("groups:visible:createtime", 0, -1)
group_keys.map { |group_key| group(group_key) }
end
@ -26,7 +26,7 @@ module NodeBB
end
def users
user_keys = redis.zrange('users:joindate', 0, -1)
user_keys = redis.zrange("users:joindate", 0, -1)
user_keys.map { |user_key| user(user_key) }
end
@ -41,13 +41,13 @@ module NodeBB
end
def categories
category_keys = redis.zrange('categories:cid', 0, -1)
category_keys = redis.zrange("categories:cid", 0, -1)
{}.tap do |categories|
category_keys.each do |category_key|
category = redis.hgetall("category:#{category_key}")
categories[category['cid']] = category
categories[category["cid"]] = category
end
end
end
@ -59,7 +59,7 @@ module NodeBB
from = offset
to = page_size + offset
topic_keys = redis.zrange('topics:tid', from, to)
topic_keys = redis.zrange("topics:tid", from, to)
topic_keys.map { |topic_key| topic(topic_key) }
end
@ -75,7 +75,7 @@ module NodeBB
end
def topic_count
redis.zcard('topics:tid')
redis.zcard("topics:tid")
end
def posts(offset = 0, page_size = 2000)
@ -85,7 +85,7 @@ module NodeBB
from = offset
to = page_size + offset
post_keys = redis.zrange('posts:pid', from, to)
post_keys = redis.zrange("posts:pid", from, to)
post_keys.map { |post_key| post(post_key) }
end
@ -99,7 +99,7 @@ module NodeBB
end
def post_count
redis.zcard('posts:pid')
redis.zcard("posts:pid")
end
private

View File

@ -5,7 +5,6 @@ require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::Phorum < ImportScripts::Base
PHORUM_DB = "piwik"
TABLE_PREFIX = "pw_"
BATCH_SIZE = 1000
@ -13,12 +12,13 @@ class ImportScripts::Phorum < ImportScripts::Base
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
password: "pa$$word",
database: PHORUM_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
password: "pa$$word",
database: PHORUM_DB,
)
end
def execute
@ -29,30 +29,34 @@ class ImportScripts::Phorum < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}users;").first['count']
total_count = mysql_query("SELECT count(*) count FROM #{TABLE_PREFIX}users;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT user_id id, username, TRIM(email) AS email, username name, date_added created_at,
results =
mysql_query(
"SELECT user_id id, username, TRIM(email) AS email, username name, date_added created_at,
date_last_active last_seen_at, admin
FROM #{TABLE_PREFIX}users
WHERE #{TABLE_PREFIX}users.active = 1
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
create_users(results, total: total_count, offset: offset) do |user|
next if user['username'].blank?
{ id: user['id'],
email: user['email'],
username: user['username'],
name: user['name'],
created_at: Time.zone.at(user['created_at']),
last_seen_at: Time.zone.at(user['last_seen_at']),
admin: user['admin'] == 1 }
next if user["username"].blank?
{
id: user["id"],
email: user["email"],
username: user["username"],
name: user["name"],
created_at: Time.zone.at(user["created_at"]),
last_seen_at: Time.zone.at(user["last_seen_at"]),
admin: user["admin"] == 1,
}
end
end
end
@ -60,19 +64,18 @@ class ImportScripts::Phorum < ImportScripts::Base
def import_categories
puts "", "importing categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT forum_id id, name, description, active
FROM #{TABLE_PREFIX}forums
ORDER BY forum_id ASC
").to_a
",
).to_a
create_categories(categories) do |category|
next if category['active'] == 0
{
id: category['id'],
name: category["name"],
description: category["description"]
}
next if category["active"] == 0
{ id: category["id"], name: category["name"], description: category["description"] }
end
# uncomment below lines to create permalink
@ -87,7 +90,9 @@ class ImportScripts::Phorum < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from #{TABLE_PREFIX}messages").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT m.message_id id,
m.parent_id,
m.forum_id category_id,
@ -100,7 +105,8 @@ class ImportScripts::Phorum < ImportScripts::Base
ORDER BY m.datestamp
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
").to_a
",
).to_a
break if results.size < 1
@ -108,20 +114,20 @@ class ImportScripts::Phorum < ImportScripts::Base
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_raw_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_raw_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['parent_id'] == 0
mapped[:category] = category_id_from_imported_category_id(m['category_id'].to_i)
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["parent_id"] == 0
mapped[:category] = category_id_from_imported_category_id(m["category_id"].to_i)
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['parent_id'])
parent = topic_lookup_from_imported_post_id(m["parent_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['parent_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["parent_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
@ -137,25 +143,24 @@ class ImportScripts::Phorum < ImportScripts::Base
# end
# end
end
end
def process_raw_post(raw, import_id)
s = raw.dup
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
s.gsub!(/<!-- s(\S+) --><img (?:[^>]+) \/><!-- s(?:\S+) -->/, '\1')
s.gsub!(%r{<!-- s(\S+) --><img (?:[^>]+) /><!-- s(?:\S+) -->}, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s.gsub!(/:(?:\w{8})\]/, "]")
# Remove mybb video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")
s = CGI.unescapeHTML(s)
@ -163,50 +168,54 @@ class ImportScripts::Phorum < ImportScripts::Base
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
s.gsub!(%r{\[http(s)?://(www\.)?}, "[")
# [QUOTE]...[/QUOTE]
s.gsub!(/\[quote\](.+?)\[\/quote\]/im) { "\n> #{$1}\n" }
s.gsub!(%r{\[quote\](.+?)\[/quote\]}im) { "\n> #{$1}\n" }
# [URL=...]...[/URL]
s.gsub!(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" }
s.gsub!(%r{\[url="?(.+?)"?\](.+)\[/url\]}i) { "[#{$2}](#{$1})" }
# [IMG]...[/IMG]
s.gsub!(/\[\/?img\]/i, "")
s.gsub!(%r{\[/?img\]}i, "")
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
s.gsub!(/\[list\](.*?)\[\/list\]/m, '[ul]\1[/ul]')
s.gsub!(/\[list=1\](.*?)\[\/list\]/m, '[ol]\1[/ol]')
s.gsub!(%r{\[list\](.*?)\[/list\]}m, '[ul]\1[/ul]')
s.gsub!(%r{\[list=1\](.*?)\[/list\]}m, '[ol]\1[/ol]')
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
s.gsub!(/\[\*\](.*?)\n/, '[li]\1[/li]')
# [CODE]...[/CODE]
s.gsub!(/\[\/?code\]/i, "\n```\n")
s.gsub!(%r{\[/?code\]}i, "\n```\n")
# [HIGHLIGHT]...[/HIGHLIGHT]
s.gsub!(/\[\/?highlight\]/i, "\n```\n")
s.gsub!(%r{\[/?highlight\]}i, "\n```\n")
# [YOUTUBE]<id>[/YOUTUBE]
s.gsub!(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
s.gsub!(%r{\[youtube\](.+?)\[/youtube\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
# [youtube=425,350]id[/youtube]
s.gsub!(/\[youtube="?(.+?)"?\](.+)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$2}\n" }
s.gsub!(%r{\[youtube="?(.+?)"?\](.+)\[/youtube\]}i) do
"\nhttps://www.youtube.com/watch?v=#{$2}\n"
end
# [MEDIA=youtube]id[/MEDIA]
s.gsub!(/\[MEDIA=youtube\](.+?)\[\/MEDIA\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
s.gsub!(%r{\[MEDIA=youtube\](.+?)\[/MEDIA\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
# [ame="youtube_link"]title[/ame]
s.gsub!(/\[ame="?(.+?)"?\](.+)\[\/ame\]/i) { "\n#{$1}\n" }
s.gsub!(%r{\[ame="?(.+?)"?\](.+)\[/ame\]}i) { "\n#{$1}\n" }
# [VIDEO=youtube;<id>]...[/VIDEO]
s.gsub!(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
s.gsub!(%r{\[video=youtube;([^\]]+)\].*?\[/video\]}i) do
"\nhttps://www.youtube.com/watch?v=#{$1}\n"
end
# [USER=706]@username[/USER]
s.gsub!(/\[user="?(.+?)"?\](.+)\[\/user\]/i) { $2 }
s.gsub!(%r{\[user="?(.+?)"?\](.+)\[/user\]}i) { $2 }
# Remove the color tag
s.gsub!(/\[color=[#a-z0-9]+\]/i, "")
s.gsub!(/\[\/color\]/i, "")
s.gsub!(%r{\[/color\]}i, "")
s.gsub!(/\[hr\]/i, "<hr>")
@ -221,7 +230,7 @@ class ImportScripts::Phorum < ImportScripts::Base
end
def import_attachments
puts '', 'importing attachments...'
puts "", "importing attachments..."
uploads = mysql_query <<-SQL
SELECT message_id, filename, FROM_BASE64(file_data) AS file_data, file_id
@ -234,26 +243,23 @@ class ImportScripts::Phorum < ImportScripts::Base
total_count = uploads.count
uploads.each do |upload|
# puts "*** processing file #{upload['file_id']}"
post_id = post_id_from_imported_post_id(upload['message_id'])
post_id = post_id_from_imported_post_id(upload["message_id"])
if post_id.nil?
puts "Post #{upload['message_id']} for attachment #{upload['file_id']} not found"
puts "Post #{upload["message_id"]} for attachment #{upload["file_id"]} not found"
next
end
post = Post.find(post_id)
real_filename = upload['filename']
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
real_filename = upload["filename"]
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
tmpfile = 'attach_' + upload['file_id'].to_s
filename = File.join('/tmp/', tmpfile)
File.open(filename, 'wb') { |f|
f.write(upload['file_data'])
}
tmpfile = "attach_" + upload["file_id"].to_s
filename = File.join("/tmp/", tmpfile)
File.open(filename, "wb") { |f| f.write(upload["file_data"]) }
upl_obj = create_upload(post.user.id, filename, real_filename)
@ -265,16 +271,16 @@ class ImportScripts::Phorum < ImportScripts::Base
post.raw += "\n\n#{html}\n\n"
post.save!
if PostUpload.where(post: post, upload: upl_obj).exists?
puts "skipping creating uploaded for previously uploaded file #{upload['file_id']}"
puts "skipping creating uploaded for previously uploaded file #{upload["file_id"]}"
else
PostUpload.create!(post: post, upload: upl_obj)
end
# PostUpload.create!(post: post, upload: upl_obj) unless PostUpload.where(post: post, upload: upl_obj).exists?
else
puts "Skipping attachment #{upload['file_id']}"
puts "Skipping attachment #{upload["file_id"]}"
end
else
puts "Failed to upload attachment #{upload['file_id']}"
puts "Failed to upload attachment #{upload["file_id"]}"
exit
end
@ -282,7 +288,6 @@ class ImportScripts::Phorum < ImportScripts::Base
print_status(current_count, total_count)
end
end
end
ImportScripts::Phorum.new.perform

View File

@ -4,32 +4,34 @@
# Documentation: https://meta.discourse.org/t/importing-from-phpbb3/30810
if ARGV.length != 1 || !File.exist?(ARGV[0])
STDERR.puts '', 'Usage of phpBB3 importer:', 'bundle exec ruby phpbb3.rb <path/to/settings.yml>'
STDERR.puts '', "Use the settings file from #{File.expand_path('phpbb3/settings.yml', File.dirname(__FILE__))} as an example."
STDERR.puts '', 'Still having problems? Take a look at https://meta.discourse.org/t/importing-from-phpbb3/30810'
STDERR.puts "", "Usage of phpBB3 importer:", "bundle exec ruby phpbb3.rb <path/to/settings.yml>"
STDERR.puts "",
"Use the settings file from #{File.expand_path("phpbb3/settings.yml", File.dirname(__FILE__))} as an example."
STDERR.puts "",
"Still having problems? Take a look at https://meta.discourse.org/t/importing-from-phpbb3/30810"
exit 1
end
module ImportScripts
module PhpBB3
require_relative 'phpbb3/support/settings'
require_relative 'phpbb3/database/database'
require_relative "phpbb3/support/settings"
require_relative "phpbb3/database/database"
@settings = Settings.load(ARGV[0])
# We need to load the gem files for ruby-bbcode-to-md and the database adapter
# (e.g. mysql2) before bundler gets initialized by the base importer.
# Otherwise we get an error since those gems are not always in the Gemfile.
require 'ruby-bbcode-to-md' if @settings.use_bbcode_to_md
require "ruby-bbcode-to-md" if @settings.use_bbcode_to_md
begin
@database = Database.create(@settings.database)
rescue UnsupportedVersionError => error
STDERR.puts '', error.message
STDERR.puts "", error.message
exit 1
end
require_relative 'phpbb3/importer'
require_relative "phpbb3/importer"
Importer.new(@settings, @database).perform
end
end

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require 'mysql2'
require "mysql2"
module ImportScripts::PhpBB3
class Database
@ -19,11 +19,11 @@ module ImportScripts::PhpBB3
def create_database
version = get_phpbb_version
if version.start_with?('3.0')
require_relative 'database_3_0'
if version.start_with?("3.0")
require_relative "database_3_0"
Database_3_0.new(@database_client, @database_settings)
elsif version.start_with?('3.1') || version.start_with?('3.2') || version.start_with?('3.3')
require_relative 'database_3_1'
elsif version.start_with?("3.1") || version.start_with?("3.2") || version.start_with?("3.3")
require_relative "database_3_1"
Database_3_1.new(@database_client, @database_settings)
else
raise UnsupportedVersionError, <<~TEXT
@ -42,7 +42,7 @@ module ImportScripts::PhpBB3
username: @database_settings.username,
password: @database_settings.password,
database: @database_settings.schema,
reconnect: true
reconnect: true,
)
end

View File

@ -1,7 +1,7 @@
# frozen_string_literal: true
require_relative 'database_base'
require_relative '../support/constants'
require_relative "database_base"
require_relative "../support/constants"
module ImportScripts::PhpBB3
class Database_3_0 < DatabaseBase

View File

@ -1,7 +1,7 @@
# frozen_string_literal: true
require_relative 'database_3_0'
require_relative '../support/constants'
require_relative "database_3_0"
require_relative "../support/constants"
module ImportScripts::PhpBB3
class Database_3_1 < Database_3_0
@ -32,14 +32,15 @@ module ImportScripts::PhpBB3
private
def profile_fields_query(profile_fields)
@profile_fields_query ||= begin
if profile_fields.present?
columns = profile_fields.map { |field| "pf_#{field[:phpbb_field_name]}" }
", #{columns.join(', ')}"
else
""
@profile_fields_query ||=
begin
if profile_fields.present?
columns = profile_fields.map { |field| "pf_#{field[:phpbb_field_name]}" }
", #{columns.join(", ")}"
else
""
end
end
end
end
end
end

View File

@ -39,9 +39,7 @@ module ImportScripts::PhpBB3
def find_last_row(rows)
last_index = rows.size - 1
rows.each_with_index do |row, index|
return row if index == last_index
end
rows.each_with_index { |row, index| return row if index == last_index }
nil
end

View File

@ -1,9 +1,9 @@
# frozen_string_literal: true
require_relative '../base'
require_relative 'support/settings'
require_relative 'database/database'
require_relative 'importers/importer_factory'
require_relative "../base"
require_relative "support/settings"
require_relative "database/database"
require_relative "importers/importer_factory"
module ImportScripts::PhpBB3
class Importer < ImportScripts::Base
@ -25,7 +25,7 @@ module ImportScripts::PhpBB3
protected
def execute
puts '', "importing from phpBB #{@php_config[:phpbb_version]}"
puts "", "importing from phpBB #{@php_config[:phpbb_version]}"
SiteSetting.tagging_enabled = true if @settings.tag_mappings.present?
@ -55,8 +55,14 @@ module ImportScripts::PhpBB3
settings[:max_attachment_size_kb] = [max_file_size_kb, SiteSetting.max_attachment_size_kb].max
# temporarily disable validation since we want to import all existing images and attachments
SiteSetting.type_supervisor.load_setting(:max_image_size_kb, max: settings[:max_image_size_kb])
SiteSetting.type_supervisor.load_setting(:max_attachment_size_kb, max: settings[:max_attachment_size_kb])
SiteSetting.type_supervisor.load_setting(
:max_image_size_kb,
max: settings[:max_image_size_kb],
)
SiteSetting.type_supervisor.load_setting(
:max_attachment_size_kb,
max: settings[:max_attachment_size_kb],
)
settings
end
@ -66,7 +72,7 @@ module ImportScripts::PhpBB3
end
def import_users
puts '', 'creating users'
puts "", "creating users"
total_count = @database.count_users
importer = @importers.user_importer
last_user_id = 0
@ -88,10 +94,10 @@ module ImportScripts::PhpBB3
end
def import_anonymous_users
puts '', 'creating anonymous users'
puts "", "creating anonymous users"
total_count = @database.count_anonymous_users
importer = @importers.user_importer
last_username = ''
last_username = ""
batches do |offset|
rows, last_username = @database.fetch_anonymous_users(last_username)
@ -109,26 +115,34 @@ module ImportScripts::PhpBB3
end
def import_groups
puts '', 'creating groups'
puts "", "creating groups"
rows = @database.fetch_groups
create_groups(rows) do |row|
begin
next if row[:group_type] == 3
group_name = if @settings.site_name.present?
"#{@settings.site_name}_#{row[:group_name]}"
else
row[:group_name]
end[0..19].gsub(/[^a-zA-Z0-9\-_. ]/, '_')
group_name =
if @settings.site_name.present?
"#{@settings.site_name}_#{row[:group_name]}"
else
row[:group_name]
end[
0..19
].gsub(/[^a-zA-Z0-9\-_. ]/, "_")
bio_raw = @importers.text_processor.process_raw_text(row[:group_desc]) rescue row[:group_desc]
bio_raw =
begin
@importers.text_processor.process_raw_text(row[:group_desc])
rescue StandardError
row[:group_desc]
end
{
id: @settings.prefix(row[:group_id]),
name: group_name,
full_name: row[:group_name],
bio_raw: bio_raw
bio_raw: bio_raw,
}
rescue => e
log_error("Failed to map group with ID #{row[:group_id]}", e)
@ -137,7 +151,7 @@ module ImportScripts::PhpBB3
end
def import_user_groups
puts '', 'creating user groups'
puts "", "creating user groups"
rows = @database.fetch_group_users
rows.each do |row|
@ -147,7 +161,11 @@ module ImportScripts::PhpBB3
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
begin
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id, owner: row[:group_leader])
GroupUser.find_or_create_by(
user_id: user_id,
group_id: group_id,
owner: row[:group_leader],
)
rescue => e
log_error("Failed to add user #{row[:user_id]} to group #{row[:group_id]}", e)
end
@ -155,7 +173,7 @@ module ImportScripts::PhpBB3
end
def import_new_categories
puts '', 'creating new categories'
puts "", "creating new categories"
create_categories(@settings.new_categories) do |row|
next if row == "SKIP"
@ -163,13 +181,14 @@ module ImportScripts::PhpBB3
{
id: @settings.prefix(row[:forum_id]),
name: row[:name],
parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id]))
parent_category_id:
@lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])),
}
end
end
def import_categories
puts '', 'creating categories'
puts "", "creating categories"
rows = @database.fetch_categories
importer = @importers.category_importer
@ -181,7 +200,7 @@ module ImportScripts::PhpBB3
end
def import_posts
puts '', 'creating topics and posts'
puts "", "creating topics and posts"
total_count = @database.count_posts
importer = @importers.post_importer
last_post_id = 0
@ -202,7 +221,7 @@ module ImportScripts::PhpBB3
end
def import_private_messages
puts '', 'creating private messages'
puts "", "creating private messages"
total_count = @database.count_messages
importer = @importers.message_importer
last_msg_id = 0
@ -223,7 +242,7 @@ module ImportScripts::PhpBB3
end
def import_bookmarks
puts '', 'creating bookmarks'
puts "", "creating bookmarks"
total_count = @database.count_bookmarks
importer = @importers.bookmark_importer
last_user_id = last_topic_id = 0
@ -243,7 +262,7 @@ module ImportScripts::PhpBB3
end
def import_likes
puts '', 'importing likes'
puts "", "importing likes"
total_count = @database.count_likes
last_post_id = last_user_id = 0
@ -255,7 +274,7 @@ module ImportScripts::PhpBB3
{
post_id: @settings.prefix(row[:post_id]),
user_id: @settings.prefix(row[:user_id]),
created_at: Time.zone.at(row[:thanks_time])
created_at: Time.zone.at(row[:thanks_time]),
}
end
end

View File

@ -49,12 +49,12 @@ module ImportScripts::PhpBB3
def get_avatar_path(avatar_type, filename)
case avatar_type
when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED then
filename.gsub!(/_[0-9]+\./, '.') # we need 1337.jpg, not 1337_2983745.jpg
get_uploaded_path(filename)
when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY then
when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED
filename.gsub!(/_[0-9]+\./, ".") # we need 1337.jpg, not 1337_2983745.jpg
get_uploaded_path(filename)
when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY
get_gallery_path(filename)
when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE then
when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE
download_avatar(filename)
else
puts "Invalid avatar type #{avatar_type}. Skipping..."
@ -67,12 +67,13 @@ module ImportScripts::PhpBB3
max_image_size_kb = SiteSetting.max_image_size_kb.kilobytes
begin
avatar_file = FileHelper.download(
url,
max_file_size: max_image_size_kb,
tmp_file_name: 'discourse-avatar',
follow_redirect: true
)
avatar_file =
FileHelper.download(
url,
max_file_size: max_image_size_kb,
tmp_file_name: "discourse-avatar",
follow_redirect: true,
)
rescue StandardError => err
warn "Error downloading avatar: #{err.message}. Skipping..."
return nil
@ -100,11 +101,11 @@ module ImportScripts::PhpBB3
def is_allowed_avatar_type?(avatar_type)
case avatar_type
when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED then
when Constants::AVATAR_TYPE_UPLOADED, Constants::AVATAR_TYPE_STRING_UPLOADED
@settings.import_uploaded_avatars
when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE then
when Constants::AVATAR_TYPE_REMOTE, Constants::AVATAR_TYPE_STRING_REMOTE
@settings.import_remote_avatars
when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY then
when Constants::AVATAR_TYPE_GALLERY, Constants::AVATAR_TYPE_STRING_GALLERY
@settings.import_gallery_avatars
else
false

View File

@ -9,7 +9,7 @@ module ImportScripts::PhpBB3
def map_bookmark(row)
{
user_id: @settings.prefix(row[:user_id]),
post_id: @settings.prefix(row[:topic_first_post_id])
post_id: @settings.prefix(row[:topic_first_post_id]),
}
end
end

View File

@ -23,11 +23,13 @@ module ImportScripts::PhpBB3
{
id: @settings.prefix(row[:forum_id]),
name: CGI.unescapeHTML(row[:forum_name]),
parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])),
post_create_action: proc do |category|
update_category_description(category, row)
@permalink_importer.create_for_category(category, row[:forum_id]) # skip @settings.prefix because ID is used in permalink generation
end
parent_category_id:
@lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])),
post_create_action:
proc do |category|
update_category_description(category, row)
@permalink_importer.create_for_category(category, row[:forum_id]) # skip @settings.prefix because ID is used in permalink generation
end,
}
end
@ -51,7 +53,16 @@ module ImportScripts::PhpBB3
end
if row[:forum_desc].present?
changes = { raw: (@text_processor.process_raw_text(row[:forum_desc]) rescue row[:forum_desc]) }
changes = {
raw:
(
begin
@text_processor.process_raw_text(row[:forum_desc])
rescue StandardError
row[:forum_desc]
end
),
}
opts = { revised_at: post.created_at, bypass_bump: true }
post.revise(Discourse.system_user, changes, opts)
end

View File

@ -1,16 +1,16 @@
# frozen_string_literal: true
require_relative 'attachment_importer'
require_relative 'avatar_importer'
require_relative 'bookmark_importer'
require_relative 'category_importer'
require_relative 'message_importer'
require_relative 'poll_importer'
require_relative 'post_importer'
require_relative 'permalink_importer'
require_relative 'user_importer'
require_relative '../support/smiley_processor'
require_relative '../support/text_processor'
require_relative "attachment_importer"
require_relative "avatar_importer"
require_relative "bookmark_importer"
require_relative "category_importer"
require_relative "message_importer"
require_relative "poll_importer"
require_relative "post_importer"
require_relative "permalink_importer"
require_relative "user_importer"
require_relative "../support/smiley_processor"
require_relative "../support/text_processor"
module ImportScripts::PhpBB3
class ImporterFactory
@ -36,7 +36,14 @@ module ImportScripts::PhpBB3
end
def post_importer
PostImporter.new(@lookup, text_processor, attachment_importer, poll_importer, permalink_importer, @settings)
PostImporter.new(
@lookup,
text_processor,
attachment_importer,
poll_importer,
permalink_importer,
@settings,
)
end
def message_importer
@ -64,7 +71,8 @@ module ImportScripts::PhpBB3
end
def text_processor
@text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings, @phpbb_config)
@text_processor ||=
TextProcessor.new(@lookup, @database, smiley_processor, @settings, @phpbb_config)
end
def smiley_processor

View File

@ -20,14 +20,16 @@ module ImportScripts::PhpBB3
end
def map_message(row)
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:author_id])) || Discourse.system_user.id
user_id =
@lookup.user_id_from_imported_user_id(@settings.prefix(row[:author_id])) ||
Discourse.system_user.id
attachments = import_attachments(row, user_id)
mapped = {
id: get_import_id(row[:msg_id]),
user_id: user_id,
created_at: Time.zone.at(row[:message_time]),
raw: @text_processor.process_private_msg(row[:message_text], attachments)
raw: @text_processor.process_private_msg(row[:message_text], attachments),
}
root_user_ids = sorted_user_ids(row[:root_author_id], row[:root_to_address])
@ -43,7 +45,7 @@ module ImportScripts::PhpBB3
protected
RE_PREFIX = 're: '
RE_PREFIX = "re: "
def import_attachments(row, user_id)
if @settings.import_attachments && row[:attachment_count] > 0
@ -55,7 +57,7 @@ module ImportScripts::PhpBB3
mapped[:title] = get_topic_title(row)
mapped[:archetype] = Archetype.private_message
mapped[:target_usernames] = get_recipient_usernames(row)
mapped[:custom_fields] = { import_user_ids: current_user_ids.join(',') }
mapped[:custom_fields] = { import_user_ids: current_user_ids.join(",") }
if mapped[:target_usernames].empty?
puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
@ -75,9 +77,9 @@ module ImportScripts::PhpBB3
# to_address looks like this: "u_91:u_1234:g_200"
# If there is a "u_" prefix, the prefix is discarded and the rest is a user_id
user_ids = to_address.split(':')
user_ids = to_address.split(":")
user_ids.uniq!
user_ids.map! { |u| u[2..-1].to_i if u[0..1] == 'u_' }.compact
user_ids.map! { |u| u[2..-1].to_i if u[0..1] == "u_" }.compact
end
def get_recipient_group_ids(to_address)
@ -85,16 +87,19 @@ module ImportScripts::PhpBB3
# to_address looks like this: "u_91:u_1234:g_200"
# If there is a "g_" prefix, the prefix is discarded and the rest is a group_id
group_ids = to_address.split(':')
group_ids = to_address.split(":")
group_ids.uniq!
group_ids.map! { |g| g[2..-1].to_i if g[0..1] == 'g_' }.compact
group_ids.map! { |g| g[2..-1].to_i if g[0..1] == "g_" }.compact
end
def get_recipient_usernames(row)
import_user_ids = get_recipient_user_ids(row[:to_address])
usernames = import_user_ids.map do |import_user_id|
@lookup.find_user_by_import_id(@settings.prefix(import_user_id)).try(:username)
end.compact
usernames =
import_user_ids
.map do |import_user_id|
@lookup.find_user_by_import_id(@settings.prefix(import_user_id)).try(:username)
end
.compact
import_group_ids = get_recipient_group_ids(row[:to_address])
import_group_ids.each do |import_group_id|
@ -142,13 +147,19 @@ module ImportScripts::PhpBB3
topic_titles = [topic_title]
topic_titles << topic_title[RE_PREFIX.length..-1] if topic_title.start_with?(RE_PREFIX)
Post.select(:topic_id)
Post
.select(:topic_id)
.joins(:topic)
.joins(:_custom_fields)
.where(["LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids",
{ titles: topic_titles, user_ids: current_user_ids.join(',') }])
.order('topics.created_at DESC')
.first.try(:topic_id)
.where(
[
"LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids",
{ titles: topic_titles, user_ids: current_user_ids.join(",") },
],
)
.order("topics.created_at DESC")
.first
.try(:topic_id)
end
end
end

View File

@ -13,13 +13,15 @@ module ImportScripts::PhpBB3
def change_site_settings
normalizations = SiteSetting.permalink_normalizations
normalizations = normalizations.blank? ? [] : normalizations.split('|')
normalizations = normalizations.blank? ? [] : normalizations.split("|")
add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION) if @settings.create_category_links
if @settings.create_category_links
add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION)
end
add_normalization(normalizations, POST_LINK_NORMALIZATION) if @settings.create_post_links
add_normalization(normalizations, TOPIC_LINK_NORMALIZATION) if @settings.create_topic_links
SiteSetting.permalink_normalizations = normalizations.join('|')
SiteSetting.permalink_normalizations = normalizations.join("|")
end
def create_for_category(category, import_id)
@ -50,8 +52,8 @@ module ImportScripts::PhpBB3
def add_normalization(normalizations, normalization)
if @settings.normalization_prefix.present?
prefix = @settings.normalization_prefix[%r|^/?(.*?)/?$|, 1]
normalization = "/#{prefix.gsub('/', '\/')}\\#{normalization}"
prefix = @settings.normalization_prefix[%r{^/?(.*?)/?$}, 1]
normalization = "/#{prefix.gsub("/", '\/')}\\#{normalization}"
end
normalizations << normalization unless normalizations.include?(normalization)

View File

@ -49,7 +49,12 @@ module ImportScripts::PhpBB3
end
def get_option_text(row)
text = @text_processor.process_raw_text(row[:poll_option_text]) rescue row[:poll_option_text]
text =
begin
@text_processor.process_raw_text(row[:poll_option_text])
rescue StandardError
row[:poll_option_text]
end
text.squish!
text.gsub!(/^(\d+)\./, '\1\.')
text
@ -57,7 +62,12 @@ module ImportScripts::PhpBB3
# @param poll_data [ImportScripts::PhpBB3::PollData]
def get_poll_text(poll_data)
title = @text_processor.process_raw_text(poll_data.title) rescue poll_data.title
title =
begin
@text_processor.process_raw_text(poll_data.title)
rescue StandardError
poll_data.title
end
text = +"#{title}\n\n"
arguments = ["results=always"]
@ -69,11 +79,9 @@ module ImportScripts::PhpBB3
arguments << "type=regular"
end
text << "[poll #{arguments.join(' ')}]"
text << "[poll #{arguments.join(" ")}]"
poll_data.options.each do |option|
text << "\n* #{option[:text]}"
end
poll_data.options.each { |option| text << "\n* #{option[:text]}" }
text << "\n[/poll]"
end
@ -104,9 +112,7 @@ module ImportScripts::PhpBB3
poll.poll_options.each_with_index do |option, index|
imported_option = poll_data.options[index]
imported_option[:ids].each do |imported_id|
option_ids[imported_id] = option.id
end
imported_option[:ids].each { |imported_id| option_ids[imported_id] = option.id }
end
option_ids

View File

@ -8,7 +8,14 @@ module ImportScripts::PhpBB3
# @param poll_importer [ImportScripts::PhpBB3::PollImporter]
# @param permalink_importer [ImportScripts::PhpBB3::PermalinkImporter]
# @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, text_processor, attachment_importer, poll_importer, permalink_importer, settings)
def initialize(
lookup,
text_processor,
attachment_importer,
poll_importer,
permalink_importer,
settings
)
@lookup = lookup
@text_processor = text_processor
@attachment_importer = attachment_importer
@ -24,7 +31,8 @@ module ImportScripts::PhpBB3
def map_post(row)
return if @settings.category_mappings.dig(row[:forum_id].to_s, :skip)
imported_user_id = @settings.prefix(row[:post_username].blank? ? row[:poster_id] : row[:post_username])
imported_user_id =
@settings.prefix(row[:post_username].blank? ? row[:poster_id] : row[:post_username])
user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || -1
is_first_post = row[:post_id] == row[:topic_first_post_id]
@ -35,7 +43,7 @@ module ImportScripts::PhpBB3
user_id: user_id,
created_at: Time.zone.at(row[:post_time]),
raw: @text_processor.process_post(row[:post_text], attachments),
import_topic_id: @settings.prefix(row[:topic_id])
import_topic_id: @settings.prefix(row[:topic_id]),
}
if is_first_post
@ -58,7 +66,9 @@ module ImportScripts::PhpBB3
mapped[:category] = if category_mapping = @settings.category_mappings[row[:forum_id].to_s]
category_mapping[:discourse_category_id] ||
@lookup.category_id_from_imported_category_id(@settings.prefix(category_mapping[:target_category_id]))
@lookup.category_id_from_imported_category_id(
@settings.prefix(category_mapping[:target_category_id]),
)
else
@lookup.category_id_from_imported_category_id(@settings.prefix(row[:forum_id]))
end
@ -81,7 +91,8 @@ module ImportScripts::PhpBB3
end
def map_other_post(row, mapped)
parent = @lookup.topic_lookup_from_imported_post_id(@settings.prefix(row[:topic_first_post_id]))
parent =
@lookup.topic_lookup_from_imported_post_id(@settings.prefix(row[:topic_first_post_id]))
if parent.blank?
puts "Parent post #{@settings.prefix(row[:topic_first_post_id])} doesn't exist. Skipping #{@settings.prefix(row[:post_id])}: #{row[:topic_title][0..40]}"

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative '../support/constants'
require_relative "../support/constants"
module ImportScripts::PhpBB3
class UserImporter
@ -29,8 +29,22 @@ module ImportScripts::PhpBB3
password: @settings.import_passwords ? row[:user_password] : nil,
name: @settings.username_as_name ? row[:username] : row[:name].presence,
created_at: Time.zone.at(row[:user_regdate]),
last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]),
registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil),
last_seen_at:
(
if row[:user_lastvisit] == 0
Time.zone.at(row[:user_regdate])
else
Time.zone.at(row[:user_lastvisit])
end
),
registration_ip_address:
(
begin
IPAddr.new(row[:user_ip])
rescue StandardError
nil
end
),
active: is_active_user,
trust_level: trust_level,
manual_locked_trust_level: manual_locked_trust_level,
@ -43,10 +57,11 @@ module ImportScripts::PhpBB3
location: row[:user_from],
date_of_birth: parse_birthdate(row),
custom_fields: custom_fields(row),
post_create_action: proc do |user|
suspend_user(user, row)
@avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present?
end
post_create_action:
proc do |user|
suspend_user(user, row)
@avatar_importer.import_avatar(user, row) if row[:user_avatar_type].present?
end,
}
end
@ -61,18 +76,19 @@ module ImportScripts::PhpBB3
id: @settings.prefix(username),
email: "anonymous_#{SecureRandom.hex}@no-email.invalid",
username: username,
name: @settings.username_as_name ? username : '',
name: @settings.username_as_name ? username : "",
created_at: Time.zone.at(row[:first_post_time]),
active: true,
trust_level: TrustLevel[0],
approved: true,
approved_by_id: Discourse.system_user.id,
approved_at: Time.now,
post_create_action: proc do |user|
row[:user_inactive_reason] = Constants::INACTIVE_MANUAL
row[:ban_reason] = 'Anonymous user from phpBB3' # TODO i18n
suspend_user(user, row, true)
end
post_create_action:
proc do |user|
row[:user_inactive_reason] = Constants::INACTIVE_MANUAL
row[:ban_reason] = "Anonymous user from phpBB3" # TODO i18n
suspend_user(user, row, true)
end,
}
end
@ -80,25 +96,32 @@ module ImportScripts::PhpBB3
def parse_birthdate(row)
return nil if row[:user_birthday].blank?
birthdate = Date.strptime(row[:user_birthday].delete(' '), '%d-%m-%Y') rescue nil
birthdate =
begin
Date.strptime(row[:user_birthday].delete(" "), "%d-%m-%Y")
rescue StandardError
nil
end
birthdate && birthdate.year > 0 ? birthdate : nil
end
def user_fields
@user_fields ||= begin
Hash[UserField.all.map { |field| [field.name, field] }]
end
@user_fields ||=
begin
Hash[UserField.all.map { |field| [field.name, field] }]
end
end
def field_mappings
@field_mappings ||= begin
@settings.custom_fields.map do |field|
{
phpbb_field_name: "pf_#{field[:phpbb_field_name]}".to_sym,
discourse_user_field: user_fields[field[:discourse_field_name]]
}
@field_mappings ||=
begin
@settings.custom_fields.map do |field|
{
phpbb_field_name: "pf_#{field[:phpbb_field_name]}".to_sym,
discourse_user_field: user_fields[field[:discourse_field_name]],
}
end
end
end
end
def custom_fields(row)
@ -114,7 +137,8 @@ module ImportScripts::PhpBB3
when "confirm"
value = value == 1 ? true : nil
when "dropdown"
value = user_field.user_field_options.find { |option| option.value == value } ? value : nil
value =
user_field.user_field_options.find { |option| option.value == value } ? value : nil
end
custom_fields["user_field_#{user_field.id}"] = value if value.present?
@ -128,7 +152,8 @@ module ImportScripts::PhpBB3
if row[:user_inactive_reason] == Constants::INACTIVE_MANUAL
user.suspended_at = Time.now
user.suspended_till = 200.years.from_now
ban_reason = row[:ban_reason].blank? ? 'Account deactivated by administrator' : row[:ban_reason] # TODO i18n
ban_reason =
row[:ban_reason].blank? ? "Account deactivated by administrator" : row[:ban_reason] # TODO i18n
elsif row[:ban_start].present?
user.suspended_at = Time.zone.at(row[:ban_start])
user.suspended_till = row[:ban_end] > 0 ? Time.zone.at(row[:ban_end]) : 200.years.from_now
@ -148,7 +173,9 @@ module ImportScripts::PhpBB3
if user.save
StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason)
else
Rails.logger.error("Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}")
Rails.logger.error(
"Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}",
)
end
end
end

View File

@ -1,7 +1,9 @@
# frozen_string_literal: true
module ImportScripts; end
module ImportScripts::PhpBB3; end
module ImportScripts
end
module ImportScripts::PhpBB3
end
module ImportScripts::PhpBB3::BBCode
LINEBREAK_AUTO = :auto

View File

@ -1,7 +1,7 @@
# frozen_string_literal: true
require 'nokogiri'
require_relative 'markdown_node'
require "nokogiri"
require_relative "markdown_node"
module ImportScripts::PhpBB3::BBCode
class XmlToMarkdown
@ -14,7 +14,7 @@ module ImportScripts::PhpBB3::BBCode
@allow_inline_code = opts.fetch(:allow_inline_code, false)
@traditional_linebreaks = opts.fetch(:traditional_linebreaks, false)
@doc = Nokogiri::XML(xml)
@doc = Nokogiri.XML(xml)
@list_stack = []
end
@ -28,9 +28,9 @@ module ImportScripts::PhpBB3::BBCode
private
IGNORED_ELEMENTS = ["s", "e", "i"]
ELEMENTS_WITHOUT_LEADING_WHITESPACES = ["LIST", "LI"]
ELEMENTS_WITH_HARD_LINEBREAKS = ["B", "I", "U"]
IGNORED_ELEMENTS = %w[s e i]
ELEMENTS_WITHOUT_LEADING_WHITESPACES = %w[LIST LI]
ELEMENTS_WITH_HARD_LINEBREAKS = %w[B I U]
EXPLICIT_LINEBREAK_THRESHOLD = 2
def preprocess_xml
@ -65,9 +65,7 @@ module ImportScripts::PhpBB3::BBCode
xml_node.children.each { |xml_child| visit(xml_child, md_node || md_parent) }
after_hook = "after_#{xml_node.name}"
if respond_to?(after_hook, include_all: true)
send(after_hook, xml_node, md_node)
end
send(after_hook, xml_node, md_node) if respond_to?(after_hook, include_all: true)
end
def create_node(xml_node, md_parent)
@ -84,19 +82,15 @@ module ImportScripts::PhpBB3::BBCode
end
def visit_B(xml_node, md_node)
if xml_node.parent&.name != 'B'
md_node.enclosed_with = "**"
end
md_node.enclosed_with = "**" if xml_node.parent&.name != "B"
end
def visit_I(xml_node, md_node)
if xml_node.parent&.name != 'I'
md_node.enclosed_with = "_"
end
md_node.enclosed_with = "_" if xml_node.parent&.name != "I"
end
def visit_U(xml_node, md_node)
if xml_node.parent&.name != 'U'
if xml_node.parent&.name != "U"
md_node.prefix = "[u]"
md_node.postfix = "[/u]"
end
@ -122,10 +116,7 @@ module ImportScripts::PhpBB3::BBCode
md_node.prefix_linebreaks = md_node.postfix_linebreaks = @list_stack.size == 0 ? 2 : 1
md_node.prefix_linebreak_type = LINEBREAK_HTML if @list_stack.size == 0
@list_stack << {
unordered: xml_node.attribute('type').nil?,
item_count: 0
}
@list_stack << { unordered: xml_node.attribute("type").nil?, item_count: 0 }
end
def after_LIST(xml_node, md_node)
@ -138,21 +129,21 @@ module ImportScripts::PhpBB3::BBCode
list[:item_count] += 1
indentation = ' ' * 2 * depth
symbol = list[:unordered] ? '*' : "#{list[:item_count]}."
indentation = " " * 2 * depth
symbol = list[:unordered] ? "*" : "#{list[:item_count]}."
md_node.prefix = "#{indentation}#{symbol} "
md_node.postfix_linebreaks = 1
end
def visit_IMG(xml_node, md_node)
md_node.text = +"![](#{xml_node.attribute('src')})"
md_node.text = +"![](#{xml_node.attribute("src")})"
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
md_node.skip_children
end
def visit_URL(xml_node, md_node)
original_url = xml_node.attribute('url').to_s
original_url = xml_node.attribute("url").to_s
url = CGI.unescapeHTML(original_url)
url = @url_replacement.call(url) if @url_replacement
@ -173,7 +164,8 @@ module ImportScripts::PhpBB3::BBCode
def visit_br(xml_node, md_node)
md_node.postfix_linebreaks += 1
if md_node.postfix_linebreaks > 1 && ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name)
if md_node.postfix_linebreaks > 1 &&
ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name)
md_node.postfix_linebreak_type = LINEBREAK_HARD
end
end
@ -194,7 +186,8 @@ module ImportScripts::PhpBB3::BBCode
def visit_QUOTE(xml_node, md_node)
if post = quoted_post(xml_node)
md_node.prefix = %Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n}
md_node.prefix =
%Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n}
md_node.postfix = "\n[/quote]"
elsif username = quoted_username(xml_node)
md_node.prefix = %Q{[quote="#{username}"]\n}
@ -242,11 +235,11 @@ module ImportScripts::PhpBB3::BBCode
return if size.nil?
if size.between?(1, 99)
md_node.prefix = '<small>'
md_node.postfix = '</small>'
md_node.prefix = "<small>"
md_node.postfix = "</small>"
elsif size.between?(101, 200)
md_node.prefix = '<big>'
md_node.postfix = '</big>'
md_node.prefix = "<big>"
md_node.postfix = "</big>"
end
end
@ -267,7 +260,8 @@ module ImportScripts::PhpBB3::BBCode
parent_prefix = prefix_from_parent(md_parent)
if parent_prefix && md_node.xml_node_name != "br" && (md_parent.prefix_children || !markdown.empty?)
if parent_prefix && md_node.xml_node_name != "br" &&
(md_parent.prefix_children || !markdown.empty?)
prefix = "#{parent_prefix}#{prefix}"
end
@ -275,11 +269,21 @@ module ImportScripts::PhpBB3::BBCode
text, prefix, postfix = hoist_whitespaces!(markdown, text, prefix, postfix)
end
add_linebreaks!(markdown, md_node.prefix_linebreaks, md_node.prefix_linebreak_type, parent_prefix)
add_linebreaks!(
markdown,
md_node.prefix_linebreaks,
md_node.prefix_linebreak_type,
parent_prefix,
)
markdown << prefix
markdown << text
markdown << postfix
add_linebreaks!(markdown, md_node.postfix_linebreaks, md_node.postfix_linebreak_type, parent_prefix)
add_linebreaks!(
markdown,
md_node.postfix_linebreaks,
md_node.postfix_linebreak_type,
parent_prefix,
)
end
markdown
@ -296,9 +300,7 @@ module ImportScripts::PhpBB3::BBCode
end
unless postfix.empty?
if ends_with_whitespace?(text)
postfix = "#{postfix}#{text[-1]}"
end
postfix = "#{postfix}#{text[-1]}" if ends_with_whitespace?(text)
text = text.rstrip
end
@ -319,16 +321,24 @@ module ImportScripts::PhpBB3::BBCode
if linebreak_type == LINEBREAK_HTML
max_linebreak_count = [existing_linebreak_count, required_linebreak_count - 1].max + 1
required_linebreak_count = max_linebreak_count if max_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD
required_linebreak_count = max_linebreak_count if max_linebreak_count >
EXPLICIT_LINEBREAK_THRESHOLD
end
return if existing_linebreak_count >= required_linebreak_count
rstrip!(markdown)
alternative_linebreak_start_index = required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2
alternative_linebreak_start_index =
required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2
required_linebreak_count.times do |index|
linebreak = linebreak(linebreak_type, index, alternative_linebreak_start_index, required_linebreak_count)
linebreak =
linebreak(
linebreak_type,
index,
alternative_linebreak_start_index,
required_linebreak_count,
)
markdown << (linebreak == "\n" ? prefix.rstrip : prefix) if prefix && index > 0
markdown << linebreak
@ -336,18 +346,25 @@ module ImportScripts::PhpBB3::BBCode
end
def rstrip!(markdown)
markdown.gsub!(/\s*(?:\\?\n|<br>\n)*\z/, '')
markdown.gsub!(/\s*(?:\\?\n|<br>\n)*\z/, "")
end
def linebreak(linebreak_type, linebreak_index, alternative_linebreak_start_index, required_linebreak_count)
def linebreak(
linebreak_type,
linebreak_index,
alternative_linebreak_start_index,
required_linebreak_count
)
use_alternative_linebreak = linebreak_index >= alternative_linebreak_start_index
is_last_linebreak = linebreak_index + 1 == required_linebreak_count
return "<br>\n" if linebreak_type == LINEBREAK_HTML &&
use_alternative_linebreak && is_last_linebreak
if linebreak_type == LINEBREAK_HTML && use_alternative_linebreak && is_last_linebreak
return "<br>\n"
end
return "\\\n" if linebreak_type == LINEBREAK_HARD ||
@traditional_linebreaks || use_alternative_linebreak
if linebreak_type == LINEBREAK_HARD || @traditional_linebreaks || use_alternative_linebreak
return "\\\n"
end
"\n"
end

View File

@ -8,8 +8,8 @@ module ImportScripts::PhpBB3
INACTIVE_MANUAL = 3 # Account deactivated by administrator
INACTIVE_REMIND = 4 # Forced user account reactivation
GROUP_ADMINISTRATORS = 'ADMINISTRATORS'
GROUP_MODERATORS = 'GLOBAL_MODERATORS'
GROUP_ADMINISTRATORS = "ADMINISTRATORS"
GROUP_MODERATORS = "GLOBAL_MODERATORS"
# https://wiki.phpbb.com/Table.phpbb_users
USER_TYPE_NORMAL = 0
@ -21,9 +21,9 @@ module ImportScripts::PhpBB3
AVATAR_TYPE_REMOTE = 2
AVATAR_TYPE_GALLERY = 3
AVATAR_TYPE_STRING_UPLOADED = 'avatar.driver.upload'
AVATAR_TYPE_STRING_REMOTE = 'avatar.driver.remote'
AVATAR_TYPE_STRING_GALLERY = 'avatar.driver.local'
AVATAR_TYPE_STRING_UPLOADED = "avatar.driver.upload"
AVATAR_TYPE_STRING_REMOTE = "avatar.driver.remote"
AVATAR_TYPE_STRING_GALLERY = "avatar.driver.local"
FORUM_TYPE_CATEGORY = 0
FORUM_TYPE_POST = 1

View File

@ -1,13 +1,13 @@
# frozen_string_literal: true
require 'csv'
require 'yaml'
require_relative '../../base'
require "csv"
require "yaml"
require_relative "../../base"
module ImportScripts::PhpBB3
class Settings
def self.load(filename)
yaml = YAML::load_file(filename)
yaml = YAML.load_file(filename)
Settings.new(yaml.deep_stringify_keys.with_indifferent_access)
end
@ -44,40 +44,41 @@ module ImportScripts::PhpBB3
attr_reader :database
def initialize(yaml)
import_settings = yaml['import']
import_settings = yaml["import"]
@site_name = import_settings['site_name']
@site_name = import_settings["site_name"]
@new_categories = import_settings['new_categories']
@category_mappings = import_settings.fetch('category_mappings', []).to_h { |m| [m[:source_category_id].to_s, m] }
@tag_mappings = import_settings['tag_mappings']
@rank_mapping = import_settings['rank_mapping']
@new_categories = import_settings["new_categories"]
@category_mappings =
import_settings.fetch("category_mappings", []).to_h { |m| [m[:source_category_id].to_s, m] }
@tag_mappings = import_settings["tag_mappings"]
@rank_mapping = import_settings["rank_mapping"]
@import_anonymous_users = import_settings['anonymous_users']
@import_attachments = import_settings['attachments']
@import_private_messages = import_settings['private_messages']
@import_polls = import_settings['polls']
@import_bookmarks = import_settings['bookmarks']
@import_passwords = import_settings['passwords']
@import_likes = import_settings['likes']
@import_anonymous_users = import_settings["anonymous_users"]
@import_attachments = import_settings["attachments"]
@import_private_messages = import_settings["private_messages"]
@import_polls = import_settings["polls"]
@import_bookmarks = import_settings["bookmarks"]
@import_passwords = import_settings["passwords"]
@import_likes = import_settings["likes"]
avatar_settings = import_settings['avatars']
@import_uploaded_avatars = avatar_settings['uploaded']
@import_remote_avatars = avatar_settings['remote']
@import_gallery_avatars = avatar_settings['gallery']
avatar_settings = import_settings["avatars"]
@import_uploaded_avatars = avatar_settings["uploaded"]
@import_remote_avatars = avatar_settings["remote"]
@import_gallery_avatars = avatar_settings["gallery"]
@use_bbcode_to_md = import_settings['use_bbcode_to_md']
@use_bbcode_to_md = import_settings["use_bbcode_to_md"]
@original_site_prefix = import_settings['site_prefix']['original']
@new_site_prefix = import_settings['site_prefix']['new']
@base_dir = import_settings['phpbb_base_dir']
@permalinks = PermalinkSettings.new(import_settings['permalinks'])
@original_site_prefix = import_settings["site_prefix"]["original"]
@new_site_prefix = import_settings["site_prefix"]["new"]
@base_dir = import_settings["phpbb_base_dir"]
@permalinks = PermalinkSettings.new(import_settings["permalinks"])
@username_as_name = import_settings['username_as_name']
@emojis = import_settings.fetch('emojis', [])
@custom_fields = import_settings.fetch('custom_fields', [])
@username_as_name = import_settings["username_as_name"]
@emojis = import_settings.fetch("emojis", [])
@custom_fields = import_settings.fetch("custom_fields", [])
@database = DatabaseSettings.new(yaml['database'])
@database = DatabaseSettings.new(yaml["database"])
end
def prefix(val)
@ -87,7 +88,7 @@ module ImportScripts::PhpBB3
def trust_level_for_posts(rank, trust_level: 0)
if @rank_mapping.present?
@rank_mapping.each do |key, value|
trust_level = [trust_level, key.gsub('trust_level_', '').to_i].max if rank >= value
trust_level = [trust_level, key.gsub("trust_level_", "").to_i].max if rank >= value
end
end
@ -106,14 +107,14 @@ module ImportScripts::PhpBB3
attr_reader :batch_size
def initialize(yaml)
@type = yaml['type']
@host = yaml['host']
@port = yaml['port']
@username = yaml['username']
@password = yaml['password']
@schema = yaml['schema']
@table_prefix = yaml['table_prefix']
@batch_size = yaml['batch_size']
@type = yaml["type"]
@host = yaml["host"]
@port = yaml["port"]
@username = yaml["username"]
@password = yaml["password"]
@schema = yaml["schema"]
@table_prefix = yaml["table_prefix"]
@batch_size = yaml["batch_size"]
end
end
@ -124,10 +125,10 @@ module ImportScripts::PhpBB3
attr_reader :normalization_prefix
def initialize(yaml)
@create_category_links = yaml['categories']
@create_topic_links = yaml['topics']
@create_post_links = yaml['posts']
@normalization_prefix = yaml['prefix']
@create_category_links = yaml["categories"]
@create_topic_links = yaml["topics"]
@create_post_links = yaml["posts"]
@normalization_prefix = yaml["prefix"]
end
end
end

View File

@ -18,15 +18,16 @@ module ImportScripts::PhpBB3
def replace_smilies(text)
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/.+?" alt=".*?" title=".*?" \/><!-- s?\S+ -->/) do
emoji($1)
end
text.gsub!(
/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/.+?" alt=".*?" title=".*?" \/><!-- s?\S+ -->/,
) { emoji($1) }
end
def emoji(smiley_code)
@smiley_map.fetch(smiley_code) do
smiley = @database.get_smiley(smiley_code)
emoji = upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley
emoji =
upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley
emoji || smiley_as_text(smiley_code)
end
end
@ -35,37 +36,34 @@ module ImportScripts::PhpBB3
def add_default_smilies
{
[':D', ':-D', ':grin:'] => ':smiley:',
[':)', ':-)', ':smile:'] => ':slight_smile:',
[';)', ';-)', ':wink:'] => ':wink:',
[':(', ':-(', ':sad:'] => ':frowning:',
[':o', ':-o', ':eek:'] => ':astonished:',
[':shock:'] => ':open_mouth:',
[':?', ':-?', ':???:'] => ':confused:',
['8)', '8-)', ':cool:'] => ':sunglasses:',
[':lol:'] => ':laughing:',
[':x', ':-x', ':mad:'] => ':angry:',
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:',
[':oops:'] => ':blush:',
[':cry:'] => ':cry:',
[':evil:'] => ':imp:',
[':twisted:'] => ':smiling_imp:',
[':roll:'] => ':unamused:',
[':!:'] => ':exclamation:',
[':?:'] => ':question:',
[':idea:'] => ':bulb:',
[':arrow:'] => ':arrow_right:',
[':|', ':-|'] => ':neutral_face:',
[':geek:'] => ':nerd:'
}.each do |smilies, emoji|
smilies.each { |smiley| @smiley_map[smiley] = emoji }
end
%w[:D :-D :grin:] => ":smiley:",
%w[:) :-) :smile:] => ":slight_smile:",
%w[;) ;-) :wink:] => ":wink:",
%w[:( :-( :sad:] => ":frowning:",
%w[:o :-o :eek:] => ":astonished:",
[":shock:"] => ":open_mouth:",
%w[:? :-? :???:] => ":confused:",
%w[8) 8-) :cool:] => ":sunglasses:",
[":lol:"] => ":laughing:",
%w[:x :-x :mad:] => ":angry:",
%w[:P :-P :razz:] => ":stuck_out_tongue:",
[":oops:"] => ":blush:",
[":cry:"] => ":cry:",
[":evil:"] => ":imp:",
[":twisted:"] => ":smiling_imp:",
[":roll:"] => ":unamused:",
[":!:"] => ":exclamation:",
[":?:"] => ":question:",
[":idea:"] => ":bulb:",
[":arrow:"] => ":arrow_right:",
%w[:| :-|] => ":neutral_face:",
[":geek:"] => ":nerd:",
}.each { |smilies, emoji| smilies.each { |smiley| @smiley_map[smiley] = emoji } }
end
def add_configured_smilies(emojis)
emojis.each do |emoji, smilies|
Array.wrap(smilies)
.each { |smiley| @smiley_map[smiley] = ":#{emoji}:" }
Array.wrap(smilies).each { |smiley| @smiley_map[smiley] = ":#{emoji}:" }
end
end

View File

@ -1,6 +1,6 @@
# frozen_string_literal: true
require_relative 'bbcode/xml_to_markdown'
require_relative "bbcode/xml_to_markdown"
module ImportScripts::PhpBB3
class TextProcessor
@ -14,7 +14,9 @@ module ImportScripts::PhpBB3
@database = database
@smiley_processor = smiley_processor
@he = HTMLEntities.new
@use_xml_to_markdown = phpbb_config[:phpbb_version].start_with?('3.2') || phpbb_config[:phpbb_version].start_with?('3.3')
@use_xml_to_markdown =
phpbb_config[:phpbb_version].start_with?("3.2") ||
phpbb_config[:phpbb_version].start_with?("3.3")
@settings = settings
@new_site_prefix = settings.new_site_prefix
@ -25,24 +27,27 @@ module ImportScripts::PhpBB3
if @use_xml_to_markdown
unreferenced_attachments = attachments&.dup
converter = BBCode::XmlToMarkdown.new(
raw,
username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) },
smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup },
quoted_post_from_post_id: lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) },
upload_md_from_file: (lambda do |filename, index|
unreferenced_attachments[index] = nil
attachments.fetch(index, filename).dup
end if attachments),
url_replacement: nil,
allow_inline_code: false
)
converter =
BBCode::XmlToMarkdown.new(
raw,
username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) },
smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup },
quoted_post_from_post_id:
lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) },
upload_md_from_file:
(
lambda do |filename, index|
unreferenced_attachments[index] = nil
attachments.fetch(index, filename).dup
end if attachments
),
url_replacement: nil,
allow_inline_code: false,
)
text = converter.convert
text.gsub!(@short_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
text.gsub!(@short_internal_link_regexp) { |link| replace_internal_link(link, $1, $2) }
add_unreferenced_attachments(text, unreferenced_attachments)
else
@ -50,9 +55,7 @@ module ImportScripts::PhpBB3
text = CGI.unescapeHTML(text)
clean_bbcodes(text)
if @settings.use_bbcode_to_md
text = bbcode_to_md(text)
end
text = bbcode_to_md(text) if @settings.use_bbcode_to_md
process_smilies(text)
process_links(text)
process_lists(text)
@ -65,11 +68,19 @@ module ImportScripts::PhpBB3
end
def process_post(raw, attachments)
process_raw_text(raw, attachments) rescue raw
begin
process_raw_text(raw, attachments)
rescue StandardError
raw
end
end
def process_private_msg(raw, attachments)
process_raw_text(raw, attachments) rescue raw
begin
process_raw_text(raw, attachments)
rescue StandardError
raw
end
end
protected
@ -78,10 +89,10 @@ module ImportScripts::PhpBB3
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
text.gsub!(/:(?:\w{5,8})\]/, ']')
text.gsub!(/:(?:\w{5,8})\]/, "]")
# remove color tags
text.gsub!(/\[\/?color(=#?[a-z0-9]*)?\]/i, "")
text.gsub!(%r{\[/?color(=#?[a-z0-9]*)?\]}i, "")
end
def bbcode_to_md(text)
@ -101,23 +112,19 @@ module ImportScripts::PhpBB3
# Internal forum links can have this forms:
# for topics: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?f=26&amp;t=3412">viewtopic.php?f=26&amp;t=3412</a><!-- l -->
# for posts: <!-- l --><a class="postlink-local" href="https://example.com/forums/viewtopic.php?p=1732#p1732">viewtopic.php?p=1732#p1732</a><!-- l -->
text.gsub!(@long_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
text.gsub!(@long_internal_link_regexp) { |link| replace_internal_link(link, $1, $2) }
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
text.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/i, '[\2](\1)')
text.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}i, '[\2](\1)')
# Replace internal forum links that aren't in the <!-- l --> format
text.gsub!(@short_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
text.gsub!(@short_internal_link_regexp) { |link| replace_internal_link(link, $1, $2) }
# phpBB shortens link text like this, which breaks our markdown processing:
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
text.gsub!(/\[http(s)?:\/\/(www\.)?/i, '[')
text.gsub!(%r{\[http(s)?://(www\.)?}i, "[")
end
def replace_internal_link(link, import_topic_id, import_post_id)
@ -144,19 +151,20 @@ module ImportScripts::PhpBB3
# convert list tags to ul and list=1 tags to ol
# list=a is not supported, so handle it like list=1
# list=9 and list=x have the same result as list=1 and list=a
text.gsub!(/\[list\](.*?)\[\/list:u\]/mi) do
$1.gsub(/\[\*\](.*?)\[\/\*:m\]\n*/mi) { "* #{$1}\n" }
text.gsub!(%r{\[list\](.*?)\[/list:u\]}mi) do
$1.gsub(%r{\[\*\](.*?)\[/\*:m\]\n*}mi) { "* #{$1}\n" }
end
text.gsub!(/\[list=.*?\](.*?)\[\/list:o\]/mi) do
$1.gsub(/\[\*\](.*?)\[\/\*:m\]\n*/mi) { "1. #{$1}\n" }
text.gsub!(%r{\[list=.*?\](.*?)\[/list:o\]}mi) do
$1.gsub(%r{\[\*\](.*?)\[/\*:m\]\n*}mi) { "1. #{$1}\n" }
end
end
# This replaces existing [attachment] BBCodes with the corresponding HTML tags for Discourse.
# All attachments that haven't been referenced in the text are appended to the end of the text.
def process_attachments(text, attachments)
attachment_regexp = /\[attachment=([\d])+\]<!-- [\w]+ -->([^<]+)<!-- [\w]+ -->\[\/attachment\]?/i
attachment_regexp =
%r{\[attachment=([\d])+\]<!-- [\w]+ -->([^<]+)<!-- [\w]+ -->\[/attachment\]?}i
unreferenced_attachments = attachments.dup
text.gsub!(attachment_regexp) do
@ -178,29 +186,34 @@ module ImportScripts::PhpBB3
end
def create_internal_link_regexps(original_site_prefix)
host = original_site_prefix.gsub('.', '\.')
link_regex = "http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)(?:[^\\s\\)\\]]*)"
host = original_site_prefix.gsub(".", '\.')
link_regex =
"http(?:s)?://#{host}/viewtopic\\.php\\?(?:\\S*)(?:t=(\\d+)|p=(\\d+)(?:#p\\d+)?)(?:[^\\s\\)\\]]*)"
@long_internal_link_regexp = Regexp.new(%Q|<!-- l --><a(?:.+)href="#{link_regex}"(?:.*)</a><!-- l -->|, Regexp::IGNORECASE)
@long_internal_link_regexp =
Regexp.new(
%Q|<!-- l --><a(?:.+)href="#{link_regex}"(?:.*)</a><!-- l -->|,
Regexp::IGNORECASE,
)
@short_internal_link_regexp = Regexp.new(link_regex, Regexp::IGNORECASE)
end
def process_code(text)
text.gsub!(/<span class="syntax.*?>(.*?)<\/span>/) { "#{$1}" }
text.gsub!(/\[code(=[a-z]*)?\](.*?)\[\/code\]/i) { "[code]\n#{@he.decode($2)}\n[/code]" }
text.gsub!(/<br \/>/, "\n")
text.gsub!(%r{<span class="syntax.*?>(.*?)</span>}) { "#{$1}" }
text.gsub!(%r{\[code(=[a-z]*)?\](.*?)\[/code\]}i) { "[code]\n#{@he.decode($2)}\n[/code]" }
text.gsub!(%r{<br />}, "\n")
text
end
def fix_markdown(text)
text.gsub!(/(\n*\[\/?quote.*?\]\n*)/mi) { |q| "\n#{q.strip}\n" }
text.gsub!(%r{(\n*\[/?quote.*?\]\n*)}mi) { |q| "\n#{q.strip}\n" }
text.gsub!(/^!\[[^\]]*\]\([^\]]*\)$/i) { |img| "\n#{img.strip}\n" } # space out images single on line
text
end
def process_videos(text)
# [YOUTUBE]<id>[/YOUTUBE]
text.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
text.gsub(%r{\[youtube\](.+?)\[/youtube\]}i) { "\nhttps://www.youtube.com/watch?v=#{$1}\n" }
text
end
end

View File

@ -7,19 +7,19 @@ require File.expand_path(File.dirname(__FILE__) + "/base.rb")
# Call it like this:
# RAILS_ENV=production bundle exec ruby script/import_scripts/punbb.rb
class ImportScripts::PunBB < ImportScripts::Base
PUNBB_DB = "punbb_db"
BATCH_SIZE = 1000
def initialize
super
@client = Mysql2::Client.new(
host: "localhost",
username: "root",
password: "pa$$word",
database: PUNBB_DB
)
@client =
Mysql2::Client.new(
host: "localhost",
username: "root",
password: "pa$$word",
database: PUNBB_DB,
)
end
def execute
@ -30,36 +30,41 @@ class ImportScripts::PunBB < ImportScripts::Base
end
def import_users
puts '', "creating users"
puts "", "creating users"
total_count = mysql_query("SELECT count(*) count FROM users;").first['count']
total_count = mysql_query("SELECT count(*) count FROM users;").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query(
"SELECT id, username, realname name, url website, email email, registered created_at,
results =
mysql_query(
"SELECT id, username, realname name, url website, email email, registered created_at,
registration_ip registration_ip_address, last_visit last_visit_time, last_email_sent last_emailed_at,
last_email_sent last_emailed_at, location, group_id
FROM users
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
OFFSET #{offset};",
)
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u["id"].to_i }
create_users(results, total: total_count, offset: offset) do |user|
{ id: user['id'],
email: user['email'],
username: user['username'],
name: user['name'],
created_at: Time.zone.at(user['created_at']),
website: user['website'],
registration_ip_address: user['registration_ip_address'],
last_seen_at: Time.zone.at(user['last_visit_time']),
last_emailed_at: user['last_emailed_at'] == nil ? 0 : Time.zone.at(user['last_emailed_at']),
location: user['location'],
moderator: user['group_id'] == 4,
admin: user['group_id'] == 1 }
{
id: user["id"],
email: user["email"],
username: user["username"],
name: user["name"],
created_at: Time.zone.at(user["created_at"]),
website: user["website"],
registration_ip_address: user["registration_ip_address"],
last_seen_at: Time.zone.at(user["last_visit_time"]),
last_emailed_at:
user["last_emailed_at"] == nil ? 0 : Time.zone.at(user["last_emailed_at"]),
location: user["location"],
moderator: user["group_id"] == 4,
admin: user["group_id"] == 1,
}
end
end
end
@ -67,33 +72,34 @@ class ImportScripts::PunBB < ImportScripts::Base
def import_categories
puts "", "importing top level categories..."
categories = mysql_query("
categories =
mysql_query(
"
SELECT id, cat_name name, disp_position position
FROM categories
ORDER BY id ASC
").to_a
",
).to_a
create_categories(categories) do |category|
{
id: category["id"],
name: category["name"]
}
end
create_categories(categories) { |category| { id: category["id"], name: category["name"] } }
puts "", "importing children categories..."
children_categories = mysql_query("
children_categories =
mysql_query(
"
SELECT id, forum_name name, forum_desc description, disp_position position, cat_id parent_category_id
FROM forums
ORDER BY id
").to_a
",
).to_a
create_categories(children_categories) do |category|
{
id: "child##{category['id']}",
id: "child##{category["id"]}",
name: category["name"],
description: category["description"],
parent_category_id: category_id_from_imported_category_id(category["parent_category_id"])
parent_category_id: category_id_from_imported_category_id(category["parent_category_id"]),
}
end
end
@ -104,7 +110,9 @@ class ImportScripts::PunBB < ImportScripts::Base
total_count = mysql_query("SELECT count(*) count from posts").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query("
results =
mysql_query(
"
SELECT p.id id,
t.id topic_id,
t.forum_id category_id,
@ -119,29 +127,30 @@ class ImportScripts::PunBB < ImportScripts::Base
ORDER BY p.posted
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
").to_a
",
).to_a
break if results.size < 1
next if all_records_exist? :posts, results.map { |m| m['id'].to_i }
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
create_posts(results, total: total_count, offset: offset) do |m|
skip = false
mapped = {}
mapped[:id] = m['id']
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
mapped[:raw] = process_punbb_post(m['raw'], m['id'])
mapped[:created_at] = Time.zone.at(m['created_at'])
mapped[:id] = m["id"]
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
mapped[:raw] = process_punbb_post(m["raw"], m["id"])
mapped[:created_at] = Time.zone.at(m["created_at"])
if m['id'] == m['first_post_id']
mapped[:category] = category_id_from_imported_category_id("child##{m['category_id']}")
mapped[:title] = CGI.unescapeHTML(m['title'])
if m["id"] == m["first_post_id"]
mapped[:category] = category_id_from_imported_category_id("child##{m["category_id"]}")
mapped[:title] = CGI.unescapeHTML(m["title"])
else
parent = topic_lookup_from_imported_post_id(m['first_post_id'])
parent = topic_lookup_from_imported_post_id(m["first_post_id"])
if parent
mapped[:topic_id] = parent[:topic_id]
else
puts "Parent post #{m['first_post_id']} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
puts "Parent post #{m["first_post_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
skip = true
end
end
@ -152,16 +161,16 @@ class ImportScripts::PunBB < ImportScripts::Base
end
def suspend_users
puts '', "updating banned users"
puts "", "updating banned users"
banned = 0
failed = 0
total = mysql_query("SELECT count(*) count FROM bans").first['count']
total = mysql_query("SELECT count(*) count FROM bans").first["count"]
system_user = Discourse.system_user
mysql_query("SELECT username, email FROM bans").each do |b|
user = User.find_by_email(b['email'])
user = User.find_by_email(b["email"])
if user
user.suspended_at = Time.now
user.suspended_till = 200.years.from_now
@ -174,7 +183,7 @@ class ImportScripts::PunBB < ImportScripts::Base
failed += 1
end
else
puts "Not found: #{b['email']}"
puts "Not found: #{b["email"]}"
failed += 1
end
@ -189,15 +198,15 @@ class ImportScripts::PunBB < ImportScripts::Base
s.gsub!(/<!-- s(\S+) -->(?:.*)<!-- s(?:\S+) -->/, '\1')
# Some links look like this: <!-- m --><a class="postlink" href="http://www.onegameamonth.com">http://www.onegameamonth.com</a><!-- m -->
s.gsub!(/<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)<\/a><!-- \w -->/, '[\2](\1)')
s.gsub!(%r{<!-- \w --><a(?:.+)href="(\S+)"(?:.*)>(.+)</a><!-- \w -->}, '[\2](\1)')
# Many phpbb bbcode tags have a hash attached to them. Examples:
# [url=https&#58;//google&#46;com:1qh1i7ky]click here[/url:1qh1i7ky]
# [quote=&quot;cybereality&quot;:b0wtlzex]Some text.[/quote:b0wtlzex]
s.gsub!(/:(?:\w{8})\]/, ']')
s.gsub!(/:(?:\w{8})\]/, "]")
# Remove mybb video tags.
s.gsub!(/(^\[video=.*?\])|(\[\/video\]$)/, '')
s.gsub!(%r{(^\[video=.*?\])|(\[/video\]$)}, "")
s = CGI.unescapeHTML(s)
@ -205,7 +214,7 @@ class ImportScripts::PunBB < ImportScripts::Base
# [http://answers.yahoo.com/question/index ... 223AAkkPli](http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli)
#
# Work around it for now:
s.gsub!(/\[http(s)?:\/\/(www\.)?/, '[')
s.gsub!(%r{\[http(s)?://(www\.)?}, "[")
s
end

View File

@ -1,25 +1,25 @@
# frozen_string_literal: true
require 'yaml'
require_relative 'quandora_api'
require "yaml"
require_relative "quandora_api"
def load_config(file)
config = YAML::load_file(File.join(__dir__, file))
@domain = config['domain']
@username = config['username']
@password = config['password']
config = YAML.load_file(File.join(__dir__, file))
@domain = config["domain"]
@username = config["username"]
@password = config["password"]
end
def export
api = QuandoraApi.new @domain, @username, @password
bases = api.list_bases
bases.each do |base|
question_list = api.list_questions base['objectId'], 1000
question_list = api.list_questions base["objectId"], 1000
question_list.each do |q|
question_id = q['uid']
question_id = q["uid"]
question = api.get_question question_id
File.open("output/#{question_id}.json", 'w') do |f|
puts question['title']
File.open("output/#{question_id}.json", "w") do |f|
puts question["title"]
f.write question.to_json
f.close
end

View File

@ -1,10 +1,9 @@
# frozen_string_literal: true
require_relative './quandora_question.rb'
require_relative "./quandora_question.rb"
require File.expand_path(File.dirname(__FILE__) + "/../base.rb")
class ImportScripts::Quandora < ImportScripts::Base
JSON_FILES_DIR = "output"
def initialize
@ -12,8 +11,8 @@ class ImportScripts::Quandora < ImportScripts::Base
@system_user = Discourse.system_user
@questions = []
Dir.foreach(JSON_FILES_DIR) do |filename|
next if filename == ('.') || filename == ('..')
question = File.read JSON_FILES_DIR + '/' + filename
next if filename == (".") || filename == ("..")
question = File.read JSON_FILES_DIR + "/" + filename
@questions << question
end
end
@ -33,9 +32,7 @@ class ImportScripts::Quandora < ImportScripts::Base
q = QuandoraQuestion.new question
import_users q.users
created_topic = import_topic q.topic
if created_topic
import_posts q.replies, created_topic.topic_id
end
import_posts q.replies, created_topic.topic_id if created_topic
topics += 1
print_status topics, total
end
@ -43,9 +40,7 @@ class ImportScripts::Quandora < ImportScripts::Base
end
def import_users(users)
users.each do |user|
create_user user, user[:id]
end
users.each { |user| create_user user, user[:id] }
end
def import_topic(topic)
@ -54,7 +49,7 @@ class ImportScripts::Quandora < ImportScripts::Base
post = Post.find(post_id) # already imported this topic
else
topic[:user_id] = user_id_from_imported_user_id(topic[:author_id]) || -1
topic[:category] = 'quandora-import'
topic[:category] = "quandora-import"
post = create_post(topic, topic[:id])
@ -68,9 +63,7 @@ class ImportScripts::Quandora < ImportScripts::Base
end
def import_posts(posts, topic_id)
posts.each do |post|
import_post post, topic_id
end
posts.each { |post| import_post post, topic_id }
end
def import_post(post, topic_id)
@ -91,6 +84,4 @@ class ImportScripts::Quandora < ImportScripts::Base
end
end
if __FILE__ == $0
ImportScripts::Quandora.new.perform
end
ImportScripts::Quandora.new.perform if __FILE__ == $0

View File

@ -1,10 +1,9 @@
# frozen_string_literal: true
require 'base64'
require 'json'
require "base64"
require "json"
class QuandoraApi
attr_accessor :domain, :username, :password
def initialize(domain, username, password)
@ -38,18 +37,18 @@ class QuandoraApi
def list_bases
response = request list_bases_url
response['data']
response["data"]
end
def list_questions(kb_id, limit = nil)
url = list_questions_url(kb_id, limit)
response = request url
response['data']['result']
response["data"]["result"]
end
def get_question(question_id)
url = "#{base_url @domain}/q/#{question_id}"
response = request url
response['data']
response["data"]
end
end

View File

@ -1,28 +1,27 @@
# frozen_string_literal: true
require 'json'
require 'cgi'
require 'time'
require "json"
require "cgi"
require "time"
class QuandoraQuestion
def initialize(question_json)
@question = JSON.parse question_json
end
def topic
topic = {}
topic[:id] = @question['uid']
topic[:author_id] = @question['author']['uid']
topic[:title] = unescape @question['title']
topic[:raw] = unescape @question['content']
topic[:created_at] = Time.parse @question['created']
topic[:id] = @question["uid"]
topic[:author_id] = @question["author"]["uid"]
topic[:title] = unescape @question["title"]
topic[:raw] = unescape @question["content"]
topic[:created_at] = Time.parse @question["created"]
topic
end
def users
users = {}
user = user_from_author @question['author']
user = user_from_author @question["author"]
users[user[:id]] = user
replies.each do |reply|
user = user_from_author reply[:author]
@ -32,12 +31,12 @@ class QuandoraQuestion
end
def user_from_author(author)
email = author['email']
email = "#{author['uid']}@noemail.com" unless email
email = author["email"]
email = "#{author["uid"]}@noemail.com" unless email
user = {}
user[:id] = author['uid']
user[:name] = "#{author['firstName']} #{author['lastName']}"
user[:id] = author["uid"]
user[:name] = "#{author["firstName"]} #{author["lastName"]}"
user[:email] = email
user[:staged] = true
user
@ -45,26 +44,20 @@ class QuandoraQuestion
def replies
posts = []
answers = @question['answersList']
comments = @question['comments']
comments.each_with_index do |comment, i|
posts << post_from_comment(comment, i, @question)
end
answers = @question["answersList"]
comments = @question["comments"]
comments.each_with_index { |comment, i| posts << post_from_comment(comment, i, @question) }
answers.each do |answer|
posts << post_from_answer(answer)
comments = answer['comments']
comments.each_with_index do |comment, i|
posts << post_from_comment(comment, i, answer)
end
comments = answer["comments"]
comments.each_with_index { |comment, i| posts << post_from_comment(comment, i, answer) }
end
order_replies posts
end
def order_replies(posts)
posts = posts.sort_by { |p| p[:created_at] }
posts.each_with_index do |p, i|
p[:post_number] = i + 2
end
posts.each_with_index { |p, i| p[:post_number] = i + 2 }
posts.each do |p|
parent = posts.select { |pp| pp[:id] == p[:parent_id] }
p[:reply_to_post_number] = parent[0][:post_number] if parent.size > 0
@ -74,35 +67,35 @@ class QuandoraQuestion
def post_from_answer(answer)
post = {}
post[:id] = answer['uid']
post[:parent_id] = @question['uid']
post[:author] = answer['author']
post[:author_id] = answer['author']['uid']
post[:raw] = unescape answer['content']
post[:created_at] = Time.parse answer['created']
post[:id] = answer["uid"]
post[:parent_id] = @question["uid"]
post[:author] = answer["author"]
post[:author_id] = answer["author"]["uid"]
post[:raw] = unescape answer["content"]
post[:created_at] = Time.parse answer["created"]
post
end
def post_from_comment(comment, index, parent)
if comment['created']
created_at = Time.parse comment['created']
if comment["created"]
created_at = Time.parse comment["created"]
else
created_at = Time.parse parent['created']
created_at = Time.parse parent["created"]
end
parent_id = parent['uid']
parent_id = "#{parent['uid']}-#{index - 1}" if index > 0
parent_id = parent["uid"]
parent_id = "#{parent["uid"]}-#{index - 1}" if index > 0
post = {}
id = "#{parent['uid']}-#{index}"
id = "#{parent["uid"]}-#{index}"
post[:id] = id
post[:parent_id] = parent_id
post[:author] = comment['author']
post[:author_id] = comment['author']['uid']
post[:raw] = unescape comment['text']
post[:author] = comment["author"]
post[:author_id] = comment["author"]["uid"]
post[:raw] = unescape comment["text"]
post[:created_at] = created_at
post
end
private
private
def unescape(html)
return nil unless html

View File

@ -1,5 +1,6 @@
# frozen_string_literal: true
BASES = '{
# frozen_string_literal: true
BASES =
'{
"type" : "kbase",
"data" : [ {
"objectId" : "90b1ccf3-35aa-4d6f-848e-e7c122d92c58",
@ -9,7 +10,8 @@
} ]
}'
QUESTIONS = '{
QUESTIONS =
'{
"type": "question-search-result",
"data": {
"totalSize": 445,
@ -50,7 +52,8 @@
}
}'
QUESTION = '{
QUESTION =
'{
"type" : "question",
"data" : {
"uid" : "de20ed0a-5fe5-48a5-9c14-d854f9af99f1",

View File

@ -1,21 +1,20 @@
# frozen_string_literal: true
require 'minitest/autorun'
require 'yaml'
require_relative '../quandora_api.rb'
require_relative './test_data.rb'
require "minitest/autorun"
require "yaml"
require_relative "../quandora_api.rb"
require_relative "./test_data.rb"
class TestQuandoraApi < Minitest::Test
DEBUG = false
def initialize(args)
config = YAML::load_file(File.join(__dir__, 'config.yml'))
@domain = config['domain']
@username = config['username']
@password = config['password']
@kb_id = config['kb_id']
@question_id = config['question_id']
config = YAML.load_file(File.join(__dir__, "config.yml"))
@domain = config["domain"]
@username = config["username"]
@password = config["password"]
@kb_id = config["kb_id"]
@question_id = config["question_id"]
super args
end
@ -30,19 +29,19 @@ class TestQuandoraApi < Minitest::Test
end
def test_base_url
assert_equal 'https://mydomain.quandora.com/m/json', @quandora.base_url('mydomain')
assert_equal "https://mydomain.quandora.com/m/json", @quandora.base_url("mydomain")
end
def test_auth_header
user = 'Aladdin'
password = 'open sesame'
user = "Aladdin"
password = "open sesame"
auth_header = @quandora.auth_header user, password
assert_equal 'Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==', auth_header[:Authorization]
assert_equal "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==", auth_header[:Authorization]
end
def test_list_bases_element_has_expected_structure
element = @quandora.list_bases[0]
expected = JSON.parse(BASES)['data'][0]
expected = JSON.parse(BASES)["data"][0]
debug element
check_keys expected, element
end
@ -50,24 +49,24 @@ class TestQuandoraApi < Minitest::Test
def test_list_questions_has_expected_structure
response = @quandora.list_questions @kb_id, 1
debug response
check_keys JSON.parse(QUESTIONS)['data']['result'][0], response[0]
check_keys JSON.parse(QUESTIONS)["data"]["result"][0], response[0]
end
def test_get_question_has_expected_structure
question = @quandora.get_question @question_id
expected = JSON.parse(QUESTION)['data']
expected = JSON.parse(QUESTION)["data"]
check_keys expected, question
expected_comment = expected['comments'][0]
actual_comment = question['comments'][0]
expected_comment = expected["comments"][0]
actual_comment = question["comments"][0]
check_keys expected_comment, actual_comment
expected_answer = expected['answersList'][1]
actual_answer = question['answersList'][0]
expected_answer = expected["answersList"][1]
actual_answer = question["answersList"][0]
check_keys expected_answer, actual_answer
expected_answer_comment = expected_answer['comments'][0]
actual_answer_comment = actual_answer['comments'][0]
expected_answer_comment = expected_answer["comments"][0]
actual_answer_comment = actual_answer["comments"][0]
check_keys expected_answer_comment, actual_answer_comment
end
@ -75,18 +74,16 @@ class TestQuandoraApi < Minitest::Test
def check_keys(expected, actual)
msg = "### caller[0]:\nKey not found in actual keys: #{actual.keys}\n"
expected.keys.each do |k|
assert (actual.keys.include? k), "#{k}"
end
expected.keys.each { |k| assert (actual.keys.include? k), "#{k}" }
end
def debug(message, show = false)
if show || DEBUG
puts '### ' + caller[0]
puts ''
puts "### " + caller[0]
puts ""
puts message
puts ''
puts ''
puts ""
puts ""
end
end
end

Some files were not shown because too many files have changed in this diff Show More