discourse/script/import_scripts/bespoke_1.rb

# frozen_string_literal: true

# bespoke importer for a customer, feel free to borrow ideas

require "csv"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")

# Call it like this:
#   RAILS_ENV=production bundle exec ruby script/import_scripts/bespoke_1.rb
class ImportScripts::Bespoke < ImportScripts::Base
  BATCH_SIZE = 1000

  def initialize(path)
    @path = path
    super()
    @bbcode_to_md = true

    puts "loading post mappings..."
    @post_number_map = {}
    Post
      .pluck(:id, :post_number)
      .each { |post_id, post_number| @post_number_map[post_id] = post_number }
  end

  def created_post(post)
    @post_number_map[post.id] = post.post_number
    super
  end

  def execute
    import_users
    import_categories
    import_posts
  end

  class RowResolver
    def load(row)
      @row = row
    end

    def self.create(cols)
      Class.new(RowResolver).new(cols)
    end

    def initialize(cols)
      cols.each_with_index { |col, idx| self.class.public_send(:define_method, col) { @row[idx] } }
    end
  end

  def load_user_batch!(users, offset, total)
    if users.length > 0
      create_users(users, offset: offset, total: total) { |user| user }
      users.clear
    end
  end

  def csv_parse(name)
    filename = "#{@path}/#{name}.csv"
    first = true
    row = nil

    current_row = +""
    double_quote_count = 0

    File
      .open(filename)
      .each_line do |line|
        # escaping is mental here
        line.gsub!(/\\(.{1})/) { |m| m[-1] == '"' ? '""' : m[-1] }
        line.strip!

        current_row << "\n" unless current_row.empty?
        current_row << line

        double_quote_count += line.scan('"').count

        next if double_quote_count % 2 == 1

        raw =
          begin
            CSV.parse(current_row)
          rescue CSV::MalformedCSVError => e
            puts e.message
            puts "*" * 100
            puts "Bad row skipped, line is: #{line}"
            puts
            puts current_row
            puts
            puts "double quote count is : #{double_quote_count}"
            puts "*" * 100

            current_row = ""
            double_quote_count = 0
            next
          end[
            0
          ]

        if first
          row = RowResolver.create(raw)

          current_row = ""
          double_quote_count = 0
          first = false
          next
        end

        row.load(raw)

        yield row

        current_row = ""
        double_quote_count = 0
      end
  end

  def total_rows(table)
    File.foreach("#{@path}/#{table}.csv").inject(0) { |c, line| c + 1 } - 1
  end

  def import_users
    puts "", "creating users"

    count = 0
    users = []

    total = total_rows("users")

    csv_parse("users") do |row|
      id = row.id
      email = row.email

      # fake it
      email = fake_email if row.email.blank? || row.email !~ /@/

      name = row.display_name
      username = row.key_custom
      created_at = DateTime.parse(row.dcreate)

      username = name if username == "NULL"
      username = email.split("@")[0] if username.blank?
      name = email.split("@")[0] if name.blank?

      users << { id: id, email: email, name: name, username: username, created_at: created_at }

      count += 1
      load_user_batch! users, count - users.length, total if count % BATCH_SIZE == 0
    end

    load_user_batch! users, count, total
  end

  def import_categories
    rows = []
    csv_parse("categories") do |row|
      rows << { id: row.id, name: row.name, description: row.description }
    end

    create_categories(rows) { |row| row }
  end

  def normalize_raw!(raw)
    return "<missing>" if raw.blank?

    # purple and #1223f3
    raw.gsub!(/\[color=[#a-z0-9]+\]/i, "")
    raw.gsub!(%r{\[/color\]}i, "")
    raw.gsub!(%r{\[signature\].+\[/signature\]}im, "")
    raw
  end

  def import_post_batch!(posts, topics, offset, total)
    create_posts(posts, total: total, offset: offset) do |post|
      mapped = {}

      mapped[:id] = post[:id]
      mapped[:user_id] = user_id_from_imported_user_id(post[:user_id]) || -1
      mapped[:raw] = post[:body]
      mapped[:created_at] = post[:created_at]

      topic = topics[post[:topic_id]]

      if topic[:post_id]
        parent = topic_lookup_from_imported_post_id(topic[:post_id])
        next unless parent

        mapped[:topic_id] = parent[:topic_id]

        reply_to_post_id = post_id_from_imported_post_id(post[:reply_id])
        if reply_to_post_id
          reply_to_post_number = @post_number_map[reply_to_post_id]
          if reply_to_post_number && reply_to_post_number > 1
            mapped[:reply_to_post_number] = reply_to_post_number
          end
        end
      else
        mapped[:category] = category_id_from_imported_category_id(topic[:category_id])
        mapped[:title] = post[:title]
        topic[:post_id] = post[:id]
      end

      next if topic[:deleted] || post[:deleted]

      mapped
    end

    posts.clear
  end

  def import_posts
    puts "", "creating topics and posts"

    topic_map = {}

    csv_parse("topics") do |topic|
      topic_map[topic.id] = {
        id: topic.id,
        category_id: topic.forum_category_id,
        deleted: topic.is_deleted.to_i == 1,
        locked: topic.is_locked.to_i == 1,
        pinned: topic.is_pinned.to_i == 1,
      }
    end

    total = total_rows("posts")

    posts = []
    count = 0
    csv_parse("posts") do |row|
      unless row.dcreate
        puts "NO CREATION DATE FOR POST"
        p row
        next
      end

      row = {
        id: row.id,
        topic_id: row.forum_topic_id,
        reply_id: row.reply_id,
        user_id: row.user_id,
        title: row.title,
        body: normalize_raw!(row.body),
        deleted: row.is_deleted.to_i == 1,
        created_at: DateTime.parse(row.dcreate),
      }
      posts << row
      count += 1

      if posts.length > 0 && posts.length % BATCH_SIZE == 0
        import_post_batch!(posts, topic_map, count - posts.length, total)
      end
    end

    import_post_batch!(posts, topic_map, count - posts.length, total) if posts.length > 0

    exit
  end
end

unless ARGV[0] && Dir.exist?(ARGV[0])
  puts "", "Usage:", "", "bundle exec ruby script/import_scripts/bespoke_1.rb DIRNAME", ""
  exit 1
end

ImportScripts::Bespoke.new(ARGV[0]).perform