DEV: Lots of improvements to the generic_bulk import script

Notable changes:
* Imports a lot more tables from core and plugins
  * site settings
  * uploads with necessary upload references
  * groups and group members
  * user profiles
  * user options
  * user fields & values
  * muted users
  * user notes (plugin)
  * user followers (plugin)
  * user avatars
  * tag groups and tags
  * tag users (notification settings for tags / user)
  * category permissions
  * polls with options and votes
  * post votes (plugin)
  * solutions (plugin)
  * gamification scores (plugin)
  * events (plugin)
  * badges and badge groupings
  * user badges
  * optimized images
  * topic users (notification settings for topics)
  * post custom fields
  * permalinks and permalink normalizations

* It creates the `migration_mappings` table which is used to store the mapping for a handful of imported tables

* Detects duplicate group names and renames them

* Pre-cooking for attachments, images and mentions

* Outputs instructions when gems are missing

* Supports importing uploads from a DB generated by `uploads_importer.rb`

* Checks that all required plugins exists and enables them if needed

* A couple of optimizations and additions in `import.rake`
This commit is contained in:
Gerhard Schlager 2023-12-10 22:38:03 +01:00 committed by Gerhard Schlager
parent d725b3ca9e
commit dc8c6b8958
3 changed files with 2926 additions and 209 deletions

View File

@ -15,6 +15,7 @@ task "import:ensure_consistency" => :environment do
insert_user_stats
insert_user_visits
insert_draft_sequences
insert_automatic_group_users
update_user_stats
update_posts
@ -24,8 +25,11 @@ task "import:ensure_consistency" => :environment do
update_groups
update_tag_stats
update_topic_users
update_topic_featured_users
create_category_definitions
# run_jobs
log "Done!"
end
@ -213,10 +217,15 @@ end
def insert_user_stats
log "Inserting user stats..."
DB.exec <<-SQL
DB.exec <<~SQL
INSERT INTO user_stats (user_id, new_since)
SELECT id, created_at
FROM users
FROM users u
WHERE NOT EXISTS (
SELECT 1
FROM user_stats us
WHERE us.user_id = u.id
)
ON CONFLICT DO NOTHING
SQL
end
@ -247,6 +256,40 @@ def insert_draft_sequences
SQL
end
def insert_automatic_group_users
Group::AUTO_GROUPS.each do |group_name, group_id|
user_condition =
case group_name
when :everyone
"TRUE"
when :admins
"id > 0 AND admin AND NOT staged"
when :moderators
"id > 0 AND moderator AND NOT staged"
when :staff
"id > 0 AND (moderator OR admin) AND NOT staged"
when :trust_level_1, :trust_level_2, :trust_level_3, :trust_level_4
"id > 0 AND trust_level >= :min_trust_level AND NOT staged"
when :trust_level_0
"id > 0 AND NOT staged"
end
DB.exec(<<~SQL, group_id: group_id, min_trust_level: group_id - 10)
INSERT INTO group_users (group_id, user_id, created_at, updated_at)
SELECT :group_id, id, NOW(), NOW()
FROM users u
WHERE #{user_condition}
AND NOT EXISTS (
SELECT 1
FROM group_users gu
WHERE gu.group_id = :group_id AND gu.user_id = u.id
)
SQL
Group.reset_user_count(Group.find(group_id))
end
end
def update_user_stats
log "Updating user stats..."
@ -404,9 +447,9 @@ def update_users
GROUP BY p.user_id
)
UPDATE users
SET first_seen_at = X.min_created_at
, last_seen_at = X.max_created_at
, last_posted_at = X.max_created_at
SET first_seen_at = LEAST(first_seen_at, X.min_created_at)
, last_seen_at = GREATEST(last_seen_at, X.max_created_at)
, last_posted_at = GREATEST(last_posted_at, X.max_created_at)
FROM X
WHERE id = X.user_id
AND (COALESCE(first_seen_at, '1970-01-01') <> X.min_created_at
@ -459,9 +502,15 @@ def update_topic_users
SQL
end
def update_topic_featured_users
log "Updating topic featured users..."
TopicFeaturedUsers.ensure_consistency!
end
def create_category_definitions
log "Creating category definitions"
Category.ensure_consistency!
Site.clear_cache
end
def log(message)
@ -654,3 +703,80 @@ task "import:update_avatars_from_sso" => :environment do
status_queue.close
status_thread.join
end
def run_jobs
log "Running jobs"
Jobs::EnsureDbConsistency.new.execute({})
Jobs::DirectoryRefreshOlder.new.execute({})
Jobs::DirectoryRefreshDaily.new.execute({})
Jobs::ReindexSearch.new.execute({})
Jobs::TopRefreshToday.new.execute({})
Jobs::TopRefreshOlder.new.execute({})
Jobs::Weekly.new.execute({})
end
desc "Rebake posts that contain polls"
task "import:rebake_uncooked_posts_with_polls" => :environment do
log "Rebaking posts with polls"
Jobs.run_immediately!
posts =
Post.where("EXISTS (SELECT 1 FROM polls WHERE polls.post_id = posts.id)").where(
"baked_version <> ? or baked_version IS NULL",
Post::BAKED_VERSION,
)
max_count = posts.count
current_count = 0
posts.find_each(order: :desc) do |post|
post.rebake!
current_count += 1
print "\r%7d / %7d" % [current_count, max_count]
end
end
desc "Rebake posts that contain events"
task "import:rebake_uncooked_posts_with_events" => :environment do
log "Rebaking posts with events"
Jobs.run_immediately!
posts =
Post.where(
"EXISTS (SELECT 1 FROM discourse_post_event_events WHERE discourse_post_event_events.id = posts.id)",
).where("baked_version <> ? or baked_version IS NULL", Post::BAKED_VERSION)
max_count = posts.count
current_count = 0
posts.find_each(order: :desc) do |post|
post.rebake!
current_count += 1
print "\r%7d / %7d" % [current_count, max_count]
end
end
desc "Rebake posts that have tag"
task "import:rebake_uncooked_posts_with_tag", [:tag_name] => :environment do |_task, args|
log "Rebaking posts with tag"
Jobs.run_immediately!
posts =
Post.where(
"EXISTS (SELECT 1 FROM topic_tags JOIN tags ON tags.id = topic_tags.tag_id WHERE topic_tags.topic_id = posts.topic_id AND tags.name = ?)",
args[:tag_name],
).where("baked_version <> ? or baked_version IS NULL", Post::BAKED_VERSION)
max_count = posts.count
current_count = 0
posts.find_each(order: :desc) do |post|
post.rebake!
current_count += 1
print "\r%7d / %7d" % [current_count, max_count]
end
end

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff