mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
Merge pull request #4240 from gschlager/phpbb3-importer
FIX import of private messages from phpBB3
This commit is contained in:
commit
665c5282c7
@ -161,83 +161,40 @@ module ImportScripts::PhpBB3
|
|||||||
SQL
|
SQL
|
||||||
end
|
end
|
||||||
|
|
||||||
def count_messages(use_fixed_messages)
|
def count_messages
|
||||||
if use_fixed_messages
|
|
||||||
count(<<-SQL)
|
count(<<-SQL)
|
||||||
SELECT COUNT(*) AS count
|
SELECT COUNT(*) AS count
|
||||||
FROM #{@table_prefix}_import_privmsgs
|
FROM #{@table_prefix}_privmsgs m
|
||||||
|
WHERE NOT EXISTS ( -- ignore duplicate messages
|
||||||
|
SELECT 1
|
||||||
|
FROM #{@table_prefix}_privmsgs x
|
||||||
|
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
|
||||||
|
AND x.to_address = m.to_address AND x.message_time = m.message_time
|
||||||
|
)
|
||||||
SQL
|
SQL
|
||||||
else
|
|
||||||
count(<<-SQL)
|
|
||||||
SELECT COUNT(*) AS count
|
|
||||||
FROM #{@table_prefix}_privmsgs
|
|
||||||
SQL
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def fetch_messages(use_fixed_messages, last_msg_id)
|
def fetch_messages(last_msg_id)
|
||||||
if use_fixed_messages
|
|
||||||
query(<<-SQL, :msg_id)
|
|
||||||
SELECT m.msg_id, i.root_msg_id, m.author_id, m.message_time, m.message_subject, m.message_text,
|
|
||||||
IFNULL(a.attachment_count, 0) AS attachment_count
|
|
||||||
FROM #{@table_prefix}_privmsgs m
|
|
||||||
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
|
|
||||||
LEFT OUTER JOIN (
|
|
||||||
SELECT post_msg_id, COUNT(*) AS attachment_count
|
|
||||||
FROM #{@table_prefix}_attachments
|
|
||||||
WHERE topic_id = 0
|
|
||||||
GROUP BY post_msg_id
|
|
||||||
) a ON (m.msg_id = a.post_msg_id)
|
|
||||||
WHERE m.msg_id > #{last_msg_id}
|
|
||||||
ORDER BY i.root_msg_id, m.msg_id
|
|
||||||
LIMIT #{@batch_size}
|
|
||||||
SQL
|
|
||||||
else
|
|
||||||
query(<<-SQL, :msg_id)
|
query(<<-SQL, :msg_id)
|
||||||
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
|
SELECT m.msg_id, m.root_level AS root_msg_id, m.author_id, m.message_time, m.message_subject,
|
||||||
m.message_text, IFNULL(a.attachment_count, 0) AS attachment_count
|
m.message_text, m.to_address, r.author_id AS root_author_id, r.to_address AS root_to_address, (
|
||||||
|
SELECT COUNT(*)
|
||||||
|
FROM #{@table_prefix}_attachments a
|
||||||
|
WHERE a.topic_id = 0 AND m.msg_id = a.post_msg_id
|
||||||
|
) AS attachment_count
|
||||||
FROM #{@table_prefix}_privmsgs m
|
FROM #{@table_prefix}_privmsgs m
|
||||||
LEFT OUTER JOIN (
|
LEFT OUTER JOIN #{@table_prefix}_privmsgs r ON (m.root_level = r.msg_id)
|
||||||
SELECT post_msg_id, COUNT(*) AS attachment_count
|
|
||||||
FROM #{@table_prefix}_attachments
|
|
||||||
WHERE topic_id = 0
|
|
||||||
GROUP BY post_msg_id
|
|
||||||
) a ON (m.msg_id = a.post_msg_id)
|
|
||||||
WHERE m.msg_id > #{last_msg_id}
|
WHERE m.msg_id > #{last_msg_id}
|
||||||
ORDER BY m.root_level, m.msg_id
|
AND NOT EXISTS ( -- ignore duplicate messages
|
||||||
|
SELECT 1
|
||||||
|
FROM #{@table_prefix}_privmsgs x
|
||||||
|
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
|
||||||
|
AND x.to_address = m.to_address AND x.message_time = m.message_time
|
||||||
|
)
|
||||||
|
ORDER BY m.msg_id
|
||||||
LIMIT #{@batch_size}
|
LIMIT #{@batch_size}
|
||||||
SQL
|
SQL
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def fetch_message_participants(msg_id, use_fixed_messages)
|
|
||||||
if use_fixed_messages
|
|
||||||
query(<<-SQL)
|
|
||||||
SELECT m.to_address
|
|
||||||
FROM #{@table_prefix}_privmsgs m
|
|
||||||
JOIN #{@table_prefix}_import_privmsgs i ON (m.msg_id = i.msg_id)
|
|
||||||
WHERE i.msg_id = #{msg_id} OR i.root_msg_id = #{msg_id}
|
|
||||||
SQL
|
|
||||||
else
|
|
||||||
query(<<-SQL)
|
|
||||||
SELECT m.to_address
|
|
||||||
FROM #{@table_prefix}_privmsgs m
|
|
||||||
WHERE m.msg_id = #{msg_id} OR m.root_level = #{msg_id}
|
|
||||||
SQL
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def calculate_fixed_messages
|
|
||||||
drop_temp_import_message_table
|
|
||||||
create_temp_import_message_table
|
|
||||||
fill_temp_import_message_table
|
|
||||||
|
|
||||||
drop_import_message_table
|
|
||||||
create_import_message_table
|
|
||||||
fill_import_message_table
|
|
||||||
|
|
||||||
drop_temp_import_message_table
|
|
||||||
end
|
|
||||||
|
|
||||||
def count_bookmarks
|
def count_bookmarks
|
||||||
count(<<-SQL)
|
count(<<-SQL)
|
||||||
@ -268,83 +225,5 @@ module ImportScripts::PhpBB3
|
|||||||
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path
|
(SELECT config_value FROM #{@table_prefix}_config WHERE config_name = 'upload_path') AS attachment_path
|
||||||
SQL
|
SQL
|
||||||
end
|
end
|
||||||
|
|
||||||
protected
|
|
||||||
|
|
||||||
def drop_temp_import_message_table
|
|
||||||
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs_temp")
|
|
||||||
end
|
|
||||||
|
|
||||||
def create_temp_import_message_table
|
|
||||||
query(<<-SQL)
|
|
||||||
CREATE TABLE #{@table_prefix}_import_privmsgs_temp (
|
|
||||||
msg_id MEDIUMINT(8) NOT NULL,
|
|
||||||
root_msg_id MEDIUMINT(8) NOT NULL,
|
|
||||||
recipient_id MEDIUMINT(8),
|
|
||||||
normalized_subject VARCHAR(255) NOT NULL,
|
|
||||||
PRIMARY KEY (msg_id)
|
|
||||||
)
|
|
||||||
SQL
|
|
||||||
end
|
|
||||||
|
|
||||||
# this removes duplicate messages, converts the to_address to a number
|
|
||||||
# and stores the message_subject in lowercase and without the prefix "Re: "
|
|
||||||
def fill_temp_import_message_table
|
|
||||||
query(<<-SQL)
|
|
||||||
INSERT INTO #{@table_prefix}_import_privmsgs_temp (msg_id, root_msg_id, recipient_id, normalized_subject)
|
|
||||||
SELECT m.msg_id, m.root_level,
|
|
||||||
CASE WHEN m.root_level = 0 AND INSTR(m.to_address, ':') = 0 THEN
|
|
||||||
CAST(SUBSTRING(m.to_address, 3) AS SIGNED INTEGER)
|
|
||||||
ELSE NULL END AS recipient_id,
|
|
||||||
LOWER(CASE WHEN m.message_subject LIKE 'Re: %' THEN
|
|
||||||
SUBSTRING(m.message_subject, 5)
|
|
||||||
ELSE m.message_subject END) AS normalized_subject
|
|
||||||
FROM #{@table_prefix}_privmsgs m
|
|
||||||
WHERE NOT EXISTS (
|
|
||||||
SELECT 1
|
|
||||||
FROM #{@table_prefix}_privmsgs x
|
|
||||||
WHERE x.msg_id < m.msg_id AND x.root_level = m.root_level AND x.author_id = m.author_id
|
|
||||||
AND x.to_address = m.to_address AND x.message_time = m.message_time
|
|
||||||
)
|
|
||||||
SQL
|
|
||||||
end
|
|
||||||
|
|
||||||
def drop_import_message_table
|
|
||||||
query("DROP TABLE IF EXISTS #{@table_prefix}_import_privmsgs")
|
|
||||||
end
|
|
||||||
|
|
||||||
def create_import_message_table
|
|
||||||
query(<<-SQL)
|
|
||||||
CREATE TABLE #{@table_prefix}_import_privmsgs (
|
|
||||||
msg_id MEDIUMINT(8) NOT NULL,
|
|
||||||
root_msg_id MEDIUMINT(8) NOT NULL,
|
|
||||||
PRIMARY KEY (msg_id),
|
|
||||||
INDEX #{@table_prefix}_import_privmsgs_root_msg_id (root_msg_id)
|
|
||||||
)
|
|
||||||
SQL
|
|
||||||
end
|
|
||||||
|
|
||||||
# this tries to calculate the actual root_level (= msg_id of the first message in a
|
|
||||||
# private conversation) based on subject, time, author and recipient
|
|
||||||
def fill_import_message_table
|
|
||||||
query(<<-SQL)
|
|
||||||
INSERT INTO #{@table_prefix}_import_privmsgs (msg_id, root_msg_id)
|
|
||||||
SELECT m.msg_id, CASE WHEN i.root_msg_id = 0 THEN
|
|
||||||
COALESCE((
|
|
||||||
SELECT a.msg_id
|
|
||||||
FROM #{@table_prefix}_privmsgs a
|
|
||||||
JOIN #{@table_prefix}_import_privmsgs_temp b ON (a.msg_id = b.msg_id)
|
|
||||||
WHERE ((a.author_id = m.author_id AND b.recipient_id = i.recipient_id) OR
|
|
||||||
(a.author_id = i.recipient_id AND b.recipient_id = m.author_id))
|
|
||||||
AND b.normalized_subject = i.normalized_subject
|
|
||||||
AND a.msg_id <> m.msg_id
|
|
||||||
AND a.message_time < m.message_time
|
|
||||||
ORDER BY a.message_time
|
|
||||||
LIMIT 1
|
|
||||||
), 0) ELSE i.root_msg_id END AS root_msg_id
|
|
||||||
FROM #{@table_prefix}_privmsgs m
|
|
||||||
JOIN #{@table_prefix}_import_privmsgs_temp i ON (m.msg_id = i.msg_id)
|
|
||||||
SQL
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -118,18 +118,13 @@ module ImportScripts::PhpBB3
|
|||||||
end
|
end
|
||||||
|
|
||||||
def import_private_messages
|
def import_private_messages
|
||||||
if @settings.fix_private_messages
|
|
||||||
puts '', 'fixing private messages'
|
|
||||||
@database.calculate_fixed_messages
|
|
||||||
end
|
|
||||||
|
|
||||||
puts '', 'creating private messages'
|
puts '', 'creating private messages'
|
||||||
total_count = @database.count_messages(@settings.fix_private_messages)
|
total_count = @database.count_messages
|
||||||
importer = @importers.message_importer
|
importer = @importers.message_importer
|
||||||
last_msg_id = 0
|
last_msg_id = 0
|
||||||
|
|
||||||
batches do |offset|
|
batches do |offset|
|
||||||
rows, last_msg_id = @database.fetch_messages(@settings.fix_private_messages, last_msg_id)
|
rows, last_msg_id = @database.fetch_messages(last_msg_id)
|
||||||
break if rows.size < 1
|
break if rows.size < 1
|
||||||
|
|
||||||
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))
|
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))
|
||||||
|
@ -14,7 +14,7 @@ module ImportScripts::PhpBB3
|
|||||||
end
|
end
|
||||||
|
|
||||||
def map_to_import_ids(rows)
|
def map_to_import_ids(rows)
|
||||||
rows.map { |row| get_import_id(row) }
|
rows.map { |row| get_import_id(row[:msg_id]) }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
@ -23,31 +23,38 @@ module ImportScripts::PhpBB3
|
|||||||
attachments = import_attachments(row, user_id)
|
attachments = import_attachments(row, user_id)
|
||||||
|
|
||||||
mapped = {
|
mapped = {
|
||||||
id: get_import_id(row),
|
id: get_import_id(row[:msg_id]),
|
||||||
user_id: user_id,
|
user_id: user_id,
|
||||||
created_at: Time.zone.at(row[:message_time]),
|
created_at: Time.zone.at(row[:message_time]),
|
||||||
raw: @text_processor.process_private_msg(row[:message_text], attachments)
|
raw: @text_processor.process_private_msg(row[:message_text], attachments)
|
||||||
}
|
}
|
||||||
|
|
||||||
if row[:root_msg_id] == 0
|
root_user_ids = sorted_user_ids(row[:root_author_id], row[:root_to_address])
|
||||||
map_first_message(row, mapped)
|
current_user_ids = sorted_user_ids(row[:author_id], row[:to_address])
|
||||||
|
topic_id = get_topic_id(row, root_user_ids, current_user_ids)
|
||||||
|
|
||||||
|
if topic_id.blank?
|
||||||
|
map_first_message(row, current_user_ids, mapped)
|
||||||
else
|
else
|
||||||
map_other_message(row, mapped)
|
map_other_message(row, topic_id, mapped)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
protected
|
protected
|
||||||
|
|
||||||
|
RE_PREFIX = 're: '
|
||||||
|
|
||||||
def import_attachments(row, user_id)
|
def import_attachments(row, user_id)
|
||||||
if @settings.import_attachments && row[:attachment_count] > 0
|
if @settings.import_attachments && row[:attachment_count] > 0
|
||||||
@attachment_importer.import_attachments(user_id, row[:msg_id])
|
@attachment_importer.import_attachments(user_id, row[:msg_id])
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def map_first_message(row, mapped)
|
def map_first_message(row, current_user_ids, mapped)
|
||||||
mapped[:title] = CGI.unescapeHTML(row[:message_subject])
|
mapped[:title] = get_topic_title(row)
|
||||||
mapped[:archetype] = Archetype.private_message
|
mapped[:archetype] = Archetype.private_message
|
||||||
mapped[:target_usernames] = get_usernames(row[:msg_id], row[:author_id])
|
mapped[:target_usernames] = get_recipient_usernames(row)
|
||||||
|
mapped[:custom_fields] = {import_user_ids: current_user_ids.join(',')}
|
||||||
|
|
||||||
if mapped[:target_usernames].empty? # pm with yourself?
|
if mapped[:target_usernames].empty? # pm with yourself?
|
||||||
puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
|
puts "Private message without recipients. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
|
||||||
@ -57,36 +64,73 @@ module ImportScripts::PhpBB3
|
|||||||
mapped
|
mapped
|
||||||
end
|
end
|
||||||
|
|
||||||
def map_other_message(row, mapped)
|
def map_other_message(row, topic_id, mapped)
|
||||||
parent_msg_id = "pm:#{row[:root_msg_id]}"
|
mapped[:topic_id] = topic_id
|
||||||
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
|
|
||||||
|
|
||||||
if parent.blank?
|
|
||||||
puts "Parent post #{parent_msg_id} doesn't exist. Skipping #{row[:msg_id]}: #{row[:message_subject][0..40]}"
|
|
||||||
return nil
|
|
||||||
end
|
|
||||||
|
|
||||||
mapped[:topic_id] = parent[:topic_id]
|
|
||||||
mapped
|
mapped
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_usernames(msg_id, author_id)
|
def get_recipient_user_ids(to_address)
|
||||||
# Find the users who are part of this private message.
|
return [] if to_address.blank?
|
||||||
# Found from the to_address of phpbb_privmsgs, by looking at
|
|
||||||
# all the rows with the same root_msg_id.
|
|
||||||
# to_address looks like this: "u_91:u_1234:u_200"
|
# to_address looks like this: "u_91:u_1234:u_200"
|
||||||
# The "u_" prefix is discarded and the rest is a user_id.
|
# The "u_" prefix is discarded and the rest is a user_id.
|
||||||
import_user_ids = @database.fetch_message_participants(msg_id, @settings.fix_private_messages)
|
user_ids = to_address.split(':')
|
||||||
.map { |r| r[:to_address].split(':') }
|
user_ids.uniq!
|
||||||
.flatten!.uniq.map! { |u| u[2..-1] }
|
user_ids.map! { |u| u[2..-1].to_i }
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_recipient_usernames(row)
|
||||||
|
author_id = row[:author_id].to_s
|
||||||
|
import_user_ids = get_recipient_user_ids(row[:to_address])
|
||||||
|
|
||||||
import_user_ids.map! do |import_user_id|
|
import_user_ids.map! do |import_user_id|
|
||||||
import_user_id.to_s == author_id.to_s ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
|
import_user_id.to_s == author_id ? nil : @lookup.find_user_by_import_id(import_user_id).try(:username)
|
||||||
end.compact
|
end.compact
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_import_id(row)
|
def get_topic_title(row)
|
||||||
"pm:#{row[:msg_id]}"
|
CGI.unescapeHTML(row[:message_subject])
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_import_id(msg_id)
|
||||||
|
"pm:#{msg_id}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Creates a sorted array consisting of the message's author and recipients.
|
||||||
|
def sorted_user_ids(author_id, to_address)
|
||||||
|
user_ids = get_recipient_user_ids(to_address)
|
||||||
|
user_ids << author_id unless author_id.nil?
|
||||||
|
user_ids.uniq!
|
||||||
|
user_ids.sort!
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_topic_id(row, root_user_ids, current_user_ids)
|
||||||
|
if row[:root_msg_id] == 0 || root_user_ids != current_user_ids
|
||||||
|
# Let's try to find an existing Discourse topic_id if this looks like a root message or
|
||||||
|
# the user IDs of the root message are different from the current message.
|
||||||
|
find_topic_id(row, current_user_ids)
|
||||||
|
else
|
||||||
|
# This appears to be a reply. Let's try to find the Discourse topic_id for this message.
|
||||||
|
parent_msg_id = get_import_id(row[:root_msg_id])
|
||||||
|
parent = @lookup.topic_lookup_from_imported_post_id(parent_msg_id)
|
||||||
|
parent[:topic_id] unless parent.blank?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Tries to find a Discourse topic (private message) that has the same title as the current message.
|
||||||
|
# The users involved in these messages must match too.
|
||||||
|
def find_topic_id(row, current_user_ids)
|
||||||
|
topic_title = get_topic_title(row).downcase
|
||||||
|
topic_titles = [topic_title]
|
||||||
|
topic_titles << topic_title[RE_PREFIX.length..-1] if topic_title.start_with?(RE_PREFIX)
|
||||||
|
|
||||||
|
Post.select(:topic_id)
|
||||||
|
.joins(:topic)
|
||||||
|
.joins(:_custom_fields)
|
||||||
|
.where(["LOWER(topics.title) IN (:titles) AND post_custom_fields.name = 'import_user_ids' AND post_custom_fields.value = :user_ids",
|
||||||
|
{titles: topic_titles, user_ids: current_user_ids.join(',')}])
|
||||||
|
.order('topics.created_at DESC')
|
||||||
|
.first.try(:topic_id)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -52,12 +52,6 @@ import:
|
|||||||
private_messages: true
|
private_messages: true
|
||||||
polls: true
|
polls: true
|
||||||
|
|
||||||
# This tries to fix Private Messages that were imported from phpBB2 to phpBB3.
|
|
||||||
# You should enable this option if you see duplicate messages or lots of related
|
|
||||||
# messages as topics with just one post (e.g. 'Importer', 'Re: Importer', 'Re: Importer'
|
|
||||||
# should be one topic named 'Importer' and consist of 3 posts).
|
|
||||||
fix_private_messages: false
|
|
||||||
|
|
||||||
# When true: each imported user will have the original username from phpBB as its name
|
# When true: each imported user will have the original username from phpBB as its name
|
||||||
# When false: the name of each user will be blank
|
# When false: the name of each user will be blank
|
||||||
username_as_name: false
|
username_as_name: false
|
||||||
|
@ -18,7 +18,6 @@ module ImportScripts::PhpBB3
|
|||||||
attr_reader :import_remote_avatars
|
attr_reader :import_remote_avatars
|
||||||
attr_reader :import_gallery_avatars
|
attr_reader :import_gallery_avatars
|
||||||
|
|
||||||
attr_reader :fix_private_messages
|
|
||||||
attr_reader :use_bbcode_to_md
|
attr_reader :use_bbcode_to_md
|
||||||
|
|
||||||
attr_reader :original_site_prefix
|
attr_reader :original_site_prefix
|
||||||
@ -45,7 +44,6 @@ module ImportScripts::PhpBB3
|
|||||||
@import_remote_avatars = avatar_settings['remote']
|
@import_remote_avatars = avatar_settings['remote']
|
||||||
@import_gallery_avatars = avatar_settings['gallery']
|
@import_gallery_avatars = avatar_settings['gallery']
|
||||||
|
|
||||||
@fix_private_messages = import_settings['fix_private_messages']
|
|
||||||
@use_bbcode_to_md =import_settings['use_bbcode_to_md']
|
@use_bbcode_to_md =import_settings['use_bbcode_to_md']
|
||||||
|
|
||||||
@original_site_prefix = import_settings['site_prefix']['original']
|
@original_site_prefix = import_settings['site_prefix']['original']
|
||||||
|
Loading…
Reference in New Issue
Block a user