Improvements to phpBB3 import script (#10999)

* FEATURE: Import attachments

* FEATURE: Add support for importing multiple forums in one

* FEATURE: Add support for category and tag mapping

* FEATURE: Import groups

* FIX: Add spaces around images

* FEATURE: Custom mapping of user rank to trust levels

* FIX: Do not fail import if it cannot import polls

* FIX: Optimize existing records lookup

Co-authored-by: Gerhard Schlager <mail@gerhard-schlager.at>
Co-authored-by: Jarek Radosz <jradosz@gmail.com>
This commit is contained in:
Bianca Nenciu 2021-01-14 21:44:43 +02:00 committed by GitHub
parent 82af278ae5
commit a71b219c9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 1606 additions and 79 deletions

View File

@ -606,10 +606,15 @@ class ImportScripts::Base
skipped += 1 skipped += 1
puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}" puts "Skipping bookmark for user id #{params[:user_id]} and post id #{params[:post_id]}"
else else
result = BookmarkManager.new(user).create(post_id: post.id) begin
manager = BookmarkManager.new(user)
bookmark = manager.create(post_id: post.id)
created += 1 if result.errors.none? created += 1 if manager.errors.none?
skipped += 1 if result.errors.any? skipped += 1 if manager.errors.any?
rescue
skipped += 1
end
end end
end end

View File

@ -57,6 +57,11 @@ module ImportScripts
UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user) UserCustomField.where(name: 'import_id', value: import_id.to_s).first.try(:user)
end end
def find_username_by_import_id(import_id)
user_id = user_id_from_imported_user_id(import_id)
User.where(id: user_id).pluck(:username).first if user_id.present?
end
# Get the Discourse Category id based on the id of the source category # Get the Discourse Category id based on the id of the source category
def category_id_from_imported_category_id(import_id) def category_id_from_imported_category_id(import_id)
@categories[import_id] || @categories[import_id.to_s] @categories[import_id] || @categories[import_id.to_s]

View File

@ -22,13 +22,13 @@ module ImportScripts::PhpBB3
if version.start_with?('3.0') if version.start_with?('3.0')
require_relative 'database_3_0' require_relative 'database_3_0'
Database_3_0.new(@database_client, @database_settings) Database_3_0.new(@database_client, @database_settings)
elsif version.start_with?('3.1') elsif version.start_with?('3.1') || version.start_with?('3.2')
require_relative 'database_3_1' require_relative 'database_3_1'
Database_3_1.new(@database_client, @database_settings) Database_3_1.new(@database_client, @database_settings)
else else
raise UnsupportedVersionError, <<~MSG raise UnsupportedVersionError, <<~MSG
Unsupported version (#{version}) of phpBB detected. Unsupported version (#{version}) of phpBB detected.
Currently only 3.0.x and 3.1.x are supported by this importer. Currently only version 3.0, 3.1 and 3.2 are supported by this importer.
MSG MSG
end end
end end

View File

@ -53,6 +53,20 @@ module ImportScripts::PhpBB3
SQL SQL
end end
def fetch_groups
query(<<-SQL)
SELECT g.group_id, g.group_type, g.group_name, g.group_desc
FROM #{@table_prefix}groups g
SQL
end
def fetch_group_users
query(<<-SQL)
SELECT ug.group_id, ug.user_id, ug.group_leader
FROM #{@table_prefix}user_group ug
SQL
end
def fetch_categories def fetch_categories
query(<<-SQL) query(<<-SQL)
SELECT f.forum_id, f.parent_id, f.forum_name, f.forum_desc, x.first_post_time SELECT f.forum_id, f.parent_id, f.forum_name, f.forum_desc, x.first_post_time
@ -213,12 +227,20 @@ module ImportScripts::PhpBB3
SELECT b.user_id, t.topic_first_post_id SELECT b.user_id, t.topic_first_post_id
FROM #{@table_prefix}bookmarks b FROM #{@table_prefix}bookmarks b
JOIN #{@table_prefix}topics t ON (b.topic_id = t.topic_id) JOIN #{@table_prefix}topics t ON (b.topic_id = t.topic_id)
WHERE b.user_id > #{last_user_id} AND b.topic_id > #{last_topic_id} WHERE b.user_id > #{last_user_id}
ORDER BY b.user_id, b.topic_id ORDER BY b.user_id, b.topic_id
LIMIT #{@batch_size} LIMIT #{@batch_size}
SQL SQL
end end
def get_smiley(smiley_code)
query(<<-SQL).first
SELECT emotion, smiley_url
FROM #{@table_prefix}smilies
WHERE code = '#{smiley_code}'
SQL
end
def get_config_values def get_config_values
query(<<-SQL).first query(<<-SQL).first
SELECT SELECT

View File

@ -27,8 +27,13 @@ module ImportScripts::PhpBB3
def execute def execute
puts '', "importing from phpBB #{@php_config[:phpbb_version]}" puts '', "importing from phpBB #{@php_config[:phpbb_version]}"
SiteSetting.tagging_enabled = true if @settings.tag_mappings.present?
import_users import_users
import_anonymous_users if @settings.import_anonymous_users import_anonymous_users if @settings.import_anonymous_users
import_groups
import_user_groups
import_new_categories
import_categories import_categories
import_posts import_posts
import_private_messages if @settings.import_private_messages import_private_messages if @settings.import_private_messages
@ -67,12 +72,12 @@ module ImportScripts::PhpBB3
batches do |offset| batches do |offset|
rows, last_user_id = @database.fetch_users(last_user_id) rows, last_user_id = @database.fetch_users(last_user_id)
rows = rows.to_a.uniq { |row| row[:user_id] }
break if rows.size < 1 break if rows.size < 1
next if all_records_exist?(:users, importer.map_users_to_import_ids(rows))
create_users(rows, total: total_count, offset: offset) do |row| create_users(rows, total: total_count, offset: offset) do |row|
begin begin
next if user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
importer.map_user(row) importer.map_user(row)
rescue => e rescue => e
log_error("Failed to map user with ID #{row[:user_id]}", e) log_error("Failed to map user with ID #{row[:user_id]}", e)
@ -91,10 +96,9 @@ module ImportScripts::PhpBB3
rows, last_username = @database.fetch_anonymous_users(last_username) rows, last_username = @database.fetch_anonymous_users(last_username)
break if rows.size < 1 break if rows.size < 1
next if all_records_exist?(:users, importer.map_anonymous_users_to_import_ids(rows))
create_users(rows, total: total_count, offset: offset) do |row| create_users(rows, total: total_count, offset: offset) do |row|
begin begin
next if user_id_from_imported_user_id(@settings.prefix(row[:post_username]))
importer.map_anonymous_user(row) importer.map_anonymous_user(row)
rescue => e rescue => e
log_error("Failed to map anonymous user with ID #{row[:user_id]}", e) log_error("Failed to map anonymous user with ID #{row[:user_id]}", e)
@ -103,12 +107,74 @@ module ImportScripts::PhpBB3
end end
end end
def import_groups
puts '', 'creating groups'
rows = @database.fetch_groups
create_groups(rows) do |row|
begin
next if row[:group_type] == 3
group_name = if @settings.site_name.present?
"#{@settings.site_name}_#{row[:group_name]}"
else
row[:group_name]
end[0..19].gsub(/[^a-zA-Z0-9\-_. ]/, '_')
bio_raw = @importers.text_processor.process_raw_text(row[:group_desc]) rescue row[:group_desc]
{
id: @settings.prefix(row[:group_id]),
name: group_name,
full_name: row[:group_name],
bio_raw: bio_raw
}
rescue => e
log_error("Failed to map group with ID #{row[:group_id]}", e)
end
end
end
def import_user_groups
puts '', 'creating user groups'
rows = @database.fetch_group_users
rows.each do |row|
group_id = @lookup.group_id_from_imported_group_id(@settings.prefix(row[:group_id]))
next if !group_id
user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
begin
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id, owner: row[:group_leader])
rescue => e
log_error("Failed to add user #{row[:user_id]} to group #{row[:group_id]}", e)
end
end
end
def import_new_categories
puts '', 'creating new categories'
create_categories(@settings.new_categories) do |row|
next if row == "SKIP"
{
id: @settings.prefix(row[:forum_id]),
name: row[:name],
parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id]))
}
end
end
def import_categories def import_categories
puts '', 'creating categories' puts '', 'creating categories'
rows = @database.fetch_categories rows = @database.fetch_categories
importer = @importers.category_importer importer = @importers.category_importer
create_categories(rows) do |row| create_categories(rows) do |row|
next if @settings.category_mappings[row[:forum_id].to_s] == 'SKIP'
importer.map_category(row) importer.map_category(row)
end end
end end
@ -123,10 +189,9 @@ module ImportScripts::PhpBB3
rows, last_post_id = @database.fetch_posts(last_post_id) rows, last_post_id = @database.fetch_posts(last_post_id)
break if rows.size < 1 break if rows.size < 1
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))
create_posts(rows, total: total_count, offset: offset) do |row| create_posts(rows, total: total_count, offset: offset) do |row|
begin begin
next if post_id_from_imported_post_id(@settings.prefix(row[:post_id]))
importer.map_post(row) importer.map_post(row)
rescue => e rescue => e
log_error("Failed to map post with ID #{row[:post_id]}", e) log_error("Failed to map post with ID #{row[:post_id]}", e)
@ -145,10 +210,9 @@ module ImportScripts::PhpBB3
rows, last_msg_id = @database.fetch_messages(last_msg_id) rows, last_msg_id = @database.fetch_messages(last_msg_id)
break if rows.size < 1 break if rows.size < 1
next if all_records_exist?(:posts, importer.map_to_import_ids(rows))
create_posts(rows, total: total_count, offset: offset) do |row| create_posts(rows, total: total_count, offset: offset) do |row|
begin begin
next if post_id_from_imported_post_id(@settings.prefix("pm:#{row[:msg_id]}"))
importer.map_message(row) importer.map_message(row)
rescue => e rescue => e
log_error("Failed to map message with ID #{row[:msg_id]}", e) log_error("Failed to map message with ID #{row[:msg_id]}", e)
@ -168,7 +232,11 @@ module ImportScripts::PhpBB3
break if rows.size < 1 break if rows.size < 1
create_bookmarks(rows, total: total_count, offset: offset) do |row| create_bookmarks(rows, total: total_count, offset: offset) do |row|
importer.map_bookmark(row) begin
importer.map_bookmark(row)
rescue => e
log_error("Failed to map bookmark (#{row[:user_id]}, #{row[:topic_first_post_id]})", e)
end
end end
end end
end end

View File

@ -2,10 +2,14 @@
module ImportScripts::PhpBB3 module ImportScripts::PhpBB3
class BookmarkImporter class BookmarkImporter
def initialize(settings)
@settings = settings
end
def map_bookmark(row) def map_bookmark(row)
{ {
user_id: row[:user_id], user_id: @settings.prefix(row[:user_id]),
post_id: row[:topic_first_post_id] post_id: @settings.prefix(row[:topic_first_post_id])
} }
end end
end end

View File

@ -5,20 +5,28 @@ module ImportScripts::PhpBB3
# @param lookup [ImportScripts::LookupContainer] # @param lookup [ImportScripts::LookupContainer]
# @param text_processor [ImportScripts::PhpBB3::TextProcessor] # @param text_processor [ImportScripts::PhpBB3::TextProcessor]
# @param permalink_importer [ImportScripts::PhpBB3::PermalinkImporter] # @param permalink_importer [ImportScripts::PhpBB3::PermalinkImporter]
def initialize(lookup, text_processor, permalink_importer) # @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, text_processor, permalink_importer, settings)
@lookup = lookup @lookup = lookup
@text_processor = text_processor @text_processor = text_processor
@permalink_importer = permalink_importer @permalink_importer = permalink_importer
@settings = settings
end end
def map_category(row) def map_category(row)
return if @settings.category_mappings[row[:forum_id].to_s]
if row[:parent_id] && @settings.category_mappings[row[:parent_id].to_s]
puts "parent category (#{row[:parent_id]}) was mapped, but children was not (#{row[:forum_id]})"
end
{ {
id: row[:forum_id], id: @settings.prefix(row[:forum_id]),
name: CGI.unescapeHTML(row[:forum_name]), name: CGI.unescapeHTML(row[:forum_name]),
parent_category_id: @lookup.category_id_from_imported_category_id(row[:parent_id]), parent_category_id: @lookup.category_id_from_imported_category_id(@settings.prefix(row[:parent_id])),
post_create_action: proc do |category| post_create_action: proc do |category|
update_category_description(category, row) update_category_description(category, row)
@permalink_importer.create_for_category(category, row[:forum_id]) @permalink_importer.create_for_category(category, row[:forum_id]) # skip @settings.prefix because ID is used in permalink generation
end end
} }
end end
@ -43,7 +51,7 @@ module ImportScripts::PhpBB3
end end
if row[:forum_desc].present? if row[:forum_desc].present?
changes = { raw: @text_processor.process_raw_text(row[:forum_desc]) } changes = { raw: (@text_processor.process_raw_text(row[:forum_desc]) rescue row[:forum_desc]) }
opts = { revised_at: post.created_at, bypass_bump: true } opts = { revised_at: post.created_at, bypass_bump: true }
post.revise(Discourse.system_user, changes, opts) post.revise(Discourse.system_user, changes, opts)
end end

View File

@ -32,7 +32,7 @@ module ImportScripts::PhpBB3
end end
def category_importer def category_importer
CategoryImporter.new(@lookup, text_processor, permalink_importer) CategoryImporter.new(@lookup, text_processor, permalink_importer, @settings)
end end
def post_importer def post_importer
@ -44,15 +44,13 @@ module ImportScripts::PhpBB3
end end
def bookmark_importer def bookmark_importer
BookmarkImporter.new BookmarkImporter.new(@settings)
end end
def permalink_importer def permalink_importer
@permalink_importer ||= PermalinkImporter.new(@settings.permalinks) @permalink_importer ||= PermalinkImporter.new(@settings.permalinks)
end end
protected
def attachment_importer def attachment_importer
AttachmentImporter.new(@database, @uploader, @settings, @phpbb_config) AttachmentImporter.new(@database, @uploader, @settings, @phpbb_config)
end end
@ -62,15 +60,15 @@ module ImportScripts::PhpBB3
end end
def poll_importer def poll_importer
PollImporter.new(@lookup, @database, text_processor) PollImporter.new(@lookup, @database, text_processor, @settings)
end end
def text_processor def text_processor
@text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings) @text_processor ||= TextProcessor.new(@lookup, @database, smiley_processor, @settings, @phpbb_config)
end end
def smiley_processor def smiley_processor
SmileyProcessor.new(@uploader, @settings, @phpbb_config) SmileyProcessor.new(@uploader, @database, @settings, @phpbb_config)
end end
end end
end end

View File

@ -20,7 +20,7 @@ module ImportScripts::PhpBB3
end end
def map_message(row) def map_message(row)
user_id = @lookup.user_id_from_imported_user_id(row[:author_id]) || Discourse.system_user.id user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:author_id])) || Discourse.system_user.id
attachments = import_attachments(row, user_id) attachments = import_attachments(row, user_id)
mapped = { mapped = {
@ -84,7 +84,7 @@ module ImportScripts::PhpBB3
import_user_ids = get_recipient_user_ids(row[:to_address]) import_user_ids = get_recipient_user_ids(row[:to_address])
import_user_ids.map! do |import_user_id| import_user_ids.map! do |import_user_id|
@lookup.find_user_by_import_id(import_user_id).try(:username) @lookup.find_user_by_import_id(@settings.prefix(import_user_id)).try(:username)
end.compact end.compact
end end
@ -93,7 +93,7 @@ module ImportScripts::PhpBB3
end end
def get_import_id(msg_id) def get_import_id(msg_id)
"pm:#{msg_id}" @settings.prefix("pm:#{msg_id}")
end end
# Creates a sorted array consisting of the message's author and recipients. # Creates a sorted array consisting of the message's author and recipients.

View File

@ -39,7 +39,7 @@ module ImportScripts::PhpBB3
end end
def create_for_post(post, import_id) def create_for_post(post, import_id)
return unless @settings.create_topic_links && post return unless @settings.create_post_links && post
url = "viewtopic.php?p=#{import_id}" url = "viewtopic.php?p=#{import_id}"

View File

@ -5,10 +5,12 @@ module ImportScripts::PhpBB3
# @param lookup [ImportScripts::LookupContainer] # @param lookup [ImportScripts::LookupContainer]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param text_processor [ImportScripts::PhpBB3::TextProcessor] # @param text_processor [ImportScripts::PhpBB3::TextProcessor]
def initialize(lookup, database, text_processor) # @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, database, text_processor, settings)
@lookup = lookup @lookup = lookup
@database = database @database = database
@text_processor = text_processor @text_processor = text_processor
@settings = settings
end end
# @param poll_data [ImportScripts::PhpBB3::PollData] # @param poll_data [ImportScripts::PhpBB3::PollData]
@ -47,7 +49,7 @@ module ImportScripts::PhpBB3
end end
def get_option_text(row) def get_option_text(row)
text = @text_processor.process_raw_text(row[:poll_option_text]) text = @text_processor.process_raw_text(row[:poll_option_text]) rescue row[:poll_option_text]
text.squish! text.squish!
text.gsub!(/^(\d+)\./, '\1\.') text.gsub!(/^(\d+)\./, '\1\.')
text text
@ -55,7 +57,7 @@ module ImportScripts::PhpBB3
# @param poll_data [ImportScripts::PhpBB3::PollData] # @param poll_data [ImportScripts::PhpBB3::PollData]
def get_poll_text(poll_data) def get_poll_text(poll_data)
title = @text_processor.process_raw_text(poll_data.title) title = @text_processor.process_raw_text(poll_data.title) rescue poll_data.title
text = +"#{title}\n\n" text = +"#{title}\n\n"
arguments = ["results=always"] arguments = ["results=always"]
@ -118,7 +120,7 @@ module ImportScripts::PhpBB3
rows.each do |row| rows.each do |row|
option_id = mapped_option_ids[row[:poll_option_id]] option_id = mapped_option_ids[row[:poll_option_id]]
user_id = @lookup.user_id_from_imported_user_id(row[:user_id]) user_id = @lookup.user_id_from_imported_user_id(@settings.prefix(row[:user_id]))
if option_id.present? && user_id.present? if option_id.present? && user_id.present?
PollVote.create!(poll: poll, poll_option_id: option_id, user_id: user_id) PollVote.create!(poll: poll, poll_option_id: option_id, user_id: user_id)

View File

@ -18,22 +18,24 @@ module ImportScripts::PhpBB3
end end
def map_to_import_ids(rows) def map_to_import_ids(rows)
rows.map { |row| row[:post_id] } rows.map { |row| @settings.prefix(row[:post_id]) }
end end
def map_post(row) def map_post(row)
imported_user_id = row[:post_username].blank? ? row[:poster_id] : row[:post_username] return if @settings.category_mappings[row[:forum_id].to_s] == 'SKIP'
imported_user_id = @settings.prefix(row[:post_username].blank? ? row[:poster_id] : row[:post_username])
user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || -1 user_id = @lookup.user_id_from_imported_user_id(imported_user_id) || -1
is_first_post = row[:post_id] == row[:topic_first_post_id] is_first_post = row[:post_id] == row[:topic_first_post_id]
attachments = import_attachments(row, user_id) attachments = import_attachments(row, user_id)
mapped = { mapped = {
id: row[:post_id], id: @settings.prefix(row[:post_id]),
user_id: user_id, user_id: user_id,
created_at: Time.zone.at(row[:post_time]), created_at: Time.zone.at(row[:post_time]),
raw: @text_processor.process_post(row[:post_text], attachments), raw: @text_processor.process_post(row[:post_text], attachments),
import_topic_id: row[:topic_id] import_topic_id: @settings.prefix(row[:topic_id])
} }
if is_first_post if is_first_post
@ -54,14 +56,18 @@ module ImportScripts::PhpBB3
def map_first_post(row, mapped) def map_first_post(row, mapped)
poll_data = add_poll(row, mapped) if @settings.import_polls poll_data = add_poll(row, mapped) if @settings.import_polls
mapped[:category] = @lookup.category_id_from_imported_category_id(row[:forum_id]) mapped[:category] = @lookup.category_id_from_imported_category_id(@settings.prefix(@settings.category_mappings[row[:forum_id].to_s])) ||
@lookup.category_id_from_imported_category_id(@settings.prefix(row[:forum_id]))
mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255] mapped[:title] = CGI.unescapeHTML(row[:topic_title]).strip[0...255]
mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL mapped[:pinned_at] = mapped[:created_at] unless row[:topic_type] == Constants::POST_NORMAL
mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL mapped[:pinned_globally] = row[:topic_type] == Constants::POST_GLOBAL
mapped[:views] = row[:topic_views] mapped[:views] = row[:topic_views]
mapped[:post_create_action] = proc do |post| mapped[:post_create_action] = proc do |post|
@permalink_importer.create_for_topic(post.topic, row[:topic_id]) if tags = @settings.tag_mappings[row[:forum_id].to_s].presence
@permalink_importer.create_for_post(post, row[:post_id]) DiscourseTagging.tag_topic_by_names(post.topic, staff_guardian, tags)
end
@permalink_importer.create_for_topic(post.topic, row[:topic_id]) # skip @settings.prefix because ID is used in permalink generation
@permalink_importer.create_for_post(post, row[:post_id]) # skip @settings.prefix because ID is used in permalink generation
@poll_importer.update_poll(row[:topic_id], post, poll_data) if poll_data @poll_importer.update_poll(row[:topic_id], post, poll_data) if poll_data
TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true) TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true)
end end
@ -70,16 +76,16 @@ module ImportScripts::PhpBB3
end end
def map_other_post(row, mapped) def map_other_post(row, mapped)
parent = @lookup.topic_lookup_from_imported_post_id(row[:topic_first_post_id]) parent = @lookup.topic_lookup_from_imported_post_id(@settings.prefix(row[:topic_first_post_id]))
if parent.blank? if parent.blank?
puts "Parent post #{row[:topic_first_post_id]} doesn't exist. Skipping #{row[:post_id]}: #{row[:topic_title][0..40]}" puts "Parent post #{@settings.prefix(row[:topic_first_post_id])} doesn't exist. Skipping #{@settings.prefix(row[:post_id])}: #{row[:topic_title][0..40]}"
return nil return nil
end end
mapped[:topic_id] = parent[:topic_id] mapped[:topic_id] = parent[:topic_id]
mapped[:post_create_action] = proc do |post| mapped[:post_create_action] = proc do |post|
@permalink_importer.create_for_post(post, row[:post_id]) @permalink_importer.create_for_post(post, row[:post_id]) # skip @settings.prefix because ID is used in permalink generation
TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true) TopicViewItem.add(post.topic_id, row[:poster_ip], post.user_id, post.created_at, true)
end end
@ -91,9 +97,14 @@ module ImportScripts::PhpBB3
poll_data = PollData.new(row[:poll_title], row[:poll_max_options], row[:poll_end]) poll_data = PollData.new(row[:poll_title], row[:poll_max_options], row[:poll_end])
poll_raw = @poll_importer.create_raw(row[:topic_id], poll_data) poll_raw = @poll_importer.create_raw(row[:topic_id], poll_data)
return if poll_data.options.size < 2
mapped_post[:raw] = poll_raw << "\n\n" << mapped_post[:raw] mapped_post[:raw] = poll_raw << "\n\n" << mapped_post[:raw]
poll_data poll_data
end end
def staff_guardian
@_staff_guardian ||= Guardian.new(Discourse.system_user)
end
end end
end end

View File

@ -12,14 +12,18 @@ module ImportScripts::PhpBB3
end end
def map_users_to_import_ids(rows) def map_users_to_import_ids(rows)
rows.map { |row| row[:user_id] } rows.map { |row| @settings.prefix(row[:user_id]) }
end end
def map_user(row) def map_user(row)
is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER is_active_user = row[:user_inactive_reason] != Constants::INACTIVE_REGISTER
trust_level = row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1]
trust_level = @settings.trust_level_for_posts(row[:user_posts], trust_level: trust_level)
manual_locked_trust_level = trust_level > TrustLevel[1] ? trust_level : nil
{ {
id: row[:user_id], id: @settings.prefix(row[:user_id]),
email: row[:user_email], email: row[:user_email],
username: row[:username], username: row[:username],
password: @settings.import_passwords ? row[:user_password] : nil, password: @settings.import_passwords ? row[:user_password] : nil,
@ -28,7 +32,8 @@ module ImportScripts::PhpBB3
last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]), last_seen_at: row[:user_lastvisit] == 0 ? Time.zone.at(row[:user_regdate]) : Time.zone.at(row[:user_lastvisit]),
registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil), registration_ip_address: (IPAddr.new(row[:user_ip]) rescue nil),
active: is_active_user, active: is_active_user,
trust_level: row[:user_posts] == 0 ? TrustLevel[0] : TrustLevel[1], trust_level: trust_level,
manual_locked_trust_level: manual_locked_trust_level,
approved: is_active_user, approved: is_active_user,
approved_by_id: is_active_user ? Discourse.system_user.id : nil, approved_by_id: is_active_user ? Discourse.system_user.id : nil,
approved_at: is_active_user ? Time.now : nil, approved_at: is_active_user ? Time.now : nil,
@ -45,14 +50,14 @@ module ImportScripts::PhpBB3
end end
def map_anonymous_users_to_import_ids(rows) def map_anonymous_users_to_import_ids(rows)
rows.map { |row| row[:post_username] } rows.map { |row| @settings.prefix(row[:post_username]) }
end end
def map_anonymous_user(row) def map_anonymous_user(row)
username = row[:post_username] username = row[:post_username]
{ {
id: username, id: @settings.prefix(username),
email: "anonymous_#{SecureRandom.hex}@no-email.invalid", email: "anonymous_#{SecureRandom.hex}@no-email.invalid",
username: username, username: username,
name: @settings.username_as_name ? username : '', name: @settings.username_as_name ? username : '',

View File

@ -11,6 +11,73 @@ database:
batch_size: 1000 # Don't change this unless you know what you're doing. The default (1000) should work just fine. batch_size: 1000 # Don't change this unless you know what you're doing. The default (1000) should work just fine.
import: import:
# Set this if you import multiple phpBB forums into a single Discourse forum.
#
# For example, when importing multiple sites, prefix all imported IDs
# with 'first' to avoid conflicts. Subsequent import runs must have a
# different 'site_name'.
#
# site_name: first
#
site_name:
# Create new categories
#
# For example, to create a parent category and a subcategory.
#
# new_categories:
# - forum_id: foo
# name: Foo Category
# - forum_id: bar
# name: Bar Category
# parent_id: foo
#
new_categories:
# Category mappings
#
# For example, topics from phpBB category 1 and 2 will be imported
# in the new "Foo Category" category, topics from phpBB category 3
# will be imported in subcategory "Bar category", topics from phpBB
# category 4 will be merged into category 5 and category 6 will be
# skipped.
#
# category_mappings:
# 1: foo
# 2: foo
# 3: bar
# 4: 5
# 6: SKIP
#
category_mappings:
# Tag mappings
#
# For example, imported topics from phpBB category 1 will be tagged
# with 'first-category', etc.
#
# tag_mappings:
# 1:
# - first-category
# 2:
# - second-category
# 3:
# - third-category
#
tag_mappings:
# Rank to trust level mapping
#
# Map phpBB 3.x rank levels to trust level
# Users with rank at least 3000 will have TL3, etc.
#
# rank_mapping:
# trust_level_1: 200
# trust_level_2: 1000
# trust_level_3: 3000
#
rank_mapping:
# WARNING: Do not activate this option unless you know what you are doing. # WARNING: Do not activate this option unless you know what you are doing.
# It will probably break the BBCode to Markdown conversion and slows down your import. # It will probably break the BBCode to Markdown conversion and slows down your import.
use_bbcode_to_md: false use_bbcode_to_md: false

View File

@ -0,0 +1,88 @@
# frozen_string_literal: true
module ImportScripts; end
module ImportScripts::PhpBB3; end
module ImportScripts::PhpBB3::BBCode
LINEBREAK_AUTO = :auto
LINEBREAK_HARD = :hard
LINEBREAK_HTML = :html
class MarkdownNode
# @return [String]
attr_reader :xml_node_name
# @return [MarkdownNode]
attr_reader :parent
# @return [Array<MarkdownNode>]
attr_reader :children
# @return [Array<MarkdownNode>]
attr_accessor :previous_sibling
# @return [Array<MarkdownNode>]
attr_accessor :next_sibling
# @return [String]
attr_accessor :text
# @return [String]
attr_accessor :prefix
# @return [String]
attr_accessor :postfix
# @return [Integer]
attr_accessor :prefix_linebreaks
# @return [Integer]
attr_accessor :postfix_linebreaks
# @return [Symbol]
attr_accessor :prefix_linebreak_type
# @return [Symbol]
attr_accessor :postfix_linebreak_type
# @return [String]
attr_accessor :prefix_children
# @param xml_node_name [String]
# @param parent [MarkdownNode]
def initialize(xml_node_name:, parent:)
@xml_node_name = xml_node_name
@text = +""
@prefix = +""
@postfix = +""
@prefix_linebreaks = 0
@postfix_linebreaks = 0
@prefix_linebreak_type = LINEBREAK_AUTO
@postfix_linebreak_type = LINEBREAK_AUTO
@parent = parent
@children = []
if @parent
@previous_sibling = @parent.children.last
@previous_sibling.next_sibling = self if @previous_sibling
@parent.children << self
end
end
def enclosed_with=(text)
@prefix = @postfix = text
end
def skip_children
@children = nil
end
def to_s
"name: #{xml_node_name}, prefix: #{prefix}, text: #{text}, children: #{children.size}, postfix: #{postfix}"
end
end
end

View File

@ -0,0 +1,356 @@
# frozen_string_literal: true
require 'nokogiri'
require_relative 'markdown_node'
module ImportScripts::PhpBB3::BBCode
class XmlToMarkdown
def initialize(xml, opts = {})
@username_from_user_id = opts[:username_from_user_id]
@smilie_to_emoji = opts[:smilie_to_emoji]
@quoted_post_from_post_id = opts[:quoted_post_from_post_id]
@upload_md_from_file = opts[:upload_md_from_file]
@url_replacement = opts[:url_replacement]
@allow_inline_code = opts.fetch(:allow_inline_code, false)
@traditional_linebreaks = opts.fetch(:traditional_linebreaks, false)
@doc = Nokogiri::XML(xml)
@list_stack = []
end
def convert
preprocess_xml
md_root = MarkdownNode.new(xml_node_name: "ROOT", parent: nil)
visit(@doc.root, md_root)
to_markdown(md_root).rstrip
end
private
IGNORED_ELEMENTS = ["s", "e", "i"]
ELEMENTS_WITHOUT_LEADING_WHITESPACES = ["LIST", "LI"]
ELEMENTS_WITH_HARD_LINEBREAKS = ["B", "I", "U"]
EXPLICIT_LINEBREAK_THRESHOLD = 2
def preprocess_xml
@doc.traverse do |node|
if node.is_a? Nokogiri::XML::Text
node.content = node.content.gsub(/\A\n+\s*/, "")
node.content = node.content.lstrip if remove_leading_whitespaces?(node)
node.remove if node.content.empty?
elsif IGNORED_ELEMENTS.include?(node.name)
node.remove
end
end
end
def remove_leading_whitespaces?(xml_node)
parent = xml_node.parent
return false unless parent
ELEMENTS_WITHOUT_LEADING_WHITESPACES.include?(parent.name) &&
parent.children.first == xml_node
end
def visit(xml_node, md_parent)
visitor = "visit_#{xml_node.name}"
visitor_exists = respond_to?(visitor, include_all: true)
if visitor_exists && md_parent.children
md_node = create_node(xml_node, md_parent)
send(visitor, xml_node, md_node)
end
xml_node.children.each { |xml_child| visit(xml_child, md_node || md_parent) }
after_hook = "after_#{xml_node.name}"
if respond_to?(after_hook, include_all: true)
send(after_hook, xml_node, md_node)
end
end
def create_node(xml_node, md_parent)
if xml_node.name == "br"
last_child = md_parent.children.last
return last_child if last_child&.xml_node_name == "br"
end
MarkdownNode.new(xml_node_name: xml_node.name, parent: md_parent)
end
def visit_text(xml_node, md_node)
md_node.text << text(xml_node)
end
def visit_B(xml_node, md_node)
if xml_node.parent&.name != 'B'
md_node.enclosed_with = "**"
end
end
def visit_I(xml_node, md_node)
if xml_node.parent&.name != 'I'
md_node.enclosed_with = "_"
end
end
def visit_U(xml_node, md_node)
if xml_node.parent&.name != 'U'
md_node.prefix = "[u]"
md_node.postfix = "[/u]"
end
end
def visit_CODE(xml_node, md_node)
content = xml_node.content
if !@allow_inline_code || content.include?("\n")
md_node.prefix = "```text\n"
md_node.postfix = "\n```"
else
md_node.enclosed_with = "`"
end
md_node.text = content.rstrip
md_node.skip_children
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
md_node.prefix_linebreak_type = LINEBREAK_HTML
end
def visit_LIST(xml_node, md_node)
md_node.prefix_linebreaks = md_node.postfix_linebreaks = @list_stack.size == 0 ? 2 : 1
md_node.prefix_linebreak_type = LINEBREAK_HTML if @list_stack.size == 0
@list_stack << {
unordered: xml_node.attribute('type').nil?,
item_count: 0
}
end
def after_LIST(xml_node, md_node)
@list_stack.pop
end
def visit_LI(xml_node, md_node)
list = @list_stack.last
depth = @list_stack.size - 1
list[:item_count] += 1
indentation = ' ' * 2 * depth
symbol = list[:unordered] ? '*' : "#{list[:item_count]}."
md_node.prefix = "#{indentation}#{symbol} "
md_node.postfix_linebreaks = 1
end
def visit_IMG(xml_node, md_node)
md_node.text = +"![](#{xml_node.attribute('src')})"
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
md_node.skip_children
end
def visit_URL(xml_node, md_node)
original_url = xml_node.attribute('url').to_s
url = CGI.unescapeHTML(original_url)
url = @url_replacement.call(url) if @url_replacement
if xml_node.content.strip == original_url
md_node.text = url
md_node.skip_children
else
md_node.prefix = "["
md_node.postfix = "](#{url})"
end
end
def visit_EMAIL(xml_node, md_node)
md_node.prefix = "<"
md_node.postfix = ">"
end
def visit_br(xml_node, md_node)
md_node.postfix_linebreaks += 1
if md_node.postfix_linebreaks > 1 && ELEMENTS_WITH_HARD_LINEBREAKS.include?(xml_node.parent&.name)
md_node.postfix_linebreak_type = LINEBREAK_HARD
end
end
def visit_E(xml_node, md_node)
if @smilie_to_emoji
md_node.text = @smilie_to_emoji.call(xml_node.content)
md_node.skip_children
end
end
def visit_QUOTE(xml_node, md_node)
if post = quoted_post(xml_node)
md_node.prefix = %Q{[quote="#{post[:username]}, post:#{post[:post_number]}, topic:#{post[:topic_id]}"]\n}
md_node.postfix = "\n[/quote]"
elsif username = quoted_username(xml_node)
md_node.prefix = %Q{[quote="#{username}"]\n}
md_node.postfix = "\n[/quote]"
else
md_node.prefix_children = "> "
end
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 2
md_node.prefix_linebreak_type = LINEBREAK_HTML
end
def quoted_post(xml_node)
if @quoted_post_from_post_id
post_id = to_i(xml_node.attr("post_id"))
@quoted_post_from_post_id.call(post_id) if post_id
end
end
def quoted_username(xml_node)
if @username_from_user_id
user_id = to_i(xml_node.attr("user_id"))
username = @username_from_user_id.call(user_id) if user_id
end
username = xml_node.attr("author") unless username
username
end
def to_i(string)
string.to_i if string&.match(/\A\d+\z/)
end
def visit_ATTACHMENT(xml_node, md_node)
filename = xml_node.attr("filename")
index = to_i(xml_node.attr("index"))
md_node.text = @upload_md_from_file.call(filename, index) if @upload_md_from_file
md_node.prefix_linebreaks = md_node.postfix_linebreaks = 1
md_node.skip_children
end
def visit_SIZE(xml_node, md_node)
size = to_i(xml_node.attr("size"))
return if size.nil?
if size.between?(1, 99)
md_node.prefix = '<small>'
md_node.postfix = '</small>'
elsif size.between?(101, 200)
md_node.prefix = '<big>'
md_node.postfix = '</big>'
end
end
def text(xml_node, escape_markdown: true)
text = CGI.unescapeHTML(xml_node.text)
# text.gsub!(/[\\`*_{}\[\]()#+\-.!~]/) { |c| "\\#{c}" } if escape_markdown
text
end
# @param md_parent [MarkdownNode]
def to_markdown(md_parent)
markdown = +""
md_parent.children.each do |md_node|
prefix = md_node.prefix
text = md_node.children&.any? ? to_markdown(md_node) : md_node.text
postfix = md_node.postfix
parent_prefix = prefix_from_parent(md_parent)
if parent_prefix && md_node.xml_node_name != "br" && (md_parent.prefix_children || !markdown.empty?)
prefix = "#{parent_prefix}#{prefix}"
end
if md_node.xml_node_name != "CODE"
text, prefix, postfix = hoist_whitespaces!(markdown, text, prefix, postfix)
end
add_linebreaks!(markdown, md_node.prefix_linebreaks, md_node.prefix_linebreak_type, parent_prefix)
markdown << prefix
markdown << text
markdown << postfix
add_linebreaks!(markdown, md_node.postfix_linebreaks, md_node.postfix_linebreak_type, parent_prefix)
end
markdown
end
def hoist_whitespaces!(markdown, text, prefix, postfix)
text = text.lstrip if markdown.end_with?("\n")
unless prefix.empty?
if starts_with_whitespace?(text) && !ends_with_whitespace?(markdown)
prefix = "#{text[0]}#{prefix}"
end
text = text.lstrip
end
unless postfix.empty?
if ends_with_whitespace?(text)
postfix = "#{postfix}#{text[-1]}"
end
text = text.rstrip
end
[text, prefix, postfix]
end
def prefix_from_parent(md_parent)
while md_parent
return md_parent.prefix_children if md_parent.prefix_children
md_parent = md_parent.parent
end
end
def add_linebreaks!(markdown, required_linebreak_count, linebreak_type, prefix = nil)
return if required_linebreak_count == 0 || markdown.empty?
existing_linebreak_count = markdown[/(?:\\?\n|<br>\n)*\z/].count("\n")
if linebreak_type == LINEBREAK_HTML
max_linebreak_count = [existing_linebreak_count, required_linebreak_count - 1].max + 1
required_linebreak_count = max_linebreak_count if max_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD
end
return if existing_linebreak_count >= required_linebreak_count
rstrip!(markdown)
alternative_linebreak_start_index = required_linebreak_count > EXPLICIT_LINEBREAK_THRESHOLD ? 1 : 2
required_linebreak_count.times do |index|
linebreak = linebreak(linebreak_type, index, alternative_linebreak_start_index, required_linebreak_count)
markdown << (linebreak == "\n" ? prefix.rstrip : prefix) if prefix && index > 0
markdown << linebreak
end
end
def rstrip!(markdown)
markdown.gsub!(/\s*(?:\\?\n|<br>\n)*\z/, '')
end
def linebreak(linebreak_type, linebreak_index, alternative_linebreak_start_index, required_linebreak_count)
use_alternative_linebreak = linebreak_index >= alternative_linebreak_start_index
is_last_linebreak = linebreak_index + 1 == required_linebreak_count
return "<br>\n" if linebreak_type == LINEBREAK_HTML &&
use_alternative_linebreak && is_last_linebreak
return "\\\n" if linebreak_type == LINEBREAK_HARD ||
@traditional_linebreaks || use_alternative_linebreak
"\n"
end
def starts_with_whitespace?(text)
text.match?(/\A\s/)
end
def ends_with_whitespace?(text)
text.match?(/\s\z/)
end
end
end

View File

@ -1,14 +1,23 @@
# frozen_string_literal: true # frozen_string_literal: true
require 'csv'
require 'yaml' require 'yaml'
require_relative '../../base'
module ImportScripts::PhpBB3 module ImportScripts::PhpBB3
class Settings class Settings
def self.load(filename) def self.load(filename)
yaml = YAML::load_file(filename) yaml = YAML::load_file(filename)
Settings.new(yaml) Settings.new(yaml.deep_stringify_keys.with_indifferent_access)
end end
attr_reader :site_name
attr_reader :new_categories
attr_reader :category_mappings
attr_reader :tag_mappings
attr_reader :rank_mapping
attr_reader :import_anonymous_users attr_reader :import_anonymous_users
attr_reader :import_attachments attr_reader :import_attachments
attr_reader :import_private_messages attr_reader :import_private_messages
@ -34,6 +43,14 @@ module ImportScripts::PhpBB3
def initialize(yaml) def initialize(yaml)
import_settings = yaml['import'] import_settings = yaml['import']
@site_name = import_settings['site_name']
@new_categories = import_settings['new_categories']
@category_mappings = import_settings['category_mappings']
@tag_mappings = import_settings['tag_mappings']
@rank_mapping = import_settings['rank_mapping']
@import_anonymous_users = import_settings['anonymous_users'] @import_anonymous_users = import_settings['anonymous_users']
@import_attachments = import_settings['attachments'] @import_attachments = import_settings['attachments']
@import_private_messages = import_settings['private_messages'] @import_private_messages = import_settings['private_messages']
@ -58,6 +75,20 @@ module ImportScripts::PhpBB3
@database = DatabaseSettings.new(yaml['database']) @database = DatabaseSettings.new(yaml['database'])
end end
def prefix(val)
@site_name.present? && val.present? ? "#{@site_name}:#{val}" : val
end
def trust_level_for_posts(rank, trust_level: 0)
if @rank_mapping.present?
@rank_mapping.each do |key, value|
trust_level = [trust_level, key.gsub('trust_level_', '').to_i].max if rank >= value
end
end
trust_level
end
end end
class DatabaseSettings class DatabaseSettings

View File

@ -3,10 +3,12 @@
module ImportScripts::PhpBB3 module ImportScripts::PhpBB3
class SmileyProcessor class SmileyProcessor
# @param uploader [ImportScripts::Uploader] # @param uploader [ImportScripts::Uploader]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param settings [ImportScripts::PhpBB3::Settings] # @param settings [ImportScripts::PhpBB3::Settings]
# @param phpbb_config [Hash] # @param phpbb_config [Hash]
def initialize(uploader, settings, phpbb_config) def initialize(uploader, database, settings, phpbb_config)
@uploader = uploader @uploader = uploader
@database = database
@smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path]) @smilies_path = File.join(settings.base_dir, phpbb_config[:smilies_path])
@smiley_map = {} @smiley_map = {}
@ -16,12 +18,16 @@ module ImportScripts::PhpBB3
def replace_smilies(text) def replace_smilies(text)
# :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) --> # :) is encoded as <!-- s:) --><img src="{SMILIES_PATH}/icon_e_smile.gif" alt=":)" title="Smile" /><!-- s:) -->
text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/(.+?)" alt="(.*?)" title="(.*?)" \/><!-- s(?:\S+) -->/) do text.gsub!(/<!-- s(\S+) --><img src="\{SMILIES_PATH\}\/.+?" alt=".*?" title=".*?" \/><!-- s?:\S+ -->/) do
smiley = $1 emoji($1)
end
end
@smiley_map.fetch(smiley) do def emoji(smiley_code)
upload_smiley(smiley, $2, $3, $4) || smiley_as_text(smiley) @smiley_map.fetch(smiley_code) do
end smiley = @database.get_smiley(smiley_code)
emoji = upload_smiley(smiley_code, smiley[:smiley_url], smiley_code, smiley[:emotion]) if smiley
emoji || smiley_as_text(smiley_code)
end end
end end
@ -36,7 +42,7 @@ module ImportScripts::PhpBB3
[':o', ':-o', ':eek:'] => ':astonished:', [':o', ':-o', ':eek:'] => ':astonished:',
[':shock:'] => ':open_mouth:', [':shock:'] => ':open_mouth:',
[':?', ':-?', ':???:'] => ':confused:', [':?', ':-?', ':???:'] => ':confused:',
['8-)', ':cool:'] => ':sunglasses:', ['8)', '8-)', ':cool:'] => ':sunglasses:',
[':lol:'] => ':laughing:', [':lol:'] => ':laughing:',
[':x', ':-x', ':mad:'] => ':angry:', [':x', ':-x', ':mad:'] => ':angry:',
[':P', ':-P', ':razz:'] => ':stuck_out_tongue:', [':P', ':-P', ':razz:'] => ':stuck_out_tongue:',

View File

@ -1,48 +1,75 @@
# frozen_string_literal: true # frozen_string_literal: true
require_relative 'bbcode/xml_to_markdown'
module ImportScripts::PhpBB3 module ImportScripts::PhpBB3
class TextProcessor class TextProcessor
# @param lookup [ImportScripts::LookupContainer] # @param lookup [ImportScripts::LookupContainer]
# @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1] # @param database [ImportScripts::PhpBB3::Database_3_0 | ImportScripts::PhpBB3::Database_3_1]
# @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor] # @param smiley_processor [ImportScripts::PhpBB3::SmileyProcessor]
# @param settings [ImportScripts::PhpBB3::Settings] # @param settings [ImportScripts::PhpBB3::Settings]
def initialize(lookup, database, smiley_processor, settings) # @param phpbb_config [Hash]
def initialize(lookup, database, smiley_processor, settings, phpbb_config)
@lookup = lookup @lookup = lookup
@database = database @database = database
@smiley_processor = smiley_processor @smiley_processor = smiley_processor
@he = HTMLEntities.new @he = HTMLEntities.new
@use_xml_to_markdown = phpbb_config[:phpbb_version].start_with?('3.2')
@settings = settings @settings = settings
@new_site_prefix = settings.new_site_prefix @new_site_prefix = settings.new_site_prefix
create_internal_link_regexps(settings.original_site_prefix) create_internal_link_regexps(settings.original_site_prefix)
end end
def process_raw_text(raw) def process_raw_text(raw, attachments = nil)
text = raw.dup if @use_xml_to_markdown
text = CGI.unescapeHTML(text) unreferenced_attachments = attachments&.dup
clean_bbcodes(text) converter = BBCode::XmlToMarkdown.new(
if @settings.use_bbcode_to_md raw,
text = bbcode_to_md(text) username_from_user_id: lambda { |user_id| @lookup.find_username_by_import_id(user_id) },
smilie_to_emoji: lambda { |smilie| @smiley_processor.emoji(smilie).dup },
quoted_post_from_post_id: lambda { |post_id| @lookup.topic_lookup_from_imported_post_id(post_id) },
upload_md_from_file: (lambda do |filename, index|
unreferenced_attachments[index] = nil
attachments.fetch(index, filename).dup
end if attachments),
url_replacement: nil,
allow_inline_code: false
)
text = converter.convert
text.gsub!(@short_internal_link_regexp) do |link|
replace_internal_link(link, $1, $2)
end
add_unreferenced_attachments(text, unreferenced_attachments)
else
text = raw.dup
text = CGI.unescapeHTML(text)
clean_bbcodes(text)
if @settings.use_bbcode_to_md
text = bbcode_to_md(text)
end
process_smilies(text)
process_links(text)
process_lists(text)
process_code(text)
fix_markdown(text)
process_attachments(text, attachments) if attachments.present?
text
end end
process_smilies(text)
process_links(text)
process_lists(text)
process_code(text)
fix_markdown(text)
text
end end
def process_post(raw, attachments) def process_post(raw, attachments)
text = process_raw_text(raw) process_raw_text(raw, attachments) rescue raw
text = process_attachments(text, attachments) if attachments.present?
text
end end
def process_private_msg(raw, attachments) def process_private_msg(raw, attachments)
text = process_raw_text(raw) process_raw_text(raw, attachments) rescue raw
text = process_attachments(text, attachments) if attachments.present?
text
end end
protected protected
@ -139,6 +166,12 @@ module ImportScripts::PhpBB3
attachments.fetch(index, real_filename) attachments.fetch(index, real_filename)
end end
add_unreferenced_attachments(text, unreferenced_attachments)
end
def add_unreferenced_attachments(text, unreferenced_attachments)
return text unless unreferenced_attachments
unreferenced_attachments = unreferenced_attachments.compact unreferenced_attachments = unreferenced_attachments.compact
text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty? text << "\n" << unreferenced_attachments.join("\n") unless unreferenced_attachments.empty?
text text
@ -161,6 +194,7 @@ module ImportScripts::PhpBB3
def fix_markdown(text) def fix_markdown(text)
text.gsub!(/(\n*\[\/?quote.*?\]\n*)/mi) { |q| "\n#{q.strip}\n" } text.gsub!(/(\n*\[\/?quote.*?\]\n*)/mi) { |q| "\n#{q.strip}\n" }
text.gsub!(/^!\[[^\]]*\]\([^\]]*\)$/i) { |img| "\n#{img.strip}\n" } # space out images single on line
text text
end end
end end

View File

@ -0,0 +1,817 @@
# frozen_string_literal: true
require 'rails_helper'
require Rails.root.join('script/import_scripts/phpbb3/support/bbcode/xml_to_markdown')
RSpec.describe ImportScripts::PhpBB3::BBCode::XmlToMarkdown do
def convert(xml, opts = {})
described_class.new(xml, opts).convert
end
it "converts unformatted text" do
xml = '<t>unformatted text</t>'
expect(convert(xml)).to eq('unformatted text')
end
it "converts nested formatting" do
xml = '<r><I><s>[i]</s>this is italic<B><s>[b]</s> and bold<e>[/b]</e></B> text<e>[/i]</e></I></r>'
expect(convert(xml)).to eq('_this is italic **and bold** text_')
end
context "bold text" do
it "converts bold text" do
xml = '<r><B><s>[b]</s>this is bold text<e>[/b]</e></B></r>'
expect(convert(xml)).to eq('**this is bold text**')
end
it "converts multi-line bold text" do
xml = <<~XML
<r><B><s>[b]</s>this is bold text<br/>
on two lines<e>[/b]</e></B><br/>
<br/>
<B><s>[b]</s>this is bold text<br/>
<br/>
<br/>
with two empty lines<e>[/b]</e></B></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
**this is bold text
on two lines**
**this is bold text\\
\\
\\
with two empty lines**
MD
end
it "ignores duplicate bold text" do
xml = '<r><B><s>[b]</s><B><s>[b]</s>this is bold text<e>[/b]</e></B><e>[/b]</e></B></r>'
expect(convert(xml)).to eq('**this is bold text**')
end
end
context "italic text" do
it "converts italic text" do
xml = '<r><I><s>[i]</s>this is italic text<e>[/i]</e></I></r>'
expect(convert(xml)).to eq('_this is italic text_')
end
it "converts multi-line italic text" do
xml = <<~XML
<r><I><s>[i]</s>this is italic text<br/>
on two lines<e>[/i]</e></I><br/>
<br/>
<I><s>[i]</s>this is italic text<br/>
<br/>
<br/>
with two empty lines<e>[/i]</e></I></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
_this is italic text
on two lines_
_this is italic text\\
\\
\\
with two empty lines_
MD
end
it "ignores duplicate italic text" do
xml = '<r><I><s>[i]</s><I><s>[i]</s>this is italic text<e>[/i]</e></I><e>[/i]</e></I></r>'
expect(convert(xml)).to eq('_this is italic text_')
end
end
context "underlined text" do
it "converts underlined text" do
xml = '<r><U><s>[u]</s>this is underlined text<e>[/u]</e></U></r>'
expect(convert(xml)).to eq('[u]this is underlined text[/u]')
end
it "converts multi-line underlined text" do
xml = <<~XML
<r><U><s>[u]</s>this is underlined text<br/>
on two lines<e>[/u]</e></U><br/>
<br/>
<U><s>[u]</s>this is underlined text<br/>
<br/>
<br/>
with two empty lines<e>[/u]</e></U></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
[u]this is underlined text
on two lines[/u]
[u]this is underlined text\\
\\
\\
with two empty lines[/u]
MD
end
it "ignores duplicate underlined text" do
xml = '<r><U><s>[u]</s><U><s>[u]</s>this is underlined text<e>[/u]</e></U><e>[/u]</e></U></r>'
expect(convert(xml)).to eq('[u]this is underlined text[/u]')
end
end
context "code blocks" do
context "inline code blocks enabled" do
let(:opts) { { allow_inline_code: true } }
it "converts single line code blocks" do
xml = '<r><CODE><s>[code]</s>one line of code<e>[/code]</e></CODE></r>'
expect(convert(xml, opts)).to eq('`one line of code`')
end
end
context "inline code blocks disabled" do
it "converts single line code blocks" do
xml = '<r>foo <CODE><s>[code]</s>some code<e>[/code]</e></CODE> bar</r>'
expect(convert(xml)).to eq(<<~MD.chomp)
foo
```text
some code
```
bar
MD
end
end
it "converts multi-line code blocks" do
xml = <<~XML
<r><CODE><s>[code]</s><i>
</i> /\_/\
( o.o )
&gt; ^ &lt;
<e>[/code]</e></CODE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
```text
/\_/\
( o.o )
> ^ <
```
MD
end
it "adds leading and trailing linebreaks to code blocks" do
xml = <<~XML
<r>text before code block<br/>
<CODE><s>[code]</s><i>
</i>foo
bar
<e>[/code]</e></CODE>
text after code block</r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
text before code block
```text
foo
bar
```
text after code block
MD
end
end
context "lists" do
it "converts unordered lists" do
xml = <<~XML
<r><LIST><s>[list]</s>
<LI><s>[*]</s>Red</LI>
<LI><s>[*]</s>Blue</LI>
<LI><s>[*]</s>Yellow</LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
* Red
* Blue
* Yellow
MD
end
it "converts ordered lists" do
xml = <<~XML
<r><LIST type="decimal"><s>[list=1]</s>
<LI><s>[*]</s>Go to the shops</LI>
<LI><s>[*]</s>Buy a new computer</LI>
<LI><s>[*]</s>Swear at computer when it crashes</LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
1. Go to the shops
2. Buy a new computer
3. Swear at computer when it crashes
MD
end
it "converts all types of ordered lists into regular ordered lists" do
xml = <<~XML
<r><LIST type="upper-alpha"><s>[list=A]</s>
<LI><s>[*]</s>The first possible answer</LI>
<LI><s>[*]</s>The second possible answer</LI>
<LI><s>[*]</s>The third possible answer</LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
1. The first possible answer
2. The second possible answer
3. The third possible answer
MD
end
it "adds leading and trailing linebreaks to lists if needed" do
xml = <<~XML
<r>foo
<LIST><s>[list]</s>
<LI><s>[*]</s>Red</LI>
<LI><s>[*]</s>Blue</LI>
<LI><s>[*]</s>Yellow</LI>
<e>[/list]</e></LIST>
bar</r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
foo
* Red
* Blue
* Yellow
bar
MD
end
it "converts nested lists" do
xml = <<~XML
<r><LIST><s>[list]</s>
<LI><s>[*]</s>Option 1
<LIST><s>[list]</s>
<LI><s>[*]</s>Option 1.1</LI>
<LI><s>[*]</s>Option 1.2</LI>
<e>[/list]</e></LIST></LI>
<LI><s>[*]</s>Option 2
<LIST><s>[list]</s>
<LI><s>[*]</s>Option 2.1
<LIST type="decimal"><s>[list=1]</s>
<LI><s>[*]</s> Red</LI>
<LI><s>[*]</s> Blue</LI>
<e>[/list]</e></LIST></LI>
<LI><s>[*]</s>Option 2.2</LI>
<e>[/list]</e></LIST></LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
* Option 1
* Option 1.1
* Option 1.2
* Option 2
* Option 2.1
1. Red
2. Blue
* Option 2.2
MD
end
it "handles nested elements and linebreaks in list items" do
xml = <<~XML
<r><LIST><s>[list]</s><LI><s>[*]</s>some text <B><s>[b]</s><I><s>[i]</s>foo<e>[/i]</e></I><e>[/b]</e></B><br/>
or <B><s>[b]</s><I><s>[i]</s>bar<e>[/i]</e></I><e>[/b]</e></B> more text</LI><e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
* some text **_foo_**
or **_bar_** more text
MD
end
end
context "images" do
it "converts image" do
xml = <<~XML
<r><IMG src="https://example.com/foo.png"><s>[img]</s>
<URL url="https://example.com/foo.png">
<LINK_TEXT text="https://example.com/foo.png">https://example.com/foo.png</LINK_TEXT>
</URL><e>[/img]</e></IMG></r>
XML
expect(convert(xml)).to eq('![](https://example.com/foo.png)')
end
it "converts image with link" do
xml = <<~XML
<r><URL url="https://example.com/"><s>[url=https://example.com/]</s>
<IMG src="https://example.com/foo.png"><s>[img]</s>
<LINK_TEXT text="https://example.com/foo.png">https://example.com/foo.png</LINK_TEXT>
<e>[/img]</e></IMG><e>[/url]</e></URL></r>
XML
expect(convert(xml)).to eq('[![](https://example.com/foo.png)](https://example.com/)')
end
end
context "links" do
it "converts links created without BBCode" do
xml = '<r><URL url="https://en.wikipedia.org/wiki/Capybara">https://en.wikipedia.org/wiki/Capybara</URL></r>'
expect(convert(xml)).to eq('https://en.wikipedia.org/wiki/Capybara')
end
it "converts links created with BBCode" do
xml = '<r><URL url="https://en.wikipedia.org/wiki/Capybara"><s>[url]</s>https://en.wikipedia.org/wiki/Capybara<e>[/url]</e></URL></r>'
expect(convert(xml)).to eq('https://en.wikipedia.org/wiki/Capybara')
end
it "converts links with link text" do
xml = '<r><URL url="https://en.wikipedia.org/wiki/Capybara"><s>[url=https://en.wikipedia.org/wiki/Capybara]</s>Capybara<e>[/url]</e></URL></r>'
expect(convert(xml)).to eq('[Capybara](https://en.wikipedia.org/wiki/Capybara)')
end
it "converts internal links" do
opts = {
url_replacement: lambda do |url|
if url == 'http://forum.example.com/viewtopic.php?f=2&t=2'
'https://discuss.example.com/t/welcome-topic/18'
end
end
}
xml = '<r><URL url="http://forum.example.com/viewtopic.php?f=2&amp;t=2"><LINK_TEXT text="viewtopic.php?f=2&amp;t=2">http://forum.example.com/viewtopic.php?f=2&amp;t=2</LINK_TEXT></URL></r>'
expect(convert(xml, opts)).to eq('https://discuss.example.com/t/welcome-topic/18')
end
it "converts email links created without BBCode" do
xml = '<r><EMAIL email="foo.bar@example.com">foo.bar@example.com</EMAIL></r>'
expect(convert(xml)).to eq('<foo.bar@example.com>')
end
it "converts email links created with BBCode" do
xml = '<r><EMAIL email="foo.bar@example.com"><s>[email]</s>foo.bar@example.com<e>[/email]</e></EMAIL></r>'
expect(convert(xml)).to eq('<foo.bar@example.com>')
end
it "converts truncated, long links" do
xml = <<~XML
<r><URL url="http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli">
<s>[url]</s><LINK_TEXT text="http://answers.yahoo.com/question/index ... 223AAkkPli">
http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli</LINK_TEXT>
<e>[/url]</e></URL></r>
XML
expect(convert(xml)).to eq('http://answers.yahoo.com/question/index?qid=20070920134223AAkkPli')
end
it "converts BBCodes inside link text" do
xml = <<~XML
<r><URL url="http://example.com"><s>[url=http://example.com]</s>
<B><s>[b]</s>Hello <I><s>[i]</s>world<e>[/i]</e></I>!<e>[/b]</e></B>
<e>[/url]</e></URL></r>
XML
expect(convert(xml)).to eq('[**Hello _world_!**](http://example.com)')
end
end
context "quotes" do
it "converts simple quote" do
xml = <<~XML
<r><QUOTE><s>[quote]</s>Lorem<br/>
ipsum<e>[/quote]</e></QUOTE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
> Lorem
> ipsum
MD
end
it "converts quote with line breaks" do
xml = <<~XML
<r><QUOTE><s>[quote]</s>First paragraph<br/>
<br/>
Second paragraph<br/>
<br/>
<br/>
Third paragraph<e>[/quote]</e></QUOTE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
> First paragraph
>
> Second paragraph
> \\
> \\
> Third paragraph
MD
end
it "converts quote with line breaks and nested formatting" do
xml = <<~XML
<r><QUOTE><s>[quote]</s>
<I><s>[i]</s>this is italic<br/>
<B><s>[b]</s>and bold<br/>
text<br/>
<e>[/b]</e></B> on multiple<br/>
<br/>
<br/>
lines<e>[/i]</e></I>
<e>[/quote]</e></QUOTE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
> _this is italic
> **and bold
> text**
> on multiple\\
> \\
> \\
> lines_
MD
end
it "converts quote with author attribute" do
xml = '<r><QUOTE author="Mr. Blobby"><s>[quote="Mr. Blobby"]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
expect(convert(xml)).to eq(<<~MD.chomp)
[quote="Mr. Blobby"]
Lorem ipsum
[/quote]
MD
end
it "converts quote with author attribute and line breaks" do
xml = <<~XML
<r><QUOTE author="Mr. Blobby"><s>[quote="Mr. Blobby"]</s>First paragraph<br/>
<br/>
Second paragraph<br/>
<br/>
Third paragraph<e>[/quote]</e></QUOTE></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
[quote="Mr. Blobby"]
First paragraph
Second paragraph
Third paragraph
[/quote]
MD
end
context "with user_id attribute" do
let(:opts) { { username_from_user_id: lambda { |user_id| user_id == 48 ? "mr_blobby" : nil } } }
it "uses the correct username when the user exists" do
xml = '<r><QUOTE author="Mr. Blobby" user_id="48"><s>[quote="Mr. Blobby" user_id=48]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
expect(convert(xml, opts)).to eq(<<~MD.chomp)
[quote="mr_blobby"]
Lorem ipsum
[/quote]
MD
end
it "uses the author name when the user does not exist" do
xml = '<r><QUOTE author="Mr. Blobby" user_id="49"><s>[quote="Mr. Blobby" user_id=48]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
expect(convert(xml, opts)).to eq(<<~MD.chomp)
[quote="Mr. Blobby"]
Lorem ipsum
[/quote]
MD
end
it "creates a blockquote when the user does not exist and the author is missing" do
xml = '<r><QUOTE user_id="49"><s>[quote=user_id=48]</s>Lorem ipsum<e>[/quote]</e></QUOTE></r>'
expect(convert(xml, opts)).to eq("> Lorem ipsum")
end
end
context "with post_id attribute" do
let(:opts) do
{ quoted_post_from_post_id: lambda { |post_id| { username: 'mr_blobby', post_number: 3, topic_id: 951 } if post_id == 43 } }
end
it "uses information from the quoted post if the post exists" do
xml = <<~XML
<r><QUOTE author="Mr. Blobby" post_id="43" time="1534626128" user_id="48">
<s>[quote="Mr. Blobby" post_id=43 time=1534626128 user_id=48]</s>Lorem ipsum<e>[/quote]</e>
</QUOTE></r>
XML
expect(convert(xml, opts)).to eq(<<~MD.chomp)
[quote="mr_blobby, post:3, topic:951"]
Lorem ipsum
[/quote]
MD
end
it "uses other attributes when post doesn't exist" do
xml = <<~XML
<r><QUOTE author="Mr. Blobby" post_id="44" time="1534626128" user_id="48">
<s>[quote="Mr. Blobby" post_id=44 time=1534626128 user_id=48]</s>Lorem ipsum<e>[/quote]</e>
</QUOTE></r>
XML
expect(convert(xml, opts)).to eq(<<~MD.chomp)
[quote="Mr. Blobby"]
Lorem ipsum
[/quote]
MD
end
end
it "converts nested quotes" do
xml = <<~XML
<r>Multiple nested quotes:<br/>
<QUOTE author="user3">
<s>[quote=user3]</s>
<QUOTE author="user2">
<s>[quote=user2]</s>
<QUOTE author="user1">
<s>[quote=user1]</s>
<B><s>[b]</s>foo <I><s>[i]</s>and<e>[/i]</e></I> bar<e>[/b]</e></B>
<e>[/quote]</e>
</QUOTE>
Lorem ipsum
<e>[/quote]</e>
</QUOTE>
nested quotes
<e>[/quote]</e>
</QUOTE>
Text after quotes.
</r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
Multiple nested quotes:
[quote="user3"]
[quote="user2"]
[quote="user1"]
**foo _and_ bar**
[/quote]
Lorem ipsum
[/quote]
nested quotes
[/quote]
Text after quotes.
MD
end
end
it "converts smilies" do
opts = {
smilie_to_emoji: lambda do |smilie|
case smilie
when ':D'
':smiley:'
when ':eek:'
':astonished:'
end
end
}
xml = '<r><E>:D</E> <E>:eek:</E></r>'
expect(convert(xml, opts)).to eq(":smiley: :astonished:")
end
context "attachments" do
it "converts attachments" do
opts = {
upload_md_from_file: lambda do |filename, index|
url = \
case index
when 0 then
"upload://hash2.png"
when 1 then
"upload://hash1.png"
end
"![#{filename}|231x231](#{url})"
end
}
xml = <<~XML
<r>Multiple attachments:
<ATTACHMENT filename="image1.png" index="1"><s>[attachment=1]</s>image1.png<e>[/attachment]</e></ATTACHMENT>
This is an inline image.<br/>
<br/>
And another one:
<ATTACHMENT filename="image2.png" index="0"><s>[attachment=0]</s>image2.png<e>[/attachment]</e></ATTACHMENT></r>
XML
expect(convert(xml, opts)).to eq(<<~MD.chomp)
Multiple attachments:
![image1.png|231x231](upload://hash1.png)
This is an inline image.
And another one:
![image2.png|231x231](upload://hash2.png)
MD
end
end
context "line breaks" do
it "converts line breaks" do
xml = <<~XML
<t>Lorem ipsum dolor sit amet.<br/>
<br/>
Consetetur sadipscing elitr.<br/>
<br/>
<br/>
Sed diam nonumy eirmod tempor.<br/>
<br/>
<br/>
<br/>
<br/>
Invidunt ut labore et dolore.</t>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
Lorem ipsum dolor sit amet.
Consetetur sadipscing elitr.
\\
\\
Sed diam nonumy eirmod tempor.
\\
\\
\\
\\
Invidunt ut labore et dolore.
MD
end
it "uses hard linebreaks when tradition line breaks are enabled" do
xml = <<~XML
<t>Lorem ipsum dolor sit amet.<br/>
Consetetur sadipscing elitr.<br/>
<br/>
Sed diam nonumy eirmod tempor.<br/>
<br/>
<br/>
<br/>
Invidunt ut labore et dolore.</t>
XML
expect(convert(xml, traditional_linebreaks: true)).to eq(<<~MD.chomp)
Lorem ipsum dolor sit amet.\\
Consetetur sadipscing elitr.\\
\\
Sed diam nonumy eirmod tempor.\\
\\
\\
\\
Invidunt ut labore et dolore.
MD
end
it "uses <br> in front of block elements" do
xml = <<~XML
<r>text before 4 empty lines<br/>
<br/>
<br/>
<br/>
<CODE><s>[code]</s>some code<e>[/code]</e></CODE>
text before 3 empty lines<br/>
<br/>
<br/>
<LIST><s>[list]</s>
<LI><s>[*]</s> item 1</LI>
<LI><s>[*]</s> item 2</LI>
<e>[/list]</e></LIST>
text before 2 empty lines<br/>
<br/>
<LIST><s>[list]</s>
<LI><s>[*]</s> item 1</LI>
<LI><s>[*]</s> item 2</LI>
<e>[/list]</e></LIST></r>
XML
expect(convert(xml)).to eq(<<~MD.chomp)
text before 4 empty lines
\\
\\
\\
<br>
```text
some code
```
text before 3 empty lines
\\
\\
<br>
* item 1
* item 2
text before 2 empty lines
\\
<br>
* item 1
* item 2
MD
end
end
context "whitespace" do
it "doesn't strip whitespaces from inline tags" do
xml = <<~XML
<r>Lorem<B><s>[b]</s> ipsum <e>[/b]</e></B>dolor<br/>
<I><s>[i]</s> sit <e>[/i]</e></I>amet,<br/>
consetetur<B><s>[b]</s> sadipscing <e>[/b]</e></B></r>
XML
expect(convert(xml)).to eq(<<~MD.rstrip)
Lorem **ipsum** dolor
_sit_ amet,
consetetur **sadipscing**
MD
end
it "preserves whitespace between tags" do
xml = "<r>foo <B><s>[b]</s>bold<e>[/b]</e></B> <I><s>[i]</s>italic<e>[/i]</e></I> <U><s>[u]</s>underlined<e>[/u]</e></U> bar</r>"
expect(convert(xml)).to eq("foo **bold** _italic_ [u]underlined[/u] bar")
end
end
context "unknown element" do
it "converts an unknown element right below the root element" do
xml = '<r><UNKNOWN><s>[unknown]</s>foo<e>[/unknown]</e></UNKNOWN></r>'
expect(convert(xml)).to eq('foo')
end
it "converts an unknown element inside a known element" do
xml = '<r><B><s>[b]</s><UNKNOWN><s>[unknown]</s>bar<e>[/unknown]</e></UNKNOWN><e>[/b]</e></B></r>'
expect(convert(xml)).to eq('**bar**')
end
end
context "font size" do
it "converts sizes to either <small> or <big>" do
xml = <<~XML
<r><SIZE size="50"><s>[size=50]</s>very small<e>[/size]</e></SIZE><br/>
<SIZE size="85"><s>[size=85]</s>small<e>[/size]</e></SIZE><br/>
<SIZE size="150"><s>[size=150]</s>large<e>[/size]</e></SIZE><br/>
<SIZE size="200"><s>[size=200]</s>very large<e>[/size]</e></SIZE></r>
XML
expect(convert(xml)).to eq(<<~MD.rstrip)
<small>very small</small>
<small>small</small>
<big>large</big>
<big>very large</big>
MD
end
it "ignores invalid sizes" do
xml = <<~XML
<r><SIZE size="-50"><s>[size=-50]</s>negative number<e>[/size]</e></SIZE><br/>
<SIZE size="0"><s>[size=0]</s>zero<e>[/size]</e></SIZE><br/>
<SIZE size="300"><s>[size=300]</s>too large<e>[/size]</e></SIZE><br/>
<SIZE size="abc"><s>[size=abc]</s>not a number<e>[/size]</e></SIZE><br/>
<SIZE><s>[size]</s>no size<e>[/size]</e></SIZE></r>
XML
expect(convert(xml)).to eq(<<~MD.rstrip)
negative number
zero
too large
not a number
no size
MD
end
end
end