discourse/script/import_scripts/telligent.rb
2023-01-09 11:13:22 +00:00

908 lines
28 KiB
Ruby
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# frozen_string_literal: true
require_relative "base"
require "tiny_tds"
# Import script for Telligent communities
#
# It's really hard to find all attachments, but the script tries to do it anyway.
#
# You can supply a JSON file if you need to map and ignore categories during the import
# by providing the path to the file in the `CATEGORY_MAPPING` environment variable.
# You can also add tags to remapped categories and remap multiple old forums into one
# category. Here's an example of such a `mapping.json` file:
#
# {
# "ignored_forum_ids": [41, 360, 378],
#
# "mapping": [
# {
# "category": ["New Category 1"],
# "forums": [
# { "id": 348, "tag": "some_tag" },
# { "id": 347, "tag": "another_tag" }
# ]
# },
# {
# "category": ["New Category 2"],
# "forums": [
# { "id": 9 }
# ]
# },
# {
# "category": ["Nested", "Category"],
# "forums": [
# { "id": 322 }
# ]
# }
# ]
# }
class ImportScripts::Telligent < ImportScripts::Base
BATCH_SIZE ||= 1000
LOCAL_AVATAR_REGEX ||=
%r{\A~/.*(?<directory>communityserver-components-(?:selectable)?avatars)/(?<path>[^/]+)/(?<filename>.+)}i
REMOTE_AVATAR_REGEX ||= %r{\Ahttps?://}i
ATTACHMENT_REGEXES ||= [
%r{<a[^>]*\shref="[^"]*?/cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)".*?>.*?</a>}i,
%r{<img[^>]*\ssrc="[^"]*?/cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)".*?>}i,
%r{\[View:[^\]]*?/cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)(?:\:[:\d\s]*?)?\]}i,
%r{\[(?<tag>img|url)\][^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)\[/\k<tag>\]}i,
%r{\[(?<tag>img|url)=[^\[]*?cfs-file(?:systemfile)?(?:\.ashx)?/__key/(?<directory>[^/]+)/(?<path>[^/]+)/(?<filename>.+?)\][^\[]*?\[/\k<tag>\]}i,
]
PROPERTY_NAMES_REGEX ||= /(?<name>\w+):S:(?<start>\d+):(?<length>\d+):/
INTERNAL_LINK_REGEX ||=
%r{\shref=".*?/f/\d+(?:(/t/(?<topic_id>\d+))|(?:/p/\d+/(?<post_id>\d+))|(?:/p/(?<post_id>\d+)/reply))\.aspx[^"]*?"}i
CATEGORY_LINK_NORMALIZATION = '/.*?(f\/\d+)$/\1'
TOPIC_LINK_NORMALIZATION = '/.*?(f\/\d+\/t\/\d+)$/\1'
UNICODE_REPLACEMENTS = {
"5F00" => "_",
"2800" => "(",
"2900" => ")",
"2D00" => "-",
"2C00" => ",",
"2700" => "'",
"5B00" => "[",
"5D00" => "]",
"3D00" => "=",
"2600" => "&",
"2100" => "!",
"2300" => "#",
"7E00" => "~",
"2500" => "%",
"2E00" => ".",
"4000" => "@",
"2B00" => "+",
"2400" => "$",
"1920" => "",
"E900" => "é",
"E000" => "à",
"F300" => "ó",
"1C20" => "",
"1D20" => "",
"B000" => "°",
"0003" => ["0300".to_i(16)].pack("U"),
"0103" => ["0301".to_i(16)].pack("U"),
}
def initialize
super()
@client =
TinyTds::Client.new(
host: ENV["DB_HOST"],
username: ENV["DB_USERNAME"],
password: ENV["DB_PASSWORD"],
database: ENV["DB_NAME"],
timeout: 60, # the user query is very slow
)
@filestore_root_directory = ENV["FILE_BASE_DIR"]
@files = {}
SiteSetting.tagging_enabled = true
end
def execute
add_permalink_normalizations
index_filestore
import_categories
import_users
import_topics
import_posts
import_messages
mark_topics_as_solved
end
def index_filestore
puts "", "Indexing filestore..."
index_directory(@filestore_root_directory)
end
def import_users
puts "", "Importing users..."
user_conditions = <<~SQL
(
EXISTS(SELECT 1
FROM te_Forum_Threads t
WHERE t.UserId = u.UserID) OR
EXISTS(SELECT 1
FROM te_Forum_ThreadReplies r
WHERE r.UserId = u.UserID) OR
EXISTS(SELECT 1
FROM cs_Messaging_ConversationParticipants p
JOIN cs_Messaging_ConversationMessages cm ON p.ConversationId = cm.ConversationId
JOIN cs_Messaging_Messages m ON m.MessageId = cm.MessageId
WHERE p.ParticipantId = u.UserID)
)
SQL
last_user_id = -1
total_count = count(<<~SQL)
SELECT COUNT(1) AS count
FROM cs_Users u
WHERE #{user_conditions}
SQL
import_count = 0
loop do
rows = query(<<~SQL)
SELECT TOP #{BATCH_SIZE}
u.UserID, u.Email, u.UserName, u.CreateDate,
ap.PropertyNames AP_PropertyNames, ap.PropertyValuesString AS AP_PropertyValues,
up.PropertyNames UP_PropertyNames, up.PropertyValues AS UP_PropertyValues
FROM cs_Users u
LEFT OUTER JOIN aspnet_Profile ap ON ap.UserId = u.MembershipID
LEFT OUTER JOIN cs_UserProfile up ON up.UserID = u.UserID
WHERE u.UserID > #{last_user_id} AND #{user_conditions}
ORDER BY UserID
SQL
break if rows.blank?
last_user_id = rows[-1]["UserID"]
if all_records_exist?(:users, rows.map { |row| row["UserID"] })
import_count += rows.size
next
end
create_users(rows, total: total_count, offset: import_count) do |row|
ap_properties = parse_properties(row["AP_PropertyNames"], row["AP_PropertyValues"])
up_properties = parse_properties(row["UP_PropertyNames"], row["UP_PropertyValues"])
{
id: row["UserID"],
email: row["Email"],
username: row["UserName"],
name: ap_properties["commonName"],
created_at: row["CreateDate"],
bio_raw: html_to_markdown(ap_properties["bio"]),
location: ap_properties["location"],
website: ap_properties["webAddress"],
post_create_action:
proc do |user|
import_avatar(user, up_properties["avatarUrl"])
suspend_user(user, up_properties["BannedUntil"], up_properties["UserBanReason"])
end,
}
end
import_count += rows.size
end
end
# TODO move into base importer (create_user) and use consistent error handling
def import_avatar(user, avatar_url)
if @filestore_root_directory.blank? || avatar_url.blank? || avatar_url.include?("anonymous")
return
end
if match_data = avatar_url.match(LOCAL_AVATAR_REGEX)
avatar_path =
File.join(
@filestore_root_directory,
match_data[:directory].gsub("-", "."),
match_data[:path].split("-"),
match_data[:filename],
)
if File.file?(avatar_path)
@uploader.create_avatar(user, avatar_path)
else
STDERR.puts "Could not find avatar: #{avatar_path}"
end
elsif avatar_url.match?(REMOTE_AVATAR_REGEX)
begin
UserAvatar.import_url_for_user(avatar_url, user)
rescue StandardError
nil
end
end
end
def suspend_user(user, banned_until, ban_reason)
return if banned_until.blank?
if banned_until = DateTime.parse(banned_until) > DateTime.now
user.suspended_till = banned_until
user.suspended_at = DateTime.now
user.save!
StaffActionLogger.new(Discourse.system_user).log_user_suspend(user, ban_reason)
end
end
def import_categories
if ENV["CATEGORY_MAPPING"]
import_mapped_forums_as_categories
else
import_groups_and_forums_as_categories
end
end
def import_mapped_forums_as_categories
puts "", "Importing categories..."
json = JSON.parse(File.read(ENV["CATEGORY_MAPPING"]))
categories = []
@forum_ids_to_tags = {}
@ignored_forum_ids = json["ignored_forum_ids"]
json["mapping"].each do |m|
parent_id = nil
last_index = m["category"].size - 1
forum_ids = []
m["forums"].each do |f|
forum_ids << f["id"]
@forum_ids_to_tags[f["id"]] = f["tag"] if f["tag"].present?
end
m["category"].each_with_index do |name, index|
id = Digest::MD5.hexdigest(name)
categories << {
id: id,
name: name,
parent_id: parent_id,
forum_ids: index == last_index ? forum_ids : nil,
}
parent_id = id
end
end
create_categories(categories) do |c|
if category_id = category_id_from_imported_category_id(c[:id])
map_forum_ids(category_id, c[:forum_ids])
nil
else
{
id: c[:id],
name: c[:name],
parent_category_id: category_id_from_imported_category_id(c[:parent_id]),
post_create_action: proc { |category| map_forum_ids(category.id, c[:forum_ids]) },
}
end
end
end
def map_forum_ids(category_id, forum_ids)
return if forum_ids.blank?
forum_ids.each do |id|
url = "f/#{id}"
Permalink.create(url: url, category_id: category_id) unless Permalink.exists?(url: url)
add_category(id, Category.find_by_id(category_id))
end
end
def import_groups_and_forums_as_categories
puts "", "Importing parent categories..."
parent_categories = query(<<~SQL)
SELECT GroupID, Name, HtmlDescription, DateCreated, SortOrder
FROM cs_Groups g
WHERE (SELECT COUNT(1)
FROM te_Forum_Forums f
WHERE f.GroupId = g.GroupID) > 1
ORDER BY SortOrder, Name
SQL
create_categories(parent_categories) do |row|
{
id: "G#{row["GroupID"]}",
name: clean_category_name(row["Name"]),
description: html_to_markdown(row["HtmlDescription"]),
position: row["SortOrder"],
}
end
puts "", "Importing child categories..."
child_categories = query(<<~SQL)
SELECT ForumId, GroupId, Name, Description, DateCreated, SortOrder
FROM te_Forum_Forums
ORDER BY GroupId, SortOrder, Name
SQL
create_categories(child_categories) do |row|
parent_category_id = parent_category_id_for(row)
if category_id = replace_with_category_id(child_categories, parent_category_id)
add_category(row["ForumId"], Category.find_by_id(category_id))
url = "f/#{row["ForumId"]}"
Permalink.create(url: url, category_id: category_id) unless Permalink.exists?(url: url)
nil
else
{
id: row["ForumId"],
parent_category_id: parent_category_id,
name: clean_category_name(row["Name"]),
description: html_to_markdown(row["Description"]),
position: row["SortOrder"],
post_create_action:
proc do |category|
url = "f/#{row["ForumId"]}"
unless Permalink.exists?(url: url)
Permalink.create(url: url, category_id: category.id)
end
end,
}
end
end
end
def parent_category_id_for(row)
category_id_from_imported_category_id("G#{row["GroupId"]}") if row.key?("GroupId")
end
def replace_with_category_id(child_categories, parent_category_id)
parent_category_id if only_child?(child_categories, parent_category_id)
end
def only_child?(child_categories, parent_category_id)
count = 0
child_categories.each { |row| count += 1 if parent_category_id_for(row) == parent_category_id }
count == 1
end
def clean_category_name(name)
CGI.unescapeHTML(name).strip
end
def import_topics
puts "", "Importing topics..."
last_topic_id = -1
total_count =
count("SELECT COUNT(1) AS count FROM te_Forum_Threads t WHERE #{ignored_forum_sql_condition}")
batches do |offset|
rows = query(<<~SQL)
SELECT TOP #{BATCH_SIZE}
t.ThreadId, t.ForumId, t.UserId, t.TotalViews, t.ContentID AS TopicContentId,
t.Subject, t.Body, t.DateCreated, t.IsLocked, t.StickyDate,
a.ApplicationTypeId, a.ApplicationId, a.ApplicationContentTypeId, a.ContentId, a.FileName, a.IsRemote
FROM te_Forum_Threads t
LEFT JOIN te_Attachments a
ON (a.ApplicationId = t.ForumId AND a.ApplicationTypeId = 0 AND a.ContentId = t.ThreadId AND
a.ApplicationContentTypeId = 0)
WHERE t.ThreadId > #{last_topic_id} AND #{ignored_forum_sql_condition}
ORDER BY t.ThreadId
SQL
break if rows.blank?
last_topic_id = rows[-1]["ThreadId"]
next if all_records_exist?(:post, rows.map { |row| import_topic_id(row["ThreadId"]) })
create_posts(rows, total: total_count, offset: offset) do |row|
user_id = user_id_from_imported_user_id(row["UserId"]) || Discourse::SYSTEM_USER_ID
post = {
id: import_topic_id(row["ThreadId"]),
title: CGI.unescapeHTML(row["Subject"]),
raw: raw_with_attachment(row, user_id, :topic),
category: category_id_from_imported_category_id(row["ForumId"]),
user_id: user_id,
created_at: row["DateCreated"],
closed: row["IsLocked"],
views: row["TotalViews"],
post_create_action:
proc do |action_post|
topic = action_post.topic
if topic.pinned_until
Jobs.enqueue_at(topic.pinned_until, :unpin_topic, topic_id: topic.id)
end
url = "f/#{row["ForumId"]}/t/#{row["ThreadId"]}"
Permalink.create(url: url, topic_id: topic.id) unless Permalink.exists?(url: url)
import_topic_views(topic, row["TopicContentId"])
end,
}
if row["StickyDate"] > Time.now
post[:pinned_until] = row["StickyDate"]
post[:pinned_at] = row["DateCreated"]
end
post
end
end
end
def import_topic_id(topic_id)
"T#{topic_id}"
end
def import_topic_views(topic, content_id)
last_user_id = -1
batches do |_|
rows = query(<<~SQL)
SELECT TOP #{BATCH_SIZE}
UserId, MAX(CreatedUtcDate) AS ViewDate
FROM te_Content_Views
WHERE ContentId = '#{content_id}' AND UserId > #{last_user_id}
GROUP BY UserId
ORDER BY UserId
SQL
break if rows.blank?
last_user_id = rows[-1]["UserId"]
rows.each do |row|
user_id = user_id_from_imported_user_id(row["UserId"])
TopicViewItem.add(topic.id, "127.0.0.1", user_id, row["ViewDate"], true) if user_id
end
end
end
def ignored_forum_sql_condition
@ignored_forum_sql_condition ||=
@ignored_forum_ids.present? ? "t.ForumId NOT IN (#{@ignored_forum_ids.join(",")})" : "1 = 1"
end
def import_posts
puts "", "Importing posts..."
last_post_id = -1
total_count = count(<<~SQL)
SELECT COUNT(1) AS count
FROM te_Forum_ThreadReplies tr
JOIN te_Forum_Threads t ON (tr.ThreadId = t.ThreadId)
WHERE #{ignored_forum_sql_condition}
SQL
batches do |offset|
rows = query(<<~SQL)
SELECT TOP #{BATCH_SIZE}
tr.ThreadReplyId, tr.ThreadId, tr.UserId, pr.ThreadReplyId AS ParentReplyId,
tr.Body, tr.ThreadReplyDate,
CONVERT(BIT,
CASE WHEN tr.AnswerVerifiedUtcDate IS NOT NULL AND NOT EXISTS(
SELECT 1
FROM te_Forum_ThreadReplies x
WHERE
x.ThreadId = tr.ThreadId AND x.ThreadReplyId < tr.ThreadReplyId AND x.AnswerVerifiedUtcDate IS NOT NULL
)
THEN 1
ELSE 0 END) AS IsFirstVerifiedAnswer,
a.ApplicationTypeId, a.ApplicationId, a.ApplicationContentTypeId, a.ContentId, a.FileName, a.IsRemote
FROM te_Forum_ThreadReplies tr
JOIN te_Forum_Threads t ON (tr.ThreadId = t.ThreadId)
LEFT JOIN te_Forum_ThreadReplies pr ON (tr.ParentReplyId = pr.ThreadReplyId AND tr.ParentReplyId < tr.ThreadReplyId AND tr.ThreadId = pr.ThreadId)
LEFT JOIN te_Attachments a
ON (a.ApplicationId = t.ForumId AND a.ApplicationTypeId = 0 AND a.ContentId = tr.ThreadReplyId AND
a.ApplicationContentTypeId = 1)
WHERE tr.ThreadReplyId > #{last_post_id} AND #{ignored_forum_sql_condition}
ORDER BY tr.ThreadReplyId
SQL
break if rows.blank?
last_post_id = rows[-1]["ThreadReplyId"]
next if all_records_exist?(:post, rows.map { |row| row["ThreadReplyId"] })
create_posts(rows, total: total_count, offset: offset) do |row|
imported_parent_id =
row["ParentReplyId"]&.nonzero? ? row["ParentReplyId"] : import_topic_id(row["ThreadId"])
parent_post = topic_lookup_from_imported_post_id(imported_parent_id)
user_id = user_id_from_imported_user_id(row["UserId"]) || Discourse::SYSTEM_USER_ID
if parent_post
post = {
id: row["ThreadReplyId"],
raw: raw_with_attachment(row, user_id, :post),
user_id: user_id,
topic_id: parent_post[:topic_id],
created_at: row["ThreadReplyDate"],
reply_to_post_number: parent_post[:post_number],
}
post[:custom_fields] = { is_accepted_answer: "true" } if row["IsFirstVerifiedAnswer"]
post
else
puts "Failed to import post #{row["ThreadReplyId"]}. Parent was not found."
end
end
end
end
def import_messages
puts "", "Importing messages..."
current_conversation_id = ""
current_topic_import_id = ""
last_conversation_id = ""
total_count = count(<<~SQL)
SELECT COUNT(1) AS count
FROM cs_Messaging_Messages m
JOIN cs_Messaging_ConversationMessages cm ON m.MessageId = cm.MessageId
SQL
batches do |offset|
if last_conversation_id.blank?
conditions = ""
else
conditions = <<~SQL
WHERE cm.ConversationId > '#{last_conversation_id}'
SQL
end
rows = query(<<~SQL)
SELECT TOP #{BATCH_SIZE}
cm.ConversationId, m.MessageId, m.AuthorId, m.Subject, m.Body, m.DateCreated,
STUFF((SELECT ';' + CONVERT(VARCHAR, p.ParticipantId)
FROM cs_Messaging_ConversationParticipants p
WHERE p.ConversationId = cm.ConversationId
ORDER BY p.ParticipantId
FOR XML PATH('')), 1, 1, '') AS ParticipantIds
FROM cs_Messaging_Messages m
JOIN cs_Messaging_ConversationMessages cm ON m.MessageId = cm.MessageId
#{conditions}
ORDER BY cm.ConversationId, m.DateCreated, m.MessageId
SQL
break if rows.blank?
last_row = rows[-1]
last_conversation_id = last_row["ConversationId"]
next if all_records_exist?(:post, rows.map { |row| row["MessageId"] })
create_posts(rows, total: total_count, offset: offset) do |row|
user_id = user_id_from_imported_user_id(row["AuthorId"]) || Discourse::SYSTEM_USER_ID
post = {
id: row["MessageId"],
raw: raw_with_attachment(row, user_id, :message),
user_id: user_id,
created_at: row["DateCreated"],
}
if current_conversation_id == row["ConversationId"]
parent_post = topic_lookup_from_imported_post_id(current_topic_import_id)
if parent_post
post[:topic_id] = parent_post[:topic_id]
else
puts "Failed to import message #{row["MessageId"]}. Parent was not found."
post = nil
end
else
post[:title] = CGI.unescapeHTML(row["Subject"])
post[:archetype] = Archetype.private_message
post[:target_usernames] = get_recipient_usernames(row)
if post[:target_usernames].empty?
puts "Private message without recipients. Skipping #{row["MessageId"]}"
post = nil
end
current_topic_import_id = row["MessageId"]
end
current_conversation_id = row["ConversationId"]
post
end
end
# Mark all imported messages as read
DB.exec(<<~SQL)
UPDATE topic_users tu
SET last_read_post_number = t.highest_post_number
FROM topics t
JOIN topic_custom_fields tcf ON t.id = tcf.topic_id
WHERE tu.topic_id = t.id
AND tu.user_id > 0
AND t.archetype = 'private_message'
AND tcf.name = 'import_id'
SQL
end
def get_recipient_user_ids(participant_ids)
return [] if participant_ids.blank?
user_ids = participant_ids.split(";")
user_ids.uniq!
user_ids.map!(&:strip)
end
def get_recipient_usernames(row)
import_user_ids = get_recipient_user_ids(row["ParticipantIds"])
import_user_ids
.map! { |import_user_id| find_user_by_import_id(import_user_id).try(:username) }
.compact
end
def index_directory(root_directory)
Dir.foreach(root_directory) do |directory_name|
next if directory_name == "." || directory_name == ".."
path = File.join(root_directory, directory_name)
if File.directory?(path)
index_directory(path)
else
path.delete_prefix!(@filestore_root_directory)
path.delete_prefix!("/")
@files[path.downcase] = path
end
end
end
def raw_with_attachment(row, user_id, type)
raw, embedded_paths, upload_ids = replace_embedded_attachments(row, user_id, type)
raw = html_to_markdown(raw) || ""
filename = row["FileName"]
return raw if @filestore_root_directory.blank? || filename.blank?
return "#{raw}\n#{filename}" if row["IsRemote"]
path =
File.join(
"telligent.evolution.components.attachments",
"%02d" % row["ApplicationTypeId"],
"%02d" % row["ApplicationId"],
"%02d" % row["ApplicationContentTypeId"],
("%010d" % row["ContentId"]).scan(/.{2}/),
)
path = fix_attachment_path(path, filename)
if path && !embedded_paths.include?(path)
if File.file?(path)
upload = @uploader.create_upload(user_id, path, filename)
if upload.present? && upload.persisted? && !upload_ids.include?(upload.id)
raw = "#{raw}\n#{@uploader.html_for_upload(upload, filename)}"
end
else
print_file_not_found_error(type, path, row)
end
end
raw
end
def print_file_not_found_error(type, path, row)
case type
when :topic
id = row["ThreadId"]
when :post
id = row["ThreadReplyId"]
when :message
id = row["MessageId"]
end
STDERR.puts "Could not find file for #{type} #{id}: #{path}"
end
def replace_embedded_attachments(row, user_id, type)
raw = row["Body"]
paths = []
upload_ids = []
return raw, paths, upload_ids if @filestore_root_directory.blank?
ATTACHMENT_REGEXES.each do |regex|
raw =
raw.gsub(regex) do
match_data = Regexp.last_match
path = File.join(match_data[:directory], match_data[:path])
fixed_path = fix_attachment_path(path, match_data[:filename])
if fixed_path && File.file?(fixed_path)
filename = File.basename(fixed_path)
upload = @uploader.create_upload(user_id, fixed_path, filename)
if upload.present? && upload.persisted?
paths << fixed_path
upload_ids << upload.id
@uploader.html_for_upload(upload, filename)
end
else
path = File.join(path, match_data[:filename])
print_file_not_found_error(type, path, row)
match_data[0]
end
end
end
[raw, paths, upload_ids]
end
def fix_attachment_path(base_path, filename)
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
base_path.downcase!
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
filename = CGI.unescapeHTML(filename)
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
filename.gsub!("-", " ")
filename.strip!
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
directories = base_path.split(File::SEPARATOR)
first_directory = directories.shift
first_directory.gsub!("-", ".")
base_path = File.join(first_directory, directories)
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
directories.map! { |d| File.join(d.split(/[\.\-]/).map(&:strip)) }
base_path = File.join(first_directory, directories)
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
directories = base_path.split(File::SEPARATOR)
directories.map! { |d| d.gsub("+", " ").strip }
base_path = File.join(directories)
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
replace_codes!(filename)
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
replace_codes!(base_path)
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
filename.gsub!(/(?:\:\d+)+$/, "")
path = find_correct_path(base_path, filename)
return path if attachment_exists?(path)
path = File.join(base_path, filename)
path_regex = Regexp.new("^#{Regexp.escape(path)}-\\d+x\\d+\\.\\w+$", Regexp::IGNORECASE)
path = find_correct_path_with_regex(path_regex)
return path if attachment_exists?(path)
nil
end
def find_correct_path(base_path, filename)
path = File.join(base_path, filename)
path = @files[path.downcase]
path ? File.join(@filestore_root_directory, path) : nil
end
def find_correct_path_with_regex(regex)
keys = @files.keys.filter { |key| regex =~ key }
keys.size == 1 ? File.join(@filestore_root_directory, @files[keys.first]) : nil
end
def attachment_exists?(path)
path.present? && File.file?(path)
end
def replace_codes!(text)
text.gsub!(/_(\h{4}+)_/i) do
codes = Regexp.last_match[1].upcase.scan(/.{4}/)
mapped_codes = codes.map { |c| UNICODE_REPLACEMENTS[c] }
mapped_codes.any? { |c| c.nil? } ? Regexp.last_match[0] : mapped_codes.join("")
end
end
def html_to_markdown(html)
return html if html.blank?
html = fix_internal_links(html)
md = HtmlToMarkdown.new(html).to_markdown
md.gsub!(/\[quote.*?\]/, "\n" + '\0' + "\n")
md.gsub!(%r{(?<!^)\[/quote\]}, "\n[/quote]\n")
md.gsub!(%r{\[/quote\](?!$)}, "\n[/quote]\n")
md.gsub!(/\[View:(http.*?)[:\d\s]*?(?:\]|\z)/i, '\1')
md.strip!
md
end
def fix_internal_links(html)
html.gsub(INTERNAL_LINK_REGEX) do
match_data = Regexp.last_match
if match_data[:topic_id].present?
imported_id = import_topic_id(match_data[:topic_id])
else
imported_id = match_data[:post_id]
end
post = topic_lookup_from_imported_post_id(imported_id) if imported_id
post ? %Q| href="#{Discourse.base_url}#{post[:url]}"| : match_data[0]
end
end
def parse_properties(names, values)
properties = {}
return properties if names.blank? || values.blank?
names
.scan(PROPERTY_NAMES_REGEX)
.each do |property|
name = property[0]
start_index = property[1].to_i
end_index = start_index + property[2].to_i - 1
properties[name] = values[start_index..end_index]
end
properties
end
def mark_topics_as_solved
puts "", "Marking topics as solved..."
DB.exec <<~SQL
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
FROM post_custom_fields pcf
JOIN posts p ON p.id = pcf.post_id
WHERE pcf.name = 'is_accepted_answer' AND pcf.value = 'true'
AND NOT EXISTS (
SELECT 1
FROM topic_custom_fields x
WHERE x.topic_id = p.topic_id AND x.name = 'accepted_answer_post_id'
)
SQL
end
def add_permalink_normalizations
normalizations = SiteSetting.permalink_normalizations
normalizations = normalizations.blank? ? [] : normalizations.split("|")
add_normalization(normalizations, CATEGORY_LINK_NORMALIZATION)
add_normalization(normalizations, TOPIC_LINK_NORMALIZATION)
SiteSetting.permalink_normalizations = normalizations.join("|")
end
def add_normalization(normalizations, normalization)
normalizations << normalization unless normalizations.include?(normalization)
end
def batches
super(BATCH_SIZE)
end
def query(sql)
@client.execute(sql).to_a
end
def count(sql)
query(sql).first["count"]
end
end
ImportScripts::Telligent.new.perform