2015-06-29 07:45:52 -05:00
require " mysql2 "
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
require 'htmlentities'
class ImportScripts :: VanillaSQL < ImportScripts :: Base
VANILLA_DB = " vanilla_mysql "
TABLE_PREFIX = " GDN_ "
2016-04-01 14:57:20 -05:00
ATTACHMENTS_BASE_DIR = nil # "/absolute/path/to/attachments" set the absolute path if you have attachments
2015-06-29 07:45:52 -05:00
BATCH_SIZE = 1000
2016-01-15 15:47:05 -06:00
CONVERT_HTML = true
2015-06-29 07:45:52 -05:00
def initialize
super
@htmlentities = HTMLEntities . new
@client = Mysql2 :: Client . new (
host : " localhost " ,
username : " root " ,
password : " pa$$word " ,
database : VANILLA_DB
)
2016-06-27 15:17:00 -05:00
@import_tags = false
begin
r = @client . query ( " select count(*) count from #{ TABLE_PREFIX } Tag where countdiscussions > 0 " )
@import_tags = true if r . first [ " count " ] . to_i > 0
rescue = > e
puts " Tags won't be imported. #{ e . message } "
end
2015-06-29 07:45:52 -05:00
end
def execute
2016-06-27 15:17:00 -05:00
if @import_tags
SiteSetting . tagging_enabled = true
SiteSetting . max_tags_per_topic = 10
end
2015-06-29 07:45:52 -05:00
import_users
2016-04-01 14:57:20 -05:00
import_avatars
2015-06-29 07:45:52 -05:00
import_categories
import_topics
import_posts
2016-05-17 10:16:41 -05:00
update_tl0
create_permalinks
2015-06-29 07:45:52 -05:00
end
def import_users
puts '' , " creating users "
2016-01-12 17:44:14 -06:00
@user_is_deleted = false
@last_deleted_username = nil
username = nil
2015-06-29 07:45:52 -05:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } User; " ) . first [ 'count' ]
batches ( BATCH_SIZE ) do | offset |
results = mysql_query (
" SELECT UserID, Name, Title, Location, About, Email,
DateInserted , DateLastActive , InsertIPAddress , Admin
FROM #{TABLE_PREFIX}User
ORDER BY UserID ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if results . size < 1
2016-01-11 13:20:07 -06:00
next if all_records_exist? :users , results . map { | u | u [ 'UserID' ] . to_i }
2015-09-21 18:48:42 -05:00
2015-06-29 07:45:52 -05:00
create_users ( results , total : total_count , offset : offset ) do | user |
next if user [ 'Email' ] . blank?
next if user [ 'Name' ] . blank?
2016-05-17 10:16:41 -05:00
next if @lookup . user_id_from_imported_user_id ( user [ 'UserID' ] )
2016-01-12 17:44:14 -06:00
if user [ 'Name' ] == '[Deleted User]'
# EVERY deleted user record in Vanilla has the same username: [Deleted User]
# Save our UserNameSuggester some pain:
@user_is_deleted = true
username = @last_deleted_username || user [ 'Name' ]
else
@user_is_deleted = false
username = user [ 'Name' ]
end
2015-06-29 07:45:52 -05:00
{ id : user [ 'UserID' ] ,
email : user [ 'Email' ] ,
2016-01-12 17:44:14 -06:00
username : username ,
2015-06-29 07:45:52 -05:00
name : user [ 'Name' ] ,
created_at : user [ 'DateInserted' ] == nil ? 0 : Time . zone . at ( user [ 'DateInserted' ] ) ,
bio_raw : user [ 'About' ] ,
registration_ip_address : user [ 'InsertIPAddress' ] ,
last_seen_at : user [ 'DateLastActive' ] == nil ? 0 : Time . zone . at ( user [ 'DateLastActive' ] ) ,
location : user [ 'Location' ] ,
2016-01-12 17:44:14 -06:00
admin : user [ 'Admin' ] == 1 ,
post_create_action : proc do | newuser |
if @user_is_deleted
@last_deleted_username = newuser . username
end
end }
2015-06-29 07:45:52 -05:00
end
end
end
2016-04-01 14:57:20 -05:00
def import_avatars
if ATTACHMENTS_BASE_DIR && File . exists? ( ATTACHMENTS_BASE_DIR )
puts " " , " importing user avatars "
User . find_each do | u |
next unless u . custom_fields [ " import_id " ]
r = mysql_query ( " SELECT photo FROM #{ TABLE_PREFIX } User WHERE UserID = #{ u . custom_fields [ 'import_id' ] } ; " ) . first
next if r . nil?
photo = r [ " photo " ]
next unless photo . present?
# Possible encoded values:
# 1. cf://uploads/userpics/820/Y0AFUQYYM6QN.jpg
# 2. ~cf/userpics2/cf566487133f1f538e02da96f9a16b18.jpg
# 3. ~cf/userpics/txkt8kw1wozn.jpg
photo_real_filename = nil
parts = photo . squeeze ( " / " ) . split ( " / " )
if parts [ 0 ] == " cf: "
photo_path = " #{ ATTACHMENTS_BASE_DIR } / #{ parts [ 2 .. - 2 ] . join ( '/' ) } " . squeeze ( " / " )
elsif parts [ 0 ] == " ~cf "
photo_path = " #{ ATTACHMENTS_BASE_DIR } / #{ parts [ 1 .. - 2 ] . join ( '/' ) } " . squeeze ( " / " )
else
puts " UNKNOWN FORMAT: #{ photo } "
next
end
if ! File . exists? ( photo_path )
puts " Path to avatar file not found! Skipping. #{ photo_path } "
next
end
photo_real_filename = find_photo_file ( photo_path , parts . last )
if photo_real_filename . nil?
puts " Couldn't find file for #{ photo } . Skipping. "
next
end
print " . "
upload = create_upload ( u . id , photo_real_filename , File . basename ( photo_real_filename ) )
if upload . persisted?
u . import_mode = false
u . create_user_avatar
u . import_mode = true
u . user_avatar . update ( custom_upload_id : upload . id )
u . update ( uploaded_avatar_id : upload . id )
else
puts " Error: Upload did not persist for #{ u . username } #{ photo_real_filename } ! "
end
end
end
end
def find_photo_file ( path , base_filename )
base_guess = base_filename . dup
full_guess = File . join ( path , base_guess ) # often an exact match exists
return full_guess if File . exists? ( full_guess )
# Otherwise, the file exists but with a prefix:
# The p prefix seems to be the full file, so try to find that one first.
[ 'p' , 't' , 'n' ] . each do | prefix |
full_guess = File . join ( path , " #{ prefix } #{ base_guess } " )
return full_guess if File . exists? ( full_guess )
end
# Didn't find it.
nil
end
2015-06-29 07:45:52 -05:00
def import_categories
puts " " , " importing categories... "
categories = mysql_query ( "
SELECT CategoryID , Name , Description
FROM #{TABLE_PREFIX}Category
ORDER BY CategoryID ASC
" ).to_a
create_categories ( categories ) do | category |
{
id : category [ 'CategoryID' ] ,
name : CGI . unescapeHTML ( category [ 'Name' ] ) ,
description : CGI . unescapeHTML ( category [ 'Description' ] )
}
end
end
def import_topics
puts " " , " importing topics... "
2016-06-27 15:17:00 -05:00
tag_names_sql = " select t.name as tag_name from GDN_Tag t, GDN_TagDiscussion td where t.tagid = td.tagid and td.discussionid = {discussionid} and t.name != ''; "
2015-06-29 07:45:52 -05:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } Discussion; " ) . first [ 'count' ]
batches ( BATCH_SIZE ) do | offset |
discussions = mysql_query (
" SELECT DiscussionID, CategoryID, Name, Body,
DateInserted , InsertUserID
FROM #{TABLE_PREFIX}Discussion
ORDER BY DiscussionID ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if discussions . size < 1
2015-09-21 18:48:42 -05:00
next if all_records_exist? :posts , discussions . map { | t | " discussion # " + t [ 'DiscussionID' ] . to_s }
2015-06-29 07:45:52 -05:00
create_posts ( discussions , total : total_count , offset : offset ) do | discussion |
{
id : " discussion # " + discussion [ 'DiscussionID' ] . to_s ,
user_id : user_id_from_imported_user_id ( discussion [ 'InsertUserID' ] ) || Discourse :: SYSTEM_USER_ID ,
title : discussion [ 'Name' ] ,
category : category_id_from_imported_category_id ( discussion [ 'CategoryID' ] ) ,
raw : clean_up ( discussion [ 'Body' ] ) ,
2016-06-27 15:17:00 -05:00
created_at : Time . zone . at ( discussion [ 'DateInserted' ] ) ,
post_create_action : proc do | post |
if @import_tags
tag_names = @client . query ( tag_names_sql . gsub ( '{discussionid}' , discussion [ 'DiscussionID' ] . to_s ) ) . map { | row | row [ 'tag_name' ] }
DiscourseTagging . tag_topic_by_names ( post . topic , staff_guardian , tag_names )
end
end
2015-06-29 07:45:52 -05:00
}
end
end
end
def import_posts
puts " " , " importing posts... "
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } Comment; " ) . first [ 'count' ]
batches ( BATCH_SIZE ) do | offset |
comments = mysql_query (
" SELECT CommentID, DiscussionID, Body,
DateInserted , InsertUserID
FROM #{TABLE_PREFIX}Comment
ORDER BY CommentID ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if comments . size < 1
2015-09-21 18:48:42 -05:00
next if all_records_exist? :posts , comments . map { | comment | " comment # " + comment [ 'CommentID' ] . to_s }
2015-06-29 07:45:52 -05:00
create_posts ( comments , total : total_count , offset : offset ) do | comment |
next unless t = topic_lookup_from_imported_post_id ( " discussion # " + comment [ 'DiscussionID' ] . to_s )
next if comment [ 'Body' ] . blank?
{
id : " comment # " + comment [ 'CommentID' ] . to_s ,
user_id : user_id_from_imported_user_id ( comment [ 'InsertUserID' ] ) || Discourse :: SYSTEM_USER_ID ,
topic_id : t [ :topic_id ] ,
raw : clean_up ( comment [ 'Body' ] ) ,
created_at : Time . zone . at ( comment [ 'DateInserted' ] )
}
end
end
end
def clean_up ( raw )
return " " if raw . blank?
# decode HTML entities
raw = @htmlentities . decode ( raw )
# fix whitespaces
raw = raw . gsub ( / ( \\ r)? \\ n / , " \n " )
. gsub ( " \\ t " , " \t " )
# [HTML]...[/HTML]
raw = raw . gsub ( / \ [html \ ] /i , " \n ```html \n " )
. gsub ( / \ [ \/ html \ ] /i , " \n ``` \n " )
# [PHP]...[/PHP]
raw = raw . gsub ( / \ [php \ ] /i , " \n ```php \n " )
. gsub ( / \ [ \/ php \ ] /i , " \n ``` \n " )
# [HIGHLIGHT="..."]
raw = raw . gsub ( / \ [highlight="?( \ w+)"? \ ] /i ) { " \n ``` #{ $1 . downcase } \n " }
# [CODE]...[/CODE]
# [HIGHLIGHT]...[/HIGHLIGHT]
raw = raw . gsub ( / \ [ \/ ?code \ ] /i , " \n ``` \n " )
. gsub ( / \ [ \/ ?highlight \ ] /i , " \n ``` \n " )
# [SAMP]...[/SAMP]
raw . gsub! ( / \ [ \/ ?samp \ ] /i , " ` " )
2016-01-15 15:47:05 -06:00
unless CONVERT_HTML
# replace all chevrons with HTML entities
# NOTE: must be done
# - AFTER all the "code" processing
# - BEFORE the "quote" processing
raw = raw . gsub ( / `([^`]+)` /im ) { " ` " + $1 . gsub ( " < " , " \ u2603 " ) + " ` " }
. gsub ( " < " , " < " )
. gsub ( " \ u2603 " , " < " )
raw = raw . gsub ( / `([^`]+)` /im ) { " ` " + $1 . gsub ( " > " , " \ u2603 " ) + " ` " }
. gsub ( " > " , " > " )
. gsub ( " \ u2603 " , " > " )
end
2015-06-29 07:45:52 -05:00
# [URL=...]...[/URL]
raw . gsub! ( / \ [url="?(.+?)"? \ ](.+) \ [ \/ url \ ] /i ) { " [ #{ $2 } ]( #{ $1 } ) " }
# [IMG]...[/IMG]
raw . gsub! ( / \ [ \/ ?img \ ] /i , " " )
# [URL]...[/URL]
# [MP3]...[/MP3]
raw = raw . gsub ( / \ [ \/ ?url \ ] /i , " " )
. gsub ( / \ [ \/ ?mp3 \ ] /i , " " )
# [QUOTE]...[/QUOTE]
raw . gsub! ( / \ [quote \ ](.+?) \ [ \/ quote \ ] /im ) { " \n > #{ $1 } \n " }
# [YOUTUBE]<id>[/YOUTUBE]
raw . gsub! ( / \ [youtube \ ](.+?) \ [ \/ youtube \ ] /i ) { " \n https://www.youtube.com/watch?v= #{ $1 } \n " }
# [youtube=425,350]id[/youtube]
raw . gsub! ( / \ [youtube="?(.+?)"? \ ](.+) \ [ \/ youtube \ ] /i ) { " \n https://www.youtube.com/watch?v= #{ $2 } \n " }
# [MEDIA=youtube]id[/MEDIA]
raw . gsub! ( / \ [MEDIA=youtube \ ](.+?) \ [ \/ MEDIA \ ] /i ) { " \n https://www.youtube.com/watch?v= #{ $1 } \n " }
# [VIDEO=youtube;<id>]...[/VIDEO]
raw . gsub! ( / \ [video=youtube;([^ \ ]]+) \ ].*? \ [ \/ video \ ] /i ) { " \n https://www.youtube.com/watch?v= #{ $1 } \n " }
# Convert image bbcode
raw . gsub! ( / \ [img=( \ d+),( \ d+) \ ]([^ \ ]]*) \ [ \/ img \ ] /i , '<img width="\1" height="\2" src="\3">' )
# Remove the color tag
raw . gsub! ( / \ [color=[ # a-z0-9]+ \ ] /i , " " )
raw . gsub! ( / \ [ \/ color \ ] /i , " " )
# remove attachments
raw . gsub! ( / \ [attach[^ \ ]]* \ ] \ d+ \ [ \/ attach \ ] /i , " " )
# sanitize img tags
2016-05-25 11:41:32 -05:00
# This regexp removes everything between the first and last img tag. The .* is too much.
# If it's needed, it needs to be fixed.
# raw.gsub!(/\<img.*src\="([^\"]+)\".*\>/i) {"\n<img src='#{$1}'>\n"}
2015-06-29 07:45:52 -05:00
raw
end
2016-06-27 15:17:00 -05:00
def staff_guardian
@_staff_guardian || = Guardian . new ( Discourse . system_user )
end
2015-06-29 07:45:52 -05:00
def mysql_query ( sql )
2016-01-12 17:45:19 -06:00
@client . query ( sql )
# @client.query(sql, cache_rows: false) #segfault: cache_rows: false causes segmentation fault
2015-06-29 07:45:52 -05:00
end
2016-05-17 10:16:41 -05:00
def create_permalinks
puts '' , 'Creating redirects...' , ''
User . find_each do | u |
ucf = u . custom_fields
if ucf && ucf [ " import_id " ] && ucf [ " import_username " ]
2016-05-17 16:43:46 -05:00
Permalink . create ( url : " profile/ #{ ucf [ 'import_id' ] } / #{ ucf [ 'import_username' ] } " , external_url : " /users/ #{ u . username } " ) rescue nil
2016-07-06 09:58:43 -05:00
print '.'
2016-05-17 10:16:41 -05:00
end
end
Post . find_each do | post |
pcf = post . custom_fields
if pcf && pcf [ " import_id " ]
topic = post . topic
id = pcf [ " import_id " ] . split ( '#' ) . last
if post . post_number == 1
slug = Slug . for ( topic . title ) # probably matches what vanilla would do...
2016-05-17 16:43:46 -05:00
Permalink . create ( url : " discussion/ #{ id } / #{ slug } " , topic_id : topic . id ) rescue nil
2016-05-17 10:16:41 -05:00
else
2016-05-17 16:43:46 -05:00
Permalink . create ( url : " discussion/comment/ #{ id } " , post_id : post . id ) rescue nil
2016-05-17 10:16:41 -05:00
end
2016-07-06 09:58:43 -05:00
print '.'
2016-05-17 10:16:41 -05:00
end
end
end
2015-06-29 07:45:52 -05:00
end
2016-05-17 10:16:41 -05:00
2015-06-29 07:45:52 -05:00
ImportScripts :: VanillaSQL . new . perform