mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 01:16:38 -06:00
FEATURE: Add import script for Friends+Me Google+ Exporter JSON archives (#7334)
This script has been used to import over 50,000 Google+ posts and over 300,000 comments from 29 communities into a single Discourse instance, as well as for at least three other imports. Google+ has closed for the public, but it is still available at this time for GSuite customers. If GSuite customers decide to migrate from Google+ to Discourse, or if Google "sunsets" Google+ for GSuite customers, this importer may be useful. https://www.reddit.com/r/FMGE_Support/comments/b8sa5h/fmge_for_gsuite/ Development and use of this script has been discussed in detail: https://meta.discourse.org/t/bounty-google-private-communities-export-screenscraper-importer/108029
This commit is contained in:
parent
46f628aa7c
commit
9fc3de01bb
684
script/import_scripts/friendsmegplus.rb
Normal file
684
script/import_scripts/friendsmegplus.rb
Normal file
@ -0,0 +1,684 @@
|
||||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||
|
||||
require 'csv'
|
||||
|
||||
# Importer for Friends+Me Google+ Exporter (F+MG+E) output.
|
||||
#
|
||||
# Takes the full path (absolute or relative) to
|
||||
# * each of the F+MG+E JSON export files you want to import
|
||||
# * the F+MG+E google-plus-image-list.csv file,
|
||||
# * a categories.json file you write to describe how the Google+
|
||||
# categories map to Discourse categories, subcategories, and tags.
|
||||
#
|
||||
# You can provide all the F+MG+E JSON export files in a single import
|
||||
# run. This will be the fastest way to do the entire import if you
|
||||
# have enough memory and disk space. It will work just as well to
|
||||
# import each F+MG+E JSON export file separately. This might be
|
||||
# valuable if you have memory or space limitations, as the memory to
|
||||
# hold all the data from the F+MG+E JSON export files is one of the
|
||||
# key resources used by this script.
|
||||
#
|
||||
# Create an initial empty ("{}") categories.json file, and the import
|
||||
# script will write a .new file for you to fill in the details.
|
||||
# You will probably want to use jq to reformat the .new file before
|
||||
# trying to edit it. `jq . categories.json.new > categories.json`
|
||||
#
|
||||
# Provide a filename that ends with "upload-paths.txt" and the names
|
||||
# of each of the files uploaded will be written to the file with that
|
||||
# name
|
||||
#
|
||||
# Edit values at the top of the script to fit your preferences
|
||||
|
||||
class ImportScripts::FMGP < ImportScripts::Base
|
||||
|
||||
def initialize
|
||||
super
|
||||
|
||||
# Set this to the base URL for the site; required for importing videos
|
||||
# typically just 'https:' in production
|
||||
@site_base_url = 'http://localhost:3000'
|
||||
@system_user = Discourse.system_user
|
||||
SiteSetting.max_image_size_kb = 40960
|
||||
SiteSetting.max_attachment_size_kb = 40960
|
||||
# handle the same video extension as the rest of Discourse
|
||||
SiteSetting.authorized_extensions = (SiteSetting.authorized_extensions.split("|") + ['mp4', 'mov', 'webm', 'ogv']).uniq.join("|")
|
||||
@invalid_bounce_score = 5.0
|
||||
@min_title_words = 3
|
||||
@max_title_words = 14
|
||||
@min_title_characters = 12
|
||||
@min_post_raw_characters = 12
|
||||
# Set to true to create categories in categories.json. Does
|
||||
# not honor parent relationships; expects categories to be
|
||||
# rearranged after import.
|
||||
@create_categories = false
|
||||
|
||||
# JSON files produced by F+MG+E as an export of a community
|
||||
@feeds = []
|
||||
|
||||
# CSV is map to downloaded images and/or videos (exported separately)
|
||||
@images = {}
|
||||
|
||||
# map from Google ID to local system users where necessary
|
||||
# {
|
||||
# "128465039243871098234": "handle"
|
||||
# }
|
||||
# GoogleID 128465039243871098234 will show up as @handle
|
||||
@usermap = {}
|
||||
|
||||
# G+ user IDs to filter out (spam, abuse) — no topics or posts, silence and suspend when creating
|
||||
# loaded from blacklist.json as array of google ids `[ 92310293874, 12378491235293 ]`
|
||||
@blacklist = Set[]
|
||||
|
||||
# G+ user IDs whose posts are useful; if this is set, include only
|
||||
# posts (and non-blacklisted comments) authored by these IDs
|
||||
@whitelist = nil
|
||||
|
||||
# Tags to apply to every topic; empty Array to not have any tags applied everywhere
|
||||
@globaltags = [ "gplus" ]
|
||||
|
||||
@imagefiles = nil
|
||||
|
||||
# categories.json file is map:
|
||||
# "google-category-uuid": {
|
||||
# "name": 'google+ category name',
|
||||
# "category": 'category name',
|
||||
# "parent": 'parent name', # optional
|
||||
# "create": true, # optional
|
||||
# "tags": ['list', 'of', 'tags'] optional
|
||||
# }
|
||||
# Start with '{}', let the script generate categories.json.new once, then edit and re-run
|
||||
@categories = {}
|
||||
|
||||
# keep track of the filename in case we need to write a .new file
|
||||
@categories_filename = nil
|
||||
# dry run parses but doesn't create
|
||||
@dryrun = false
|
||||
# @last_date cuts off at a certain date, for late-spammed abandoned communities
|
||||
@last_date = nil
|
||||
# @first_date starts at a certain date, for early-spammed rescued communities
|
||||
@first_date = nil
|
||||
# every argument is a filename, do the right thing based on the file name
|
||||
ARGV.each do |arg|
|
||||
if arg.end_with?('.csv')
|
||||
# CSV files produced by F+MG+E have "URL";"IsDownloaded";"FileName";"FilePath";"FileSize"
|
||||
CSV.foreach(arg, headers: true, col_sep: ';') do |row|
|
||||
@images[row[0]] = {
|
||||
filename: row[2],
|
||||
filepath: row[3],
|
||||
filesize: row[4]
|
||||
}
|
||||
end
|
||||
elsif arg.end_with?("upload-paths.txt")
|
||||
@imagefiles = File.open(arg, "w")
|
||||
elsif arg.end_with?('categories.json')
|
||||
@categories_filename = arg
|
||||
@categories = load_fmgp_json(arg)
|
||||
elsif arg.end_with?("usermap.json")
|
||||
@usermap = load_fmgp_json(arg)
|
||||
elsif arg.end_with?('blacklist.json')
|
||||
@blacklist = load_fmgp_json(arg).map { |i| i.to_s }.to_set
|
||||
elsif arg.end_with?('whitelist.json')
|
||||
@whitelist = load_fmgp_json(arg).map { |i| i.to_s }.to_set
|
||||
elsif arg.end_with?('.json')
|
||||
@feeds << load_fmgp_json(arg)
|
||||
elsif arg == '--dry-run'
|
||||
@dryrun = true
|
||||
elsif arg.start_with?("--last-date=")
|
||||
@last_date = Time.zone.parse(arg.gsub(/.*=/, ''))
|
||||
elsif arg.start_with?("--first-date=")
|
||||
@first_date = Time.zone.parse(arg.gsub(/.*=/, ''))
|
||||
else
|
||||
raise RuntimeError.new("unknown argument #{arg}")
|
||||
end
|
||||
end
|
||||
|
||||
raise RuntimeError.new("Must provide a categories.json file") if @categories_filename.nil?
|
||||
|
||||
# store the actual category objects looked up in the database
|
||||
@cats = {}
|
||||
# remember google auth DB lookup results
|
||||
@emails = {}
|
||||
@newusers = {}
|
||||
@users = {}
|
||||
# remember uploaded images
|
||||
@uploaded = {}
|
||||
# counters for post progress
|
||||
@topics_imported = 0
|
||||
@posts_imported = 0
|
||||
@topics_skipped = 0
|
||||
@posts_skipped = 0
|
||||
@topics_blacklisted = 0
|
||||
@posts_blacklisted = 0
|
||||
# count uploaded file size
|
||||
@totalsize = 0
|
||||
|
||||
end
|
||||
|
||||
def execute
|
||||
puts "", "Importing from Friends+Me Google+ Exporter..."
|
||||
|
||||
read_categories
|
||||
check_categories
|
||||
map_categories
|
||||
|
||||
import_users
|
||||
import_posts
|
||||
|
||||
# No need to set trust level 0 for any imported users unless F+MG+E gets the
|
||||
# ability to add +1 data, in which case users who have only done a +1 and
|
||||
# neither posted nor commented should be TL0, in which case this should be
|
||||
# called after all other processing done
|
||||
# update_tl0
|
||||
|
||||
@imagefiles.close() if !@imagefiles.nil?
|
||||
puts "", "Uploaded #{@totalsize} bytes of image files"
|
||||
puts "", "Done"
|
||||
end
|
||||
|
||||
def load_fmgp_json(filename)
|
||||
raise RuntimeError.new("File #{filename} not found") if !File.exists?(filename)
|
||||
JSON.parse(File.read(filename))
|
||||
end
|
||||
|
||||
def read_categories
|
||||
@feeds.each do |feed|
|
||||
feed["accounts"].each do |account|
|
||||
account["communities"].each do |community|
|
||||
community["categories"].each do |category|
|
||||
if !@categories[category["id"]].present?
|
||||
# Create empty entries to write and fill in manually
|
||||
@categories[category["id"]] = {
|
||||
"name" => category["name"],
|
||||
"community" => community["name"],
|
||||
"category" => "",
|
||||
"parent" => nil,
|
||||
"tags" => [],
|
||||
}
|
||||
elsif !@categories[category["id"]]["community"].present?
|
||||
@categories[category["id"]]["community"] = community["name"]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def check_categories
|
||||
# raise a useful exception if necessary data not found in categories.json
|
||||
incomplete_categories = []
|
||||
@categories.each do |id, c|
|
||||
if !c["category"].present?
|
||||
# written in JSON without a "category" key at all
|
||||
c["category"] = ""
|
||||
end
|
||||
if c["category"].empty?
|
||||
# found in read_categories or not yet filled out in categories.json
|
||||
incomplete_categories << c["name"]
|
||||
end
|
||||
end
|
||||
if !incomplete_categories.empty?
|
||||
categories_new = "#{@categories_filename}.new"
|
||||
File.open(categories_new, "w") do |f|
|
||||
f.write(@categories.to_json)
|
||||
raise RuntimeError.new("Category file missing categories for #{incomplete_categories}, edit #{categories_new} and rename it to #{@category_filename} before running the same import")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def map_categories
|
||||
puts "", "Mapping categories from Google+ to Discourse..."
|
||||
|
||||
@categories.each do |id, cat|
|
||||
if cat["parent"].present? && !cat["parent"].empty?
|
||||
# Two separate sub-categories can have the same name, so need to identify by parent
|
||||
Category.where(name: cat["category"]).each do |category|
|
||||
parent = Category.where(id: category.parent_category_id).first
|
||||
@cats[id] = category if parent.name == cat["parent"]
|
||||
end
|
||||
else
|
||||
if category = Category.where(name: cat["category"]).first
|
||||
@cats[id] = category
|
||||
elsif @create_categories
|
||||
params = {}
|
||||
params[:name] = cat['category']
|
||||
params[:id] = id
|
||||
puts "Creating #{cat['category']}"
|
||||
category = create_category(params, id)
|
||||
@cats[id] = category
|
||||
end
|
||||
end
|
||||
raise RuntimeError.new("Could not find category #{cat["category"]} for #{cat}") if @cats[id].nil?
|
||||
end
|
||||
end
|
||||
|
||||
def import_users
|
||||
puts '', "Importing Google+ post and comment author users..."
|
||||
|
||||
# collect authors of both posts and comments
|
||||
@feeds.each do |feed|
|
||||
feed["accounts"].each do |account|
|
||||
account["communities"].each do |community|
|
||||
community["categories"].each do |category|
|
||||
category["posts"].each do |post|
|
||||
import_author_user(post["author"])
|
||||
if post["message"].present?
|
||||
import_message_users(post["message"])
|
||||
end
|
||||
post["comments"].each do |comment|
|
||||
import_author_user(comment["author"])
|
||||
if comment["message"].present?
|
||||
import_message_users(comment["message"])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return if @dryrun
|
||||
|
||||
# now create them all
|
||||
create_users(@newusers) do |id, u|
|
||||
{
|
||||
id: id,
|
||||
email: u[:email],
|
||||
name: u[:name],
|
||||
post_create_action: u[:post_create_action]
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def import_author_user(author)
|
||||
id = author["id"]
|
||||
name = author["name"]
|
||||
import_google_user(id, name)
|
||||
end
|
||||
|
||||
def import_message_users(message)
|
||||
message.each do |fragment|
|
||||
if fragment[0] == 3 && !fragment[2].nil?
|
||||
# deleted G+ users show up with a null ID
|
||||
import_google_user(fragment[2], fragment[1])
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_google_user(id, name)
|
||||
if !@emails[id].present?
|
||||
google_user_info = UserAssociatedAccount.find_by(provider_name: 'google_oauth2', provider_uid: id.to_i)
|
||||
if google_user_info.nil?
|
||||
# create new google user on system; expect this user to merge
|
||||
# when they later log in with google authentication
|
||||
# Note that because email address is not included in G+ data, we
|
||||
# don't know if they already have another account not yet associated
|
||||
# with google ooauth2. If they didn't log in, they'll have an
|
||||
# @gplus.invalid address associated with their account
|
||||
email = "#{id}@gplus.invalid"
|
||||
@newusers[id] = {
|
||||
email: email,
|
||||
name: name,
|
||||
post_create_action: proc do |newuser|
|
||||
newuser.approved = true
|
||||
newuser.approved_by_id = @system_user.id
|
||||
newuser.approved_at = newuser.created_at
|
||||
if @blacklist.include?(id.to_s)
|
||||
now = DateTime.now
|
||||
forever = 1000.years.from_now
|
||||
# you can suspend as well if you want your blacklist to
|
||||
# be hard to recover from
|
||||
#newuser.suspended_at = now
|
||||
#newuser.suspended_till = forever
|
||||
newuser.silenced_till = forever
|
||||
end
|
||||
newuser.save
|
||||
@users[id] = newuser
|
||||
UserAssociatedAccount.create(provider_name: 'google_oauth2', user_id: newuser.id, provider_uid: id)
|
||||
# Do not send email to the invalid email addresses
|
||||
# this can be removed after merging with #7162
|
||||
s = UserStat.where(user_id: newuser.id).first
|
||||
s.bounce_score = @invalid_bounce_score
|
||||
s.reset_bounce_score_after = 1000.years.from_now
|
||||
s.save
|
||||
end
|
||||
}
|
||||
else
|
||||
# user already on system
|
||||
u = User.find(google_user_info.user_id)
|
||||
if u.silenced? || u.suspended?
|
||||
@blacklist.add(id)
|
||||
end
|
||||
@users[id] = u
|
||||
email = u.email
|
||||
end
|
||||
@emails[id] = email
|
||||
end
|
||||
end
|
||||
|
||||
def import_posts
|
||||
# "post" is confusing:
|
||||
# - A google+ post is a discourse topic
|
||||
# - A google+ comment is a discourse post
|
||||
|
||||
puts '', "Importing Google+ posts and comments..."
|
||||
|
||||
@feeds.each do |feed|
|
||||
feed["accounts"].each do |account|
|
||||
account["communities"].each do |community|
|
||||
community["categories"].each do |category|
|
||||
category["posts"].each do |post|
|
||||
# G+ post / Discourse topic
|
||||
import_topic(post, category)
|
||||
print("\r#{@topics_imported}/#{@posts_imported} topics/posts (skipped: #{@topics_skipped}/#{@posts_skipped} blacklisted: #{@topics_blacklisted}/#{@posts_blacklisted}) ")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
puts ''
|
||||
end
|
||||
|
||||
def import_topic(post, category)
|
||||
# no parent for discourse topics / G+ posts
|
||||
if topic_id = post_id_from_imported_post_id(post["id"])
|
||||
# already imported topic; might need to attach more comments/posts
|
||||
p = Post.find_by(id: topic_id)
|
||||
@topics_skipped += 1
|
||||
else
|
||||
# new post
|
||||
if !@whitelist.nil? && !@whitelist.include?(post["author"]["id"])
|
||||
# only ignore non-whitelisted if whitelist defined
|
||||
return
|
||||
end
|
||||
postmap = make_postmap(post, category, nil)
|
||||
if postmap.nil?
|
||||
@topics_blacklisted += 1
|
||||
return
|
||||
end
|
||||
p = create_post(postmap, postmap[:id]) if !@dryrun
|
||||
@topics_imported += 1
|
||||
end
|
||||
# iterate over comments in post
|
||||
post["comments"].each do |comment|
|
||||
# category is nil for comments
|
||||
if post_id_from_imported_post_id(comment["id"])
|
||||
@posts_skipped += 1
|
||||
else
|
||||
commentmap = make_postmap(comment, nil, p)
|
||||
if commentmap.nil?
|
||||
@posts_blacklisted += 1
|
||||
else
|
||||
@posts_imported += 1
|
||||
new_comment = create_post(commentmap, commentmap[:id]) if !@dryrun
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def make_postmap(post, category, parent)
|
||||
post_author_id = post["author"]["id"]
|
||||
return nil if @blacklist.include?(post_author_id.to_s)
|
||||
|
||||
raw = formatted_message(post)
|
||||
# if no message, image, or images, it's just empty
|
||||
return nil if raw.length < @min_post_raw_characters
|
||||
|
||||
created_at = Time.zone.parse(post["createdAt"])
|
||||
return nil if !@last_date.nil? && created_at > @last_date
|
||||
return nil if !@frst_date.nil? && created_at < @first_date
|
||||
|
||||
user_id = user_id_from_imported_user_id(post_author_id)
|
||||
if user_id.nil?
|
||||
user_id = @users[post["author"]["id"]].id
|
||||
end
|
||||
|
||||
mapped = {
|
||||
id: post["id"],
|
||||
user_id: user_id,
|
||||
created_at: created_at,
|
||||
raw: raw,
|
||||
cook_method: Post.cook_methods[:regular],
|
||||
}
|
||||
|
||||
# nil category for comments, set for posts, so post-only things here
|
||||
if !category.nil?
|
||||
cat_id = category["id"]
|
||||
mapped[:title] = parse_title(post, created_at)
|
||||
mapped[:category] = @cats[cat_id].id
|
||||
mapped[:tags] = Array.new(@globaltags)
|
||||
if @categories[cat_id]["tags"].present?
|
||||
mapped[:tags].append(@categories[cat_id]["tags"]).flatten!
|
||||
end
|
||||
else
|
||||
mapped[:topic_id] = parent.topic_id if !@dryrun
|
||||
end
|
||||
# FIXME: import G+ "+1" as "like" if F+MG+E feature request implemented
|
||||
|
||||
return mapped
|
||||
end
|
||||
|
||||
def parse_title(post, created_at)
|
||||
# G+ has no titles, so we have to make something up
|
||||
if post["message"].present?
|
||||
title_text(post, created_at)
|
||||
else
|
||||
# probably just posted an image and/or album
|
||||
untitled(post["author"]["name"], created_at)
|
||||
end
|
||||
end
|
||||
|
||||
def title_text(post, created_at)
|
||||
words = message_text(post["message"])
|
||||
if words.empty? || words.join("").length < @min_title_characters || words.length < @min_title_words
|
||||
# database has minimum length
|
||||
# short posts appear not to work well as titles most of the time (in practice)
|
||||
return untitled(post["author"]["name"], created_at)
|
||||
end
|
||||
|
||||
words = words[0..(@max_title_words - 1)]
|
||||
lastword = nil
|
||||
|
||||
(@min_title_words..(words.length - 1)).each do |i|
|
||||
# prefer full stop
|
||||
if words[i].end_with?(".")
|
||||
lastword = i
|
||||
end
|
||||
end
|
||||
|
||||
if lastword.nil?
|
||||
# fall back on other punctuation
|
||||
(@min_title_words..(words.length - 1)).each do |i|
|
||||
if words[i].end_with?(',', ';', ':', '?')
|
||||
lastword = i
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if !lastword.nil?
|
||||
# found a logical terminating word
|
||||
words = words[0..lastword]
|
||||
end
|
||||
|
||||
# database has max title length, which is longer than a good display shows anyway
|
||||
title = words.join(" ").scan(/.{1,254}/)[0]
|
||||
end
|
||||
|
||||
def untitled(name, created_at)
|
||||
"Google+ post by #{name} on #{created_at}"
|
||||
end
|
||||
|
||||
def message_text(message)
|
||||
# only words, no markup
|
||||
words = []
|
||||
text_types = [0, 3]
|
||||
message.each do |fragment|
|
||||
if text_types.include?(fragment[0])
|
||||
fragment[1].split().each do |word|
|
||||
words << word
|
||||
end
|
||||
elsif fragment[0] == 2
|
||||
# use the display text of a link
|
||||
words << fragment[1]
|
||||
end
|
||||
end
|
||||
return words
|
||||
end
|
||||
|
||||
def formatted_message(post)
|
||||
lines = []
|
||||
urls_seen = Set[]
|
||||
if post["message"].present?
|
||||
post["message"].each do |fragment|
|
||||
lines << formatted_message_fragment(fragment, post, urls_seen)
|
||||
end
|
||||
end
|
||||
# yes, both "image" and "images"; "video" and "videos" :(
|
||||
if post["video"].present?
|
||||
lines << "\n#{formatted_link(post["video"]["proxy"])}\n"
|
||||
elsif post["image"].present?
|
||||
# if both image and video, image is a cover image for the video
|
||||
lines << "\n#{formatted_link(post["image"]["proxy"])}\n"
|
||||
end
|
||||
if post["images"].present?
|
||||
post["images"].each do |image|
|
||||
lines << "\n#{formatted_link(image["proxy"])}\n"
|
||||
end
|
||||
end
|
||||
if post["videos"].present?
|
||||
post["videos"].each do |video|
|
||||
lines << "\n#{formatted_link(video["proxy"])}\n"
|
||||
end
|
||||
end
|
||||
if post["link"].present? && post["link"]["url"].present?
|
||||
url = post["link"]["url"]
|
||||
if !urls_seen.include?(url)
|
||||
# add the URL only if it wasn't already referenced, because
|
||||
# they are often redundant
|
||||
lines << "\n#{post["link"]["url"]}\n"
|
||||
urls_seen.add(url)
|
||||
end
|
||||
end
|
||||
lines.join("")
|
||||
end
|
||||
|
||||
def formatted_message_fragment(fragment, post, urls_seen)
|
||||
# markdown does not nest reliably the same as either G+'s markup or what users intended in G+, so generate HTML codes
|
||||
# this method uses return to make sure it doesn't fall through accidentally
|
||||
if fragment[0] == 0
|
||||
# Random zero-width join characters break the output; in particular, they are
|
||||
# common after plus-references and break @name recognition. Just get rid of them.
|
||||
# Also deal with 0x80 (really‽) and non-breaking spaces
|
||||
text = fragment[1].gsub(/(\u200d|\u0080)/, "").gsub(/\u00a0/, " ")
|
||||
if fragment[2].nil?
|
||||
return text
|
||||
else
|
||||
if fragment[2]["italic"].present?
|
||||
text = "<i>#{text}</i>"
|
||||
end
|
||||
if fragment[2]["bold"].present?
|
||||
text = "<b>#{text}</b>"
|
||||
end
|
||||
if fragment[2]["strikethrough"].present?
|
||||
# s more likely than del to represent user intent?
|
||||
text = "<s>#{text}</s>"
|
||||
end
|
||||
return text
|
||||
end
|
||||
elsif fragment[0] == 1
|
||||
return "\n"
|
||||
elsif fragment[0] == 2
|
||||
urls_seen.add(fragment[2])
|
||||
return formatted_link_text(fragment[2], fragment[1])
|
||||
elsif fragment[0] == 3
|
||||
# reference to a user
|
||||
if @usermap.include?(fragment[2].to_s)
|
||||
return "@#{@usermap[fragment[2].to_s]}"
|
||||
end
|
||||
if fragment[2].nil?
|
||||
# deleted G+ users show up with a null ID
|
||||
return "<b>+#{fragment[1]}</b>"
|
||||
end
|
||||
# G+ occasionally doesn't put proper spaces after users
|
||||
if user = find_user_by_import_id(fragment[2])
|
||||
# user was in this import's authors
|
||||
return "@#{user.username} "
|
||||
else
|
||||
if google_user_info = UserAssociatedAccount.find_by(provider_name: 'google_oauth2', provider_uid: fragment[2])
|
||||
# user was not in this import, but has logged in or been imported otherwise
|
||||
user = User.find(google_user_info.user_id)
|
||||
return "@#{user.username} "
|
||||
else
|
||||
raise RuntimeError.new("Google user #{fragment[1]} (id #{fragment[2]}) not imported") if !@dryrun
|
||||
# if you want to fall back to their G+ name, just erase the raise above,
|
||||
# but this should not happen
|
||||
return "<b>+#{fragment[1]}</b>"
|
||||
end
|
||||
end
|
||||
elsif fragment[0] == 4
|
||||
# hashtag, the octothorpe is included
|
||||
return fragment[1]
|
||||
else
|
||||
raise RuntimeError.new("message code #{fragment[0]} not recognized!")
|
||||
end
|
||||
end
|
||||
|
||||
def formatted_link(url)
|
||||
formatted_link_text(url, url)
|
||||
end
|
||||
|
||||
def embedded_image_md(upload)
|
||||
# remove unnecessary size logic relative to embedded_image_html
|
||||
upload_name = upload.short_url || upload.url
|
||||
if upload_name =~ /\.(mov|mp4|webm|ogv)$/i
|
||||
@site_base_url + upload.url
|
||||
else
|
||||
"![#{upload.original_filename}](#{upload_name})"
|
||||
end
|
||||
end
|
||||
|
||||
def formatted_link_text(url, text)
|
||||
# two ways to present images attached to posts; you may want to edit this for preference
|
||||
# - display: embedded_image_html(upload)
|
||||
# - download links: attachment_html(upload, text)
|
||||
# you might even want to make it depend on the file name.
|
||||
if @images[text].present?
|
||||
# F+MG+E provides the URL it downloaded in the text slot
|
||||
# we won't use the plus url at all since it will disappear anyway
|
||||
url = text
|
||||
end
|
||||
if @uploaded[url].present?
|
||||
upload = @uploaded[url]
|
||||
return "\n#{embedded_image_md(upload)}"
|
||||
elsif @images[url].present?
|
||||
missing = "<i>missing/deleted image from Google+</i>"
|
||||
return missing if !Pathname.new(@images[url][:filepath]).exist?
|
||||
@imagefiles.write("#{@images[url][:filepath]}\n") if !@imagefiles.nil?
|
||||
upload = create_upload(@system_user.id, @images[url][:filepath], @images[url][:filename])
|
||||
if upload.nil? || upload.id.nil?
|
||||
# upload can be nil if the image conversion fails
|
||||
# upload.id can be nil for at least videos, and possibly deleted images
|
||||
return missing
|
||||
end
|
||||
upload.save
|
||||
@totalsize += @images[url][:filesize].to_i
|
||||
@uploaded[url] = upload
|
||||
return "\n#{embedded_image_md(upload)}"
|
||||
end
|
||||
if text == url
|
||||
# leave the URL bare and Discourse will do the right thing
|
||||
return url
|
||||
else
|
||||
# It turns out that the only place we get here, google has done its own text
|
||||
# interpolation that doesn't look good on Discourse, so while it looks like
|
||||
# this should be:
|
||||
# return "[#{text}](#{url})"
|
||||
# it actually looks better to throw away the google-provided text:
|
||||
return url
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if __FILE__ == $0
|
||||
ImportScripts::FMGP.new.perform
|
||||
end
|
Loading…
Reference in New Issue
Block a user