From 68e87bb58e40a9975f026e17d83ca19e57e72889 Mon Sep 17 00:00:00 2001 From: Kane York Date: Tue, 6 Oct 2020 15:51:53 -0700 Subject: [PATCH] User export: profile as json, export auth token logs (#10819) * FEATURE: Export the entire user profile as json, not just bio/website * FEATURE: Add session log information to user export Even though the columns are named 'auth_token' etc, the content is not actually usable to log into the forum with. Despite all that, it is still truncated for export, to avoid any 'token hash cracking' situations. --- app/jobs/regular/export_user_archive.rb | 77 +++++++++++++++++++++++-- spec/jobs/export_user_archive_spec.rb | 56 ++++++++++++++++-- 2 files changed, 122 insertions(+), 11 deletions(-) diff --git a/app/jobs/regular/export_user_archive.rb b/app/jobs/regular/export_user_archive.rb index f2e77b00883..fe5d0ea7c0f 100644 --- a/app/jobs/regular/export_user_archive.rb +++ b/app/jobs/regular/export_user_archive.rb @@ -12,7 +12,9 @@ module Jobs COMPONENTS ||= %w( user_archive - user_archive_profile + preferences + auth_tokens + auth_token_logs badges bookmarks category_preferences @@ -22,6 +24,8 @@ module Jobs HEADER_ATTRS_FOR ||= HashWithIndifferentAccess.new( user_archive: ['topic_title', 'categories', 'is_pm', 'post', 'like_count', 'reply_count', 'url', 'created_at'], user_archive_profile: ['location', 'website', 'bio', 'views'], + auth_tokens: ['id', 'auth_token_hash', 'prev_auth_token_hash', 'auth_token_seen', 'client_ip', 'user_agent', 'seen_at', 'rotated_at', 'created_at', 'updated_at'], + auth_token_logs: ['id', 'action', 'user_auth_token_id', 'client_ip', 'auth_token_hash', 'created_at', 'path', 'user_agent'], badges: ['badge_id', 'badge_name', 'granted_at', 'post_id', 'seq', 'granted_manually', 'notification_id', 'featured_rank'], bookmarks: ['post_id', 'topic_id', 'post_number', 'link', 'name', 'created_at', 'updated_at', 'reminder_type', 'reminder_at', 'reminder_last_sent_at', 'reminder_set_at', 'auto_delete_preference'], category_preferences: ['category_id', 'category_names', 'notification_level', 'dismiss_new_timestamp'], @@ -38,12 +42,15 @@ module Jobs COMPONENTS.each do |name| h = { name: name, method: :"#{name}_export" } h[:filetype] = :csv - filename_method = :"#{name}_filename" - if respond_to? filename_method - h[:filename] = public_send(filename_method) - else - h[:filename] = name + filetype_method = :"#{name}_filetype" + if respond_to? filetype_method + h[:filetype] = public_send(filetype_method) end + condition_method = :"include_#{name}?" + if respond_to? condition_method + h[:skip] = !public_send(condition_method) + end + h[:filename] = name components.push(h) end @@ -61,12 +68,17 @@ module Jobs zip_filename = nil begin components.each do |component| + next if component[:skip] case component[:filetype] when :csv CSV.open("#{dirname}/#{component[:filename]}.csv", "w") do |csv| csv << get_header(component[:name]) public_send(component[:method]) { |d| csv << d } end + when :json + File.open("#{dirname}/#{component[:filename]}.json", "w") do |file| + file.write MultiJson.dump(public_send(component[:method]), indent: 4) + end else raise 'unknown export filetype' end @@ -132,6 +144,59 @@ module Jobs end end + def preferences_export + UserSerializer.new(@current_user, scope: guardian) + end + + def preferences_filetype + :json + end + + def auth_tokens_export + return enum_for(:auth_tokens) unless block_given? + + UserAuthToken + .where(user_id: @current_user.id) + .each do |token| + yield [ + token.id, + token.auth_token.to_s[0..4] + "...", # hashed and truncated + token.prev_auth_token[0..4] + "...", + token.auth_token_seen, + token.client_ip, + token.user_agent, + token.seen_at, + token.rotated_at, + token.created_at, + token.updated_at, + ] + end + end + + def include_auth_token_logs? + # SiteSetting.verbose_auth_token_logging + UserAuthTokenLog.where(user_id: @current_user.id).exists? + end + + def auth_token_logs_export + return enum_for(:auth_token_logs) unless block_given? + + UserAuthTokenLog + .where(user_id: @current_user.id) + .each do |log| + yield [ + log.id, + log.action, + log.user_auth_token_id, + log.client_ip, + log.auth_token.to_s[0..4] + "...", # hashed and truncated + log.created_at, + log.path, + log.user_agent, + ] + end + end + def badges_export return enum_for(:badges_export) unless block_given? diff --git a/spec/jobs/export_user_archive_spec.rb b/spec/jobs/export_user_archive_spec.rb index 4d305d7d058..7327429c029 100644 --- a/spec/jobs/export_user_archive_spec.rb +++ b/spec/jobs/export_user_archive_spec.rb @@ -26,6 +26,10 @@ describe Jobs::ExportUserArchive do [data_rows, csv_out] end + def make_component_json + JSON.parse(MultiJson.dump(job.public_send(:"#{component}_export"))) + end + context '#execute' do let(:post) { Fabricate(:post, user: user) } @@ -33,6 +37,11 @@ describe Jobs::ExportUserArchive do _ = post user.user_profile.website = 'https://doe.example.com/john' user.user_profile.save + # force a UserAuthTokenLog entry + Discourse.current_user_provider.new({ + 'HTTP_USER_AGENT' => 'MyWebBrowser', + 'REQUEST_PATH' => '/some_path/456852', + }).log_on_user(user, {}, {}) end after do @@ -143,20 +152,57 @@ describe Jobs::ExportUserArchive do end end - context 'user_archive_profile' do - let(:component) { 'user_archive_profile' } + context 'preferences' do + let(:component) { 'preferences' } before do user.user_profile.website = 'https://doe.example.com/john' user.user_profile.bio_raw = "I am John Doe\n\nHere I am" user.user_profile.save + user.user_option.text_size = :smaller + user.user_option.automatically_unpin_topics = false + user.user_option.save end it 'properly includes the profile fields' do - _, csv_out = make_component_csv + serializer = job.preferences_export + # puts MultiJson.dump(serializer, indent: 4) + output = make_component_json + payload = output['user'] - expect(csv_out).to match('doe.example.com') - expect(csv_out).to match("Doe\n\nHere") + expect(payload['website']).to match('doe.example.com') + expect(payload['bio_raw']).to match("Doe\n\nHere") + expect(payload['user_option']['automatically_unpin_topics']).to eq(false) + expect(payload['user_option']['text_size']).to eq('smaller') + end + end + + context 'auth tokens' do + let(:component) { 'auth_tokens' } + + before do + Discourse.current_user_provider.new({ + 'HTTP_USER_AGENT' => 'MyWebBrowser', + 'REQUEST_PATH' => '/some_path/456852', + }).log_on_user(user, {}, {}) + end + + it 'properly includes session records' do + data, csv_out = make_component_csv + expect(data.length).to eq(1) + + expect(data[0]['user_agent']).to eq('MyWebBrowser') + end + + context 'auth token logs' do + let(:component) { 'auth_token_logs' } + it 'includes details such as the path' do + data, csv_out = make_component_csv + expect(data.length).to eq(1) + + expect(data[0]['action']).to eq('generate') + expect(data[0]['path']).to eq('/some_path/456852') + end end end