From ed1e5ef6cc92ecdcef860c88b86aac02ab385c53 Mon Sep 17 00:00:00 2001 From: Gerhard Schlager Date: Wed, 18 Sep 2019 18:11:52 +0200 Subject: [PATCH] FIX: By default, don't abort Google Groups crawling on error --- script/import_scripts/google_groups.rb | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/script/import_scripts/google_groups.rb b/script/import_scripts/google_groups.rb index 15f152a329e..6d627db25b0 100755 --- a/script/import_scripts/google_groups.rb +++ b/script/import_scripts/google_groups.rb @@ -100,7 +100,7 @@ def crawl_topic(url) @scraped_topic_urls << url rescue puts "Failed to scrape topic at #{url}".red - raise + raise if @abort_on_error end def crawl_message(url, might_be_deleted) @@ -122,11 +122,15 @@ def crawl_message(url, might_be_deleted) File.write(filename, content) rescue Selenium::WebDriver::Error::NoSuchElementError - raise unless might_be_deleted - puts "Message might be deleted. Skipping #{url}" + if might_be_deleted + puts "Message might be deleted. Skipping #{url}" + else + puts "Failed to scrape message at #{url}".red + raise if @abort_on_error + end rescue puts "Failed to scrape message at #{url}".red - raise + raise if @abort_on_error end def login @@ -195,7 +199,10 @@ end def parse_arguments puts "" + # default values @force_import = false + @abort_on_error = false + @cookies = DEFAULT_COOKIES_TXT if File.exist?(DEFAULT_COOKIES_TXT) parser = OptionParser.new do |opts| opts.banner = "Usage: google_groups.rb [options]" @@ -204,6 +211,7 @@ def parse_arguments opts.on("-c", "--cookies PATH", "path to cookies.txt") { |v| @cookies = v } opts.on("--path PATH", "output path for emails") { |v| @path = v } opts.on("-f", "--force", "force import when user isn't allowed to see email addresses") { @force_import = true } + opts.on("-a", "--abort-on-error", "abort crawl on error instead of skipping message") { @abort_on_error = true } opts.on("-h", "--help") do puts opts exit @@ -216,15 +224,13 @@ def parse_arguments exit_with_error(e.message, "", parser) end - @cookies = DEFAULT_COOKIES_TXT if @cookies.nil? && File.exist?(DEFAULT_COOKIES_TXT) - @path = File.join(DEFAULT_OUTPUT_PATH, @groupname) if @path.nil? - mandatory = [:groupname, :cookies] missing = mandatory.select { |name| instance_variable_get("@#{name}").nil? } exit_with_error("Missing arguments: #{missing.join(', ')}".red.bold, "", parser, "") if missing.any? exit_with_error("cookies.txt not found at #{@cookies}".red.bold, "") if !File.exist?(@cookies) + @path = File.join(DEFAULT_OUTPUT_PATH, @groupname) if @path.nil? FileUtils.mkpath(@path) end