FEATURE: Rate limit how often we'll crawl a destination IP

This commit is contained in:
Robin Ward
2017-05-23 15:03:04 -04:00
parent 36e477750c
commit b81e7be9a1
2 changed files with 20 additions and 18 deletions

View File

@@ -1,6 +1,7 @@
require "socket"
require "ipaddr"
require 'excon'
require 'rate_limiter'
# Determine the final endpoint for a Web URI, following redirects
class FinalDestination
@@ -76,10 +77,6 @@ class FinalDestination
end
def is_dest_valid?
is_public?
end
def is_public?
return false unless @uri && @uri.host
address_s = @opts[:lookup_ip].call(@uri.hostname)
@@ -92,7 +89,12 @@ class FinalDestination
return false
end
# Rate limit how often this IP can be crawled
RateLimiter.new(nil, "crawl-destination-ip:#{address_s}", 100, 1.hour).performed!
true
rescue RateLimiter::LimitExceeded
false
end
def private_ranges