2019-05-02 17:17:27 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2018-03-15 16:10:45 -05:00
|
|
|
class WebCrawlerRequest < ActiveRecord::Base
|
|
|
|
include CachedCounting
|
|
|
|
|
|
|
|
cattr_accessor :max_record_age, :max_records_per_day
|
|
|
|
|
|
|
|
# only keep the top records based on request count
|
|
|
|
self.max_records_per_day = 200
|
|
|
|
|
|
|
|
# delete records older than this
|
|
|
|
self.max_record_age = 30.days
|
|
|
|
|
2022-02-22 10:45:25 -06:00
|
|
|
def self.increment!(user_agent)
|
|
|
|
perform_increment!(user_agent)
|
2018-03-15 16:10:45 -05:00
|
|
|
end
|
|
|
|
|
2022-02-22 10:45:25 -06:00
|
|
|
def self.write_cache!(user_agent, count, date)
|
|
|
|
where(id: request_id(date: date, user_agent: user_agent))
|
|
|
|
.update_all(["count = count + ?", count])
|
2018-03-15 16:10:45 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
2022-02-22 10:45:25 -06:00
|
|
|
def self.request_id(date:, user_agent:, retries: 0)
|
|
|
|
id = where(date: date, user_agent: user_agent).pluck_first(:id)
|
|
|
|
id ||= create!({ date: date, user_agent: user_agent }.merge(count: 0)).id
|
|
|
|
rescue # primary key violation
|
|
|
|
if retries == 0
|
|
|
|
request_id(date: date, user_agent: user_agent, retries: 1)
|
|
|
|
else
|
|
|
|
raise
|
|
|
|
end
|
2018-03-15 16:10:45 -05:00
|
|
|
end
|
|
|
|
|
|
|
|
end
|
2018-03-28 13:40:26 -05:00
|
|
|
|
|
|
|
# == Schema Information
|
|
|
|
#
|
|
|
|
# Table name: web_crawler_requests
|
|
|
|
#
|
2019-05-02 17:34:12 -05:00
|
|
|
# id :bigint not null, primary key
|
2018-03-28 13:40:26 -05:00
|
|
|
# date :date not null
|
|
|
|
# user_agent :string not null
|
|
|
|
# count :integer default(0), not null
|
|
|
|
#
|
|
|
|
# Indexes
|
|
|
|
#
|
|
|
|
# index_web_crawler_requests_on_date_and_user_agent (date,user_agent) UNIQUE
|
|
|
|
#
|