discourse/script/cache_critical_dns

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

183 lines
3.6 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env ruby
# frozen_string_literal: true
require 'ipaddr'
require 'resolv'
require 'time'
require 'socket'
REFRESH_SECONDS = 30
HOSTS_PATH = "/etc/hosts"
CRITICAL_HOST_ENV_VARS = %w{
DISCOURSE_DB_HOST
DISCOURSE_DB_REPLICA_HOST
DISCOURSE_REDIS_HOST
DISCOURSE_REDIS_SLAVE_HOST
}
def log(msg)
STDERR.puts "#{Time.now.iso8601}: #{msg}"
end
def error(msg)
log(msg)
end
def swap_address(hosts, name, ips)
new_file = []
hosts.split("\n").each do |line|
line.strip!
if line[0] != '#'
2018-11-22 21:48:02 -06:00
_, hostname = line.split(/\s+/)
next if hostname == name
end
new_file << line
new_file << "\n"
end
ips.each do |ip|
2018-11-22 22:43:27 -06:00
new_file << "#{ip} #{name} # AUTO GENERATED: #{Time.now.iso8601}\n"
end
2018-11-22 22:43:27 -06:00
new_file.join
end
def hosts_entries(dns, name)
host = ENV[name]
results = dns.getresources(host, Resolv::DNS::Resource::IN::A)
results.concat dns.getresources(host, Resolv::DNS::Resource::IN::AAAA)
results.map do |result|
"#{result.address}"
end
end
def send_counter(name, description, labels, value)
host = "localhost"
port = ENV.fetch("DISCOURSE_PROMETHEUS_COLLECTOR_PORT", 9405).to_i
if labels
labels = labels.map do |k, v|
"\"#{k}\": \"#{v}\""
end.join(",")
else
labels = ""
end
json = <<~JSON
{
"_type": "Custom",
"type": "Counter",
"name": "#{name}",
"description": "#{description}",
"labels": { #{labels} },
"value": #{value}
}
JSON
payload = +"POST /send-metrics HTTP/1.1\r\n"
payload << "Host: #{host}\r\n"
payload << "Connection: Close\r\n"
payload << "Content-Type: application/json\r\n"
payload << "Content-Length: #{json.bytesize}\r\n"
payload << "\r\n"
payload << json
socket = TCPSocket.new host, port
socket.write payload
socket.flush
result = socket.read
first_line = result.split("\n")[0]
if first_line.strip != "HTTP/1.1 200 OK"
error("Failed to report metric #{result}")
end
socket.close
rescue => e
error("Failed to send metric to Prometheus #{e}")
end
def report_success
send_counter('critical_dns_successes_total', 'critical DNS resolution success', nil, 1)
end
def report_failure(errors)
errors.each do |host, count|
send_counter('critical_dns_failures_total', 'critical DNS resolution failures', host ? { host: host } : nil, count)
end
end
@vars = CRITICAL_HOST_ENV_VARS.map do |name|
begin
host = ENV[name]
next if !host || host.length == 0
IPAddr.new(ENV[name])
nil
rescue IPAddr::InvalidAddressError, IPAddr::AddressFamilyError
name
end
end.compact
def loop
errors = Hash.new(0)
Resolv::DNS.open do |dns|
dns.timeouts = 2
resolved = {}
hosts = @vars.each do |var|
host = ENV[var]
begin
entries = hosts_entries(dns, var)
rescue => e
error("Failed to resolve DNS for #{name} - #{e}")
errors[host] += 1
end
if entries&.length > 0
resolved[host] = entries
else
error("Failed to find any DNS entry for #{var} : #{ENV[var]}")
errors[host] += 1
end
end
hosts_content = File.read(HOSTS_PATH)
hosts = Resolv::Hosts.new(HOSTS_PATH)
changed = false
resolved.each do |name, ips|
if hosts.getaddresses(name).map(&:to_s).sort != ips.sort
log("IP addresses for #{name} changed to #{ips}")
hosts_content = swap_address(hosts_content, name, ips)
changed = true
end
end
if changed
File.write(HOSTS_PATH, hosts_content)
end
end
rescue => e
error("Failed to access DNS - #{e}")
errors[nil] = 1
ensure
if errors == {}
report_success
else
report_failure(errors)
end
end
while true
loop
sleep REFRESH_SECONDS
end