Auto-retry failed certmonger requests

During parallel replica installation, a request sometimes fails with
CA_REJECTED or CA_UNREACHABLE. The error occur when the master is
either busy or some information haven't been replicated yet. Even
a stuck request can be recovered, e.g. when permission and group
information have been replicated.

A new function request_and_retry_cert() automatically resubmits failing
requests until it times out.

Fixes: https://pagure.io/freeipa/issue/7623
Signed-off-by: Christian Heimes <cheimes@redhat.com>
Reviewed-By: Stanislav Laznicka <slaznick@redhat.com>
This commit is contained in:
Christian Heimes 2018-07-08 11:53:58 +02:00
parent fcb2a06931
commit 1fa2a7cd41
6 changed files with 64 additions and 19 deletions

View File

@ -305,20 +305,56 @@ def add_subject(request_id, subject):
def request_and_wait_for_cert(
certpath, subject, principal, nickname=None, passwd_fname=None,
dns=None, ca='IPA', profile=None,
pre_command=None, post_command=None, storage='NSSDB', perms=None):
"""
Execute certmonger to request a server certificate.
pre_command=None, post_command=None, storage='NSSDB', perms=None,
resubmit_timeout=0):
"""Request certificate, wait and possibly resubmit failing requests
The method also waits for the certificate to be available.
Submit a cert request to certmonger and wait until the request has
finished.
With timeout, a failed request is resubmitted. During parallel replica
installation, a request sometimes fails with CA_REJECTED or
CA_UNREACHABLE. The error occurs when the master is either busy or some
information haven't been replicated yet. Even a stuck request can be
recovered, e.g. when permission and group information have been
replicated.
"""
reqId = request_cert(certpath, subject, principal, nickname,
passwd_fname, dns, ca, profile,
pre_command, post_command, storage, perms)
state = wait_for_request(reqId, api.env.startup_timeout)
ca_error = get_request_value(reqId, 'ca-error')
if state != 'MONITORING' or ca_error:
raise RuntimeError("Certificate issuance failed ({})".format(state))
return reqId
req_id = request_cert(
certpath, subject, principal, nickname, passwd_fname, dns, ca,
profile, pre_command, post_command, storage, perms
)
deadline = time.time() + resubmit_timeout
while True: # until success, timeout, or error
state = wait_for_request(req_id, api.env.replication_wait_timeout)
ca_error = get_request_value(req_id, 'ca-error')
if state == 'MONITORING' and ca_error is None:
# we got a winner, exiting
logger.debug("Cert request %s was successful", req_id)
return req_id
logger.debug(
"Cert request %s failed: %s (%s)", req_id, state, ca_error
)
if state not in {'CA_REJECTED', 'CA_UNREACHABLE'}:
# probably unrecoverable error
logger.debug("Giving up on cert request %s", req_id)
break
elif not resubmit_timeout:
# no resubmit
break
elif time.time() > deadline:
logger.debug("Request %s reached resubmit dead line", req_id)
break
else:
# sleep and resubmit
logger.debug("Sleep and resubmit cert request %s", req_id)
time.sleep(10)
resubmit_request(req_id)
raise RuntimeError(
"Certificate issuance failed ({}: {})".format(state, ca_error)
)
def request_cert(

View File

@ -926,7 +926,9 @@ class CAInstance(DogtagInstance):
profile='caServerCert',
pre_command='renew_ra_cert_pre',
post_command='renew_ra_cert',
storage="FILE")
storage="FILE",
resubmit_timeout=api.env.replication_wait_timeout
)
self.__set_ra_cert_perms()
self.requestId = str(reqId)

View File

@ -658,14 +658,18 @@ class CertDB(object):
def export_pem_cert(self, nickname, location):
return self.nssdb.export_pem_cert(nickname, location)
def request_service_cert(self, nickname, principal, host):
certmonger.request_and_wait_for_cert(
def request_service_cert(self, nickname, principal, host,
resubmit_timeout=None):
if resubmit_timeout is None:
resubmit_timeout = api.env.replication_wait_timeout
return certmonger.request_and_wait_for_cert(
certpath=self.secdir,
storage='NSSDB',
nickname=nickname,
principal=principal,
subject=host,
passwd_fname=self.passwd_fname
passwd_fname=self.passwd_fname,
resubmit_timeout=resubmit_timeout
)
def is_ipa_issued_cert(self, api, nickname):

View File

@ -852,7 +852,8 @@ class DsInstance(service.Service):
ca='IPA',
profile=dogtag.DEFAULT_PROFILE,
dns=[self.fqdn],
post_command=cmd
post_command=cmd,
resubmit_timeout=api.env.replication_wait_timeout
)
finally:
if prev_helper is not None:

View File

@ -378,7 +378,8 @@ class HTTPInstance(service.Service):
dns=[self.fqdn],
post_command='restart_httpd',
storage='FILE',
passwd_fname=key_passwd_file
passwd_fname=key_passwd_file,
resubmit_timeout=api.env.replication_wait_timeout
)
finally:
if prev_helper is not None:

View File

@ -456,7 +456,8 @@ class KrbInstance(service.Service):
storage='FILE',
profile=KDC_PROFILE,
post_command='renew_kdc_cert',
perms=(0o644, 0o600)
perms=(0o644, 0o600),
resubmit_timeout=api.env.replication_wait_timeout
)
except dbus.DBusException as e:
# if the certificate is already tracked, ignore the error