diff --git a/ipalib/install/certmonger.py b/ipalib/install/certmonger.py index ebf5d0889..03caff964 100644 --- a/ipalib/install/certmonger.py +++ b/ipalib/install/certmonger.py @@ -305,20 +305,56 @@ def add_subject(request_id, subject): def request_and_wait_for_cert( certpath, subject, principal, nickname=None, passwd_fname=None, dns=None, ca='IPA', profile=None, - pre_command=None, post_command=None, storage='NSSDB', perms=None): - """ - Execute certmonger to request a server certificate. + pre_command=None, post_command=None, storage='NSSDB', perms=None, + resubmit_timeout=0): + """Request certificate, wait and possibly resubmit failing requests - The method also waits for the certificate to be available. + Submit a cert request to certmonger and wait until the request has + finished. + + With timeout, a failed request is resubmitted. During parallel replica + installation, a request sometimes fails with CA_REJECTED or + CA_UNREACHABLE. The error occurs when the master is either busy or some + information haven't been replicated yet. Even a stuck request can be + recovered, e.g. when permission and group information have been + replicated. """ - reqId = request_cert(certpath, subject, principal, nickname, - passwd_fname, dns, ca, profile, - pre_command, post_command, storage, perms) - state = wait_for_request(reqId, api.env.startup_timeout) - ca_error = get_request_value(reqId, 'ca-error') - if state != 'MONITORING' or ca_error: - raise RuntimeError("Certificate issuance failed ({})".format(state)) - return reqId + req_id = request_cert( + certpath, subject, principal, nickname, passwd_fname, dns, ca, + profile, pre_command, post_command, storage, perms + ) + + deadline = time.time() + resubmit_timeout + while True: # until success, timeout, or error + state = wait_for_request(req_id, api.env.replication_wait_timeout) + ca_error = get_request_value(req_id, 'ca-error') + if state == 'MONITORING' and ca_error is None: + # we got a winner, exiting + logger.debug("Cert request %s was successful", req_id) + return req_id + + logger.debug( + "Cert request %s failed: %s (%s)", req_id, state, ca_error + ) + if state not in {'CA_REJECTED', 'CA_UNREACHABLE'}: + # probably unrecoverable error + logger.debug("Giving up on cert request %s", req_id) + break + elif not resubmit_timeout: + # no resubmit + break + elif time.time() > deadline: + logger.debug("Request %s reached resubmit dead line", req_id) + break + else: + # sleep and resubmit + logger.debug("Sleep and resubmit cert request %s", req_id) + time.sleep(10) + resubmit_request(req_id) + + raise RuntimeError( + "Certificate issuance failed ({}: {})".format(state, ca_error) + ) def request_cert( diff --git a/ipaserver/install/cainstance.py b/ipaserver/install/cainstance.py index 258a18237..c295108c1 100644 --- a/ipaserver/install/cainstance.py +++ b/ipaserver/install/cainstance.py @@ -926,7 +926,9 @@ class CAInstance(DogtagInstance): profile='caServerCert', pre_command='renew_ra_cert_pre', post_command='renew_ra_cert', - storage="FILE") + storage="FILE", + resubmit_timeout=api.env.replication_wait_timeout + ) self.__set_ra_cert_perms() self.requestId = str(reqId) diff --git a/ipaserver/install/certs.py b/ipaserver/install/certs.py index 2ee8da4df..1466570fa 100644 --- a/ipaserver/install/certs.py +++ b/ipaserver/install/certs.py @@ -658,14 +658,18 @@ class CertDB(object): def export_pem_cert(self, nickname, location): return self.nssdb.export_pem_cert(nickname, location) - def request_service_cert(self, nickname, principal, host): - certmonger.request_and_wait_for_cert( + def request_service_cert(self, nickname, principal, host, + resubmit_timeout=None): + if resubmit_timeout is None: + resubmit_timeout = api.env.replication_wait_timeout + return certmonger.request_and_wait_for_cert( certpath=self.secdir, storage='NSSDB', nickname=nickname, principal=principal, subject=host, - passwd_fname=self.passwd_fname + passwd_fname=self.passwd_fname, + resubmit_timeout=resubmit_timeout ) def is_ipa_issued_cert(self, api, nickname): diff --git a/ipaserver/install/dsinstance.py b/ipaserver/install/dsinstance.py index 492bc6747..926a67792 100644 --- a/ipaserver/install/dsinstance.py +++ b/ipaserver/install/dsinstance.py @@ -852,7 +852,8 @@ class DsInstance(service.Service): ca='IPA', profile=dogtag.DEFAULT_PROFILE, dns=[self.fqdn], - post_command=cmd + post_command=cmd, + resubmit_timeout=api.env.replication_wait_timeout ) finally: if prev_helper is not None: diff --git a/ipaserver/install/httpinstance.py b/ipaserver/install/httpinstance.py index 84095c1dd..62b563fb2 100644 --- a/ipaserver/install/httpinstance.py +++ b/ipaserver/install/httpinstance.py @@ -378,7 +378,8 @@ class HTTPInstance(service.Service): dns=[self.fqdn], post_command='restart_httpd', storage='FILE', - passwd_fname=key_passwd_file + passwd_fname=key_passwd_file, + resubmit_timeout=api.env.replication_wait_timeout ) finally: if prev_helper is not None: diff --git a/ipaserver/install/krbinstance.py b/ipaserver/install/krbinstance.py index 22f8ac2a0..1c7878875 100644 --- a/ipaserver/install/krbinstance.py +++ b/ipaserver/install/krbinstance.py @@ -456,7 +456,8 @@ class KrbInstance(service.Service): storage='FILE', profile=KDC_PROFILE, post_command='renew_kdc_cert', - perms=(0o644, 0o600) + perms=(0o644, 0o600), + resubmit_timeout=api.env.replication_wait_timeout ) except dbus.DBusException as e: # if the certificate is already tracked, ignore the error