mirror of
https://salsa.debian.org/freeipa-team/freeipa.git
synced 2025-02-25 18:55:28 -06:00
Make replica install more robust
Under certain circumstances, replica installation may fail in "enable GSSAPI for replication" step when it cannot sync LDAP service principals. There is often not much we can do as Directory Server may be in an unrecoverable state but we should at least wait longer before we give up. A function checking replication status was also fixed to give more accurate results by properly comparing start/end time of the replication process and returning an error message to calling function. This error message is then returned to user if do not manage to get the LDAP service principals to give him a pointer to the actual issue. https://fedorahosted.org/freeipa/ticket/2950
This commit is contained in:
parent
5bcbe1df37
commit
be8a9e6ddb
@ -543,6 +543,7 @@ class ReplicationManager(object):
|
|||||||
|
|
||||||
a_entry = None
|
a_entry = None
|
||||||
b_entry = None
|
b_entry = None
|
||||||
|
error_message = ''
|
||||||
|
|
||||||
while (retries > 0 ):
|
while (retries > 0 ):
|
||||||
root_logger.info('Getting ldap service principals for conversion: %s and %s' % (filter_a, filter_b))
|
root_logger.info('Getting ldap service principals for conversion: %s and %s' % (filter_a, filter_b))
|
||||||
@ -566,19 +567,23 @@ class ReplicationManager(object):
|
|||||||
% (filter_a, str(b)))
|
% (filter_a, str(b)))
|
||||||
self.force_sync(a, b.host)
|
self.force_sync(a, b.host)
|
||||||
cn, dn = self.agreement_dn(b.host)
|
cn, dn = self.agreement_dn(b.host)
|
||||||
self.wait_for_repl_update(a, dn, 30)
|
haserror, error_message = self.wait_for_repl_update(a, dn, 60)
|
||||||
|
|
||||||
if not b_entry:
|
if not b_entry:
|
||||||
root_logger.debug('Unable to find entry for %s on %s'
|
root_logger.debug('Unable to find entry for %s on %s'
|
||||||
% (filter_b, str(a)))
|
% (filter_b, str(a)))
|
||||||
self.force_sync(b, a.host)
|
self.force_sync(b, a.host)
|
||||||
cn, dn = self.agreement_dn(a.host)
|
cn, dn = self.agreement_dn(a.host)
|
||||||
self.wait_for_repl_update(b, dn, 30)
|
haserror, error_message = self.wait_for_repl_update(b, dn, 60)
|
||||||
|
|
||||||
retries -= 1
|
retries -= 1
|
||||||
|
|
||||||
if not a_entry or not b_entry:
|
if not a_entry or not b_entry:
|
||||||
raise RuntimeError('One of the ldap service principals is missing. Replication agreement cannot be converted')
|
error = 'One of the ldap service principals is missing. ' \
|
||||||
|
'Replication agreement cannot be converted.'
|
||||||
|
if error_message:
|
||||||
|
error += '\nReplication error message: %s' % error_message
|
||||||
|
raise RuntimeError(error)
|
||||||
|
|
||||||
return (a_entry[0].dn, b_entry[0].dn)
|
return (a_entry[0].dn, b_entry[0].dn)
|
||||||
|
|
||||||
@ -592,7 +597,7 @@ class ReplicationManager(object):
|
|||||||
|
|
||||||
rep_dn = self.replica_dn()
|
rep_dn = self.replica_dn()
|
||||||
assert isinstance(rep_dn, DN)
|
assert isinstance(rep_dn, DN)
|
||||||
(a_dn, b_dn) = self.get_replica_principal_dns(a, b, retries=10)
|
(a_dn, b_dn) = self.get_replica_principal_dns(a, b, retries=100)
|
||||||
assert isinstance(a_dn, DN)
|
assert isinstance(a_dn, DN)
|
||||||
assert isinstance(b_dn, DN)
|
assert isinstance(b_dn, DN)
|
||||||
|
|
||||||
@ -689,6 +694,7 @@ class ReplicationManager(object):
|
|||||||
def check_repl_update(self, conn, agmtdn):
|
def check_repl_update(self, conn, agmtdn):
|
||||||
done = False
|
done = False
|
||||||
hasError = 0
|
hasError = 0
|
||||||
|
error_message = ''
|
||||||
attrlist = ['cn', 'nsds5replicaUpdateInProgress',
|
attrlist = ['cn', 'nsds5replicaUpdateInProgress',
|
||||||
'nsds5ReplicaLastUpdateStatus', 'nsds5ReplicaLastUpdateStart',
|
'nsds5ReplicaLastUpdateStatus', 'nsds5ReplicaLastUpdateStart',
|
||||||
'nsds5ReplicaLastUpdateEnd']
|
'nsds5ReplicaLastUpdateEnd']
|
||||||
@ -699,21 +705,28 @@ class ReplicationManager(object):
|
|||||||
else:
|
else:
|
||||||
inprogress = entry.getValue('nsds5replicaUpdateInProgress')
|
inprogress = entry.getValue('nsds5replicaUpdateInProgress')
|
||||||
status = entry.getValue('nsds5ReplicaLastUpdateStatus')
|
status = entry.getValue('nsds5ReplicaLastUpdateStatus')
|
||||||
start = entry.getValue('nsds5ReplicaLastUpdateStart')
|
try:
|
||||||
end = entry.getValue('nsds5ReplicaLastUpdateEnd')
|
start = int(entry.getValue('nsds5ReplicaLastUpdateStart'))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
start = 0
|
||||||
|
try:
|
||||||
|
end = int(entry.getValue('nsds5ReplicaLastUpdateEnd'))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
end = 0
|
||||||
# incremental update is done if inprogress is false and end >= start
|
# incremental update is done if inprogress is false and end >= start
|
||||||
done = inprogress and inprogress.lower() == 'false' and start and end and (start <= end)
|
done = inprogress and inprogress.lower() == 'false' and start <= end
|
||||||
root_logger.info("Replication Update in progress: %s: status: %s: start: %s: end: %s" %
|
root_logger.info("Replication Update in progress: %s: status: %s: start: %d: end: %d" %
|
||||||
(inprogress, status, start, end))
|
(inprogress, status, start, end))
|
||||||
if not done and status: # check for errors
|
if status: # always check for errors
|
||||||
# status will usually be a number followed by a string
|
# status will usually be a number followed by a string
|
||||||
# number != 0 means error
|
# number != 0 means error
|
||||||
rc, msg = status.split(' ', 1)
|
rc, msg = status.split(' ', 1)
|
||||||
if rc != '0':
|
if rc != '0':
|
||||||
hasError = 1
|
hasError = 1
|
||||||
|
error_message = msg
|
||||||
done = True
|
done = True
|
||||||
|
|
||||||
return done, hasError
|
return done, hasError, error_message
|
||||||
|
|
||||||
def wait_for_repl_init(self, conn, agmtdn):
|
def wait_for_repl_init(self, conn, agmtdn):
|
||||||
done = False
|
done = False
|
||||||
@ -726,14 +739,15 @@ class ReplicationManager(object):
|
|||||||
def wait_for_repl_update(self, conn, agmtdn, maxtries=600):
|
def wait_for_repl_update(self, conn, agmtdn, maxtries=600):
|
||||||
done = False
|
done = False
|
||||||
haserror = 0
|
haserror = 0
|
||||||
|
error_message = ''
|
||||||
while not done and not haserror and maxtries > 0:
|
while not done and not haserror and maxtries > 0:
|
||||||
time.sleep(1) # give it a few seconds to get going
|
time.sleep(1) # give it a few seconds to get going
|
||||||
done, haserror = self.check_repl_update(conn, agmtdn)
|
done, haserror, error_message = self.check_repl_update(conn, agmtdn)
|
||||||
maxtries -= 1
|
maxtries -= 1
|
||||||
if maxtries == 0: # too many tries
|
if maxtries == 0: # too many tries
|
||||||
print "Error: timeout: could not determine agreement status: please check your directory server logs for possible errors"
|
print "Error: timeout: could not determine agreement status: please check your directory server logs for possible errors"
|
||||||
haserror = 1
|
haserror = 1
|
||||||
return haserror
|
return haserror, error_message
|
||||||
|
|
||||||
def start_replication(self, conn, hostname=None, master=None):
|
def start_replication(self, conn, hostname=None, master=None):
|
||||||
print "Starting replication, please wait until this has completed."
|
print "Starting replication, please wait until this has completed."
|
||||||
|
Loading…
Reference in New Issue
Block a user