Run the CLEANALLRUV task when deleting a replication agreement.

This adds two new commands to ipa-replica-manage: list-ruv & clean-ruv

list-ruv can be use to list the update vectors the master has
configugured

clean-ruv can be used to fire off the CLEANRUV task to remove a
replication vector. It should be used with caution.

https://fedorahosted.org/freeipa/ticket/2303
This commit is contained in:
Rob Crittenden
2012-09-17 17:45:42 +02:00
committed by Martin Kosek
parent c0630950a1
commit c9c55a2845
7 changed files with 343 additions and 30 deletions

View File

@@ -22,6 +22,7 @@ import os
import ldap, re, krbV
import traceback
from urllib2 import urlparse
from ipapython import ipautil
from ipaserver.install import replication, dsinstance, installutils
@@ -38,6 +39,7 @@ CACERT = "/etc/ipa/ca.crt"
# dict of command name and tuples of min/max num of args needed
commands = {
"list":(0, 1, "[master fqdn]", ""),
"list-ruv":(0, 0, "", ""),
"connect":(1, 2, "<master fqdn> [other master fqdn]",
"must provide the name of the servers to connect"),
"disconnect":(1, 2, "<master fqdn> [other master fqdn]",
@@ -45,9 +47,23 @@ commands = {
"del":(1, 1, "<master fqdn>",
"must provide hostname of master to delete"),
"re-initialize":(0, 0, "", ""),
"force-sync":(0, 0, "", "")
"force-sync":(0, 0, "", ""),
"clean-ruv":(1, 1, "Replica ID of to clean", "must provide replica ID to clean"),
"abort-clean-ruv":(1, 1, "Replica ID to abort cleaning", "must provide replica ID to abort cleaning"),
"list-clean-ruv":(0, 0, "", ""),
}
def convert_error(exc):
"""
LDAP exceptions are a dictionary, make them prettier.
"""
if isinstance(exc, ldap.LDAPError):
desc = exc.args[0]['desc'].strip()
info = exc.args[0].get('info', '').strip()
return '%s %s' % (desc, info)
else:
return str(exc)
def parse_options():
parser = IPAOptionParser(version=version.VERSION)
parser.add_option("-H", "--host", dest="host", help="starting host")
@@ -132,7 +148,7 @@ def list_replicas(realm, host, replica, dirman_passwd, verbose):
try:
entries = conn.getList(dn, ldap.SCOPE_ONELEVEL)
except:
print "Failed read master data from '%s': %s" % (host, str(e))
print "Failed to read master data from '%s': %s" % (host, str(e))
return
else:
for ent in entries:
@@ -177,7 +193,7 @@ def list_replicas(realm, host, replica, dirman_passwd, verbose):
entries = repl.find_replication_agreements()
ent_type = 'replica'
except Exception, e:
print "Failed to get data from '%s': %s" % (replica, str(e))
print "Failed to get data from '%s': %s" % (replica, convert_error(e))
return
for entry in entries:
@@ -190,6 +206,15 @@ def list_replicas(realm, host, replica, dirman_passwd, verbose):
print " last update ended: %s" % str(ipautil.parse_generalized_time(entry.getValue('nsds5replicalastupdateend')))
def del_link(realm, replica1, replica2, dirman_passwd, force=False):
"""
Delete a replication agreement from host A to host B.
@realm: the Kerberos realm
@replica1: the hostname of master A
@replica2: the hostname of master B
@dirman_passwd: the Directory Manager password
@force: force deletion even if one server is down
"""
repl2 = None
@@ -202,14 +227,14 @@ def del_link(realm, replica1, replica2, dirman_passwd, force=False):
if not force and len(repl_list) <= 1 and type1 == replication.IPA_REPLICA:
print "Cannot remove the last replication link of '%s'" % replica1
print "Please use the 'del' command to remove it from the domain"
return
return False
except (ldap.NO_SUCH_OBJECT, errors.NotFound):
print "'%s' has no replication agreement for '%s'" % (replica1, replica2)
return
return False
except Exception, e:
print "Failed to get data from '%s': %s" % (replica1, str(e))
return
print "Failed to determine agreement type for '%s': %s" % (replica1, convert_error(e))
return False
if type1 == replication.IPA_REPLICA:
try:
@@ -219,36 +244,41 @@ def del_link(realm, replica1, replica2, dirman_passwd, force=False):
if not force and len(repl_list) <= 1:
print "Cannot remove the last replication link of '%s'" % replica2
print "Please use the 'del' command to remove it from the domain"
return
return False
except (ldap.NO_SUCH_OBJECT, errors.NotFound):
print "'%s' has no replication agreement for '%s'" % (replica2, replica1)
if not force:
return
return False
except Exception, e:
print "Failed to get data from '%s': %s" % (replica2, str(e))
print "Failed to get list of agreements from '%s': %s" % (replica2, convert_error(e))
if not force:
return
return False
if repl2 and type1 == replication.IPA_REPLICA:
failed = False
try:
repl2.set_readonly(readonly=True)
repl2.force_sync(repl2.conn, replica1)
cn, dn = repl2.agreement_dn(repl1.conn.host)
repl2.wait_for_repl_update(repl2.conn, dn, 30)
repl2.delete_agreement(replica1)
repl2.delete_referral(replica1)
repl2.set_readonly(readonly=False)
except ldap.LDAPError, e:
desc = e.args[0]['desc'].strip()
info = e.args[0].get('info', '').strip()
print "Unable to remove agreement on %s: %s: %s" % (replica2, desc, info)
failed = True
except Exception, e:
print "Unable to remove agreement on %s: %s" % (replica2, str(e))
print "Unable to remove agreement on %s: %s" % (replica2, convert_error(e))
failed = True
if failed:
if force:
print "Forcing removal on '%s'" % replica1
else:
return
return False
if not repl2 and force:
print "Forcing removal on '%s'" % replica1
@@ -268,10 +298,171 @@ def del_link(realm, replica1, replica2, dirman_passwd, force=False):
for dn in dns:
repl1.conn.deleteEntry(dn)
except Exception, e:
print "Error deleting winsync replica shared info: %s" % str(e)
print "Error deleting winsync replica shared info: %s" % convert_error(e)
print "Deleted replication agreement from '%s' to '%s'" % (replica1, replica2)
return True
def get_ruv(realm, host, dirman_passwd):
"""
Return the RUV entries as a list of tuples: (hostname, rid)
"""
try:
thisrepl = replication.ReplicationManager(realm, host, dirman_passwd)
except Exception, e:
print "Failed to connect to server %s: %s" % (host, convert_error(e))
sys.exit(1)
search_filter = '(&(nsuniqueid=ffffffff-ffffffff-ffffffff-ffffffff)(objectclass=nstombstone))'
try:
entries = thisrepl.conn.search_s(api.env.basedn, ldap.SCOPE_ONELEVEL,
search_filter, ['nsds50ruv'])
except ldap.NO_SUCH_OBJECT:
print "No RUV records found."
sys.exit(0)
servers = []
for ruv in entries[0][1]['nsds50ruv']:
if ruv.startswith('{replicageneration'):
continue
data = re.match('\{replica (\d+) (ldap://.*:\d+)\}(\s+\w+\s+\w*){0,1}', ruv)
if data:
rid = data.group(1)
(scheme, netloc, path, params, query, fragment) = urlparse.urlparse(data.group(2))
servers.append((netloc, rid))
else:
print "unable to decode: %s" % ruv
return servers
def list_ruv(realm, host, dirman_passwd, verbose):
"""
List the Replica Update Vectors on this host to get the available
replica IDs.
"""
servers = get_ruv(realm, host, dirman_passwd)
for (netloc, rid) in servers:
print "%s: %s" % (netloc, rid)
def get_rid_by_host(realm, sourcehost, host, dirman_passwd):
"""
Try to determine the RID by host name.
"""
servers = get_ruv(realm, sourcehost, dirman_passwd)
for (netloc, rid) in servers:
if '%s:389' % host == netloc:
return int(rid)
def clean_ruv(realm, ruv, options):
"""
Given an RID create a CLEANALLRUV task to clean it up.
"""
try:
ruv = int(ruv)
except ValueError:
sys.exit("Replica ID must be an integer: %s" % ruv)
servers = get_ruv(realm, options.host, options.dirman_passwd)
found = False
for (netloc, rid) in servers:
if ruv == int(rid):
found = True
hostname = netloc
break
if not found:
sys.exit("Replica ID %s not found" % ruv)
print "Clean the Replication Update Vector for %s" % hostname
print
print "Cleaning the wrong replica ID will cause that server to no"
print "longer replicate so it may miss updates while the process"
print "is running. It would need to be re-initialized to maintain"
print "consistency. Be very careful."
if not options.force and not ipautil.user_input("Continue to clean?", False):
sys.exit("Aborted")
thisrepl = replication.ReplicationManager(realm, options.host,
options.dirman_passwd)
thisrepl.cleanallruv(ruv)
print "Cleanup task created"
def abort_clean_ruv(realm, ruv, options):
"""
Given an RID abort a CLEANALLRUV task.
"""
try:
ruv = int(ruv)
except ValueError:
sys.exit("Replica ID must be an integer: %s" % ruv)
servers = get_ruv(realm, options.host, options.dirman_passwd)
found = False
for (netloc, rid) in servers:
if ruv == int(rid):
found = True
hostname = netloc
break
if not found:
sys.exit("Replica ID %s not found" % ruv)
servers = get_ruv(realm, options.host, options.dirman_passwd)
found = False
for (netloc, rid) in servers:
if ruv == int(rid):
found = True
hostname = netloc
break
if not found:
sys.exit("Replica ID %s not found" % ruv)
print "Aborting the clean Replication Update Vector task for %s" % hostname
print
thisrepl = replication.ReplicationManager(realm, options.host,
options.dirman_passwd)
thisrepl.abortcleanallruv(ruv)
print "Cleanup task stopped"
def list_clean_ruv(realm, host, dirman_passwd, verbose):
"""
List all clean RUV tasks.
"""
repl = replication.ReplicationManager(realm, host, dirman_passwd)
dn = DN(('cn', 'cleanallruv'),('cn', 'tasks'), ('cn', 'config'))
try:
entries = repl.conn.getList(dn, ldap.SCOPE_ONELEVEL)
except errors.NotFound:
print "No CLEANALLRUV tasks running"
else:
print "CLEANALLRUV tasks"
for entry in entries:
name = entry.getValue('cn').replace('clean ', '')
status = entry.getValue('nsTaskStatus')
print "RID %s: %s" % (name, status)
if verbose:
print str(dn)
print entry.getValue('nstasklog')
print
dn = DN(('cn', 'abort cleanallruv'),('cn', 'tasks'), ('cn', 'config'))
try:
entries = repl.conn.getList(dn, ldap.SCOPE_ONELEVEL)
except errors.NotFound:
print "No abort CLEANALLRUV tasks running"
else:
print "Abort CLEANALLRUV tasks"
for entry in entries:
name = entry.getValue('cn').replace('abort ', '')
status = entry.getValue('nsTaskStatus')
print "RID %s: %s" % (name, status)
if verbose:
print str(dn)
print entry.getValue('nstasklog')
def del_master(realm, hostname, options):
force_del = False
@@ -281,7 +472,7 @@ def del_master(realm, hostname, options):
thisrepl = replication.ReplicationManager(realm, options.host,
options.dirman_passwd)
except Exception, e:
print "Failed to connect to server %s: %s" % (options.host, str(e))
print "Failed to connect to server %s: %s" % (options.host, convert_error(e))
sys.exit(1)
# 2. Ensure we have an agreement with the master
@@ -297,7 +488,7 @@ def del_master(realm, hostname, options):
delrepl = replication.ReplicationManager(realm, hostname, options.dirman_passwd)
except Exception, e:
if not options.force:
print "Unable to delete replica %s: %s" % (hostname, str(e))
print "Unable to delete replica %s: %s" % (hostname, convert_error(e))
sys.exit(1)
else:
print "Unable to connect to replica %s, forcing removal" % hostname
@@ -325,21 +516,35 @@ def del_master(realm, hostname, options):
if not ipautil.user_input("Continue to delete?", False):
sys.exit("Deletion aborted")
# Save the RID value before we start deleting
if repltype == replication.IPA_REPLICA:
rid = get_rid_by_host(realm, options.host, hostname, options.dirman_passwd)
# 4. Remove each agreement
print "Deleting replication agreements between %s and %s" % (hostname, ', '.join(replica_names))
for r in replica_names:
try:
del_link(realm, r, hostname, options.dirman_passwd, force=True)
if not del_link(realm, r, hostname, options.dirman_passwd, force=True):
print "Unable to remove replication agreement for %s from %s." % (hostname, r)
except Exception, e:
print "There were issues removing a connection: %s" % str(e)
print "There were issues removing a connection: %s" % convert_error(e)
# 5. Finally clean up the removed replica common entries.
# 5. Clean RUV for the deleted master
if repltype == replication.IPA_REPLICA:
try:
thisrepl.cleanallruv(rid)
except KeyboardInterrupt:
print "Wait for task interrupted. It will continue to run in the background"
# 6. Finally clean up the removed replica common entries.
try:
thisrepl.replica_cleanup(hostname, realm, force=True)
except Exception, e:
print "Failed to cleanup %s entries: %s" % (hostname, str(e))
print "Failed to cleanup %s entries: %s" % (hostname, convert_error(e))
print "You may need to manually remove them from the tree"
# 6. And clean up the removed replica DNS entries if any.
# 7. And clean up the removed replica DNS entries if any.
try:
if bindinstance.dns_container_exists(options.host, thisrepl.suffix,
dm_password=options.dirman_passwd):
@@ -352,7 +557,7 @@ def del_master(realm, hostname, options):
bind = bindinstance.BindInstance()
bind.remove_master_dns_records(hostname, realm, realm.lower())
except Exception, e:
print "Failed to cleanup %s DNS entries: %s" % (hostname, str(e))
print "Failed to cleanup %s DNS entries: %s" % (hostname, convert_error(e))
print "You may need to manually remove them from the tree"
def add_link(realm, replica1, replica2, dirman_passwd, options):
@@ -391,12 +596,11 @@ def add_link(realm, replica1, replica2, dirman_passwd, options):
# the directory server and kill the connection
try:
repl1 = replication.ReplicationManager(realm, replica1, dirman_passwd)
except (ldap.NO_SUCH_OBJECT, errors.NotFound):
print "Cannot find replica '%s'" % replica1
return
except Exception, e:
print "Failed to get data from '%s': %s" % (replica1, str(e))
print "Failed to connect to '%s': %s" % (replica1, convert_error(e))
return
if options.winsync:
@@ -513,6 +717,8 @@ def main():
if len(args) == 2:
replica = args[1]
list_replicas(realm, host, replica, dirman_passwd, options.verbose)
elif args[0] == "list-ruv":
list_ruv(realm, host, dirman_passwd, options.verbose)
elif args[0] == "del":
del_master(realm, args[1], options)
elif args[0] == "re-initialize":
@@ -541,6 +747,12 @@ def main():
replica1 = host
replica2 = args[1]
del_link(realm, replica1, replica2, dirman_passwd)
elif args[0] == "clean-ruv":
clean_ruv(realm, args[1], options)
elif args[0] == "abort-clean-ruv":
abort_clean_ruv(realm, args[1], options)
elif args[0] == "list-clean-ruv":
list_clean_ruv(realm, host, dirman_passwd, options.verbose)
try:
main()