Please review new Replication Diff Tool

Mark Reynolds <mareynol@xxxxxxxxxx> · Wed, 12 Apr 2017 17:02:45 -0400

    Hello,

    This is a beta version of a replication diff tool written in
    python.  

    Design page (this needs updating - I hope to get that done tonight)

    http://www.port389.org/docs/389ds/design/repl-diff-tool-design.html

    Current usage:

      -v, --verbose         Verbose output

      -o FILE, --outfile=FILE

                            The output file

      -D BINDDN, --binddn=BINDDN

                            The Bind DN (REQUIRED)

      -w BINDPW, --bindpw=BINDPW

                            The Bind password (REQUIRED)

      -h MHOST, --master_host=MHOST

                            The Master host (default localhost)

      -p MPORT, --master_port=MPORT

                            The Master port (default 389)

      -H RHOST, --replica_host=RHOST

                            The Replica host (REQUIRED)

      -P RPORT, --replica_port=RPORT

                            The Replica port (REQUIRED)

      -b SUFFIX, --basedn=SUFFIX

                            Replicated suffix (REQUIRED)

      -l LAG, --lagtime=LAG

                            The amount of time to ignore
      inconsistencies (default

                            300 seconds)

      -Z CERTDIR, --certdir=CERTDIR

                            The certificate database directory
      for startTLS

                            connections

      -i IGNORE, --ignore=IGNORE

                            Comma separated list of attributes
      to ignore

      -M MLDIF, --mldif=MLDIF

                            Master LDIF file (offline mode)

      -R RLDIF, --rldif=RLDIF

                            Replica LDIF file (offline mode)

Examples:

python repl-diff.py -D "cn=directory manager" -w PASSWORD -h localhost -p 389 -H remotehost -P 5555 -b "dc=example,dc=com"
python repl-diff.py -D "cn=directory manager" -w PASSWORD -h localhost -p 389 -H remotehost -P 5555 -b "dc=example,dc=com" -Z /etc/dirsrv/slapd-localhost
python repl-diff.py -M /tmp/master.ldif -R /tmp/replica.ldif 

    How long the tool takes to run depends on the number of entries per
    database.  See performance numbers below  

    Entries per Replica     Time

    ---------------------------------
100k                    40 seconds
500k                    3m 30secs
1 million               7m 30secs
2 million               14 minutes
10 million              ~70 minutes

    I'd be very interested in feedback, RFE's, and bugs.

    Thanks,

    Mark

# --- BEGIN COPYRIGHT BLOCK ---
# Copyright (C) 2017 Red Hat, Inc.
# All rights reserved.
#
# License: GPL (version 3 or any later version).
# See LICENSE for details.
# --- END COPYRIGHT BLOCK ---
#
import re
import time
import ldap
import optparse
from ldap.ldapobject import SimpleLDAPObject
from ldap.cidict import cidict
from ldap.controls import SimplePagedResultsControl

VERSION = "1.0"
RUV_FILTER = '(&(nsuniqueid=ffffffff-ffffffff-ffffffff-ffffffff)(objectclass=nstombstone))'
vucsn_pattern = re.compile(';vucsn-([A-Fa-f0-9]+)')
vdcsn_pattern = re.compile(';vdcsn-([A-Fa-f0-9]+)')
mdcsn_pattern = re.compile(';mdcsn-([A-Fa-f0-9]+)')
adcsn_pattern = re.compile(';adcsn-([A-Fa-f0-9]+)')

class Entry(object):
    ''' This is a stripped down version of Entry from python-lib389.
    Once python-lib389 is released on RHEL this class will go away.
    '''
    def __init__(self, entrydata):
        if entrydata:
            self.dn = entrydata[0]
            self.data = cidict(entrydata[1])

    def __getitem__(self, name):
        return self.__getattr__(name)

    def __getattr__(self, name):
        if name == 'dn' or name == 'data':
            return self.__dict__.get(name, None)
        return self.getValue(name)

def get_entry(entries, dn):
    ''' Loop over enties looking for a matching dn
    '''
    for entry in entries:
        if entry.dn == dn:
            return entry
    return None

def remove_entry(rentries, dn):
    ''' Remove an entry from the array of entries
    '''
    for entry in rentries:
        if entry.dn == dn:
            rentries.remove(entry)
            break

def extract_time(stateinfo):
    ''' Take the nscpEntryWSI attribute and get the most recent timestamp from
    one of the csns (vucsn, vdcsn, mdcsn, adcsn)

    Return the timestamp in decimal
    '''
    timestamp = 0
    for pattern in [vucsn_pattern, vdcsn_pattern, mdcsn_pattern, adcsn_pattern]:
        csntime = pattern.search(stateinfo)
        if csntime:
            hextime = csntime.group(1)[:8]
            dectime = int(hextime, 16)
            if dectime > timestamp:
                timestamp = dectime

    return timestamp

def convert_timestamp(timestamp):
    ''' Convert createtimestamp to ctime: 20170405184656Z -> Wed Apr  5 19:46:56 2017
    '''
    time_tuple = (int(timestamp[:4]), int(timestamp[4:6]), int(timestamp[6:8]),
                  int(timestamp[8:10]), int(timestamp[10:12]), int(timestamp[12:14]),
                  0, 0, 0)
    secs = time.mktime(time_tuple)
    return time.ctime(secs)

def convert_entries(entries):
    '''Convert and normalize the ldap entries
    '''
    new_entries = []
    for entry in entries:
        new_entry = Entry(entry)
        new_entry.data = {k.lower(): v for k, v in list(new_entry.data.items())}
        new_entry.dn = new_entry.dn.lower()
        new_entries.append(new_entry)
    del entries
    return new_entries

def report_conflict(entry, attr, opts):
    ''' Check the createtimestamp/modifytimestamp (which ever is larger),
    and make sure its past the ignore time.

    return True - if the conflict should be reported
    return False - if it should be ignored
    '''
    if opts['lag'] == 0:
        return True

    report = True

    if 'nscpentrywsi' in entry.data:
        found = False
        for val in entry.data['nscpentrywsi']:
            if val.lower().startswith(attr + ';'):
                if (opts['starttime'] - extract_time(val)) <= opts['lag']:
                    report = False

    return report

def format_diff(diff):
    ''' Take the diff map and format it for friendly output
    '''
    diff_report = "%s\n" % (diff['dn'])
    diff_report += ("-" * len(diff['dn'])) + "\n"
    for missing in diff['missing']:
        diff_report += "%s\n" % (missing)
    for val_diff in diff['diff']:
        diff_report += "%s\n" % (val_diff)

    return diff_report

def get_ruv_report(opts):
    '''Print a friendly RUV report
    '''
    opts['master_ruv'].sort()
    opts['replica_ruv'].sort()

    report = "Master RUV:\n"
    for element in opts['master_ruv']:
        report += "  %s\n" % (element)
    report += "\nReplica RUV:\n"
    for element in opts['replica_ruv']:
        report += "  %s\n" % (element)
    report += "\n\n"

    return report

#
# Offline mode helper functions
#
def ldif_search(LDIF, dn):
    ''' Search ldif by DN
    '''
    data = {}
    found = False
    count = 0
    for line in LDIF:
        count += 1
        line = line.rstrip()
        if found:
            if line == "":
                # End of entry
                break

            if line[0] == ' ':
                # continuation line
                prev = data[attr][len(data[attr]) - 1]
                data[attr][len(data[attr]) - 1] = prev + line.strip()
                continue

            value_set = line.split(":", 1)
            attr = value_set[0].lower()
            if attr in data:
                data[attr].append(value_set[1].strip())
            else:
                data[attr] = [value_set[1].strip()]
        if line[4:].lower() == dn:
            found = True
            continue

    if found:
        return Entry([dn, data]), count
    else:
        return None, 0

def get_dns(LDIF, opts):
    ''' Get all the DN's
    '''
    dns = []
    for line in LDIF:
        if line.startswith('dn: ') and line[4:].startswith('nsuniqueid=ffffffff-ffffffff-ffffffff-ffffffff'):
            opts['ruv_dn'] = line[4:].lower().strip()
        elif line.startswith('dn: '):
            dns.append(line[4:].lower().strip())

    return dns

def get_ldif_ruv(LDIF, opts):
    ''' Search the ldif and get the ruv entry
    '''
    LDIF.seek(0)
    ruv_entry, idx = ldif_search(LDIF, opts['ruv_dn'])
    return ruv_entry.data['nsds50ruv']

def cmp_entry(mentry, rentry, repl_opts):
    ''' Compare the two entries, and return a diff map
    '''
    diff = {}
    diff['dn'] = mentry['dn']
    diff['missing'] = []
    diff['diff'] = []
    diff_count = 0

    rlist = list(rentry.data.keys())
    mlist = list(mentry.data.keys())

    #
    # Check master
    #
    for mattr in mlist:
        if mattr in repl_opts['ignore']:
            continue

        if mattr not in rlist:
            # Replica is missing the attribute.  Display the state info
            if report_conflict(mentry, mattr, repl_opts):
                diff['missing'].append(" - Replica missing attribute: \"%s\"" % (mattr))
                diff_count += 1
                if 'nscpentrywsi' in mentry.data:
                    found = False
                    for val in mentry.data['nscpentrywsi']:
                        if val.lower().startswith(mattr + ';'):
                            if not found:
                                diff['missing'].append("")
                            found = True
                            diff['missing'].append(" - Master's State Info: %s" % (val))
                            diff['missing'].append(" - Date: %s\n" % (time.ctime(extract_time(val))))
                else:
                    diff['missing'].append("")

        elif mentry.data[mattr] != rentry.data[mattr]:
            # Replica's attr value is different
            if report_conflict(rentry, mattr, repl_opts) and report_conflict(mentry, mattr, repl_opts):
                diff['diff'].append(" - Attribute '%s' is different:" % mattr)
                if 'nscpentrywsi' in mentry.data:
                    # Process Master
                    found = False
                    for val in mentry.data['nscpentrywsi']:
                        if val.lower().startswith(mattr + ';'):
                            if not found:
                                diff['diff'].append("      Master:")
                            diff['diff'].append("        - State Info: %s" % (val))
                            diff['diff'].append("        - Date:       %s\n" % (time.ctime(extract_time(val))))
                            found = True
                    if not found:
                        diff['diff'].append("      Master: ")
                        for val in mentry.data[mattr]:
                            diff['diff'].append("        - Origin value: %s" % (val))
                        diff['diff'].append("")

                    # Process Replica
                    found = False
                    for val in rentry.data['nscpentrywsi']:
                        if val.lower().startswith(mattr + ';'):
                            if not found:
                                diff['diff'].append("      Replica:")
                            diff['diff'].append("        - State Info: %s" % (val))
                            diff['diff'].append("        - Date:       %s\n" % (time.ctime(extract_time(val))))
                            found = True
                    if not found:
                        diff['diff'].append("      Replica: ")
                        for val in rentry.data[mattr]:
                            diff['diff'].append("        - Origin value: %s" % (val))
                        diff['diff'].append("")
                else:
                    # no state info
                    diff['diff'].append("      Master: ")
                    for val in mentry.data[mattr]:
                        diff['diff'].append("        - %s: %s" % (mattr, val))
                    diff['diff'].append("      Replica: ")
                    for val in rentry.data[mattr]:
                        diff['diff'].append("        - %s: %s\n" % (mattr, val))

                diff_count += 1

    #
    # Check replica (only need to check for missing attributes)
    #
    for rattr in rlist:
        if rattr in repl_opts['ignore']:
            continue

        if rattr not in mlist:
            # Master is missing the attribute
            if report_conflict(rentry, rattr, repl_opts):
                diff['missing'].append(" - Master missing attribute: \"%s\"" % (rattr))
                diff_count += 1
                if 'nscpentrywsi' in rentry.data:
                    found = False
                    for val in rentry.data['nscpentrywsi']:
                        if val.lower().startswith(rattr + ';'):
                            if not found:
                                diff['missing'].append("")
                            found = True
                            diff['missing'].append(" - Replica's State Info: %s" % (val))
                            diff['missing'].append(" - Date: %s\n" % (time.ctime(extract_time(val))))
                else:
                    # No state info
                    diff['missing'].append("")

    if diff_count > 0:
        diff['count'] = str(diff_count)
        return diff
    else:
        return None

def do_offline_report(opts, output_file=None):
    ''' Check for inconsistencies between two ldifs
    '''
    missing_report = ""
    diff_report = []
    final_report = ""

    # Open LDIF files
    try:
        MLDIF = open(opts['mldif'], "r")
    except Exception as e:
        print('Failed to open Master LDIF: ' + str(e))
        return None

    try:
        RLDIF = open(opts['rldif'], "r")
    except Exception as e:
        print('Failed to open Replica LDIF: ' + str(e))
        return None

    # Get all the dn's, and entry counts
    print ("Gathering all the DN's...")
    master_dns = get_dns(MLDIF, opts)
    replica_dns = get_dns(RLDIF, opts)
    m_count = len(master_dns)
    r_count = len(replica_dns)

    # Get DB RUV
    print ("Gathering the database RUV's...")
    opts['master_ruv'] = get_ldif_ruv(MLDIF, opts)
    opts['replica_ruv'] = get_ldif_ruv(RLDIF, opts)

    # Reset the cursors
    idx = 0
    MLDIF.seek(0)
    RLDIF.seek(0)

    # Compare the master entries with the replica's
    print ("Comparing Master to Replica...")
    missing = False
    for dn in master_dns:
        mentry, idx = ldif_search(MLDIF, dn)
        rentry, idx = ldif_search(RLDIF, dn)

        if rentry is None:
            # missing entry - restart search from beginning
            RLDIF.seek(0)
            rentry, i = ldif_search(RLDIF, dn)
            if rentry is None:
                RLDIF.seek(idx)  # Set the cursor to the last good line
                # missing entry in rentries
                if not missing:
                    missing_report += ('Replica is missing entries:\n')
                    missing = True
                if 'createtimestamp' in mentry.data:
                    missing_report += ('  - %s  (Master\'s creation date:  %s)\n' %
                                       (dn, convert_timestamp(mentry.data['createtimestamp'][0])))
                else:
                    missing_report += ('  - %s\n')
            else:
                # Compare the entries
                diff = cmp_entry(mentry, rentry, opts)
                if diff:
                    diff_report.append(format_diff(diff))
        else:
            # Compare the entries
            diff = cmp_entry(mentry, rentry, opts)
            if diff:
                # We have a diff, report the result
                diff_report.append(format_diff(diff))
    if missing:
        missing_report += ('\n')

    # Search Replica, and look for missing entries only.  Count entries as well
    print ("Comparing Replica to Master...")
    MLDIF.seek(0)
    RLDIF.seek(0)
    missing = False
    for dn in replica_dns:
        rentry, idx = ldif_search(RLDIF, dn)
        mentry, idx = ldif_search(MLDIF, dn)

        if mentry is None:
            # missing entry
            MLDIF.seek(0)
            mentry, i = ldif_search(MLDIF, dn)
            if mentry is None:
                MLDIF.seek(idx)  # Set the cursor to the last good line
                if not missing:
                    missing_report += ('Master is missing entries:\n')
                    missing = True
                if 'createtimestamp' in rentry.data:
                    missing_report += ('  - %s  (Replica\'s creation date:  %s)\n' %
                                       (dn, convert_timestamp(rentry.data['createtimestamp'][0])))
                else:
                    missing_report += ('  - %s\n')
    if missing:
        missing_report += ('\n')

    MLDIF.close()
    RLDIF.close()
    print ("Preparing report...\n")

    # Build final report
    final_report = ('=' * 80 + '\n')
    final_report += ('         Replication Synchronization Report  (%s)\n' %
                     time.ctime())
    final_report += ('=' * 80 + '\n\n\n')
    final_report += ('Database RUV\'s\n')
    final_report += ('=====================================================\n\n')
    final_report += get_ruv_report(opts)
    final_report += ('Entry Counts\n')
    final_report += ('=====================================================\n\n')
    final_report += ('Master:  %d\n' % (m_count))
    final_report += ('Replica: %d\n\n' % (r_count))
    if missing_report != "":
        final_report += ('\nMissing Entries\n')
        final_report += ('=====================================================\n\n')
        final_report += ('%s\n' % (missing_report))
    if len(diff_report) > 0:
        final_report += ('\nEntry Inconsistencies\n')
        final_report += ('=====================================================\n\n')
    for diff in diff_report:
        final_report += ('%s\n' % (diff))
    if missing_report == "" and len(diff_report) == 0 and m_count == r_count:
        final_report += ('No differences between Master and Replica\n')

    if output_file:
        output_file.write(final_report)
    else:
        return final_report

def check_for_diffs(mentries, rentries, report, opts):
    ''' Check for diffs, return the updated report
    '''
    diff_report = []
    m_missing = []
    r_missing = []

    report['m_count'] += len(mentries)
    report['r_count'] += len(rentries)

    # Add the stragglers
    if len(report['r_missing']) > 0:
        mentries += report['r_missing']
    if len(report['m_missing']) > 0:
        rentries += report['m_missing']

    for mentry in mentries:
        rentry = get_entry(rentries, mentry.dn)
        if rentry:
            diff = cmp_entry(mentry, rentry, opts)
            if diff:
                diff_report.append(format_diff(diff))
            # Now remove the rentry from the rentries so we can find stragglers
            remove_entry(rentries, rentry.dn)
        else:
            # Add missing entry in Replica
            r_missing.append(mentry)

    for rentry in rentries:
        # We should not have any entries if we are sync
        m_missing.append(rentry)

    if len(diff_report) > 0:
        report['diff'] += diff_report

    # Reset the missing entries
    report['m_missing'] = m_missing
    report['r_missing'] = r_missing

    return report

def connect_to_replicas(opts):
    ''' Start the paged results searches
    '''
    print('Connecting to servers...')
    uri = "ldap://%s:%s/"; % (opts['mhost'], opts['mport'])
    master = SimpleLDAPObject(uri)

    uri = "ldap://%s:%s/"; % (opts['rhost'], opts['rport'])
    replica = SimpleLDAPObject(uri)

    # Setup startTLS
    if opts['certdir'] is not None:
        master.set_option(ldap.OPT_X_TLS_CACERTDIR, opts['certdir'])
        master.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_HARD)
        try:
            master.start_tls_s()
        except ldap.LDAPError as e:
            print('TLS negotiation failed on Master: %s' % str(e))
            exit(1)

        replica.set_option(ldap.OPT_X_TLS_CACERTDIR, opts['certdir'])
        replica.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_HARD)
        try:
            replica.start_tls_s()
        except ldap.LDAPError as e:
            print('TLS negotiation failed on Replica: %s' % str(e))
            exit(1)

    # Open connection to master
    try:
        master.simple_bind_s(opts['binddn'], opts['bindpw'])
    except ldap.SERVER_DOWN as e:
        print("Cannot connect to %r" % uri)
        exit(1)
    except ldap.LDAPError as e:
        print("Error: Failed to authenticate to Master: %s", str(e))
        exit(1)

    # Open connection to replica
    try:
        replica.simple_bind_s(opts['binddn'], opts['bindpw'])
    except ldap.SERVER_DOWN as e:
        print("Cannot connect to %r" % uri)
        exit(1)
    except ldap.LDAPError as e:
        print("Error: Failed to authenticate to Replica: %s", str(e))
        exit(1)

    # Get the RUVs
    print ("Gathering Master's RUV...")
    try:
        master_ruv = master.search_s(opts['suffix'], ldap.SCOPE_SUBTREE, RUV_FILTER, ['nsds50ruv'])
        opts['master_ruv'] = master_ruv[0][1]['nsds50ruv']
    except ldap.LDAPError as e:
        print("Error: Failed to get Master RUV entry: %s", str(e))
        exit(1)

    print ("Gathering Replica's RUV...")
    try:
        replica_ruv = replica.search_s(opts['suffix'], ldap.SCOPE_SUBTREE, RUV_FILTER, ['nsds50ruv'])
        opts['replica_ruv'] = replica_ruv[0][1]['nsds50ruv']
    except ldap.LDAPError as e:
        print("Error: Failed to get Replica RUV entry: %s", str(e))
        exit(1)

    return (master, replica, opts)

def print_online_report(report, opts, output_file):
    ''' Print the online report
    '''
    print ('Display final report...\n')
    m_missing = len(report['m_missing'])
    r_missing = len(report['r_missing'])
    final_report = ('=' * 80 + '\n')
    final_report += ('         Replication Synchronization Report  (%s)\n' %
                     time.ctime())
    final_report += ('=' * 80 + '\n\n\n')
    final_report += ('Database RUV\'s\n')
    final_report += ('=====================================================\n\n')
    final_report += get_ruv_report(opts)
    final_report += ('Entry Counts\n')
    final_report += ('=====================================================\n\n')
    final_report += ('Master:  %d\n' % (report['m_count']))
    final_report += ('Replica: %d\n\n' % (report['r_count']))
    final_report += report['conflict']
    missing = False
    if r_missing > 0 or m_missing > 0:
        missing = True
        final_report += ('\nMissing Entries\n')
        final_report += ('=====================================================\n\n')
        if m_missing > 0:
            final_report += ('  Entries missing on Master:\n')
            for entry in report['m_missing']:
                if 'createtimestamp' in entry.data:
                    final_report += ('   - %s  (Created on Replica at: %s)\n' %
                        (entry.dn, convert_timestamp(entry.data['createtimestamp'][0])))
                else:
                    final_report += ('   - %s\n' % (entry.dn))

        if r_missing > 0:
            if m_missing > 0:
                final_report += ('\n')
            final_report += ('  Entries missing on Replica:\n')
            for entry in report['r_missing']:
                if 'createtimestamp' in entry.data:
                    final_report += ('   - %s  (Created on Master at: %s)\n' %
                        (entry.dn, convert_timestamp(entry.data['createtimestamp'][0])))
                else:
                    final_report += ('   - %s\n' % (entry.dn))

    if len(report['diff']) > 0:
        final_report += ('\n\nEntry Inconsistencies\n')
        final_report += ('=====================================================\n\n')
        for diff in report['diff']:
            final_report += ('%s\n' % (diff))

    if not missing and len(report['diff']) == 0 and report['m_count'] == report['r_count']:
        final_report += ('No differences between Master and Replica\n')

    if output_file:
        output_file.write(final_report)
    else:
        print final_report

def get_conflict_report(master, replica, opts):
    ''' Gather the conflict entry dn's for each replica
    '''
    m_done = False
    r_done = False
    m_conflicts = []
    r_conflicts = []

    # Fire off paged searches on Master and Replica
    print ('Start gathering conflict entries...')
    paged_ctrl = SimplePagedResultsControl(True, size=100, cookie='')
    controls = [paged_ctrl]
    req_pr_ctrl = controls[0]
    try:
        master_msgid = master.search_ext(opts['suffix'], ldap.SCOPE_SUBTREE, "nsds5ReplConflict=*",
                        ['nsds5ReplConflict', 'createtimestamp'], serverctrls=controls)
    except ldap.LDAPError as e:
        print("Error: Failed to get Master conflicts: %s", str(e))
        exit(1)
    try:
        replica_msgid = replica.search_ext(opts['suffix'], ldap.SCOPE_SUBTREE, "nsds5ReplConflict=*",
                        ['createtimestamp', 'nsds5ReplConflict'], serverctrls=controls)
    except ldap.LDAPError as e:
        print("Error: Failed to get Replica conflicts: %s", str(e))
        exit(1)

    while not m_done or not r_done:
        if not m_done:
            m_rtype, m_rdata, m_rmsgid, m_rctrls = master.result3(master_msgid)
        elif not r_done:
            m_rdata = []

        if not r_done:
            r_rtype, r_rdata, r_rmsgid, r_rctrls = replica.result3(replica_msgid)
        elif not m_done:
            r_rdata = []

        # Convert entries
        mentries = convert_entries(m_rdata)
        rentries = convert_entries(r_rdata)

        for entry in mentries:
            m_conflicts.append({'dn': entry.dn, 'date': entry.data['createtimestamp']})
        for entry in rentries:
            r_conflicts.append({'dn': entry.dn, 'date': entry.data['createtimestamp']})

        if not m_done:
            # Master
            m_pctrls = [
                c
                for c in m_rctrls
                if c.controlType == SimplePagedResultsControl.controlType
                ]
            if m_pctrls:
                if m_pctrls[0].cookie:
                    # Copy cookie from response control to request control
                    req_pr_ctrl.cookie = m_pctrls[0].cookie
                    master_msgid = master.search_ext(opts['suffix'], ldap.SCOPE_SUBTREE, "objectclass=*",
                            ['*', 'createtimestamp', 'nscpentrywsi'], serverctrls=controls)
                else:
                    m_done = True  # No more pages available
            else:
                m_done = True

        if not r_done:
            # Replica
            r_pctrls = [
                c
                for c in r_rctrls
                if c.controlType == SimplePagedResultsControl.controlType
                ]

            if r_pctrls:
                if r_pctrls[0].cookie:
                    # Copy cookie from response control to request control
                    req_pr_ctrl.cookie = r_pctrls[0].cookie
                    replica_msgid = replica.search_ext(opts['suffix'], ldap.SCOPE_SUBTREE, "objectclass=*",
                            ['*', 'createtimestamp', 'nscpentrywsi'], serverctrls=controls)
                else:
                    r_done = True  # No more pages available
            else:
                r_done = True

    if len(m_conflicts) > 0 or len(r_conflicts) > 0:
        report = "\n\nReplication Conflict Entries\n"
        report += "============================"
        if len(m_conflicts) > 0:
            report += ('  Master Conflict Entries (%d)\n' % (m_conflicts))
            for entry in m_conflicts:
                report += ('  - %s  (Created on %s)\n' % (entry['dn'], convert_timestamp(entry['date'])))

        if len(r_conflicts) > 0:
            if len(m_conflicts) > 0:
                report = "\n"  # spacer
            report += ('  Replica Conflict Entries (%d)\n' % (r_conflicts))
            for entry in r_conflicts:
                report += ('  - %s  (Created on %s)\n' % (entry['dn'], convert_timestamp(entry['date'])))
        report = "\n"
    else:
        return ""

def do_online_report(opts, output_file=None):
    ''' Check for differences between two replicas
    '''
    m_done = False
    r_done = False
    done = False
    report = {}
    report['diff'] = []
    report['m_missing'] = []
    report['r_missing'] = []
    report['m_count'] = 0
    report['r_count'] = 0

    # Fire off paged searches on Master and Replica
    master, replica, opts = connect_to_replicas(opts)

    print ('Start searching and comparing...')
    paged_ctrl = SimplePagedResultsControl(True, size=500, cookie='')
    controls = [paged_ctrl]
    req_pr_ctrl = controls[0]
    try:
        master_msgid = master.search_ext(opts['suffix'], ldap.SCOPE_SUBTREE, "objectclass=*",
                        ['*', 'createtimestamp', 'nscpentrywsi'], serverctrls=controls)
    except ldap.LDAPError as e:
        print("Error: Failed to get Master entries: %s", str(e))
        exit(1)
    try:
        replica_msgid = replica.search_ext(opts['suffix'], ldap.SCOPE_SUBTREE, "objectclass=*",
                        ['*', 'createtimestamp', 'nscpentrywsi'], serverctrls=controls)
    except ldap.LDAPError as e:
        print("Error: Failed to get Replica entries: %s", str(e))
        exit(1)

    # Read the results and start comparing
    while not m_done or not r_done:
        if not m_done:
            m_rtype, m_rdata, m_rmsgid, m_rctrls = master.result3(master_msgid)
        elif not r_done:
            m_rdata = []

        if not r_done:
            r_rtype, r_rdata, r_rmsgid, r_rctrls = replica.result3(replica_msgid)
        elif not m_done:
            r_rdata = []

        # Convert entries
        mentries = convert_entries(m_rdata)
        rentries = convert_entries(r_rdata)

        # Check for diffs
        report = check_for_diffs(mentries, rentries, report, opts)

        if not m_done:
            # Master
            m_pctrls = [
                c
                for c in m_rctrls
                if c.controlType == SimplePagedResultsControl.controlType
                ]
            if m_pctrls:
                if m_pctrls[0].cookie:
                    # Copy cookie from response control to request control
                    req_pr_ctrl.cookie = m_pctrls[0].cookie
                    master_msgid = master.search_ext(opts['suffix'], ldap.SCOPE_SUBTREE, "objectclass=*",
                            ['*', 'createtimestamp', 'nscpentrywsi'], serverctrls=controls)
                else:
                    m_done = True  # No more pages available
            else:
                m_done = True

        if not r_done:
            # Replica
            r_pctrls = [
                c
                for c in r_rctrls
                if c.controlType == SimplePagedResultsControl.controlType
                ]

            if r_pctrls:
                if r_pctrls[0].cookie:
                    # Copy cookie from response control to request control
                    req_pr_ctrl.cookie = r_pctrls[0].cookie
                    replica_msgid = replica.search_ext(opts['suffix'], ldap.SCOPE_SUBTREE, "objectclass=*",
                            ['*', 'createtimestamp', 'nscpentrywsi'], serverctrls=controls)
                else:
                    r_done = True  # No more pages available
            else:
                r_done = True

    # Get conflicts
    report['conflict'] = get_conflict_report(master, replica, opts)

    # Do the final report
    print_online_report(report, opts, output_file)

    # unbind
    master.unbind_s()
    replica.unbind_s()

def main():
    desc = ("""Replication Comparison Tool (v""" + VERSION + """).  This script """ +
            """ can be used to compare two replicas to see if they are in sync.""")

    parser = optparse.OptionParser(description=desc, add_help_option=False)

    # General options
    parser.add_option('-v', '--verbose', help='Verbose output', action='store_true', default=False, dest='verbose')
    parser.add_option('-o', '--outfile', help='The output file', dest='file', default=None)
    parser.add_option('-D', '--binddn', help='The Bind DN (REQUIRED)', dest='binddn', default=None)
    parser.add_option('-w', '--bindpw', help='The Bind password (REQUIRED)', dest='bindpw', default=None)
    parser.add_option('-h', '--master_host', help='The Master host (default localhost)', dest='mhost',
                      default='localhost')
    parser.add_option('-p', '--master_port', help='The Master port (default 389)', dest='mport', default='389')
    parser.add_option('-H', '--replica_host', help='The Replica host (REQUIRED)', dest='rhost', default=None)
    parser.add_option('-P', '--replica_port', help='The Replica port (REQUIRED)', dest='rport', default=None)
    parser.add_option('-b', '--basedn', help='Replicated suffix (REQUIRED)', dest='suffix', default=None)
    parser.add_option('-l', '--lagtime', help='The amount of time to ignore inconsistencies (default 300 seconds)',
                      dest='lag', default='300')
    parser.add_option('-Z', '--certdir', help='The certificate database directory for startTLS connections',
                      dest='certdir', default=None)
    parser.add_option('-i', '--ignore', help='Comma separated list of attributes to ignore',
                      dest='ignore', default=None)
    # Offline mode
    parser.add_option('-M', '--mldif', help='Master LDIF file (offline mode)',
                      dest='mldif', default=None)
    parser.add_option('-R', '--rldif', help='Replica LDIF file (offline mode)',
                      dest='rldif', default=None)

    # Process the options
    (args, opts) = parser.parse_args()

    # Check for required options
    if ((args.mldif is not None and args.rldif is None) or
        (args.mldif is None and args.rldif is not None)):
            print("\n-------> Missing required options for offline mode!\n")
            parser.print_help()
            exit(1)
    elif (args.mldif is None and
          (args.binddn is None or
           args.bindpw is None or
           args.suffix is None or
           args.rhost is None or
           args.rport is None)):
            print("\n-------> Missing required options for online mode!\n")
            parser.print_help()
            exit(1)

    # Initilaize the options
    repl_opts = {}
    repl_opts['binddn'] = args.binddn
    repl_opts['bindpw'] = args.bindpw
    repl_opts['mhost'] = args.mhost
    repl_opts['mport'] = args.mport
    repl_opts['rhost'] = args.rhost
    repl_opts['rport'] = args.rport
    repl_opts['suffix'] = args.suffix
    repl_opts['certdir'] = args.certdir
    repl_opts['starttime'] = int(time.time())
    repl_opts['verbose'] = args.verbose
    repl_opts['mldif'] = args.mldif
    repl_opts['rldif'] = args.rldif
    repl_opts['ignore'] = ['createtimestamp', 'nscpentrywsi']
    if args.ignore:
        repl_opts['ignore'] = repl_opts['ignore'] + args.ignore.split(',')
    if args.mldif:
        # We're offline - "lag" only applies to online mode
        repl_opts['lag'] = 0
    else:
        repl_opts['lag'] = int(args.lag)

    OUTPUT_FILE = None
    if args.file:
        # Write report to the file
        try:
            OUTPUT_FILE = open(args.file, "w")
        except IOError:
            print("Can't open file: " + args.file)
            exit(1)

    if repl_opts['mldif'] is not None and repl_opts['rldif'] is not None:
        print ("Performing offline report...")
        do_offline_report(repl_opts, OUTPUT_FILE)
    else:
        print ("Performing online report...")
        do_online_report(repl_opts, OUTPUT_FILE)

    if OUTPUT_FILE is not None:
        OUTPUT_FILE.close()

if __name__ == '__main__':
    main()
_______________________________________________
389-devel mailing list -- 389-devel@xxxxxxxxxxxxxxxxxxxxxxx
To unsubscribe send an email to 389-devel-leave@xxxxxxxxxxxxxxxxxxxxxxx