Hi, This is along the lines of "tools for sysadmins". I plan on using these algorithms for puppet-gluster, but will try to maintain them separately as a standalone tool. The problem: Given a set of bricks and servers, if they have a logical naming convention, can an algorithm decide the ideal order. This could allow parameters such as replica count, and chained=true/false/offset#. The second problem: Given a set of bricks in a volume, if someone adds X bricks and removes Y bricks, is this valid, and what is the valid sequence of add/remove brick commands. I've written some code with test cases to try and figure this all out. I've left out a lot of corner cases, but the boilerplate is there to make it happen. Hopefully it's self explanatory. (gluster.py) Read and run it. Once this all works, the puppet-gluster use case is magic. It will be able to take care of these operations for you (if you want). For non puppet users, this will give admins the confidence to know what commands they should _probably_ run in what order. I say probably because we assume that if there's an error, they'll stop and inspect first. I haven't yet tried to implement the chained cases, or anything involving striping. There are also some corner cases with some of the current code. Once you add chaining and striping, etc, I realized it was time to step back and ask for help :) I hope this all makes sense. Comments, code, test cases are appreciated! Cheers, James @purpleidea (irc/twitter) https://ttboj.wordpress.com/
#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2012-2013+ James Shubin # Written by James Shubin <james@xxxxxxxxx> """This is brick logic for GlusterFS.""" import random import unittest def brick_str_to_dict(s): a = s.split(':') assert len(a) == 2 p = a[1] p = p if p.endswith('/') else p+'/' return {'host': a[0], 'path': p} def brick_dict_to_str(d): return str(d['host'])+':'+str(d['path']) def get_version_from_path(path): pass # rindex = path.rindex('/') # if rindex.nil? # # TODO: error, brick needs a / character... # end # base = path[rindex+1, path.size-rindex] # findv = base.rindex('#v') # if findv.nil? # return 0 # version 0 (non-existant) # else # version = base[findv+2, base.size-findv] # if version.size < 1 # # TODO: error, version string is missing... # # TODO: assume version 0 ? # return -1 # end # return version.to_i # end #end def get_versions(group): versions = [] for x in group: v = get_version_from_path(x['path']) if not v in versions: versions.append(v) return sorted(versions) # should be all int's def filter_version(group, version=0): # TODO: empty version is 0 or None ? result = [] for x in group: v = get_version_from_path(x['path']) if v == version: result.append(x) return result def natural_brick_order(bricks): """Put bricks in logical ordering.""" # XXX: technically we should specify the replica to this function, but it might not be required. maybe it's a good idea as a checksum type thing... vfinal = [] # versioned final versions = get_versions(bricks) # list of available versions... for version in versions: subset = filter_version(bricks, version) collect = {} for x in subset: key = x['host'] val = x['path'] if not key in collect.keys(): collect[key] = [] # initialize collect[key].append(val) # save in array # TODO: ensure this array is always sorted (we could also do this after # or always insert elements in the correct sorted order too :P) collect[key] = sorted(collect[key]) # we also could do this sort here... for x in collect.keys(): collect[key] = sorted(collect[key]) final = [] # final order... while len(collect) > 0: for x in sorted(collect.keys()): # NOTE: this array should already be sorted! p = collect[x].pop(0) # assume an array of at least 1 element final.append({'host': x, 'path': p}) # save if len(collect[x]) == 0: # maybe the array is empty now del collect[x] # remove that empty list's key vfinal = vfinal + final # concat array on... return vfinal def brick_delta(a, b): ai = 0 bi = 0 add = [] rem = [] while ((len(a) - ai) > 0) and ((len(b) - bi) > 0): # same element, keep going if a[ai] == b[bi]: ai = ai + 1 bi = bi + 1 continue # the elements must differ # if the element in a, doesn't exist in b... if not a[ai] in b: # then it must be a delete operation of a[ai] rem.append(a[ai]) # push onto delete queue... ai = ai + 1 continue # if the element in b, doesn't exist in a... if not b[bi] in a: # then it must be an add operation of b[bi] add.append(b[bi]) # push onto add queue... bi = bi + 1 continue # XXX: i think if either of the below conditions are true, then # we have an out of sync problem between the a and b sets. i do # think that they're probably either both true or neither true! # OR: #if a.include?(b[bi]) # # then ??? # bi = bi + 1 # ??? # # out of sync ? #end # #if b.include?(a[ai]) # # then ??? # ai = ai + 1 # ??? # # out of sync ? #end while (len(a) - ai) > 0: # if there is left over a at the end... rem.append(a[ai]) # push onto delete queue... ai = ai + 1 while (len(b) - bi) > 0: # if there is left over b at the end... add.append(b[bi]) # push onto add queue... bi = bi + 1 return {'add': add, 'del': rem} def brick_transform(a, b, XXX): pass def brick_transform_commands(a, b, volume, replica, debug=False): # XXX: can we do this all in one function, with the delta precursor ? # XXX: list of bricks 'b' can be in ANY order... test this... cmds = [] h = brick_delta(a, b) add = h['add'] rem = h['del'] if debug: cmds.append('# add:') # step through by mod <replica>, pulling out <replica> values at a time #(0..add.length - 1).step(replica).each do |i| for i in range(0, len(add)-1, replica): #sliced = add[i, replica] # slice sliced = add[i:i+replica] # slice if len(sliced) == replica: #sliced.each do |x| # print x['host'] + ':' + x['path'] #end # NOTE: the: + volume is the volume folder on the brick #bricks = sliced.map {|x| x['host'] + ':' + x['path'] + volume} bricks = [brick_dict_to_str(x)+volume for x in sliced] if debug: # include a debug comment cmds.append("# on volume: %(volume)s, adding brick(s): %(bricks)s" % {'volume': volume, 'bricks': ' '.join(bricks)}) cmds.append("gluster volume add-brick %(volume)s %(bricks)s" % {'volume': volume, 'bricks': ' '.join(bricks)}) else: # TODO: warning? pass # we have extras, we can add them next time, it's ok... # volume needs to be online for the remove-brick to work... at least if # you want the rebalance to work. future use cases might allow force... #started = `gluster volume status %(volume)s` #if $?.exitstatus == 0 if True: # assume started... if debug: cmds.append('# del:') #(0..rem.length - 1).step(replica).each do |i| for i in range(0, len(rem)-1, replica): sliced = rem[i:i+replica] # slice if len(sliced) == replica: #print sliced.join(',') #bricks = sliced.map {|x| x['host'] + ':' + x['path'] + volume} bricks = [brick_dict_to_str(x)+volume for x in sliced] if debug: # include a debug comment cmds.append("# on volume: %(volume)s, removing brick(s): %(bricks)s" % {'volume': volume, 'bricks': ' '.join(bricks)}) remove = "gluster volume remove-brick %(volume)s %(bricks)s" % {'volume': volume, 'bricks': ' '.join(bricks)} status = "gluster volume rebalance %(volume)s status --xml | xml.py rebalance --name %(volume)s --status" % {'volume': volume} cmds.append("%s start" % remove) # start # while status of rebalance == 1; (in progress) cmds.append("while /usr/bin/test (%s) -eq 1; do /usr/bin/sleep 5s; done" % status) # try to finish (yay!) # TODO: can gluster take a --yes argument instead? # check that status ended in success, not fail! # FIXME: use the HELP flag to warn the human... cmds.append("if (%(status)s); then (/usr/bin/yes | %(remove)s commit); else /bin/false; fi" % {'status': status, 'remove': remove}) else: # FIXME: warning or error? pass # we have extras, we can't remove them, it's a problem! return cmds def brick_commands(a, b): # XXX: i postulate that a brick_delta + brick_transform produces this... # but maybe I'm wrong... can the delta always be the first step in figuring # out the end result of commands ? pass # # Tests... # class TestConversion(unittest.TestCase): def setUp(self): # called before the start of _each_ test x = [] a = 'annex1.example.com:/data/brick1/' b = {'host': 'annex1.example.com', 'path': '/data/brick1/'} x.append({'a': a, 'b': b}) a = 'annex2.example.com:/data/brick2/' b = {'host': 'annex2.example.com', 'path': '/data/brick2/'} x.append({'a': a, 'b': b}) a = 'annex3.example.com:/data/brick3/' b = {'host': 'annex3.example.com', 'path': '/data/brick3/'} x.append({'a': a, 'b': b}) self.pairs = x def test_brick_str_to_dict(self): #a = 'annex1.example.com:/data/brick1' #b = {'host': 'annex1.example.com', 'path': '/data/brick1/'} for x in self.pairs: a = x['a'] b = x['b'] self.assertEqual(brick_str_to_dict(a), b) def test_brick_dict_to_str(self): #a = 'annex1.example.com:/data/brick1' #b = {'host': 'annex1.example.com', 'path': '/data/brick1/'} for x in self.pairs: a = x['a'] b = x['b'] self.assertEqual(a, brick_dict_to_str(b)) class TestNaturalOrder(unittest.TestCase): def setUp(self): # called before the start of _each_ test self.iterate = 3 # run this test N times for safety... def test_one(self): # put together brick list in correct order, shuffle will test... bricks = [] bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1/'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1/'}) bricks.append({'host': 'annex1.example.com', 'path': '/data/brick2/'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick2/'}) bricks.append({'host': 'annex1.example.com', 'path': '/data/brick3/'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick3/'}) for i in range(self.iterate): # random.sample returns a different sample each time... rbricks = random.sample(bricks, len(bricks)) self.assertEqual(bricks, natural_brick_order(rbricks)) def test_two(self): bricks = [] bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1/'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1/'}) bricks.append({'host': 'annex3.example.com', 'path': '/data/brick2/'}) bricks.append({'host': 'annex4.example.com', 'path': '/data/brick2/'}) bricks.append({'host': 'annex1.example.com', 'path': '/data/brick3/'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick3/'}) bricks.append({'host': 'annex3.example.com', 'path': '/data/brick4/'}) bricks.append({'host': 'annex4.example.com', 'path': '/data/brick4/'}) for i in range(self.iterate): # random.sample returns a different sample each time... rbricks = random.sample(bricks, len(bricks)) self.assertEqual(bricks, natural_brick_order(rbricks)) def test_three(self): bricks = [] bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex3.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex4.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex5.example.com', 'path': '/data/brick3'}) bricks.append({'host': 'annex6.example.com', 'path': '/data/brick3'}) bricks.append({'host': 'annex1.example.com', 'path': '/data/brick4'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick4'}) bricks.append({'host': 'annex3.example.com', 'path': '/data/brick5'}) bricks.append({'host': 'annex4.example.com', 'path': '/data/brick5'}) bricks.append({'host': 'annex5.example.com', 'path': '/data/brick6'}) bricks.append({'host': 'annex6.example.com', 'path': '/data/brick6'}) for i in range(self.iterate): # random.sample returns a different sample each time... rbricks = random.sample(bricks, len(bricks)) self.assertEqual(bricks, natural_brick_order(rbricks)) def test_four(self): # replica 3 (without specifying replica 3) # XXX: each host doesn't have a linear brick sequence number... bricks = [] bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex3.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex4.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex5.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex6.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex1.example.com', 'path': '/data/brick3'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick3'}) bricks.append({'host': 'annex3.example.com', 'path': '/data/brick3'}) bricks.append({'host': 'annex4.example.com', 'path': '/data/brick4'}) bricks.append({'host': 'annex5.example.com', 'path': '/data/brick4'}) bricks.append({'host': 'annex6.example.com', 'path': '/data/brick4'}) for i in range(self.iterate): # random.sample returns a different sample each time... rbricks = random.sample(bricks, len(bricks)) self.assertEqual(bricks, natural_brick_order(rbricks)) def test_five(self): # replica 3 (without specifying replica 3) # XXX: brick1 which is not the same on all hosts has different data... bricks = [] bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex3.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex4.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex5.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex6.example.com', 'path': '/data/brick1'}) bricks.append({'host': 'annex1.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex2.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex3.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex4.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex5.example.com', 'path': '/data/brick2'}) bricks.append({'host': 'annex6.example.com', 'path': '/data/brick2'}) for i in range(self.iterate): # random.sample returns a different sample each time... rbricks = random.sample(bricks, len(bricks)) self.assertEqual(bricks, natural_brick_order(rbricks)) class TestTransformCommands(unittest.TestCase): def setUp(self): # called before the start of _each_ test self.volume = 'foo' def test_one(self): v = self.volume r = 2 # replica a = [] # current list of bricks in volume order a.append({'host': 'annex1.example.com', 'path': '/data/brick1/'}) a.append({'host': 'annex2.example.com', 'path': '/data/brick1/'}) a.append({'host': 'annex1.example.com', 'path': '/data/brick2/'}) a.append({'host': 'annex2.example.com', 'path': '/data/brick2/'}) b = [] # future available bricks in any order b.append({'host': 'annex1.example.com', 'path': '/data/brick1/'}) b.append({'host': 'annex2.example.com', 'path': '/data/brick1/'}) b.append({'host': 'annex1.example.com', 'path': '/data/brick2/'}) b.append({'host': 'annex2.example.com', 'path': '/data/brick2/'}) c = [] self.assertEqual(brick_transform_commands(a, b, v, r), c) def test_two(self): v = self.volume r = 2 # replica a = [] # current list of bricks in volume order a.append({'host': 'annex1.example.com', 'path': '/data/brick1/'}) a.append({'host': 'annex2.example.com', 'path': '/data/brick1/'}) a.append({'host': 'annex1.example.com', 'path': '/data/brick2/'}) a.append({'host': 'annex2.example.com', 'path': '/data/brick2/'}) b = [] # future available bricks in any order b = b + a # start with original, and add these on: b.append({'host': 'annex1.example.com', 'path': '/data/brick3/'}) b.append({'host': 'annex2.example.com', 'path': '/data/brick3/'}) c = [] # commands c.append('gluster volume add-brick foo annex1.example.com:/data/brick3/foo annex2.example.com:/data/brick3/foo') self.assertEqual(brick_transform_commands(a, b, v, r), c) def test_three(self): v = self.volume r = 2 # replica a = [] # current list of bricks in volume order a.append({'host': 'annex1.example.com', 'path': '/data/brick1/'}) a.append({'host': 'annex2.example.com', 'path': '/data/brick1/'}) a.append({'host': 'annex1.example.com', 'path': '/data/brick2/'}) a.append({'host': 'annex2.example.com', 'path': '/data/brick2/'}) b = [] # future available bricks in any order b = b + a # start with original, and add these on: b.append({'host': 'annex1.example.com', 'path': '/data/brick3/'}) b.append({'host': 'annex2.example.com', 'path': '/data/brick3/'}) b.append({'host': 'annex1.example.com', 'path': '/data/brick4/'}) b.append({'host': 'annex2.example.com', 'path': '/data/brick4/'}) c = [] # commands c.append('gluster volume add-brick foo annex1.example.com:/data/brick3/foo annex2.example.com:/data/brick3/foo') c.append('gluster volume add-brick foo annex1.example.com:/data/brick4/foo annex2.example.com:/data/brick4/foo') self.assertEqual(brick_transform_commands(a, b, v, r), c) if __name__ == '__main__': unittest.main()