Mechanisms for automatic management of Gluster

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

This is along the lines of "tools for sysadmins". I plan on using
these algorithms for puppet-gluster, but will try to maintain them
separately as a standalone tool.

The problem: Given a set of bricks and servers, if they have a logical
naming convention, can an algorithm decide the ideal order. This could
allow parameters such as replica count, and
chained=true/false/offset#.

The second problem: Given a set of bricks in a volume, if someone adds
X bricks and removes Y bricks, is this valid, and what is the valid
sequence of add/remove brick commands.

I've written some code with test cases to try and figure this all out.
I've left out a lot of corner cases, but the boilerplate is there to
make it happen. Hopefully it's self explanatory. (gluster.py) Read and
run it.

Once this all works, the puppet-gluster use case is magic. It will be
able to take care of these operations for you (if you want).

For non puppet users, this will give admins the confidence to know
what commands they should _probably_ run in what order. I say probably
because we assume that if there's an error, they'll stop and inspect
first.

I haven't yet tried to implement the chained cases, or anything
involving striping. There are also some corner cases with some of the
current code. Once you add chaining and striping, etc, I realized it
was time to step back and ask for help :)

I hope this all makes sense. Comments, code, test cases are appreciated!

Cheers,

James
@purpleidea (irc/twitter)
https://ttboj.wordpress.com/
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (C) 2012-2013+ James Shubin
# Written by James Shubin <james@xxxxxxxxx>

"""This is brick logic for GlusterFS."""

import random
import unittest

def brick_str_to_dict(s):
	a = s.split(':')
	assert len(a) == 2

	p = a[1]
	p = p if p.endswith('/') else p+'/'

	return {'host': a[0], 'path': p}

def brick_dict_to_str(d):
	return str(d['host'])+':'+str(d['path'])

def get_version_from_path(path):
	pass
#	rindex = path.rindex('/')
#	if rindex.nil?
#		# TODO: error, brick needs a / character...
#	end

#	base = path[rindex+1, path.size-rindex]
#	findv = base.rindex('#v')
#	if findv.nil?
#		return 0	# version 0 (non-existant)
#	else
#		version = base[findv+2, base.size-findv]
#		if version.size < 1
#			# TODO: error, version string is missing...
#			# TODO: assume version 0 ?
#			return -1
#		end

#		return version.to_i
#	end
#end

def get_versions(group):
	versions = []
	for x in group:
		v = get_version_from_path(x['path'])
		if not v in versions:
			versions.append(v)

	return sorted(versions)		# should be all int's


def filter_version(group, version=0):	# TODO: empty version is 0 or None ?

	result = []
	for x in group:
		v = get_version_from_path(x['path'])
		if v == version:
			result.append(x)

	return result

def natural_brick_order(bricks):
	"""Put bricks in logical ordering."""

	# XXX: technically we should specify the replica to this function, but it might not be required. maybe it's a good idea as a checksum type thing...

	vfinal = []	# versioned final
	versions = get_versions(bricks)	# list of available versions...
	for version in versions:

		subset = filter_version(bricks, version)

		collect = {}

		for x in subset:
			key = x['host']
			val = x['path']

			if not key in collect.keys():
				collect[key] = []	# initialize

			collect[key].append(val)	# save in array
			# TODO: ensure this array is always sorted (we could also do this after
			# or always insert elements in the correct sorted order too :P)
			collect[key] = sorted(collect[key])


		# we also could do this sort here...
		for x in collect.keys():
			collect[key] = sorted(collect[key])


		final = []	# final order...
		while len(collect) > 0:
			for x in sorted(collect.keys()):

				# NOTE: this array should already be sorted!
				p = collect[x].pop(0)	# assume an array of at least 1 element
				final.append({'host': x, 'path': p})	# save

				if len(collect[x]) == 0:	# maybe the array is empty now
					del collect[x]		# remove that empty list's key


		vfinal = vfinal + final	# concat array on...

	return vfinal


def brick_delta(a, b):

	ai = 0
	bi = 0

	add = []
	rem = []
	while ((len(a) - ai) > 0) and ((len(b) - bi) > 0):
		# same element, keep going
		if a[ai] == b[bi]:
			ai = ai + 1
			bi = bi + 1
			continue

		# the elements must differ

		# if the element in a, doesn't exist in b...
		if not a[ai] in b:
			# then it must be a delete operation of a[ai]
			rem.append(a[ai])	# push onto delete queue...
			ai = ai + 1
			continue

		# if the element in b, doesn't exist in a...
		if not b[bi] in a:
			# then it must be an add operation of b[bi]
			add.append(b[bi])	# push onto add queue...
			bi = bi + 1
			continue

		# XXX: i think if either of the below conditions are true, then
		# we have an out of sync problem between the a and b sets. i do
		# think that they're probably either both true or neither true!
		# OR:
		#if a.include?(b[bi])
		#	# then ???
		#	bi = bi + 1	# ???
		#	# out of sync ?
		#end
		#
		#if b.include?(a[ai])
		#	# then ???
		#	ai = ai + 1	# ???
		#	# out of sync ?
		#end

	while (len(a) - ai) > 0:	# if there is left over a at the end...
		rem.append(a[ai])	# push onto delete queue...
		ai = ai + 1

	while (len(b) - bi) > 0:	# if there is left over b at the end...
		add.append(b[bi])	# push onto add queue...
		bi = bi + 1

	return {'add': add, 'del': rem}


def brick_transform(a, b, XXX):
	pass

def brick_transform_commands(a, b, volume, replica, debug=False):
	# XXX: can we do this all in one function, with the delta precursor ?

	# XXX: list of bricks 'b' can be in ANY order... test this...

	cmds = []
	h = brick_delta(a, b)
	add = h['add']
	rem = h['del']

	if debug:
		cmds.append('# add:')

	# step through by mod <replica>, pulling out <replica> values at a time
	#(0..add.length - 1).step(replica).each do |i|
	for i in range(0, len(add)-1, replica):

		#sliced = add[i, replica]	# slice
		sliced = add[i:i+replica]	# slice

		if len(sliced) == replica:
			#sliced.each do |x|
			#	print x['host'] + ':' + x['path']
			#end
			# NOTE: the: + volume is the volume folder on the brick
			#bricks = sliced.map {|x| x['host'] + ':' + x['path'] + volume}
			bricks = [brick_dict_to_str(x)+volume for x in sliced]

			if debug:	# include a debug comment
				cmds.append("# on volume: %(volume)s, adding brick(s): %(bricks)s" % {'volume': volume, 'bricks': ' '.join(bricks)})

			cmds.append("gluster volume add-brick %(volume)s %(bricks)s" % {'volume': volume, 'bricks': ' '.join(bricks)})
		else:
			# TODO: warning?
			pass
			# we have extras, we can add them next time, it's ok...

	# volume needs to be online for the remove-brick to work... at least if
	# you want the rebalance to work. future use cases might allow force...
	#started = `gluster volume status %(volume)s`
	#if $?.exitstatus == 0
	if True:	# assume started...
		if debug:
			cmds.append('# del:')

		#(0..rem.length - 1).step(replica).each do |i|
		for i in range(0, len(rem)-1, replica):
			sliced = rem[i:i+replica]	# slice
			if len(sliced) == replica:
				#print sliced.join(',')
				#bricks = sliced.map {|x| x['host'] + ':' + x['path'] + volume}
				bricks = [brick_dict_to_str(x)+volume for x in sliced]

				if debug:	# include a debug comment
					cmds.append("# on volume: %(volume)s, removing brick(s): %(bricks)s" % {'volume': volume, 'bricks': ' '.join(bricks)})

				remove = "gluster volume remove-brick %(volume)s %(bricks)s" % {'volume': volume, 'bricks': ' '.join(bricks)}
				status = "gluster volume rebalance %(volume)s status --xml | xml.py rebalance --name %(volume)s --status" % {'volume': volume}

				cmds.append("%s start" % remove)	# start

				# while status of rebalance == 1; (in progress)
				cmds.append("while /usr/bin/test (%s) -eq 1; do /usr/bin/sleep 5s; done" % status)

				# try to finish (yay!)
				# TODO: can gluster take a --yes argument instead?
				# check that status ended in success, not fail!
				# FIXME: use the HELP flag to warn the human...
				cmds.append("if (%(status)s); then (/usr/bin/yes | %(remove)s commit); else /bin/false; fi" % {'status': status, 'remove': remove})
			else:
				# FIXME: warning or error?
				pass
				# we have extras, we can't remove them, it's a problem!

	return cmds


def brick_commands(a, b):
	# XXX: i postulate that a brick_delta + brick_transform produces this...
	# but maybe I'm wrong... can the delta always be the first step in figuring
	# out the end result of commands ?
	pass




#
#	Tests...
#
class TestConversion(unittest.TestCase):

	def setUp(self):	# called before the start of _each_ test
		x = []
		a = 'annex1.example.com:/data/brick1/'
		b = {'host': 'annex1.example.com', 'path': '/data/brick1/'}
		x.append({'a': a, 'b': b})
		a = 'annex2.example.com:/data/brick2/'
		b = {'host': 'annex2.example.com', 'path': '/data/brick2/'}
		x.append({'a': a, 'b': b})
		a = 'annex3.example.com:/data/brick3/'
		b = {'host': 'annex3.example.com', 'path': '/data/brick3/'}
		x.append({'a': a, 'b': b})

		self.pairs = x

	def test_brick_str_to_dict(self):

		#a = 'annex1.example.com:/data/brick1'
		#b = {'host': 'annex1.example.com', 'path': '/data/brick1/'}
		for x in self.pairs:
			a = x['a']
			b = x['b']
			self.assertEqual(brick_str_to_dict(a), b)

	def test_brick_dict_to_str(self):

		#a = 'annex1.example.com:/data/brick1'
		#b = {'host': 'annex1.example.com', 'path': '/data/brick1/'}
		for x in self.pairs:
			a = x['a']
			b = x['b']
			self.assertEqual(a, brick_dict_to_str(b))


class TestNaturalOrder(unittest.TestCase):

	def setUp(self):	# called before the start of _each_ test
		self.iterate = 3	# run this test N times for safety...

	def test_one(self):
		# put together brick list in correct order, shuffle will test...
		bricks = []
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1/'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1/'})
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick2/'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick2/'})
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick3/'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick3/'})

		for i in range(self.iterate):
			# random.sample returns a different sample each time...
			rbricks = random.sample(bricks, len(bricks))
			self.assertEqual(bricks, natural_brick_order(rbricks))


	def test_two(self):

		bricks = []
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1/'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1/'})
		bricks.append({'host': 'annex3.example.com', 'path': '/data/brick2/'})
		bricks.append({'host': 'annex4.example.com', 'path': '/data/brick2/'})
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick3/'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick3/'})
		bricks.append({'host': 'annex3.example.com', 'path': '/data/brick4/'})
		bricks.append({'host': 'annex4.example.com', 'path': '/data/brick4/'})

		for i in range(self.iterate):
			# random.sample returns a different sample each time...
			rbricks = random.sample(bricks, len(bricks))
			self.assertEqual(bricks, natural_brick_order(rbricks))


	def test_three(self):
		bricks = []
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex3.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex4.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex5.example.com', 'path': '/data/brick3'})
		bricks.append({'host': 'annex6.example.com', 'path': '/data/brick3'})
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick4'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick4'})
		bricks.append({'host': 'annex3.example.com', 'path': '/data/brick5'})
		bricks.append({'host': 'annex4.example.com', 'path': '/data/brick5'})
		bricks.append({'host': 'annex5.example.com', 'path': '/data/brick6'})
		bricks.append({'host': 'annex6.example.com', 'path': '/data/brick6'})

		for i in range(self.iterate):
			# random.sample returns a different sample each time...
			rbricks = random.sample(bricks, len(bricks))
			self.assertEqual(bricks, natural_brick_order(rbricks))


	def test_four(self):
		# replica 3 (without specifying replica 3)
		# XXX: each host doesn't have a linear brick sequence number...
		bricks = []
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex3.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex4.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex5.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex6.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick3'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick3'})
		bricks.append({'host': 'annex3.example.com', 'path': '/data/brick3'})
		bricks.append({'host': 'annex4.example.com', 'path': '/data/brick4'})
		bricks.append({'host': 'annex5.example.com', 'path': '/data/brick4'})
		bricks.append({'host': 'annex6.example.com', 'path': '/data/brick4'})


		for i in range(self.iterate):
			# random.sample returns a different sample each time...
			rbricks = random.sample(bricks, len(bricks))
			self.assertEqual(bricks, natural_brick_order(rbricks))

	def test_five(self):
		# replica 3 (without specifying replica 3)
		# XXX: brick1 which is not the same on all hosts has different data...
		bricks = []
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex3.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex4.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex5.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex6.example.com', 'path': '/data/brick1'})
		bricks.append({'host': 'annex1.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex2.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex3.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex4.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex5.example.com', 'path': '/data/brick2'})
		bricks.append({'host': 'annex6.example.com', 'path': '/data/brick2'})

		for i in range(self.iterate):
			# random.sample returns a different sample each time...
			rbricks = random.sample(bricks, len(bricks))
			self.assertEqual(bricks, natural_brick_order(rbricks))

class TestTransformCommands(unittest.TestCase):

	def setUp(self):	# called before the start of _each_ test
		self.volume = 'foo'

	def test_one(self):

		v = self.volume
		r = 2	# replica

		a = []	# current list of bricks in volume order
		a.append({'host': 'annex1.example.com', 'path': '/data/brick1/'})
		a.append({'host': 'annex2.example.com', 'path': '/data/brick1/'})
		a.append({'host': 'annex1.example.com', 'path': '/data/brick2/'})
		a.append({'host': 'annex2.example.com', 'path': '/data/brick2/'})

		b = []	# future available bricks in any order
		b.append({'host': 'annex1.example.com', 'path': '/data/brick1/'})
		b.append({'host': 'annex2.example.com', 'path': '/data/brick1/'})
		b.append({'host': 'annex1.example.com', 'path': '/data/brick2/'})
		b.append({'host': 'annex2.example.com', 'path': '/data/brick2/'})

		c = []

		self.assertEqual(brick_transform_commands(a, b, v, r), c)

	def test_two(self):

		v = self.volume
		r = 2	# replica

		a = []	# current list of bricks in volume order
		a.append({'host': 'annex1.example.com', 'path': '/data/brick1/'})
		a.append({'host': 'annex2.example.com', 'path': '/data/brick1/'})
		a.append({'host': 'annex1.example.com', 'path': '/data/brick2/'})
		a.append({'host': 'annex2.example.com', 'path': '/data/brick2/'})

		b = []	# future available bricks in any order
		b = b + a	# start with original, and add these on:
		b.append({'host': 'annex1.example.com', 'path': '/data/brick3/'})
		b.append({'host': 'annex2.example.com', 'path': '/data/brick3/'})

		c = []	# commands
		c.append('gluster volume add-brick foo annex1.example.com:/data/brick3/foo annex2.example.com:/data/brick3/foo')

		self.assertEqual(brick_transform_commands(a, b, v, r), c)

	def test_three(self):

		v = self.volume
		r = 2	# replica

		a = []	# current list of bricks in volume order
		a.append({'host': 'annex1.example.com', 'path': '/data/brick1/'})
		a.append({'host': 'annex2.example.com', 'path': '/data/brick1/'})
		a.append({'host': 'annex1.example.com', 'path': '/data/brick2/'})
		a.append({'host': 'annex2.example.com', 'path': '/data/brick2/'})

		b = []	# future available bricks in any order
		b = b + a	# start with original, and add these on:
		b.append({'host': 'annex1.example.com', 'path': '/data/brick3/'})
		b.append({'host': 'annex2.example.com', 'path': '/data/brick3/'})
		b.append({'host': 'annex1.example.com', 'path': '/data/brick4/'})
		b.append({'host': 'annex2.example.com', 'path': '/data/brick4/'})
		c = []	# commands
		c.append('gluster volume add-brick foo annex1.example.com:/data/brick3/foo annex2.example.com:/data/brick3/foo')
		c.append('gluster volume add-brick foo annex1.example.com:/data/brick4/foo annex2.example.com:/data/brick4/foo')

		self.assertEqual(brick_transform_commands(a, b, v, r), c)


if __name__ == '__main__':
	unittest.main()


[Index of Archives]     [Gluster Users]     [Ceph Users]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux