i can't run a xen - virtual machine on a glusterfs mount point

Jordi Moles Blanco <jordi@xxxxxxxxx> · Mon, 07 Jul 2008 18:48:52 +0200

hi,

i've been using glusterfs for a while now in different scenarios and now 
i'm facing a really strange problem, or at least it seems so to me....

the thing is that i've got a glusterfs, with six nodes and several 
clients (xen-servers) mounting the shared system. The idea is to have a 
big shared storage for all the vm's i've got.

when i try to run a vm created from xen 3.2 which is stored on gluster, 
it won't run.

it says:

Error: Device 2049 (vbd) could not be connected. Backend device not found.

nodes don't log any error or warning at all, and the xen-server which is 
trying to run the vm says this:

2008-07-07 17:43:07 E [afr.c:2391:afr_writev_cbk] grup1: 
(path=/domains/xen_testdedicat1/disk.img child=espai2) op_ret=-1 op_errno=22
2008-07-07 17:43:07 E [fuse-bridge.c:1645:fuse_writev_cbk] 
glusterfs-fuse: 656: WRITE => -1 (22)
2008-07-07 17:43:07 E [afr.c:2699:afr_flush] grup1: afrfdp->fdstate[] is 
0, returning ENOTCONN
2008-07-07 17:43:07 E [fuse-bridge.c:945:fuse_err_cbk] glusterfs-fuse: 
657: (16) ERR => -1 (107)

i'm using this software versions

linux debian etch, kernel 2.6.18-6-xen-amd64
fuse-2.7.3glfs10
glusterfs--mainline--2.5--patch-788

the thing is, however, that if i move the virtual machine create from 
xen 3.2 from the shared gluster mount point to a local path, it will run 
without any problem at all...
is it that gluster is not fast enough to provide xen with the data 
needed to launch the vm? could i do anything to fix that?

these are the specs files:

node side:

***********

volume espa
       type storage/posix
       option directory /mnt/compartit
end-volume

volume spai
       type performance/io-threads
       option thread-count 4
       option cache-size 32MB
       subvolumes espa
end-volume

volume espai
       type performance/write-behind
       option aggregate-size 1MB
       option flush-behind on
       subvolumes spai
end-volume

volume ultim
       type protocol/server
       subvolumes espai
       option transport-type tcp/server
       option auth.ip.espai.allow *
end-volume

***********

client/xen side:

***********

volume espai1
   type protocol/client
   option transport-type tcp/client
   option remote-host 192.168.1.204
   option remote-subvolume espai
end-volume

volume namespace1
   type protocol/client
   option transport-type tcp/client
   option remote-host 192.168.1.204
   option remote-subvolume nm
end-volume

volume espai2
   type protocol/client
   option transport-type tcp/client
   option remote-host 192.168.1.206
   option remote-subvolume espai
end-volume

volume espai3
   type protocol/client
   option transport-type tcp/client
   option remote-host 192.168.1.213
   option remote-subvolume espai
end-volume

volume espai4
   type protocol/client
   option transport-type tcp/client
   option remote-host 192.168.1.161
   option remote-subvolume espai
end-volume

volume namespace2
   type protocol/client
   option transport-type tcp/client
   option remote-host 192.168.1.161
   option remote-subvolume nm
end-volume

volume espai5
   type protocol/client
   option transport-type tcp/client
   option remote-host 192.168.1.162
   option remote-subvolume espai
end-volume

volume espai6
   type protocol/client
   option transport-type tcp/client
   option remote-host 192.168.1.163
   option remote-subvolume espai
end-volume

volume grup1
   type cluster/afr
   subvolumes espai1 espai2
end-volume

volume grup2
   type cluster/afr
   subvolumes espai3 espai4
end-volume

volume grup3
   type cluster/afr
   subvolumes espai5 espai6
end-volume

volume nm1
   type cluster/afr
   subvolumes namespace1 namespace2
end-volume

volume ultim
   type cluster/unify
   subvolumes grup1 grup2 grup3
   option scheduler rr
   option namespace nm1
end-volume

volume iot
 type performance/io-threads
 option thread-count 4
 option cache-size 32MB
 subvolumes ultim
end-volume

volume ioc
type performance/io-cache
option cache-size 64MB
option page-size 1MB
option force-revalidate-timeout 2
subvolumes iot
end-volume

***********

i've already tried for a while to play with the spec file... adding and 
removing write and read buffers... but the final result is the same... 
the vm won't run.

any idea?

thank you.