I set up 3 logical volumes, lv1, lv2, and lv3. lv1 was mounted on a host named eightoften as /var/spool/mqueue, lv2 was mounted on host a host named nineoften as /var/spool/mqueue. Lv3 was mounted on both eightoften and nineoften as as /var/spool/mail.
The goal is to have one mailbox for each user on either host and have a load balancer distribute the incoming between the 2 hosts. Under light loads, things work well, but under heavy loads sendmail has some locking problems:
Oct 1 15:57:00 nineoften sendmail[29243]: NOQUEUE: SYSERR(root): cannot lockf(qfi91LpkrM029041, fd=4, type=5, omode=0, euid=0): Operation not permitted Oct 1 15:57:00 nineoften sendmail[29243]: NOQUEUE: 4: fl=0x0, mode=100600: dev=253/3, ino=290, nlink=1, u/gid=0/55, size=1188 Oct 1 15:57:00 nineoften sendmail[29243]: NOQUEUE: SYSERR(root): cannot lockf(qfi91Lpl88029050, fd=4, type=5, omode=0, euid=0): Operation not permitted Oct 1 15:57:00 nineoften sendmail[29243]: NOQUEUE: 4: fl=0x0, mode=100600: dev=253/3, ino=193, nlink=1, u/gid=0/55, size=1187 Oct 1 15:57:00 nineoften sendmail[29243]: NOQUEUE: SYSERR(root): cannot lockf(qfi91Lpik7029006, fd=4, type=5, omode=0, euid=0): Operation not permitted Oct 1 15:57:00 nineoften sendmail[29243]: NOQUEUE: 4: fl=0x0, mode=100600: dev=253/3, ino=297, nlink=1, u/gid=0/55, size=1187
Other host:
Oct 1 15:56:52 eightoften sendmail[7288]: NOQUEUE: SYSERR(root): cannot lockf(qfi91LpiRJ006964, fd=4, type=5, omode=0, euid=0): Operation not permitted Oct 1 15:56:52 eightoften sendmail[7288]: NOQUEUE: 4: fl=0x0, mode=100600: dev=253/1, ino=156, nlink=1, u/gid=0/55, size=1188 Oct 1 15:56:52 eightoften sendmail[7288]: NOQUEUE: SYSERR(root): cannot lockf(qfi91Lpkw1006986, fd=4, type=5, omode=0, euid=0): Operation not permitted Oct 1 15:56:52 eightoften sendmail[7288]: NOQUEUE: 4: fl=0x0, mode=100600: dev=253/1, ino=203, nlink=1, u/gid=0/55, size=1203 Oct 1 15:56:52 eightoften sendmail[7288]: NOQUEUE: SYSERR(root): cannot lockf(qfi91Lpt0k007183, fd=4, type=5, omode=0, euid=0): Operation not permittedI wrote a couple of test programs that call fcntl to get a read lock or a write lock. The test program work fine on ext2, or reiser file systems, but return an operation not permitted on a GFS file system under some cases:
If you call fcntl with F_SETLKW, things appear to work, calling fnctl with F_SETLK will return with a -1 and set errno to 1.
readlock and writelock were compiled to call fcntl with F_SETLK:
First, I ran writelock on nineoften:
[mbrookov@nineoften locktest]$ ./writelock /var/spool/mail/test/afile Have write lock, hit return to free write lock on /var/spool/mail/test/afile and exit
Then ran readlock on eightoften:
[mbrookov@eightoften locktest]$ ./readlock /var/spool/mail/test/afile Could not get read lock on /var/spool/mail/test/afile errno=1:Operation not permitted [mbrookov@eightoften locktest]$
Similar problems happen if you run readlock and writelock on the same host.
Out of 500 mail messages sent, only 216 arrived in the mail box. I have a load balancer set up to distribute the mail messages between the 2 hosts.
Any ideas?
thanks
Matt
Academic Computing and Networking
Colorado School of Mines
mbrookov@xxxxxxxxx
303-273-3436
#include <stdio.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <string.h> #include <time.h> #include <errno.h> int readlock(char *filename) { int fd; struct flock mylock; if ((fd=open(filename,O_CREAT|O_RDWR,S_IRUSR|S_IWUSR|S_IRGRP))==-1) { fprintf (stderr,"Could not open %s:",filename); perror (""); exit(1); } mylock.l_type=F_RDLCK; mylock.l_len=0; /*lock entire file */ mylock.l_whence=SEEK_SET; mylock.l_start=0; /* ** F_SETLKW seems to work on GFS under light load ** looping over F_SETLK will fail with errno operation not permitted */ while (fcntl(fd,F_SETLK,&mylock) == -1) { if (errno!=EAGAIN) { fprintf (stderr,"Could not get read lock on %s errno=%d:",filename,errno); perror (""); exit(1); } printf ("%s locked, trying again\n",filename); } printf ("Have read lock, hit return to free read lock on %s and exit\n",filename); fgetc(stdin); mylock.l_type=F_UNLCK; mylock.l_len=0; /*lock entire file */ mylock.l_whence=SEEK_SET; mylock.l_start=0; if (fcntl(fd,F_SETLK,&mylock) == -1) { fprintf (stderr,"Could unlock %s:",filename); perror (""); exit(1); } close(fd); } main(int argc, char *argv[]) { int fd; int i; fd=readlock(argv[1]); }
#include <stdio.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <string.h> #include <time.h> #include <errno.h> int writelock(char *filename) { int fd; struct flock mylock; if ((fd=open(filename,O_CREAT|O_RDWR,S_IRUSR|S_IWUSR|S_IRGRP))==-1) { fprintf (stderr,"Could not open %s:",filename); perror (""); exit(1); } mylock.l_type=F_WRLCK; mylock.l_len=0; /*lock entire file */ mylock.l_whence=SEEK_SET; mylock.l_start=0; /* ** F_SETLKW seems to work on GFS under light load ** looping over F_SETLK will fail with errno operation not permitted */ while (fcntl(fd,F_SETLK,&mylock) == -1) { if (errno!=EAGAIN) { fprintf (stderr,"Could not get write lock on %s errno=%d:",filename,errno); perror (""); exit(1); } printf ("%s locked, trying again\n",filename); } printf ("Have write lock, hit return to free write lock on %s and exit\n",filename); fgetc(stdin); mylock.l_type=F_UNLCK; mylock.l_len=0; /*lock entire file */ mylock.l_whence=SEEK_SET; mylock.l_start=0; if (fcntl(fd,F_SETLK,&mylock) == -1) { fprintf (stderr,"Could unlock %s:",filename); perror (""); exit(1); } close(fd); } main(int argc, char *argv[]) { int fd; int i; fd=writelock(argv[1]); }