Hi, Thanks Jeff! On Wed, Jul 5, 2023 at 8:34 PM Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > On Fri, 2023-06-30 at 14:40 +0500, Stas Sergeev wrote: > > Currently IPC_RMID was attempted on a semid returned after failed > > semget() with flags=IPC_CREAT|IPC_EXCL. So nothing was actually removed. > > To get a proper semid, this patch retries semget() without IPC_EXCL. > > > > Nice catch. > > > This opens up a race: if the lock-tester grabbed the old sem before > > lock-setter used that magic to remove it, then the lock-tester will > > remain with removed sem. > > > > Additionally locker was waiting for sem_otime on sem0 to became > > non-zero after incrementing sem0 himself. So sem_otime was never > > 0 at the time of checking it. > > > > So basically the code was all wrong. > > This patch: > > - fixes RMID after IPC_EXCL to actually remove the sem > > - moves the increment of sem1 to the lock-tester site, and the > > lock-setter waits for that event > > - replaces the wait loop on sem_otime with GETVAL, adding a small sleep. > > - lock-tester during the init sequence checks for removed sem, and > > if it is - retries the init from semget() > > > > Signed-off-by: Stas Sergeev <stsp2@xxxxxxxxx> > > --- > > src/t_ofd_locks.c | 73 ++++++++++++++++++++++++++++++----------------- > > 1 file changed, 47 insertions(+), 26 deletions(-) > > > > diff --git a/src/t_ofd_locks.c b/src/t_ofd_locks.c > > index e77f2659..daa6f96c 100644 > > --- a/src/t_ofd_locks.c > > +++ b/src/t_ofd_locks.c > > @@ -297,6 +297,7 @@ int main(int argc, char **argv) > > semid = semget(semkey, 2, IPC_CREAT|IPC_EXCL); > > if (semid < 0 && errno == EEXIST) { > > /* remove sem set after one round of test */ > > + semid = semget(semkey, 2, IPC_CREAT); > > if (semctl(semid, 2, IPC_RMID, semu) == -1) Good catch. This RMID is useless unless we have got the existing semaphore. According to SEMGET(2), seems should be: semid = semget(semkey, 2, 0); to obtain an existing semaphore? The while loop makes sure we get the semaphore before continuing the test. It's been some time, I'm not sure but now I really can't see this really hurts. Thanks, Murphy > > err_exit("rmid 0", errno); > > retry++; > > @@ -315,32 +316,29 @@ int main(int argc, char **argv) > > semu.array = vals; > > if (semctl(semid, 2, SETALL, semu) == -1) > > err_exit("init sem", errno); > > - /* Inc both new sem to 2 */ > > - sop.sem_num = 0; > > - sop.sem_op = 1; > > - sop.sem_flg = 0; > > - ts.tv_sec = 5; > > - ts.tv_nsec = 0; > > - if (semtimedop(semid, &sop, 1, &ts) == -1) > > - err_exit("inc sem0 2", errno); > > - sop.sem_num = 1; > > - sop.sem_op = 1; > > - sop.sem_flg = 0; > > - ts.tv_sec = 5; > > - ts.tv_nsec = 0; > > - if (semtimedop(semid, &sop, 1, &ts) == -1) > > - err_exit("inc sem1 2", errno); > > > > /* > > - * Wait initialization complete. semctl(2) only update > > - * sem_ctime, semop(2) will update sem_otime. > > + * Wait initialization complete. > > */ > > ret = -1; > > do { > > + if (ret != -1) > > + usleep(100000); > > memset(&sem_ds, 0, sizeof(sem_ds)); > > semu.buf = &sem_ds; > > - ret = semctl(semid, 0, IPC_STAT, semu); > > - } while (!(ret == 0 && sem_ds.sem_otime != 0)); > > + ret = semctl(semid, 1, GETVAL, semu); > > + if (ret == -1) > > + err_exit("wait sem1 2", errno); > > + } while (ret != 2); > > + > > + /* Inc sem0 to 2 */ > > + sop.sem_num = 0; > > + sop.sem_op = 1; > > + sop.sem_flg = 0; > > + ts.tv_sec = 5; > > + ts.tv_nsec = 0; > > + if (semtimedop(semid, &sop, 1, &ts) == -1) > > + err_exit("inc sem0 2", errno); > > > > /* place the lock */ > > if (fcntl(fd, setlk_macro, &flk) < 0) > > @@ -393,10 +391,26 @@ int main(int argc, char **argv) > > /* getlck */ > > if (lock_cmd == 0) { > > /* wait sem created and initialized */ > > +again: > > retry = 5; > > do { > > semid = semget(semkey, 2, 0); > > - if (semid != -1) > > + ret = -1; > > + if (semid != -1) do { > > + if (ret != -1) > > + usleep(100000); > > + memset(&sem_ds, 0, sizeof(sem_ds)); > > + semu.buf = &sem_ds; > > + ret = semctl(semid, 0, GETVAL, semu); > > + if (ret == -1 && (errno == EINVAL > > + || errno == EIDRM)) { > > + /* sem removed */ > > + goto again; > > + } > > + if (ret == -1) > > + break; > > + } while (ret != 1); > > + if (ret == 1) > > break; > > if (errno == ENOENT && retry) { > > sleep(1); > > @@ -406,11 +420,15 @@ int main(int argc, char **argv) > > err_exit("getlk_semget", errno); > > } > > } while (1); > > - do { > > - memset(&sem_ds, 0, sizeof(sem_ds)); > > - semu.buf = &sem_ds; > > - ret = semctl(semid, 0, IPC_STAT, semu); > > - } while (!(ret == 0 && sem_ds.sem_otime != 0)); > > + > > + /* inc sem1 to 2 (initialization completed) */ > > + sop.sem_num = 1; > > + sop.sem_op = 1; > > + sop.sem_flg = 0; > > + ts.tv_sec = 5; > > + ts.tv_nsec = 0; > > + if (semtimedop(semid, &sop, 1, &ts) == -1) > > + err_exit("inc sem1 2", errno); > > > > /* wait sem0 == 0 (setlk and close fd done) */ > > sop.sem_num = 0; > > @@ -418,8 +436,11 @@ int main(int argc, char **argv) > > sop.sem_flg = 0; > > ts.tv_sec = 5; > > ts.tv_nsec = 0; > > - if (semtimedop(semid, &sop, 1, &ts) == -1) > > + if (semtimedop(semid, &sop, 1, &ts) == -1) { > > + if (errno == EIDRM || errno == EINVAL) > > + goto again; > > err_exit("wait sem0 0", errno); > > + } > > > > if (fcntl(fd, getlk_macro, &flk) < 0) > > err_exit("getlk", errno); > > (cc'ing Murphy) > > The patch looks reasonable to me at first glance, though I confess I'm > no expert in semaphore handling. > > Acked-by: Jeff Layton <jlayton@xxxxxxxxxx> >