On Fri, 2023-06-30 at 14:40 +0500, Stas Sergeev wrote: > Currently IPC_RMID was attempted on a semid returned after failed > semget() with flags=IPC_CREAT|IPC_EXCL. So nothing was actually removed. > To get a proper semid, this patch retries semget() without IPC_EXCL. > Nice catch. > This opens up a race: if the lock-tester grabbed the old sem before > lock-setter used that magic to remove it, then the lock-tester will > remain with removed sem. > > Additionally locker was waiting for sem_otime on sem0 to became > non-zero after incrementing sem0 himself. So sem_otime was never > 0 at the time of checking it. > > So basically the code was all wrong. > This patch: > - fixes RMID after IPC_EXCL to actually remove the sem > - moves the increment of sem1 to the lock-tester site, and the > lock-setter waits for that event > - replaces the wait loop on sem_otime with GETVAL, adding a small sleep. > - lock-tester during the init sequence checks for removed sem, and > if it is - retries the init from semget() > > Signed-off-by: Stas Sergeev <stsp2@xxxxxxxxx> > --- > src/t_ofd_locks.c | 73 ++++++++++++++++++++++++++++++----------------- > 1 file changed, 47 insertions(+), 26 deletions(-) > > diff --git a/src/t_ofd_locks.c b/src/t_ofd_locks.c > index e77f2659..daa6f96c 100644 > --- a/src/t_ofd_locks.c > +++ b/src/t_ofd_locks.c > @@ -297,6 +297,7 @@ int main(int argc, char **argv) > semid = semget(semkey, 2, IPC_CREAT|IPC_EXCL); > if (semid < 0 && errno == EEXIST) { > /* remove sem set after one round of test */ > + semid = semget(semkey, 2, IPC_CREAT); > if (semctl(semid, 2, IPC_RMID, semu) == -1) > err_exit("rmid 0", errno); > retry++; > @@ -315,32 +316,29 @@ int main(int argc, char **argv) > semu.array = vals; > if (semctl(semid, 2, SETALL, semu) == -1) > err_exit("init sem", errno); > - /* Inc both new sem to 2 */ > - sop.sem_num = 0; > - sop.sem_op = 1; > - sop.sem_flg = 0; > - ts.tv_sec = 5; > - ts.tv_nsec = 0; > - if (semtimedop(semid, &sop, 1, &ts) == -1) > - err_exit("inc sem0 2", errno); > - sop.sem_num = 1; > - sop.sem_op = 1; > - sop.sem_flg = 0; > - ts.tv_sec = 5; > - ts.tv_nsec = 0; > - if (semtimedop(semid, &sop, 1, &ts) == -1) > - err_exit("inc sem1 2", errno); > > /* > - * Wait initialization complete. semctl(2) only update > - * sem_ctime, semop(2) will update sem_otime. > + * Wait initialization complete. > */ > ret = -1; > do { > + if (ret != -1) > + usleep(100000); > memset(&sem_ds, 0, sizeof(sem_ds)); > semu.buf = &sem_ds; > - ret = semctl(semid, 0, IPC_STAT, semu); > - } while (!(ret == 0 && sem_ds.sem_otime != 0)); > + ret = semctl(semid, 1, GETVAL, semu); > + if (ret == -1) > + err_exit("wait sem1 2", errno); > + } while (ret != 2); > + > + /* Inc sem0 to 2 */ > + sop.sem_num = 0; > + sop.sem_op = 1; > + sop.sem_flg = 0; > + ts.tv_sec = 5; > + ts.tv_nsec = 0; > + if (semtimedop(semid, &sop, 1, &ts) == -1) > + err_exit("inc sem0 2", errno); > > /* place the lock */ > if (fcntl(fd, setlk_macro, &flk) < 0) > @@ -393,10 +391,26 @@ int main(int argc, char **argv) > /* getlck */ > if (lock_cmd == 0) { > /* wait sem created and initialized */ > +again: > retry = 5; > do { > semid = semget(semkey, 2, 0); > - if (semid != -1) > + ret = -1; > + if (semid != -1) do { > + if (ret != -1) > + usleep(100000); > + memset(&sem_ds, 0, sizeof(sem_ds)); > + semu.buf = &sem_ds; > + ret = semctl(semid, 0, GETVAL, semu); > + if (ret == -1 && (errno == EINVAL > + || errno == EIDRM)) { > + /* sem removed */ > + goto again; > + } > + if (ret == -1) > + break; > + } while (ret != 1); > + if (ret == 1) > break; > if (errno == ENOENT && retry) { > sleep(1); > @@ -406,11 +420,15 @@ int main(int argc, char **argv) > err_exit("getlk_semget", errno); > } > } while (1); > - do { > - memset(&sem_ds, 0, sizeof(sem_ds)); > - semu.buf = &sem_ds; > - ret = semctl(semid, 0, IPC_STAT, semu); > - } while (!(ret == 0 && sem_ds.sem_otime != 0)); > + > + /* inc sem1 to 2 (initialization completed) */ > + sop.sem_num = 1; > + sop.sem_op = 1; > + sop.sem_flg = 0; > + ts.tv_sec = 5; > + ts.tv_nsec = 0; > + if (semtimedop(semid, &sop, 1, &ts) == -1) > + err_exit("inc sem1 2", errno); > > /* wait sem0 == 0 (setlk and close fd done) */ > sop.sem_num = 0; > @@ -418,8 +436,11 @@ int main(int argc, char **argv) > sop.sem_flg = 0; > ts.tv_sec = 5; > ts.tv_nsec = 0; > - if (semtimedop(semid, &sop, 1, &ts) == -1) > + if (semtimedop(semid, &sop, 1, &ts) == -1) { > + if (errno == EIDRM || errno == EINVAL) > + goto again; > err_exit("wait sem0 0", errno); > + } > > if (fcntl(fd, getlk_macro, &flk) < 0) > err_exit("getlk", errno); (cc'ing Murphy) The patch looks reasonable to me at first glance, though I confess I'm no expert in semaphore handling. Acked-by: Jeff Layton <jlayton@xxxxxxxxxx>