This patch implements dlm plock F_SETLKW interruption feature. If the pending plock operation is not sent to user space yet it can simple be dropped out of the send_list. In case it's already being sent we need to try to remove the waiters in dlm user space tool. If it was successful a reply with DLM_PLOCK_OP_CANCEL optype instead of DLM_PLOCK_OP_LOCK comes back (flag DLM_PLOCK_FL_NO_REPLY was then being cleared in user space) to signal the cancellation was successful. If a result with optype DLM_PLOCK_OP_LOCK came back then the cancellation was not successful. Signed-off-by: Alexander Aring <aahringo@xxxxxxxxxx> --- fs/dlm/plock.c | 91 ++++++++++++++++++++++++---------- include/uapi/linux/dlm_plock.h | 1 + 2 files changed, 66 insertions(+), 26 deletions(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 1fa5b04d0298..0781a80c25f7 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -29,6 +29,9 @@ struct plock_async_data { struct plock_op { struct list_head list; +#define DLM_PLOCK_OP_FLAG_SENT 0 +#define DLM_PLOCK_OP_FLAG_INTERRUPTED 1 + unsigned long flags; int done; struct dlm_plock_info info; /* if set indicates async handling */ @@ -74,30 +77,25 @@ static void send_op(struct plock_op *op) wake_up(&send_wq); } -/* If a process was killed while waiting for the only plock on a file, - locks_remove_posix will not see any lock on the file so it won't - send an unlock-close to us to pass on to userspace to clean up the - abandoned waiter. So, we have to insert the unlock-close when the - lock call is interrupted. */ - -static void do_unlock_close(const struct dlm_plock_info *info) +static int do_lock_cancel(const struct plock_op *orig_op) { struct plock_op *op; op = kzalloc(sizeof(*op), GFP_NOFS); if (!op) - return; + return -ENOMEM; + + op->info = orig_op->info; + op->info.optype = DLM_PLOCK_OP_CANCEL; + op->info.flags = DLM_PLOCK_FL_NO_REPLY; - op->info.optype = DLM_PLOCK_OP_UNLOCK; - op->info.pid = info->pid; - op->info.fsid = info->fsid; - op->info.number = info->number; - op->info.start = 0; - op->info.end = OFFSET_MAX; - op->info.owner = info->owner; - - op->info.flags |= (DLM_PLOCK_FL_CLOSE | DLM_PLOCK_FL_NO_REPLY); send_op(op); + wait_event(recv_wq, (orig_op->done != 0)); + + if (orig_op->info.optype == DLM_PLOCK_OP_CANCEL) + return 0; + + return 1; } int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, @@ -156,7 +154,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, send_op(op); if (op->info.wait) { - rv = wait_event_killable(recv_wq, (op->done != 0)); + rv = wait_event_interruptible(recv_wq, (op->done != 0)); if (rv == -ERESTARTSYS) { spin_lock(&ops_lock); /* recheck under ops_lock if we got a done != 0, @@ -166,17 +164,37 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, spin_unlock(&ops_lock); goto do_lock_wait; } - list_del(&op->list); - spin_unlock(&ops_lock); + + if (!test_bit(DLM_PLOCK_OP_FLAG_SENT, &op->flags)) { + /* part of send_list, user never saw the op */ + list_del(&op->list); + spin_unlock(&ops_lock); + rv = -EINTR; + } else { + set_bit(DLM_PLOCK_OP_FLAG_INTERRUPTED, &op->flags); + spin_unlock(&ops_lock); + rv = do_lock_cancel(op); + switch (rv) { + case 0: + rv = -EINTR; + break; + case 1: + /* cancellation wasn't successful but op is done */ + goto do_lock_wait; + default: + /* internal error doing cancel we need to wait */ + goto wait; + } + } log_debug(ls, "%s: wait interrupted %x %llx pid %d", __func__, ls->ls_global_id, (unsigned long long)number, op->info.pid); - do_unlock_close(&op->info); dlm_release_plock_op(op); goto out; } } else { +wait: wait_event(recv_wq, (op->done != 0)); } @@ -392,10 +410,12 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, spin_lock(&ops_lock); if (!list_empty(&send_list)) { op = list_first_entry(&send_list, struct plock_op, list); - if (op->info.flags & DLM_PLOCK_FL_NO_REPLY) + if (op->info.flags & DLM_PLOCK_FL_NO_REPLY) { list_del(&op->list); - else + } else { list_move_tail(&op->list, &recv_list); + set_bit(DLM_PLOCK_OP_FLAG_SENT, &op->flags); + } memcpy(&info, &op->info, sizeof(info)); } spin_unlock(&ops_lock); @@ -454,6 +474,27 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, spin_lock(&ops_lock); if (info.wait) { list_for_each_entry(iter, &recv_list, list) { + /* A very specific posix lock case allows two + * lock request with the same meaning by using + * threads. It makes no sense from the application + * to do such request, however it is possible. + * We need to check the state for cancellation because + * we need to know the instance which is interrupted + * if two or more of the "same" lock requests are + * in waiting state and got interrupted. + * + * TODO we should move to a instance reference from + * request and reply and not go over lock states, but + * it seems going over lock states and get the instance + * does not make any problems (at least there were no + * issues found yet) but it's much cleaner to not think + * about all possible special cases and states to instance + * has no 1:1 mapping anymore. + */ + if (info.optype == DLM_PLOCK_OP_CANCEL && + !test_bit(DLM_PLOCK_OP_FLAG_INTERRUPTED, &iter->flags)) + continue; + if (iter->info.fsid == info.fsid && iter->info.number == info.number && iter->info.owner == info.owner && @@ -477,9 +518,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, if (op) { /* Sanity check that op and info match. */ - if (info.wait) - WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK); - else + if (!info.wait) WARN_ON(op->info.fsid != info.fsid || op->info.number != info.number || op->info.owner != info.owner || diff --git a/include/uapi/linux/dlm_plock.h b/include/uapi/linux/dlm_plock.h index 8dfa272c929a..9c4c083c824a 100644 --- a/include/uapi/linux/dlm_plock.h +++ b/include/uapi/linux/dlm_plock.h @@ -22,6 +22,7 @@ enum { DLM_PLOCK_OP_LOCK = 1, DLM_PLOCK_OP_UNLOCK, DLM_PLOCK_OP_GET, + DLM_PLOCK_OP_CANCEL, }; #define DLM_PLOCK_FL_CLOSE 1 -- 2.31.1