On 2020-03-18 2:43 p.m., Thomas Gleixner wrote: > From: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> > > The poll callback is using the completion wait queue and sticks it into > poll_wait() to wake up pollers after a command has completed. > > This works to some extent, but cannot provide EPOLLEXCLUSIVE support > because the waker side uses complete_all() which unconditionally wakes up > all waiters. complete_all() is required because completions internally use > exclusive wait and complete() only wakes up one waiter by default. > > This mixes conceptually different mechanisms and relies on internal > implementation details of completions, which in turn puts contraints on > changing the internal implementation of completions. > > Replace it with a regular wait queue and store the state in struct > switchtec_user. > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> > Acked-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> While I've been against open coding the completion in this driver for a while, I'm convinced by the EPOLLEXCLUSIVE argument for this change. I've reviewed and lightly tested the change with hardware: Reviewed-by: Logan Gunthorpe <logang@xxxxxxxxxxxx> Thanks, Logan > Cc: Kurt Schwemmer <kurt.schwemmer@xxxxxxxxxxxxx> > Cc: Logan Gunthorpe <logang@xxxxxxxxxxxx> > Cc: Bjorn Helgaas <bhelgaas@xxxxxxxxxx> > Cc: linux-pci@xxxxxxxxxxxxxxx > --- > V2: Reworded changelog. > --- > drivers/pci/switch/switchtec.c | 22 +++++++++++++--------- > 1 file changed, 13 insertions(+), 9 deletions(-) > > --- a/drivers/pci/switch/switchtec.c > +++ b/drivers/pci/switch/switchtec.c > @@ -52,10 +52,11 @@ struct switchtec_user { > > enum mrpc_state state; > > - struct completion comp; > + wait_queue_head_t cmd_comp; > struct kref kref; > struct list_head list; > > + bool cmd_done; > u32 cmd; > u32 status; > u32 return_code; > @@ -77,7 +78,7 @@ static struct switchtec_user *stuser_cre > stuser->stdev = stdev; > kref_init(&stuser->kref); > INIT_LIST_HEAD(&stuser->list); > - init_completion(&stuser->comp); > + init_waitqueue_head(&stuser->cmd_comp); > stuser->event_cnt = atomic_read(&stdev->event_cnt); > > dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); > @@ -175,7 +176,7 @@ static int mrpc_queue_cmd(struct switcht > kref_get(&stuser->kref); > stuser->read_len = sizeof(stuser->data); > stuser_set_state(stuser, MRPC_QUEUED); > - reinit_completion(&stuser->comp); > + stuser->cmd_done = false; > list_add_tail(&stuser->list, &stdev->mrpc_queue); > > mrpc_cmd_submit(stdev); > @@ -222,7 +223,8 @@ static void mrpc_complete_cmd(struct swi > memcpy_fromio(stuser->data, &stdev->mmio_mrpc->output_data, > stuser->read_len); > out: > - complete_all(&stuser->comp); > + stuser->cmd_done = true; > + wake_up_interruptible(&stuser->cmd_comp); > list_del_init(&stuser->list); > stuser_put(stuser); > stdev->mrpc_busy = 0; > @@ -529,10 +531,11 @@ static ssize_t switchtec_dev_read(struct > mutex_unlock(&stdev->mrpc_mutex); > > if (filp->f_flags & O_NONBLOCK) { > - if (!try_wait_for_completion(&stuser->comp)) > + if (!stuser->cmd_done) > return -EAGAIN; > } else { > - rc = wait_for_completion_interruptible(&stuser->comp); > + rc = wait_event_interruptible(stuser->cmd_comp, > + stuser->cmd_done); > if (rc < 0) > return rc; > } > @@ -580,7 +583,7 @@ static __poll_t switchtec_dev_poll(struc > struct switchtec_dev *stdev = stuser->stdev; > __poll_t ret = 0; > > - poll_wait(filp, &stuser->comp.wait, wait); > + poll_wait(filp, &stuser->cmd_comp, wait); > poll_wait(filp, &stdev->event_wq, wait); > > if (lock_mutex_and_test_alive(stdev)) > @@ -588,7 +591,7 @@ static __poll_t switchtec_dev_poll(struc > > mutex_unlock(&stdev->mrpc_mutex); > > - if (try_wait_for_completion(&stuser->comp)) > + if (stuser->cmd_done) > ret |= EPOLLIN | EPOLLRDNORM; > > if (stuser->event_cnt != atomic_read(&stdev->event_cnt)) > @@ -1272,7 +1275,8 @@ static void stdev_kill(struct switchtec_ > > /* Wake up and kill any users waiting on an MRPC request */ > list_for_each_entry_safe(stuser, tmpuser, &stdev->mrpc_queue, list) { > - complete_all(&stuser->comp); > + stuser->cmd_done = true; > + wake_up_interruptible(&stuser->cmd_comp); > list_del_init(&stuser->list); > stuser_put(stuser); > } >