Hi Xenia, I'm Ccing both Dan and Mathias, since your patch has triggered my "holy crap so many race conditions" reaction, and I would like their eyes on this too. On Mon, Oct 07, 2013 at 06:52:38PM +0300, Xenia Ragiadakou wrote: > If a Reset Endpoint command is issued for an endpoint that is not in > the Halted state, xHC does nothing. Since the current implementation of > xhci_endpoint_reset() aborts any resets that address not halted endpoints, > the synchronization between the host-side and device-side toggle or sequence > number is lost. > This patch attempts to fix this issue by dropping and readding the endpoint > in order to enforce xHC to reset its internal toggle/sequence number. > > Four new functions have been introduced to prevent xhci_endpoint_reset() from > becoming huge. > > xhci_unlink_urbs(xhci, ring): > unlinks the urbs from the endpoint's rings > > xhci_stop_endpoint_for_config(xhci, udev, ep index): > brings the endpoint to the Stopped state and calls xhci_unlink_urbs > to unlink any queued urbs > > xhci_reset_ep0(xhci, udev, ep): > calls xhci_stop_endpoint_for_config to stop the endpoint and then > reinitializes the ring and input context for the default control > endpoint and issues an Evaluate Context command to reset xHC toggle > > xhci_reset_not_halted_ep(xhci, udev, ep): > calls xhci_stop_endpoint_for_config and if the endpoint to be reset is > the endpoint 0 calls xhci_reset_ep0, otherwise reinitializes the > endpoint rings and input context and then drops and readds the endpoint > issueing a Configure Endpoint > > Also, this patch introduces a new trace event for debugging output contexts > during the process of resetting the endpoint. At this early stage, it is > useful for debugging the xhci_reset_not_halted_ep() and it may be removed > later when further testing and revisioning show that the function works as > expected. > > Signed-off-by: Xenia Ragiadakou <burzalodowa@xxxxxxxxx> > --- > > This patch needs to be tested further because I did not find a satisfying > way to test it. I would appreciate if the device driver writers that had > problems with the xhci reset endpoint in the past could test it and let me > know if it fixes the problem or not. > > drivers/usb/host/xhci-ring.c | 17 +++- > drivers/usb/host/xhci-trace.h | 6 ++ > drivers/usb/host/xhci.c | 210 +++++++++++++++++++++++++++++++++++++++++- > drivers/usb/host/xhci.h | 3 + > 4 files changed, 233 insertions(+), 3 deletions(-) > > diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c > index faff6fc..f62131b 100644 > --- a/drivers/usb/host/xhci-ring.c > +++ b/drivers/usb/host/xhci-ring.c > @@ -433,6 +433,10 @@ static void ring_doorbell_for_active_rings(struct xhci_hcd *xhci, > > ep = &xhci->devs[slot_id]->eps[ep_index]; > > + /* If there is a pending configuration, don't ring the doorbell yet */ > + if (ep->ep_state & EP_CONFIG_PENDING) > + return; > + > /* A ring has pending URBs if its TD list is not empty */ > if (!(ep->ep_state & EP_HAS_STREAMS)) { > if (ep->ring && !(list_empty(&ep->ring->td_list))) > @@ -715,8 +719,8 @@ static void xhci_stop_watchdog_timer_in_irq(struct xhci_hcd *xhci, > } > > /* Must be called with xhci->lock held in interrupt context */ > -static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, > - struct xhci_td *cur_td, int status) > +void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, struct xhci_td *cur_td, > + int status) > { > struct usb_hcd *hcd; > struct urb *urb; > @@ -788,6 +792,8 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id, > xhci_stop_watchdog_timer_in_irq(xhci, ep); > ep->stopped_td = NULL; > ep->stopped_trb = NULL; > + if (ep->ep_state & EP_CONFIG_PENDING) > + goto signal_completion; > ring_doorbell_for_active_rings(xhci, slot_id, ep_index); > return; > } > @@ -881,6 +887,13 @@ remove_finished_td: > return; > } while (cur_td != last_unlinked_td); > > +signal_completion: > + /* signal the completion for the Stop Endpoint command */ > + if (ep->ep_state & EP_CONFIG_PENDING) { > + virt_dev = xhci->devs[slot_id]; > + handle_cmd_in_cmd_wait_list(xhci, virt_dev, event); > + } > + > /* Return to the event handler with xhci->lock re-acquired */ > } > > diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h > index 132abda..1f8b3fb 100644 > --- a/drivers/usb/host/xhci-trace.h > +++ b/drivers/usb/host/xhci-trace.h > @@ -113,6 +113,12 @@ DEFINE_EVENT(xhci_log_ctx, xhci_address_ctx, > TP_ARGS(xhci, ctx, ep_num) > ); > > +DEFINE_EVENT(xhci_log_ctx, xhci_resetep_ctx, > + TP_PROTO(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, > + unsigned int ep_num), > + TP_ARGS(xhci, ctx, ep_num) > +); > + > DECLARE_EVENT_CLASS(xhci_log_event, > TP_PROTO(void *trb_va, struct xhci_generic_trb *ev), > TP_ARGS(trb_va, ev), > diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c > index 9f22ddf..e6300b5 100644 > --- a/drivers/usb/host/xhci.c > +++ b/drivers/usb/host/xhci.c > @@ -2873,6 +2873,210 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, > } > } > > +static void xhci_unlink_urbs(struct xhci_hcd *xhci, struct xhci_ring *ring) > +{ > + unsigned long flags; > + struct xhci_td *cur_td; > + > + spin_lock_irqsave(&xhci->lock, flags); > + while (!list_empty(&ring->td_list)) { > + cur_td = list_first_entry(&ring->td_list, struct xhci_td, > + td_list); > + cur_td->urb->transfer_buffer_length = 0; > + xhci_giveback_urb_in_irq(xhci, cur_td, -EPROTO); > + list_del_init(&cur_td->td_list); You need to delete the TD from the cur_td list before calling into xhci_giveback_urb_in_irq() here. Since that function drops the lock, other functions may try to manipulate the cur_td list. > + } > + spin_unlock_irqrestore(&xhci->lock, flags); > +} > + > +static int xhci_stop_endpoint_for_config(struct xhci_hcd *xhci, > + struct usb_device *udev, unsigned int ep_index) > +{ > + struct xhci_virt_device *vdev; > + struct xhci_virt_ep *virt_ep; > + struct xhci_ep_ctx *ep_ctx_out; > + unsigned int ep_state; > + struct xhci_command *stop_ep_cmd; > + unsigned long flags; > + int ret, timeleft; > + > + vdev = xhci->devs[udev->slot_id]; > + virt_ep = &vdev->eps[ep_index]; > + > + ep_ctx_out = xhci_get_ep_ctx(xhci, vdev->out_ctx, ep_index); > + ep_state = le32_to_cpu(ep_ctx_out->ep_info) & EP_STATE_MASK; > + if (ep_state == EP_STATE_STOPPED) > + return 0; > + > + /* set EP_CONFIG_PENDING flag to prevent xhci from ringing the > + * doorbell for this endpoint's rings, when it handles the Stop > + * Endpoint command. */ Please put the close comment on the next line. > + spin_lock_irqsave(&xhci->lock, flags); > + virt_ep->ep_state |= EP_CONFIG_PENDING; > + spin_unlock_irqrestore(&xhci->lock, flags); I think there's some nasty race conditions here. There's several different structures this code and other functions are manipulating: - the endpoint ring contents - the endpoint ring dequeue pointer - the endpoint's cur_td and cancelled_td lists The other functions you need to look at are xhci_urb_enqueue, xhci_urb_dequeue, and xhci_handle_cmd_stop_ep. Arguably, we should be doing something about drivers attempting to change alternate interface settings at the same time they're resetting endpoints, but I think the solution should be that drivers just shouldn't do that. However, we do need to handle the case where the reset endpoint races with URB cancellation and enqueue. So, what is the high-level overview of what happens with this new code to reset the endpoint, and URB enqueue and dequeue? Resetting the endpoint currently looks something like: R1. check if endpoint is running, if so (lock) xhci->lock set EP_CONFIG_PENDING flag for the endpoint queue stop endpoint command (unlock) xhci->lock wait for command to complete (which will clear EP_CONFIG_PENDING) R2. give back URBs in cur_td list: (lock) xhci->lock pull one TD off cur_td list (unlock) xhci->lock giveback that URB (lock) xhci->lock delete TD off cur_td list (unlock) xhci->lock R3. set up Configure Endpoint or Evaluate Context command set the add and drop flags for the endpoint (lock) xhci->lock queue command (unlock) xhci->lock wait for command to finish, sometime later: hardware drops the endpoint re-adds it with a new dequeue pointer C1. The URB dequeue function: (lock) xhci->lock adds the TD to the cancelled_td_list issues a Stop endpoint command if EP_HALTED is not set (unlock) xhci->lock C2. later, when that command completes: (lock) xhci->lock cancel URBs from ring remove URBs from the cur_td list possibly issue a Set TR Dequeue command or ring the doorbells C3. give back URBs in cancelled_td_list: (unlock) xhci->lock giveback URB in cancelled_td_list (lock) xhci->lock C4. if EP_CONFIG_PENDING is set, complete the command in the waitlist It will probably help you to lay the two high-level functions side by side, and play around with when different functions can run, based on when xhci->lock is unlocked. For instance, what happens if you manage to check the output context running state at R1 just as a stop endpoint command for a cancellation completes, but before the command handler is called in C2? In that case, you never set the EP_CONFIG_PENDING flag. You then assume the endpoint will remain halted while you give back URBs and manipulate the endpoint context with the Configure Endpoint or Evaluate Context command. However, there's nothing to stop the code at C2 (in xhci_handle_cmd_stop_ep) from issuing a Set TR Dequeue command or ringing the doorbells, which will restart the endpoint ring, because the EP_CONFIG_PENDING flag isn't set. Instead, you should use the EP_CONFIG_PENDING flag to protect the whole operation of removing URBs and reinitializing the rings. You need to unconditionally take the xhci->lock, set the EP_CONFIG_PENDING flag, and then check whether the endpoint is stopped while still holding the lock. The flag should only be cleared once you are done with the Configure Endpoint or Evaluate Context command. You're also going to have to make xhci_handle_cmd_stop_ep skip the Set TR dequeue pointer if the EP_CONFIG_PENDING flag is set. You'll need to clear the new dequeue pointer and new deq_seg as well. So now the sequence looks like: R1. reset endpoint (lock) xhci->lock set EP_CONFIG_PENDING flag for the endpoint check if endpoint is running, if so queue stop endpoint command (unlock) xhci->lock wait for command to complete R2. give back URBs in cur_td list: (lock) xhci->lock pull one TD off cur_td list (unlock) xhci->lock giveback that URB (lock) xhci->lock delete TD off cur_td list (unlock) xhci->lock R3. set up Configure Endpoint or Evaluate Context command set the add and drop flags for the endpoint (lock) xhci->lock queue command (unlock) xhci->lock wait for command to finish, sometime later: hardware drops the endpoint re-adds it with a new dequeue pointer clear EP_CONFIG_PENDING C1. The URB dequeue function: (lock) xhci->lock adds the TD to the cancelled_td_list issues a Stop endpoint command if EP_HALTED is not set (unlock) xhci->lock C2. later, when that command completes: (lock) xhci->lock cancel URBs from ring remove URBs from the cur_td list if EP_CONFIG_PENDING isn't set issue a Set TR Dequeue command or ring the doorbells C3. give back URBs in cancelled_td_list: (unlock) xhci->lock giveback URB in cancelled_td_list (lock) xhci->lock C4. if EP_CONFIG_PENDING is set, complete the command in the waitlist Next, what happens if the URB gets canceled, and the code reaches C3 before we can set EP_CONFIG_PENDING? In that case, there might be a Set TR dequeue command pending, but the endpoint may or may not be halted. Let's say the endpoint is still stopped. In that case, we don't issue a Stop Endpoint command, we give back some URBs, and move onto issuing the Configure Endpoint or Evaluate Context command. So what does the Set TR Dequeue command completion handler do? TR1. Handle Set TR dequeue command: (lock) xhci->lock (prior to calling xhci_handle_cmd_set_deq) update the number of free TRBs update the endpoint ring dequeue pointer and dequeue segment ring the doorbells (unlock) xhci->lock What happens if the Set TR dequeue command completes while we're in the middle of R3? It could complete after we drop the endpoint, after we re-initialize the endpoint when it's re-added, or even finish just before we queue the Configure Endpoint or Evaluate context command to the command ring. In that case, both the xHCI internal state of the number of free TRBs and the dequeue pointer and dequeue segment will get set to something completely different than we want it to be set to. So, you should change xhci_handle_cmd_set_deq to return immediately if EP_CONFIG_PENDING is set. There may be additional race conditions there, but at this point, my brain is pretty fried on thinking about race conditions. Finally, you need to make sure xhci_urb_enqueue returns an error if an URB is enqueued while EP_CONFIG_PENDING is set. Drivers shouldn't be attempting to enqueue URBs while a reset endpoint is in progress, because it's going to be manipulating the cur_td list. > + > + stop_ep_cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO); > + if (!stop_ep_cmd) { > + xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, > + "%s: Failed to allocate stop ep cmd", __func__); > + ret = -ENOMEM; > + goto out; > + } > + > + spin_lock_irqsave(&xhci->lock, flags); > + stop_ep_cmd->command_trb = xhci->cmd_ring->enqueue; > + ret = xhci_queue_stop_endpoint(xhci, udev->slot_id, ep_index, 0); > + if (ret) { > + spin_unlock_irqrestore(&xhci->lock, flags); > + xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, > + "%s: Failed to queue stop ep cmd", __func__); > + goto cleanup_cmd; > + } > + list_add_tail(&stop_ep_cmd->cmd_list, &vdev->cmd_list); > + xhci_ring_cmd_db(xhci); > + spin_unlock_irqrestore(&xhci->lock, flags); > + > + timeleft = wait_for_completion_interruptible_timeout( > + stop_ep_cmd->completion, XHCI_CMD_DEFAULT_TIMEOUT); > + if (timeleft <= 0) { > + ep_state = le32_to_cpu(ep_ctx_out->ep_info) & EP_STATE_MASK; > + if (ep_state != EP_STATE_STOPPED) { > + spin_lock_irqsave(&xhci->lock, flags); > + if (stop_ep_cmd->cmd_list.next != LIST_POISON1) > + list_del(&stop_ep_cmd->cmd_list); > + spin_unlock_irqrestore(&xhci->lock, flags); > + ret = -ETIME; > + } > + } > + > + /* unlink urbs queued to the endpoint */ > + if (!(virt_ep->ep_state & EP_HAS_STREAMS)) { > + xhci_unlink_urbs(xhci, virt_ep->ring); > + } else { > + struct xhci_stream_info *stream_info = virt_ep->stream_info; > + unsigned int stream; > + > + for (stream = 1; stream < stream_info->num_streams; ++stream) { > + xhci_unlink_urbs(xhci, > + stream_info->stream_rings[stream]); > + } > + } > + > +cleanup_cmd: > + xhci_free_command(xhci, stop_ep_cmd); > + > +out: > + spin_lock_irqsave(&xhci->lock, flags); > + virt_ep->ep_state &= ~EP_CONFIG_PENDING; > + spin_unlock_irqrestore(&xhci->lock, flags); > + > + return ret; > +} > + > +static int xhci_reset_ep0(struct xhci_hcd *xhci, struct usb_device *udev, > + struct usb_host_endpoint *ep) > +{ > + struct xhci_virt_device *vdev; > + struct xhci_input_control_ctx *ctrl_ctx; > + struct xhci_slot_ctx *slot_ctx_out; > + int ret; > + > + vdev = xhci->devs[udev->slot_id]; > + > + /* reinitialize endpoint's ring and setup its input context based > + * on its descriptors */ > + if (xhci_endpoint_init(xhci, vdev, udev, ep, GFP_NOIO) < 0) { > + xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, > + "%s: failed to initialize ep 0", __func__); > + return -ENOMEM; > + } > + > + /* issue an evaluate context command */ > + ctrl_ctx = xhci_get_input_control_ctx(xhci, vdev->in_ctx); > + if (!ctrl_ctx) { > + xhci_warn(xhci, "%s: Could not get input context, bad type.\n", > + __func__); > + > + return -EINVAL; > + } > + ctrl_ctx->add_flags = cpu_to_le32(EP0_FLAG | SLOT_FLAG); > + ctrl_ctx->drop_flags = 0; > + > + ret = xhci_configure_endpoint(xhci, udev, NULL, true, false); > + > + xhci_zero_in_ctx(xhci, vdev); > + > + slot_ctx_out = xhci_get_slot_ctx(xhci, vdev->out_ctx); > + trace_xhci_resetep_ctx(xhci, vdev->out_ctx, Could you change this from trace_xhci_resetep_ctx to trace_xhci_reset_ep_ctx? > + LAST_CTX_TO_EP_NUM(le32_to_cpu(slot_ctx_out->dev_info)) + 1); > + > + return ret; > +} > + > + > +/* The Reset Endpoint command cannot be issued for an endpoint that is not in > + * the Halted state. This function brings the endpoint in Stopped state, > + * unlinks any urbs queued to this endpoint and issues a Configure Endpoint > + * command to drop and re-add the endpoint. > + * By dropping and readding the endpoint, xHC is forced to reset its USB2 > + * data toggle or USB3 sequence number and the synchronization between the > + * host-side and the device-side toggle bit/sequence number is maintained. > + * Endpoint 0 does not apply to the Configure Endpoint command and will be > + * ignored by the xHC. Hence, for endpoint 0 we need to issue an Evaluate > + * Context command instead. > + */ > +static int xhci_reset_not_halted_ep(struct xhci_hcd *xhci, > + struct usb_device *udev, struct usb_host_endpoint *ep) > +{ > + struct usb_hcd *hcd = bus_to_hcd(udev->bus); > + struct xhci_virt_device *vdev; > + struct xhci_slot_ctx *slot_ctx_out; > + unsigned int ep_index; > + int ret; > + > + vdev = xhci->devs[udev->slot_id]; > + ep_index = xhci_get_endpoint_index(&ep->desc); > + > + /* bring endpoint in Stopped state and unlink urbs from its rings */ > + xhci_stop_endpoint_for_config(xhci, udev, ep_index); > + > + slot_ctx_out = xhci_get_slot_ctx(xhci, vdev->out_ctx); > + trace_xhci_resetep_ctx(xhci, vdev->out_ctx, > + LAST_CTX_TO_EP_NUM(le32_to_cpu(slot_ctx_out->dev_info)) + 1); > + > + /* for endpoint 0 we need to issue an Evaluate Context command */ > + if (ep_index == 0) > + return xhci_reset_ep0(xhci, udev, ep); > + > + /* setup input control flags to drop and re-add endpoint */ > + ret = xhci_drop_endpoint(hcd, udev, ep); > + if (ret < 0) { > + xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, > + "%s: Failed to drop ep %d", __func__, ep_index); > + return ret; > + } > + ret = xhci_add_endpoint(hcd, udev, ep); > + if (ret < 0) { > + xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, > + "%s: Failed to add ep %d", __func__, ep_index); > + goto reset_bw; > + } > + > + /* issue a Configure Endpoint command */ > + ret = xhci_check_bandwidth(hcd, udev); > + > +reset_bw: > + if (ret < 0) > + xhci_reset_bandwidth(hcd, udev); > + > + slot_ctx_out = xhci_get_slot_ctx(xhci, vdev->out_ctx); > + trace_xhci_resetep_ctx(xhci, vdev->out_ctx, > + LAST_CTX_TO_EP_NUM(le32_to_cpu(slot_ctx_out->dev_info)) + 1); > + > + return ret; > +} > + > + > /* Deal with stalled endpoints. The core should have sent the control message > * to clear the halt condition. However, we need to make the xHCI hardware > * reset its sequence number, since a device will expect a sequence number of > @@ -2900,8 +3104,12 @@ void xhci_endpoint_reset(struct usb_hcd *hcd, > virt_ep = &xhci->devs[udev->slot_id]->eps[ep_index]; > if (!virt_ep->stopped_td) { > xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, > - "Endpoint 0x%x not halted, refusing to reset.", > + "reset not halted endpoint 0x%x", Please capitalize this sentence. > ep->desc.bEndpointAddress); > + ret = xhci_reset_not_halted_ep(xhci, udev, ep); > + if (ret < 0) > + xhci_dbg_trace(xhci, trace_xhci_dbg_reset_ep, > + "reset for not halted ep failed"); Same here. > return; > } > if (usb_endpoint_xfer_control(&ep->desc)) { > diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h > index 9575088..eaaa0af 100644 > --- a/drivers/usb/host/xhci.h > +++ b/drivers/usb/host/xhci.h > @@ -861,6 +861,7 @@ struct xhci_virt_ep { > #define EP_HAS_STREAMS (1 << 4) > /* Transitioning the endpoint to not using streams, don't enqueue URBs */ > #define EP_GETTING_NO_STREAMS (1 << 5) > +#define EP_CONFIG_PENDING (1 << 6) > /* ---- Related to URB cancellation ---- */ > struct list_head cancelled_td_list; > /* The TRB that was last reported in a stopped endpoint ring */ > @@ -1840,6 +1841,8 @@ int xhci_cancel_cmd(struct xhci_hcd *xhci, struct xhci_command *command, > union xhci_trb *cmd_trb); > void xhci_ring_ep_doorbell(struct xhci_hcd *xhci, unsigned int slot_id, > unsigned int ep_index, unsigned int stream_id); > +void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, struct xhci_td *cur_td, > + int status); > > /* xHCI roothub code */ > void xhci_set_link_state(struct xhci_hcd *xhci, __le32 __iomem **port_array, Ok, that's all the code review of this patch I can handle right now. Try to make the changes I suggested, and send me a second revision so I can think through the race conditions more. Sarah Sharp -- To unsubscribe from this list: send the line "unsubscribe linux-usb" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html