[RFC v2 4/7] xhci: Add memory allocation for USB3 bulk streams.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add support for allocating streams for USB 3.0 bulk endpoints.  See
Documentation/usb/bulk-streams.txt for more information about how and why
you would use streams.

When an endpoint has streams enabled, instead of having one ring where all
transfers are enqueued to the hardware, it has several rings.  The ring
dequeue pointer in the endpoint context is changed to point to a "Stream
Context Array".  This is basically an array of pointers to transfer rings,
one for each stream ID that the driver wants to use.

The Stream Context Array size must be a power of two, and host controllers
can place a limit on the size of the array (4 to 2^16 entries).  These
two facts make calculating the size of the Stream Context Array and the
number of entries actually used by the driver a bit tricky.

Besides the Stream Context Array and rings for all the stream IDs, we need
one more data structure.  The xHCI hardware will not tell us which stream
ID a transfer event was for, but it will give us the slot ID, endpoint
index, and physical address for the TRB that caused the event.  For every
endpoint on a device, add a radix tree to map physical TRB addresses to
virtual segments within a stream ring.

Keep track of whether an endpoint is transitioning to using streams, and
don't enqueue any URBs while that's taking place.  Refuse to transition an
endpoint to streams if there are already URBs enqueued for that endpoint.

Signed-off-by: Sarah Sharp <sarah.a.sharp@xxxxxxxxxxxxxxx>
---
 drivers/usb/host/xhci-hcd.c  |  357 +++++++++++++++++++++++++++++++++++++++++-
 drivers/usb/host/xhci-mem.c  |  358 +++++++++++++++++++++++++++++++++++++++++-
 drivers/usb/host/xhci-ring.c |   10 +-
 drivers/usb/host/xhci.h      |   84 +++++++++-
 4 files changed, 795 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/host/xhci-hcd.c b/drivers/usb/host/xhci-hcd.c
index 6c823c2..7d4f463 100644
--- a/drivers/usb/host/xhci-hcd.c
+++ b/drivers/usb/host/xhci-hcd.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/irq.h>
+#include <linux/log2.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 
@@ -726,8 +727,15 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
 		spin_unlock_irqrestore(&xhci->lock, flags);
 	} else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) {
 		spin_lock_irqsave(&xhci->lock, flags);
-		ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb,
-				slot_id, ep_index);
+		if (xhci->devs[slot_id]->eps[ep_index].ep_state &
+				EP_GETTING_STREAMS) {
+			xhci_warn(xhci, "WARN: Can't enqueue URB while bulk ep "
+					"is transitioning to using streams.\n");
+			ret = -EINVAL;
+		} else {
+			ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb,
+					slot_id, ep_index);
+		}
 		spin_unlock_irqrestore(&xhci->lock, flags);
 	} else {
 		ret = -EINVAL;
@@ -1396,6 +1404,351 @@ void xhci_endpoint_reset(struct usb_hcd *hcd,
 		xhci_warn(xhci, "FIXME allocate a new ring segment\n");
 }
 
+static int xhci_check_streams_endpoint(struct xhci_hcd *xhci,
+		struct usb_device *udev, struct usb_host_endpoint *ep,
+		unsigned int slot_id)
+{
+	int ret;
+	unsigned int ep_index;
+	unsigned int ep_state;
+
+	if (!ep)
+		return -EINVAL;
+	ret = xhci_check_args(xhci_to_hcd(xhci), udev, ep, 1, __func__);
+	if (ret >= 0)
+		return -EINVAL;
+	if (!ep->ss_ep_comp) {
+		xhci_warn(xhci, "WARN: No SuperSpeed Endpoint Companion"
+				" descriptor for ep 0x%x\n",
+				ep->desc.bEndpointAddress);
+		return -EINVAL;
+	}
+	ep_index = xhci_get_endpoint_index(&ep->desc);
+	ep_state = xhci->devs[slot_id]->eps[ep_index].ep_state;
+	if (ep_state & EP_HAS_STREAMS ||
+			ep_state & EP_GETTING_STREAMS) {
+		xhci_warn(xhci, "WARN: SuperSpeed bulk endpoint 0x%x "
+				"already has streams set up.\n",
+				ep->desc.bEndpointAddress);
+		xhci_warn(xhci, "Send email to xHCI maintainer and ask for "
+				"dynamic stream context array reallocation.\n");
+		return -EINVAL;
+	}
+	if (!list_empty(&xhci->devs[slot_id]->eps[ep_index].ring->td_list)) {
+		xhci_warn(xhci, "Cannot setup streams for SuperSpeed bulk "
+				"endpoint 0x%x; URBs are pending.\n",
+				ep->desc.bEndpointAddress);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void xhci_calculate_streams_entries(struct xhci_hcd *xhci,
+		unsigned int *num_streams, unsigned int *num_stream_ctxs)
+{
+	unsigned int max_streams;
+
+	/* The stream context array size must be a power of two */
+	*num_stream_ctxs = roundup_pow_of_two(*num_streams);
+	/*
+	 * Find out how many primary stream array entries the host controller
+	 * supports.  Later we may use secondary stream arrays (similar to 2nd
+	 * level page entries), but that's an optional feature for xHCI host
+	 * controllers. xHCs must support at least 4 stream IDs.
+	 */
+	max_streams = HCC_MAX_PSA(xhci->hcc_params);
+	if (*num_stream_ctxs > max_streams) {
+		xhci_dbg(xhci, "xHCI HW only supports %u stream ctx entries.\n",
+				max_streams);
+		*num_stream_ctxs = max_streams;
+		*num_streams = max_streams;
+	}
+}
+
+/* Returns an error code if one of the endpoint already has streams.
+ * This does not change any data structures, it only checks and gathers
+ * information.
+ */
+static int xhci_calculate_streams_and_bitmask(struct xhci_hcd *xhci,
+		struct usb_device *udev,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		unsigned int *num_streams, u32 *changed_ep_bitmask)
+{
+	struct usb_host_ss_ep_comp *ss_ep_comp;
+	unsigned int max_streams;
+	unsigned int endpoint_flag;
+	int i;
+	int ret;
+
+	for (i = 0; i < num_eps; i++) {
+		ret = xhci_check_streams_endpoint(xhci, udev,
+				eps[i], udev->slot_id);
+		if (ret < 0)
+			return ret;
+
+		ss_ep_comp = eps[i]->ss_ep_comp;
+		max_streams = USB_SS_MAX_STREAMS(ss_ep_comp->desc.bmAttributes);
+		if (max_streams < *num_streams) {
+			xhci_dbg(xhci, "Ep 0x%x only supports %u stream IDs.\n",
+					eps[i]->desc.bEndpointAddress,
+					max_streams);
+			*num_streams = max_streams;
+		}
+
+		endpoint_flag = xhci_get_endpoint_flag(&eps[i]->desc);
+		if (*changed_ep_bitmask & endpoint_flag)
+			return -EINVAL;
+		*changed_ep_bitmask |= endpoint_flag;
+	}
+	return 0;
+}
+
+static u32 xhci_calculate_no_streams_bitmask(struct xhci_hcd *xhci,
+		struct usb_device *udev,
+		struct usb_host_endpoint **eps, unsigned int num_eps)
+{
+	u32 changed_ep_bitmask = 0;
+	unsigned int slot_id;
+	unsigned int ep_index;
+	unsigned int ep_state;
+	int i;
+
+	slot_id = udev->slot_id;
+	if (!xhci->devs[slot_id])
+		return 0;
+
+	for (i = 0; i < num_eps; i++) {
+		ep_index = xhci_get_endpoint_index(&eps[i]->desc);
+		ep_state = xhci->devs[slot_id]->eps[ep_index].ep_state;
+		if (!(ep_state & EP_HAS_STREAMS)) {
+			xhci_warn(xhci, "WARN Can't disable streams for "
+					"endpoint 0x%x\n, "
+					"streams are already disabled!",
+					eps[i]->desc.bEndpointAddress);
+			xhci_warn(xhci, "WARN xhci_free_streams() called "
+					"with non-streams endpoint\n");
+			return 0;
+		}
+		changed_ep_bitmask |= xhci_get_endpoint_flag(&eps[i]->desc);
+	}
+	return changed_ep_bitmask;
+}
+
+/*
+ * The USB device drivers use this function (though the HCD interface in USB
+ * core) to prepare a set of bulk endpoints to use streams.  Streams are used to
+ * coordinate mass storage command queueing across multiple endpoints (basically
+ * a stream ID == a task ID).
+ *
+ * Setting up streams involves allocating the same size stream context array
+ * for each endpoint and issuing a configure endpoint command for all endpoints.
+ *
+ * Don't allow the call to succeed if one endpoint only supports one stream
+ * (which means it doesn't support streams at all).
+ *
+ * Drivers may get less stream IDs than they asked for, if the host controller
+ * hardware or endpoints claim they can't support the number of requested
+ * stream IDs.
+ */
+int xhci_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		unsigned int num_streams, gfp_t mem_flags)
+{
+	int i, ret;
+	struct xhci_hcd *xhci;
+	struct xhci_virt_device *vdev;
+	unsigned int ep_index;
+	unsigned int num_stream_ctxs;
+	unsigned long flags;
+	u32 changed_ep_bitmask = 0;
+
+	if (!eps)
+		return -EINVAL;
+
+	/* Add one to the number of streams requested to account for
+	 * stream 0 that is reserved for xHCI usage.
+	 */
+	num_streams += 1;
+	xhci = hcd_to_xhci(hcd);
+	xhci_dbg(xhci, "Driver wants %u stream IDs (including stream 0).\n",
+			num_streams);
+
+	/* Check to make sure all endpoints are not already configured for
+	 * streams.  While we're at it, find the maximum number of streams that
+	 * all the endpoints will support and check for duplicate endpoints.
+	 */
+	spin_lock_irqsave(&xhci->lock, flags);
+	ret = xhci_calculate_streams_and_bitmask(xhci, udev, eps,
+			num_eps, &num_streams, &changed_ep_bitmask);
+	if (ret < 0) {
+		spin_unlock_irqrestore(&xhci->lock, flags);
+		return ret;
+	}
+	if (num_streams <= 1) {
+		xhci_warn(xhci, "WARN: endpoints can't handle "
+				"more than one stream.\n");
+		spin_unlock_irqrestore(&xhci->lock, flags);
+		return -EINVAL;
+	}
+	vdev = xhci->devs[udev->slot_id];
+	/* Mark each endpoint as being in transistion, so
+	 * xhci_enqueue_urb() will reject all URBs.
+	 */
+	for (i = 0; i < num_eps; i++) {
+		ep_index = xhci_get_endpoint_index(&eps[i]->desc);
+		vdev->eps[ep_index].ep_state |= EP_GETTING_STREAMS;
+	}
+	spin_unlock_irqrestore(&xhci->lock, flags);
+
+	/* Setup internal data structures and allocate HW data structures for
+	 * streams (but don't install the HW structures in the input context
+	 * until we're sure all memory allocation succeeded).
+	 */
+	xhci_calculate_streams_entries(xhci, &num_streams, &num_stream_ctxs);
+	xhci_dbg(xhci, "Need %u stream ctx entries for %u stream IDs.\n",
+			num_stream_ctxs, num_streams);
+
+	for (i = 0; i < num_eps; i++) {
+		ep_index = xhci_get_endpoint_index(&eps[i]->desc);
+		vdev->eps[ep_index].stream_info = xhci_alloc_stream_info(xhci,
+				num_stream_ctxs,
+				num_streams, mem_flags);
+		if (!vdev->eps[ep_index].stream_info)
+			goto cleanup;
+		/* Set maxPstreams in endpoint context and update deq ptr to
+		 * point to stream context array. FIXME
+		 */
+	}
+
+	/* XXX No locking against something else using the input context.  It's
+	 * possible another endpoint on the device stalled, and the input
+	 * context is being used.  Alt settings could also change.  For now,
+	 * don't think about it.  If a race conditions occurs later, make this
+	 * command allocate memory (stall condition should use the pre-allocated
+	 * input context so that it doesn't have to block on memory allocation).
+	 */
+	/* Set up the input context for a configure endpoint command. */
+	for (i = 0; i < num_eps; i++) {
+		struct xhci_ep_ctx *ep_ctx;
+
+		ep_index = xhci_get_endpoint_index(&eps[i]->desc);
+		ep_ctx = xhci_get_ep_ctx(xhci, vdev->in_ctx, ep_index);
+
+		xhci_endpoint_copy(xhci, vdev, ep_index);
+		xhci_setup_streams_ep_input_ctx(xhci, ep_ctx,
+				vdev->eps[ep_index].stream_info);
+	}
+	/* Tell the HW to drop its old copy of the endpoint context info
+	 * and add the updated copy from the input context.
+	 */
+	xhci_setup_input_ctx_for_config_ep(xhci, udev->slot_id,
+			changed_ep_bitmask, changed_ep_bitmask);
+
+	/* Issue and wait for the configure endpoint command */
+	ret = xhci_configure_endpoint(xhci, udev, vdev, false);
+
+	xhci_zero_in_ctx(xhci, vdev);
+	/* xHC rejected the configure endpoint command for some reason, so we
+	 * leave the old ring intact and free our internal streams data
+	 * structure.
+	 */
+	if (ret < 0)
+		goto cleanup;
+
+	spin_lock_irqsave(&xhci->lock, flags);
+	for (i = 0; i < num_eps; i++) {
+		ep_index = xhci_get_endpoint_index(&eps[i]->desc);
+		vdev->eps[ep_index].ep_state &= ~EP_GETTING_STREAMS;
+		if (ret < 0) {
+			xhci_dbg(xhci, "Slot %u ep ctx %u now has streams.\n",
+					udev->slot_id, ep_index);
+			vdev->eps[ep_index].ep_state |= EP_HAS_STREAMS;
+		}
+	}
+	spin_unlock_irqrestore(&xhci->lock, flags);
+
+	/* Subtract 1 for stream 0, which drivers can't use */
+	return num_streams - 1;
+
+cleanup:
+	/* If it didn't work, free the streams! */
+	for (i = 0; i < num_eps; i++) {
+		ep_index = xhci_get_endpoint_index(&eps[i]->desc);
+		xhci_free_stream_info(xhci, vdev->eps[ep_index].stream_info);
+		/* FIXME Unset maxPstreams in endpoint context and
+		 * update deq ptr to point to normal string ring.
+		 */
+		vdev->eps[ep_index].ep_state &= ~EP_GETTING_STREAMS;
+		vdev->eps[ep_index].ep_state &= ~EP_HAS_STREAMS;
+		xhci_endpoint_zero(xhci, vdev, eps[i]);
+	}
+	return -ENOMEM;
+}
+
+/* Transition the endpoint from using streams to being a "normal" endpoint
+ * without streams.
+ *
+ * Modify the endpoint context state, submit a configure endpoint command,
+ * and free all endpoint rings for streams if that completes successfully.
+ */
+int xhci_free_streams(struct usb_hcd *hcd, struct usb_device *udev,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		gfp_t mem_flags)
+{
+	int i, ret;
+	struct xhci_hcd *xhci;
+	struct xhci_virt_device *vdev;
+	unsigned int ep_index;
+	unsigned long flags;
+	u32 changed_ep_bitmask;
+
+	xhci = hcd_to_xhci(hcd);
+	vdev = xhci->devs[udev->slot_id];
+
+	/* Set up a configure endpoint command to remove the streams rings */
+	changed_ep_bitmask = xhci_calculate_no_streams_bitmask(xhci,
+			udev, eps, num_eps);
+	if (changed_ep_bitmask == 0)
+		return -EINVAL;
+
+	for (i = 0; i < num_eps; i++) {
+		struct xhci_ep_ctx *ep_ctx;
+
+		ep_index = xhci_get_endpoint_index(&eps[i]->desc);
+		ep_ctx = xhci_get_ep_ctx(xhci, vdev->in_ctx, ep_index);
+
+		xhci_endpoint_copy(xhci, vdev, ep_index);
+		xhci_setup_no_streams_ep_input_ctx(xhci, ep_ctx,
+				&vdev->eps[ep_index]);
+	}
+	xhci_setup_input_ctx_for_config_ep(xhci, udev->slot_id,
+			changed_ep_bitmask, changed_ep_bitmask);
+
+	/* Issue and wait for the configure endpoint command */
+	ret = xhci_configure_endpoint(xhci, udev, vdev, false);
+
+	xhci_zero_in_ctx(xhci, vdev);
+	/* xHC rejected the configure endpoint command for some reason, so we
+	 * leave the streams rings intact.
+	 */
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irqsave(&xhci->lock, flags);
+	for (i = 0; i < num_eps; i++) {
+		ep_index = xhci_get_endpoint_index(&eps[i]->desc);
+		xhci_free_stream_info(xhci, vdev->eps[ep_index].stream_info);
+		/* FIXME Unset maxPstreams in endpoint context and
+		 * update deq ptr to point to normal string ring.
+		 */
+		vdev->eps[ep_index].ep_state &= ~EP_GETTING_STREAMS;
+		vdev->eps[ep_index].ep_state &= ~EP_HAS_STREAMS;
+	}
+	spin_unlock_irqrestore(&xhci->lock, flags);
+
+	return 0;
+}
+
 /*
  * At this point, the struct usb_device is about to go away, the device has
  * disconnected, and all traffic has been stopped and the endpoints have been
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 6e6797a..ccefe8d 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -248,6 +248,329 @@ struct xhci_ep_ctx *xhci_get_ep_ctx(struct xhci_hcd *xhci,
 		(ctx->bytes + (ep_index * CTX_SIZE(xhci->hcc_params)));
 }
 
+
+/***************** Streams structures manipulation *************************/
+
+void xhci_free_stream_ctx(struct xhci_hcd *xhci,
+		unsigned int num_stream_ctxs,
+		struct xhci_stream_ctx *stream_ctx, dma_addr_t dma)
+{
+	struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller);
+
+	if (num_stream_ctxs > MEDIUM_STREAM_ARRAY_SIZE)
+		pci_free_consistent(pdev,
+				sizeof(struct xhci_stream_ctx)*num_stream_ctxs,
+				stream_ctx, dma);
+	else if (num_stream_ctxs <= SMALL_STREAM_ARRAY_SIZE)
+		return dma_pool_free(xhci->small_streams_pool,
+				stream_ctx, dma);
+	else
+		return dma_pool_free(xhci->medium_streams_pool,
+				stream_ctx, dma);
+}
+
+/*
+ * The stream context array for each endpoint with bulk streams enabled can
+ * vary in size, based on:
+ *  - how many streams the endpoint supports,
+ *  - the maximum primary stream array size the host controller supports,
+ *  - and how many streams the device driver asks for.
+ *
+ * The stream context array must be a power of 2, and can be as small as
+ * 64 bytes or as large as 1MB.
+ */
+struct xhci_stream_ctx *xhci_alloc_stream_ctx(struct xhci_hcd *xhci,
+		unsigned int num_stream_ctxs, dma_addr_t *dma,
+		gfp_t mem_flags)
+{
+	struct pci_dev *pdev = to_pci_dev(xhci_to_hcd(xhci)->self.controller);
+
+	if (num_stream_ctxs > MEDIUM_STREAM_ARRAY_SIZE)
+		return pci_alloc_consistent(pdev,
+				sizeof(struct xhci_stream_ctx)*num_stream_ctxs,
+				dma);
+	else if (num_stream_ctxs <= SMALL_STREAM_ARRAY_SIZE)
+		return dma_pool_alloc(xhci->small_streams_pool,
+				mem_flags, dma);
+	else
+		return dma_pool_alloc(xhci->medium_streams_pool,
+				mem_flags, dma);
+}
+
+struct xhci_ring *dma_to_stream_ring(
+		struct xhci_stream_info *stream_info,
+		u64 address)
+{
+	return radix_tree_lookup(&stream_info->trb_address_map,
+			address >> SEGMENT_SHIFT);
+}
+
+static int xhci_test_radix_tree(struct xhci_hcd *xhci,
+		unsigned int num_streams,
+		struct xhci_stream_info *stream_info)
+{
+	u32 cur_stream;
+	struct xhci_ring *cur_ring;
+	u64 addr;
+
+	for (cur_stream = 1; cur_stream < num_streams; cur_stream++) {
+		struct xhci_ring *mapped_ring;
+		int trb_size = sizeof(union xhci_trb);
+
+		cur_ring = stream_info->stream_rings[cur_stream];
+		for (addr = cur_ring->first_seg->dma;
+				addr < cur_ring->first_seg->dma + SEGMENT_SIZE;
+				addr += trb_size) {
+			mapped_ring = dma_to_stream_ring(stream_info, addr);
+			if (cur_ring != mapped_ring) {
+				xhci_warn(xhci, "WARN: DMA address 0x%08llx "
+						"didn't map to stream ID %u; "
+						"mapped to ring %p\n",
+						(unsigned long long) addr,
+						cur_stream,
+						mapped_ring);
+				return -EINVAL;
+			}
+		}
+		/* One TRB after the end of the ring segment shouldn't return a
+		 * pointer to the current ring (although it may be a part of a
+		 * different ring).
+		 */
+		mapped_ring = dma_to_stream_ring(stream_info, addr);
+		if (mapped_ring != cur_ring) {
+			/* One TRB before should also fail */
+			addr = cur_ring->first_seg->dma - trb_size;
+			mapped_ring = dma_to_stream_ring(stream_info, addr);
+		}
+		if (mapped_ring == cur_ring) {
+			xhci_warn(xhci, "WARN: Bad DMA address 0x%08llx "
+					"mapped to valid stream ID %u; "
+					"mapped ring = %p\n",
+					(unsigned long long) addr,
+					cur_stream,
+					mapped_ring);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Change an endpoint's internal structure so it supports stream IDs.  The
+ * number of requested streams includes stream 0, which cannot be used by device
+ * drivers.
+ *
+ * The number of stream contexts in the stream context array may be bigger than
+ * the number of streams the driver wants to use.  This is because the number of
+ * stream context array entries must be a power of two.
+ *
+ * We need a radix tree for mapping physical addresses of TRBs to which stream
+ * ID they belong to.  We need to do this because the host controller won't tell
+ * us which stream ring the TRB came from.  We could store the stream ID in an
+ * event data TRB, but that doesn't help us for the cancellation case, since the
+ * endpoint may stop before it reaches that event data TRB.
+ *
+ * The radix tree maps the upper portion of the TRB DMA address to a ring
+ * segment that has the same upper portion of DMA addresses.  For example, say I
+ * have segments of size 1KB, that are always 64-byte aligned.  A segment may
+ * start at 0x10c91000 and end at 0x10c913f0.  If I use the upper 10 bits, the
+ * key to the stream ID is 0x43244.  I can use the DMA address of the TRB to
+ * pass the radix tree a key to get the right stream ID:
+ *
+ * 	0x10c90fff >> 10 = 0x43243
+ * 	0x10c912c0 >> 10 = 0x43244
+ * 	0x10c91400 >> 10 = 0x43245
+ *
+ * Obviously, only those TRBs with DMA addresses that are within the segment
+ * will make the radix tree return the stream ID for that ring.
+ *
+ * Caveats for the radix tree:
+ *
+ * The radix tree uses an unsigned long as a key pair.  On 32-bit systems, an
+ * unsigned long will be 32-bits; on a 64-bit system an unsigned long will be
+ * 64-bits.  Since we only request 32-bit DMA addresses, we can use that as the
+ * key on 32-bit or 64-bit systems (it would also be fine if we asked for 64-bit
+ * PCI DMA addresses on a 64-bit system).  There might be a problem on 32-bit
+ * extended systems (where the DMA address can be bigger than 32-bits),
+ * if we allow the PCI dma mask to be bigger than 32-bits.  So don't do that.
+ */
+struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
+		unsigned int num_stream_ctxs,
+		unsigned int num_streams, gfp_t mem_flags)
+{
+	struct xhci_stream_info *stream_info;
+	u32 cur_stream;
+	struct xhci_ring *cur_ring;
+	unsigned long key;
+	u64 addr;
+	int ret;
+
+	xhci_dbg(xhci, "Allocating %u streams and %u "
+			"stream context array entries.\n",
+			num_streams, num_stream_ctxs);
+	stream_info = kzalloc(sizeof(struct xhci_stream_info), mem_flags);
+	if (!stream_info)
+		return NULL;
+
+	stream_info->num_streams = num_streams;
+	stream_info->num_stream_ctxs = num_stream_ctxs;
+
+	/* Initialize the array of virtual pointers to stream rings. */
+	stream_info->stream_rings = kzalloc(
+			sizeof(struct xhci_ring *)*num_streams,
+			mem_flags);
+	if (!stream_info->stream_rings)
+		goto cleanup_info;
+
+	/* Initialize the array of DMA addresses for stream rings for the HW. */
+	stream_info->stream_ctx_array = xhci_alloc_stream_ctx(xhci,
+			num_stream_ctxs, &stream_info->ctx_array_dma,
+			mem_flags);
+	if (!stream_info->stream_ctx_array)
+		goto cleanup_ctx;
+	memset(stream_info->stream_ctx_array, 0,
+			sizeof(struct xhci_stream_ctx)*num_stream_ctxs);
+
+	INIT_RADIX_TREE(&stream_info->trb_address_map, GFP_ATOMIC);
+
+	/* Allocate rings for all the streams that the driver will use,
+	 * and add their segment DMA addresses to the radix tree.
+	 * Stream 0 is reserved.
+	 */
+	for (cur_stream = 1; cur_stream < num_streams; cur_stream++) {
+		stream_info->stream_rings[cur_stream] =
+			xhci_ring_alloc(xhci, 1, true, mem_flags);
+		cur_ring = stream_info->stream_rings[cur_stream];
+		if (!cur_ring)
+			goto cleanup_rings;
+		/* Set deq ptr, cycle bit, and stream context type */
+		addr = cur_ring->first_seg->dma |
+			SCT_FOR_CTX(SCT_PRI_TR) |
+			cur_ring->cycle_state;
+		stream_info->stream_ctx_array[cur_stream].stream_ring = addr;
+		xhci_dbg(xhci, "Setting stream %d ring ptr to 0x%08llx\n",
+				cur_stream, (unsigned long long) addr);
+
+		key = (unsigned long)
+			(cur_ring->first_seg->dma >> SEGMENT_SHIFT);
+		ret = radix_tree_insert(&stream_info->trb_address_map,
+				key, cur_ring);
+		if (!ret) {
+			xhci_ring_free(xhci, cur_ring);
+			stream_info->stream_rings[cur_stream] = NULL;
+			goto cleanup_rings;
+		}
+	}
+	/* Leave the other unused stream ring pointers in the stream context
+	 * array initialized to zero.  This will cause the xHC to give us an
+	 * error if the device asks for a stream ID we don't have setup (if it
+	 * was any other way, the host controller would assume the ring is
+	 * "empty" and wait forever for data to be queued to that stream ID).
+	 */
+#if XHCI_DEBUG
+	/* Do a little test on the radix tree to make sure it returns the
+	 * correct values.
+	 */
+	if (!xhci_test_radix_tree(xhci, num_streams, stream_info))
+		goto cleanup_rings;
+#endif
+
+	return stream_info;
+
+cleanup_rings:
+	for (cur_stream = 1; cur_stream < num_streams; cur_stream++) {
+		cur_ring = stream_info->stream_rings[cur_stream];
+		if (cur_ring) {
+			addr = cur_ring->first_seg->dma;
+			radix_tree_delete(&stream_info->trb_address_map,
+					addr >> SEGMENT_SHIFT);
+			xhci_ring_free(xhci, cur_ring);
+			stream_info->stream_rings[cur_stream] = NULL;
+		}
+	}
+cleanup_ctx:
+	kfree(stream_info->stream_rings);
+cleanup_info:
+	kfree(stream_info);
+	return NULL;
+}
+/*
+ * Sets the MaxPStreams field and the Linear Stream Array field.
+ * Sets the dequeue pointer to the stream context array.
+ */
+void xhci_setup_streams_ep_input_ctx(struct xhci_hcd *xhci,
+		struct xhci_ep_ctx *ep_ctx,
+		struct xhci_stream_info *stream_info)
+{
+	u32 max_primary_streams;
+	/* MaxPStreams is the number of stream context array entries, not the
+	 * number we're actually using.  Must be in 2^(MaxPstreams + 1) format.
+	 * fls(0) = 0, fls(0x1) = 1, fls(0x10) = 2, fls(0x100) = 3, etc.
+	 */
+	max_primary_streams = fls(stream_info->num_stream_ctxs) - 2;
+	xhci_dbg(xhci, "Setting number of stream ctx array entries to %u\n",
+			1 << (max_primary_streams + 1));
+	ep_ctx->ep_info &= ~EP_MAXPSTREAMS_MASK;
+	ep_ctx->ep_info |= EP_MAXPSTREAMS(max_primary_streams);
+	ep_ctx->ep_info |= EP_HAS_LSA;
+	ep_ctx->deq  = stream_info->ctx_array_dma;
+}
+
+/*
+ * Sets the MaxPStreams field and the Linear Stream Array field to 0.
+ * Reinstalls the "normal" endpoint ring (at its previous dequeue mark,
+ * not at the beginning of the ring).
+ */
+void xhci_setup_no_streams_ep_input_ctx(struct xhci_hcd *xhci,
+		struct xhci_ep_ctx *ep_ctx,
+		struct xhci_virt_ep *ep)
+{
+	dma_addr_t addr;
+	ep_ctx->ep_info &= ~EP_MAXPSTREAMS_MASK;
+	ep_ctx->ep_info &= ~EP_HAS_LSA;
+	addr = xhci_trb_virt_to_dma(ep->ring->deq_seg, ep->ring->dequeue);
+	ep_ctx->deq  = addr | ep->ring->cycle_state;
+}
+
+/* Frees all stream contexts associated with the endpoint,
+ *
+ * Caller should fix the endpoint context streams fields.
+ */
+void xhci_free_stream_info(struct xhci_hcd *xhci,
+		struct xhci_stream_info *stream_info)
+{
+	int cur_stream;
+	struct xhci_ring *cur_ring;
+	dma_addr_t addr;
+
+	if (!stream_info)
+		return;
+
+	for (cur_stream = 1; cur_stream < stream_info->num_streams;
+			cur_stream++) {
+		cur_ring = stream_info->stream_rings[cur_stream];
+		if (cur_ring) {
+			addr = cur_ring->first_seg->dma;
+			radix_tree_delete(&stream_info->trb_address_map,
+					addr >> SEGMENT_SHIFT);
+			xhci_ring_free(xhci, cur_ring);
+			stream_info->stream_rings[cur_stream] = NULL;
+		}
+	}
+	if (stream_info->stream_ctx_array)
+		xhci_free_stream_ctx(xhci,
+				stream_info->num_stream_ctxs,
+				stream_info->stream_ctx_array,
+				stream_info->ctx_array_dma);
+
+	if (stream_info)
+		kfree(stream_info->stream_rings);
+	kfree(stream_info);
+}
+
+
+/***************** Device context manipulation *************************/
+
 /* All the xhci_tds in the ring's TD list should be freed at this point */
 void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id)
 {
@@ -263,9 +586,13 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id)
 	if (!dev)
 		return;
 
-	for (i = 0; i < 31; ++i)
+	for (i = 0; i < 31; ++i) {
 		if (dev->eps[i].ring)
 			xhci_ring_free(xhci, dev->eps[i].ring);
+		if (dev->eps[i].stream_info)
+			xhci_free_stream_info(xhci,
+					dev->eps[i].stream_info);
+	}
 
 	if (dev->in_ctx)
 		xhci_free_container_ctx(xhci, dev->in_ctx);
@@ -528,6 +855,9 @@ static inline u32 xhci_get_endpoint_type(struct usb_device *udev,
 	return type;
 }
 
+/* Set up an endpoint with one ring segment.  Do not allocate stream rings.
+ * Drivers will have to call usb_alloc_streams() to do that.
+ */
 int xhci_endpoint_init(struct xhci_hcd *xhci,
 		struct xhci_virt_device *virt_dev,
 		struct usb_device *udev,
@@ -837,6 +1167,16 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci)
 	xhci->device_pool = NULL;
 	xhci_dbg(xhci, "Freed device context pool\n");
 
+	if (xhci->small_streams_pool)
+		dma_pool_destroy(xhci->small_streams_pool);
+	xhci->small_streams_pool = NULL;
+	xhci_dbg(xhci, "Freed small stream array pool\n");
+
+	if (xhci->medium_streams_pool)
+		dma_pool_destroy(xhci->medium_streams_pool);
+	xhci->medium_streams_pool = NULL;
+	xhci_dbg(xhci, "Freed medium stream array pool\n");
+
 	xhci_write_64(xhci, 0, &xhci->op_regs->dcbaa_ptr);
 	if (xhci->dcbaa)
 		pci_free_consistent(pdev, sizeof(*xhci->dcbaa),
@@ -916,6 +1256,22 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags)
 	if (!xhci->segment_pool || !xhci->device_pool)
 		goto fail;
 
+	/* Linear stream context arrays don't have any boundary restrictions,
+	 * and only need to be 16-byte aligned.
+	 */
+	xhci->small_streams_pool =
+		dma_pool_create("xHCI 256 byte stream ctx arrays",
+			dev, SMALL_STREAM_ARRAY_SIZE, 16, 0);
+	xhci->medium_streams_pool =
+		dma_pool_create("xHCI 1KB stream ctx arrays",
+			dev, MEDIUM_STREAM_ARRAY_SIZE, 16, 0);
+	/* Any stream context array bigger than MEDIUM_STREAM_ARRAY_SIZE
+	 * will be allocated with pci_alloc_consistent()
+	 */
+
+	if (!xhci->small_streams_pool || !xhci->medium_streams_pool)
+		goto fail;
+
 	/* Set up the command ring to have one segments for now. */
 	xhci->cmd_ring = xhci_ring_alloc(xhci, 1, true, flags);
 	if (!xhci->cmd_ring)
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 5cc3f9b..5798275 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -305,6 +305,10 @@ static void ring_ep_doorbell(struct xhci_hcd *xhci,
 	ep_state = ep->ep_state;
 	/* Don't ring the doorbell for this endpoint if there are pending
 	 * cancellations because the we don't want to interrupt processing.
+	 * We don't want to restart any stream rings if there's a set dequeue
+	 * pointer command pending because the device can choose to start any
+	 * stream once the endpoint is on the HW schedule.
+	 * FIXME - check all the stream rings for pending cancellations.
 	 */
 	if (!ep->cancels_pending && !(ep_state & SET_DEQ_PENDING)
 			&& !(ep_state & EP_HALTED)) {
@@ -746,8 +750,9 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 		 * Configure endpoint commands can come from the USB core
 		 * configuration or alt setting changes, or because the HW
 		 * needed an extra configure endpoint command after a reset
-		 * endpoint command.  In the latter case, the xHCI driver is
-		 * not waiting on the configure endpoint command.
+		 * endpoint command or streams were being configured.
+		 * If the command was for a halted endpoint, the xHCI driver
+		 * is not waiting on the configure endpoint command.
 		 */
 		ctrl_ctx = xhci_get_input_control_ctx(xhci,
 				virt_dev->in_ctx);
@@ -771,6 +776,7 @@ static void handle_cmd_completion(struct xhci_hcd *xhci,
 				~EP_HALTED;
 			ring_ep_doorbell(xhci, slot_id, ep_index);
 		} else {
+			/* Must have been for streams configuration. */
 			xhci->devs[slot_id]->cmd_status =
 				GET_COMP_CODE(event->status);
 			complete(&xhci->devs[slot_id]->cmd_completion);
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index e801d26..f24a997 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -117,7 +117,7 @@ struct xhci_cap_regs {
 /* true: no secondary Stream ID Support */
 #define HCC_NSS(p)		((p) & (1 << 7))
 /* Max size for Primary Stream Arrays - 2^(n+1), where n is bits 12:15 */
-#define HCC_MAX_PSA		(1 << ((((p) >> 12) & 0xf) + 1))
+#define HCC_MAX_PSA(p)		(1 << ((((p) >> 12) & 0xf) + 1))
 /* Extended Capabilities pointer from PCI base - section 5.3.6 */
 #define HCC_EXT_CAPS(p)		XHCI_HCC_EXT_CAPS(p)
 
@@ -581,6 +581,10 @@ struct xhci_ep_ctx {
 /* bit 15 is Linear Stream Array */
 /* Interval - period between requests to an endpoint - 125u increments. */
 #define EP_INTERVAL(p)		((p & 0xff) << 16)
+#define EP_MAXPSTREAMS_MASK	(0x1f << 10)
+#define EP_MAXPSTREAMS(p)	(((p) << 10) & EP_MAXPSTREAMS_MASK)
+/* Endpoint is set up with a Linear Stream Array (vs. Secondary Stream Array) */
+#define	EP_HAS_LSA		(1 << 15)
 
 /* ep_info2 bitmasks */
 /*
@@ -640,8 +644,49 @@ struct xhci_command {
 /* add context bitmasks */
 #define	ADD_EP(x)	(0x1 << x)
 
+struct xhci_stream_ctx {
+	/* 64-bit stream ring address, cycle state, and stream type */
+	u64	stream_ring;
+	/* offset 0x14 - 0x1f reserved for HC internal use */
+	u32	reserved[2];
+};
+
+/* Stream Context Types (section 6.4.1) - bits 3:1 of stream ctx deq ptr */
+#define	SCT_FOR_CTX(p)		(((p) << 1) & 0x7)
+/* Secondary stream array type, dequeue pointer is to a transfer ring */
+#define	SCT_SEC_TR		0
+/* Primary stream array type, dequeue pointer is to a transfer ring */
+#define	SCT_PRI_TR		1
+/* Dequeue pointer is for a secondary stream array (SSA) with 8 entries */
+#define SCT_SSA_8		2
+#define SCT_SSA_16		3
+#define SCT_SSA_32		4
+#define SCT_SSA_64		5
+#define SCT_SSA_128		6
+#define SCT_SSA_256		7
+
+/* Assume no secondary streams for now */
+struct xhci_stream_info {
+	struct xhci_ring		**stream_rings;
+	/* Number of streams, including stream 0 (which drivers can't use) */
+	unsigned int			num_streams;
+	/* The stream context array may be bigger than
+	 * the number of streams the driver asked for
+	 */
+	struct xhci_stream_ctx		*stream_ctx_array;
+	unsigned int			num_stream_ctxs;
+	dma_addr_t			ctx_array_dma;
+	/* For mapping physical TRB addresses to segments in stream rings */
+	struct radix_tree_root		trb_address_map;
+};
+
+#define	SMALL_STREAM_ARRAY_SIZE		256
+#define	MEDIUM_STREAM_ARRAY_SIZE	1024
+
 struct xhci_virt_ep {
 	struct xhci_ring		*ring;
+	/* Related to endpoints that are configured to use stream IDs only */
+	struct xhci_stream_info		*stream_info;
 	/* Temporary storage in case the configure endpoint command fails and we
 	 * have to restore the device state to the previous state
 	 */
@@ -649,6 +694,9 @@ struct xhci_virt_ep {
 	unsigned int			ep_state;
 #define SET_DEQ_PENDING		(1 << 0)
 #define EP_HALTED		(1 << 1)
+/* Transitioning the endpoint to using streams, don't enqueue URBs */
+#define EP_GETTING_STREAMS	(1 << 2)
+#define EP_HAS_STREAMS		(1 << 3)
 	/* ----  Related to URB cancellation ---- */
 	struct list_head	cancelled_td_list;
 	unsigned int		cancels_pending;
@@ -694,14 +742,6 @@ struct xhci_device_context_array {
  */
 
 
-struct xhci_stream_ctx {
-	/* 64-bit stream ring address, cycle state, and stream type */
-	u64	stream_ring;
-	/* offset 0x14 - 0x1f reserved for HC internal use */
-	u32	reserved[2];
-};
-
-
 struct xhci_transfer_event {
 	/* 64-bit buffer address, or immediate data */
 	u64	buffer;
@@ -939,6 +979,10 @@ union xhci_trb {
 /* Allow two commands + a link TRB, along with any reserved command TRBs */
 #define MAX_RSVD_CMD_TRBS	(TRBS_PER_SEGMENT - 3)
 #define SEGMENT_SIZE		(TRBS_PER_SEGMENT*16)
+/* SEGMENT_SHIFT should be log2(SEGMENT_SIZE).
+ * Change this if you change TRBS_PER_SEGMENT!
+ */
+#define SEGMENT_SHIFT		10
 /* TRB buffer pointers can't cross 64KB boundaries */
 #define TRB_MAX_BUFF_SHIFT		16
 #define TRB_MAX_BUFF_SIZE	(1 << TRB_MAX_BUFF_SHIFT)
@@ -1073,6 +1117,8 @@ struct xhci_hcd {
 	/* DMA pools */
 	struct dma_pool	*device_pool;
 	struct dma_pool	*segment_pool;
+	struct dma_pool	*small_streams_pool;
+	struct dma_pool	*medium_streams_pool;
 
 #ifdef CONFIG_USB_XHCI_HCD_DEBUGGING
 	/* Poll the rings - for debugging */
@@ -1207,6 +1253,20 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev,
 		struct usb_device *udev, struct usb_host_endpoint *ep,
 		gfp_t mem_flags);
 void xhci_ring_free(struct xhci_hcd *xhci, struct xhci_ring *ring);
+struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci,
+		unsigned int num_stream_ctxs,
+		unsigned int num_streams, gfp_t flags);
+void xhci_free_stream_info(struct xhci_hcd *xhci,
+		struct xhci_stream_info *stream_info);
+void xhci_setup_streams_ep_input_ctx(struct xhci_hcd *xhci,
+		struct xhci_ep_ctx *ep_ctx,
+		struct xhci_stream_info *stream_info);
+void xhci_setup_no_streams_ep_input_ctx(struct xhci_hcd *xhci,
+		struct xhci_ep_ctx *ep_ctx,
+		struct xhci_virt_ep *ep);
+struct xhci_ring *dma_to_stream_ring(
+		struct xhci_stream_info *stream_info,
+		u64 address);
 struct xhci_command *xhci_alloc_command(struct xhci_hcd *xhci,
 		bool allocate_completion, gfp_t mem_flags);
 void xhci_free_command(struct xhci_hcd *xhci,
@@ -1229,6 +1289,12 @@ int xhci_get_frame(struct usb_hcd *hcd);
 irqreturn_t xhci_irq(struct usb_hcd *hcd);
 int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev);
 void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev);
+int xhci_alloc_streams(struct usb_hcd *hcd, struct usb_device *udev,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		unsigned int num_streams, gfp_t mem_flags);
+int xhci_free_streams(struct usb_hcd *hcd, struct usb_device *udev,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		gfp_t mem_flags);
 int xhci_address_device(struct usb_hcd *hcd, struct usb_device *udev);
 int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags);
 int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status);
-- 
1.6.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Media]     [Linux Input]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Old Linux USB Devel Archive]

  Powered by Linux