[RFC PATCH 19/20] xhci: v1.0 scatterlist enqueue support (td-fragment rework)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



v1.0 hosts require that TD-fragments (portions of a TD that do not end
on a MPB boundary) not cross a TRB segment boundary.  This constraint is
in addition to the constraint that a TRB may not specify a transfer that
crosses a 64K boundary. This enabling permits the driver to accept
scatterlists of nearly any geometry.  "Nearly" because there is one
unlikely remaining degenerate case of a driver submitting a transfer
that consumes all the TRBs in a segment before hitting an MBP boundary.
That case is trapped and the transfer is rejected.

Given the multi-dimensional constraints of queuing TRBs from a
scattelist, this implementation does not attempt to pre-calculate the
number TRBs in a TD.  Instead it attempts a dry-run of enqueuing the
TRBs to the ring.  If it discovers a TD-fragment straddling a segment
boundary it backs up to the last MBP boundary, inserts a link-trb at
that boundary, and restarts enqueuing in the next segment.  A side
effect of not pre-calculating the number of required TRBs is that the
ring is now expanded as the scatterlist is walked, rather than in
prepare_ring().

To simplify the math and forgo the need to track (union xhci_trb *) and
(struct xhci_segment *) pointers, modulo-power-of-2 ring indexes are
used.  A small portion of the patch is adding infrastructure to convert
from a (struct xhci_ring_pointer *) to an integer index.

Glossary of acronyms:
TRB: Transfer Request Buffer, 16-byte xhci-hardware scatterlist entry

TD: Transfer Descriptor, the set of trbs that comprise a transfer

TRB segment: A contiguous allocation of TRBs.  They are of size
  PAGE_SIZE in the xhci driver.  Each segment ends with a link TRB
  pointing to the next segment, but the link trb may appear at any TRB
  boundary in the segment.

Ring: A linked list of segments.

MBP: Max Burst Packet, is the minimum amount of data hardware expects to
  transfer before the end of a segment (assuming the TD spans a segment
  boundary).

Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
 drivers/usb/host/xhci-mem.c  |   17 +
 drivers/usb/host/xhci-ring.c |  620 +++++++++++++++++++++++++++++++++++++++++-
 drivers/usb/host/xhci.h      |   75 +++++
 3 files changed, 695 insertions(+), 17 deletions(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index edaa49798172..1fc38ec60c25 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -103,7 +103,8 @@ static void xhci_link_rings(struct xhci_hcd *xhci, struct xhci_ring *ring,
 		struct list_head *segments, unsigned int num_segs)
 {
 	struct xhci_segment *insert_head, *insert_next, *new_head, *new_tail;
-	struct xhci_segment *last_seg = xhci_ring_last_seg(ring);
+	struct xhci_segment *last_seg = xhci_ring_last_seg(ring), *seg;
+	int i;
 
 	new_tail = list_last_entry(segments, typeof(*new_tail), list);
 	new_head = list_first_entry(segments, typeof(*new_head), list);
@@ -124,6 +125,11 @@ static void xhci_link_rings(struct xhci_hcd *xhci, struct xhci_ring *ring,
 		last_seg->link->link.control &= ~cpu_to_le32(LINK_TOGGLE);
 		new_tail->link->link.control |= cpu_to_le32(LINK_TOGGLE);
 	}
+
+	i = insert_head->segid + 1;
+	seg = insert_head;
+	list_for_each_entry_continue(seg, &ring->segments, list)
+		seg->segid = i++;
 }
 
 /*
@@ -257,8 +263,9 @@ void xhci_ring_free(struct xhci_ring *ring)
 static void xhci_initialize_ring_info(struct xhci_ring *ring,
 					unsigned int cycle_state)
 {
-	struct xhci_segment *first_seg = xhci_ring_first_seg(ring);
+	struct xhci_segment *first_seg = xhci_ring_first_seg(ring), *seg;
 	struct xhci_ring_pointer enq = { first_seg, first_seg->trbs };
+	int i;
 
 	/* The ring is empty, so the enqueue pointer == dequeue pointer */
 	xhci_ring_set_enqueue(ring, &enq);
@@ -280,7 +287,11 @@ static void xhci_initialize_ring_info(struct xhci_ring *ring,
 	 * Each segment has a link TRB, and leave an extra TRB for SW
 	 * accounting purpose
 	 */
-	ring->num_trbs_free = (1 << ring->order) * (TRBS_PER_SEGMENT - 1) - 1;
+	ring->num_trbs_free = xhci_ring_size(ring) - xhci_ring_num_segs(ring) - 1;
+
+	i = 0;
+	list_for_each_entry(seg, &ring->segments, list)
+		seg->segid = i++;
 }
 
 /* Allocate segments and link them for a ring */
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index ef9d58039666..82a24ce58c3e 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -168,6 +168,40 @@ static void ep_inc_deq(struct xhci_ring *ring)
 	} while (ring->ops->last_trb(ring, &ring->deq));
 }
 
+static void v1_inc_deq(struct xhci_ring *ring)
+{
+	ring->deq_updates++;
+
+	if (!ring->ops->last_trb(ring, &ring->deq))
+		ring->num_trbs_free++;
+
+	/*
+	 * ep_inc_deq() lets the dequeue-pointer (deq/tail) wrap the
+	 * enqueue-pointer (enq/head)!  However, since room_on_ring() looks at
+	 * ->num_trbs_free instead of the position of the ring pointers, it
+	 * never causes a problem as enq gets back in line with deq at the next
+	 * submission.
+	 *
+	 * In the case of v1+ rings, conditional_expand() is sensitive to this
+	 * wrap and prematurely expands the ring.  Prevent that condition by
+	 * stopping once deq == enq.  Eventually, ->num_trbs_free should be
+	 * deprecated entirely in favor of just comparing the ring pointers.
+	 * For now, for legacy compatibility, we leave well enough alone and
+	 * limit this to xhci-v1+ implementations.
+	 */
+	do {
+		if (xhci_ring_dequeue(ring) == xhci_ring_enqueue(ring))
+			break;
+
+		/* Update the dequeue pointer further if that was a link TRB */
+		if (ring->ops->last_trb(ring, &ring->deq))
+			xhci_ring_pointer_advance_seg(ring, &ring->deq);
+		else
+			xhci_ring_pointer_advance(&ring->deq);
+	} while (ring->ops->last_trb(ring, &ring->deq));
+
+}
+
 /*
  * Don't make a ring full of link TRBs.  That would be dumb and this
  * would loop.
@@ -289,7 +323,7 @@ static u32 common_link_segments(struct xhci_segment *prev,
 
 	if (!prev || !next)
 		return 0;
-	prev->link = &prev->trbs[TRBS_PER_SEGMENT-1];
+	prev->link = &prev->trbs[TRBS_PER_SEGMENT - 1];
 	prev->link->link.segment_ptr = cpu_to_le64(next->dma);
 
 	/* Set the last TRB in the segment to have a TRB type ID of Link TRB */
@@ -324,6 +358,30 @@ static void chain_quirk_link_segments(struct xhci_segment *prev,
 	prev->link->link.control = cpu_to_le32(val);
 }
 
+static unsigned int xhci_ring_num_trbs_free(struct xhci_ring *ring)
+{
+	unsigned int enq_idx, deq_idx, num_trbs, num_segs;
+
+	enq_idx = xhci_ring_pointer_to_index(&ring->enq);
+	deq_idx = xhci_ring_pointer_to_index(&ring->deq);
+
+	num_trbs = to_xhci_ring_index(ring, deq_idx - (enq_idx + 1));
+	num_segs = (enq_idx % TRBS_PER_SEGMENT + num_trbs) / TRBS_PER_SEGMENT;
+
+	/* free trbs minus link trbs */
+	return num_trbs - num_segs;
+}
+
+static void v1_reap_td(struct xhci_ring *ring)
+{
+	/*
+	 * hack to fix up num_trbs_free for v1 rings where the presence of
+	 * mid-segment links means that increment num_trbs_free once per
+	 * ->inc_deq() invocation is insufficient
+	 */
+	ring->num_trbs_free = xhci_ring_num_trbs_free(ring);
+}
+
 static const struct xhci_ring_ops event_ring_ops = {
 	.last_trb = event_last_trb,
 	.last_trb_ring = event_last_trb_ring,
@@ -332,12 +390,20 @@ static const struct xhci_ring_ops event_ring_ops = {
 	.link_segments = event_link_segments,
 };
 
+static int queue_bulk_sg_tx(struct xhci_hcd *xhci, struct xhci_ring *ring,
+		gfp_t mem_flags, struct urb *urb, struct scatterlist *sgl,
+		int num_sgs, int slot_id, unsigned int ep_index);
+static int queue_bulk_sg_tx_v1(struct xhci_hcd *xhci, struct xhci_ring *ring,
+		gfp_t mem_flags, struct urb *urb, struct scatterlist *sgl,
+		int num_sgs, int slot_id, unsigned int ep_index);
+
 static const struct xhci_ring_ops ep_ring_ops = {
 	.last_trb = ep_last_trb,
 	.last_trb_ring = ep_last_trb_ring,
 	.inc_enq = ep_inc_enq,
 	.inc_deq = ep_inc_deq,
 	.link_segments = ep_link_segments,
+	.queue_bulk_sg_tx = queue_bulk_sg_tx,
 };
 
 static const struct xhci_ring_ops chain_quirk_ring_ops = {
@@ -346,6 +412,17 @@ static const struct xhci_ring_ops chain_quirk_ring_ops = {
 	.inc_enq = chain_quirk_inc_enq,
 	.inc_deq = ep_inc_deq,
 	.link_segments = chain_quirk_link_segments,
+	.queue_bulk_sg_tx = queue_bulk_sg_tx,
+};
+
+static const struct xhci_ring_ops ep_ring_ops_v1 = {
+	.last_trb = ep_last_trb,
+	.last_trb_ring = ep_last_trb_ring,
+	.inc_enq = ep_inc_enq,
+	.inc_deq = v1_inc_deq,
+	.link_segments = ep_link_segments,
+	.queue_bulk_sg_tx = queue_bulk_sg_tx_v1,
+	.reap_td = v1_reap_td,
 };
 
 bool xhci_is_event_ring(struct xhci_ring *ring)
@@ -372,8 +449,10 @@ static const struct xhci_ring_ops *xhci_ring_ops(struct xhci_hcd *xhci,
 	case TYPE_COMMAND:
 		if (chain_quirk)
 			ops = &chain_quirk_ring_ops;
-		else
+		else if (xhci->hci_version < 0x100)
 			ops = &ep_ring_ops;
+		else
+			ops = &ep_ring_ops_v1;
 		break;
 	default:
 		ops = NULL;
@@ -1967,6 +2046,8 @@ static void xhci_ring_reap_td(struct xhci_ring *ep_ring, struct xhci_td *td)
 	while (xhci_ring_dequeue(ep_ring) != td->last_trb)
 		xhci_ring_inc_deq(ep_ring);
 	xhci_ring_inc_deq(ep_ring);
+	if (ep_ring->ops->reap_td)
+		ep_ring->ops->reap_td(ep_ring);
 }
 
 /*
@@ -3196,11 +3277,10 @@ static u32 xhci_v1_0_td_remainder(int running_total, int trb_buff_len,
 	return (total_packet_count - packets_transferred) << 17;
 }
 
-static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
-		struct urb *urb, struct scatterlist *sgl, int num_sgs,
-		int slot_id, unsigned int ep_index)
+static int queue_bulk_sg_tx(struct xhci_hcd *xhci, struct xhci_ring *ep_ring,
+		gfp_t mem_flags, struct urb *urb, struct scatterlist *sgl,
+		int num_sgs, int slot_id, unsigned int ep_index)
 {
-	struct xhci_ring *ep_ring;
 	unsigned int num_trbs;
 	struct urb_priv *urb_priv;
 	struct xhci_td *td;
@@ -3213,10 +3293,6 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	union xhci_trb *start_trb;
 	int start_cycle;
 
-	ep_ring = xhci_urb_to_transfer_ring(xhci, urb);
-	if (!ep_ring)
-		return -EINVAL;
-
 	num_trbs = count_sg_trbs_needed(xhci, urb, sgl, num_sgs);
 	total_packet_count = DIV_ROUND_UP(urb->transfer_buffer_length,
 			usb_endpoint_maxp(&urb->ep->desc));
@@ -3346,12 +3422,528 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	return 0;
 }
 
+struct queue_bulk_sg_context {
+	const u32 mbp;
+	const unsigned int tx_len;
+	unsigned int start_idx, enq_idx, final_enq_idx, sg_idx;
+	unsigned int len, mbp_len, running_total, total_packet_count;
+	unsigned int total_links, links, num_sgs;
+	struct scatterlist *sgl, *sg;
+	struct xhci_ring *ring;
+	struct xhci_hcd *xhci;
+	struct xhci_td *td;
+	u32 start_cycle;
+	struct urb *urb;
+	gfp_t flags;
+	int pass;
+};
+
+/*
+ * Helper for queue_bulk_sg_tx_v1 that returns the expected cycle
+ * relative to the passed index, but is careful to maintain the cycle
+ * (maintain software control) of the first trb in a td.
+ */
+static u32 to_enq_cycle(struct queue_bulk_sg_context *q, bool link)
+{
+	unsigned int idx = to_xhci_ring_index(q->ring, q->enq_idx);
+	u32 cycle;
+
+	if (idx <= q->start_idx)
+		cycle = q->start_cycle ^ 1;
+	else
+		cycle = q->start_cycle;
+
+	/*
+	 * gross hack alert: for legacy reasons inc_enq wants to do the
+	 * toggling for link trbs
+	 */
+	if (idx != q->start_idx && link)
+		cycle ^= 1;
+
+	return cycle;
+}
+
+static int conditional_expand(struct queue_bulk_sg_context *q,
+		unsigned int num_trbs)
+{
+	unsigned int enq_to_deq, deq_segid, next_segid;
+	unsigned int deq_idx, next_idx;
+	bool cross_seg;
+
+	/* are we advancing into the deq segment? */
+	next_idx = to_xhci_ring_index(q->ring, q->enq_idx + num_trbs);
+	deq_idx = xhci_ring_pointer_to_index(&q->ring->deq);
+	enq_to_deq = to_xhci_ring_index(q->ring, deq_idx - q->enq_idx);
+	next_segid = next_idx / TRBS_PER_SEGMENT;
+	deq_segid = deq_idx / TRBS_PER_SEGMENT;
+	cross_seg = q->enq_idx % TRBS_PER_SEGMENT + num_trbs > TRBS_PER_SEGMENT;
+	if ((enq_to_deq && num_trbs >= enq_to_deq)
+			|| (cross_seg && next_segid == deq_segid)) {
+		/*
+		 * An assumption has been violated if we are trying to
+		 * expand the ring on pass-2
+		 */
+		if (WARN_ON_ONCE(q->pass == 2))
+			return -EINVAL;
+
+		return xhci_ring_expansion(q->xhci, q->ring,
+				xhci_ring_size(q->ring), q->flags);
+	}
+	return 0;
+}
+
+static bool check_mid_segment_link(struct queue_bulk_sg_context *q)
+{
+	bool was_mid_seg_link = false;
+	union xhci_trb *trb;
+	u32 field;
+
+	trb = to_xhci_ring_trb(q->ring, q->ring->enq.seg, q->enq_idx);
+	if (TRB_TYPE_LINK_LE32(trb->link.control)
+			&& !is_last_xhci_segment_index(q->enq_idx)) {
+		if (q->links) {
+			/*
+			 * We inserted a link previously to avoid a
+			 * td-fragment-segment boundary, skip ahead...
+			 */
+			q->links--;
+			q->enq_idx = xhci_ring_advance_seg(q->ring, q->enq_idx);
+			WARN_ON_ONCE(q->mbp_len);
+			trb = to_xhci_ring_trb(q->ring, q->ring->enq.seg,
+					q->enq_idx);
+			WARN_ON_ONCE(TRB_TYPE_LINK_LE32(trb->link.control));
+			was_mid_seg_link = true;
+		} else {
+			WARN_ON_ONCE(q->pass == 2);
+			/* invalidate this mid-segment link */
+			field = to_enq_cycle(q, false);
+			field |= TRB_TYPE(TRB_TR_NOOP);
+			trb->generic.field[3] = __cpu_to_le32(field);
+		}
+	}
+
+	return was_mid_seg_link;
+}
+
+/*
+ * When a mid-segment-link is invalidated ensure the remainder of the
+ * segment has no cycle-valid or chained trbs
+ */
+static void sync_seg_cycle(struct xhci_ring *ring, struct xhci_segment *seg,
+		unsigned int start_idx, u32 cycle)
+{
+	unsigned int i, num_trbs;
+
+	num_trbs = ALIGN(start_idx, TRBS_PER_SEGMENT) - start_idx;
+	for (i = 0; i < num_trbs; i++) {
+		unsigned int idx = to_xhci_ring_index(ring, start_idx + i);
+		union xhci_trb *trb = to_xhci_ring_trb(ring, seg, idx);
+		u32 val = __le32_to_cpu(trb->generic.field[3]);
+
+		val &= ~(TRB_CYCLE | TRB_CHAIN);
+		val |= cycle;
+		trb->generic.field[3] = __cpu_to_le32(val);
+	}
+}
+
+static int set_mid_segment_link(struct queue_bulk_sg_context *q)
+{
+	union xhci_trb *trb, *last_trb;
+	struct xhci_segment *seg;
+	unsigned int next_idx;
+	u32 val, cycle, chain;
+	int ret, num_trbs;
+
+	/*
+	 * We may have already placed a link here on a previous attempt
+	 * and are now continuing after a truncation.
+	 */
+	if (check_mid_segment_link(q))
+		return 0;
+
+	/*
+	 * If the start of this mbp is the start of a segment that
+	 * implies that the size of the td-fragment is greater than
+	 * TRBS_PER_SEGMENT.  Outside of recompiling the driver with a
+	 * larger TRBS_PER_SEGMENT constant we're stuck, complain.
+	 */
+	if (q->enq_idx % TRBS_PER_SEGMENT == 0) {
+		struct device *dev = &q->urb->dev->dev;
+
+		xhci_warn(q->xhci,
+				"%s %s: scatterlist required too many trbs\n",
+				dev_driver_string(dev), dev_name(dev));
+		return -EINVAL;
+	}
+	next_idx = xhci_ring_advance_seg(q->ring, q->enq_idx);
+	num_trbs = to_xhci_ring_index(q->ring, next_idx - q->enq_idx);
+	ret = conditional_expand(q, num_trbs);
+	if (ret)
+		return ret;
+	/*
+	 * copy the end of segment link to this position, maintaining
+	 * the toggle bit and updating chain and cycle
+	 */
+	seg = to_xhci_ring_segment(q->ring, q->ring->enq.seg, q->enq_idx);
+	trb = to_xhci_ring_trb(q->ring, seg, q->enq_idx);
+	last_trb = &seg->trbs[TRBS_PER_SEGMENT - 1];
+
+	val = le32_to_cpu(last_trb->link.control);
+	val &= ~(TRB_CHAIN | TRB_CYCLE);
+	cycle = to_enq_cycle(q, true);
+	if (q->enq_idx == q->start_idx)
+		chain = 0;
+	else
+		chain = TRB_CHAIN;
+	val |= chain | cycle;
+	trb->link.segment_ptr = last_trb->link.segment_ptr;
+	trb->link.control = cpu_to_le32(val);
+	seg->link = trb;
+
+	/*
+	 * be careful, see the comment in to_enq_cycle(), the cycle we
+	 * have here is flipped since it was obtained for a link trb
+	 */
+	sync_seg_cycle(q->ring, seg, q->enq_idx + 1, cycle ^ 1);
+
+	q->enq_idx = xhci_ring_advance_seg(q->ring, q->enq_idx);
+	q->links++;
+	q->total_links++;
+	return 0;
+}
+
+static unsigned int do_enq_trb(struct queue_bulk_sg_context *q, dma_addr_t dma,
+		unsigned int len)
+{
+	u32 field, length_field, remainder;
+	unsigned int num_trbs, next_idx;
+	bool more_trbs_coming;
+
+	num_trbs = to_xhci_ring_index(q->ring, q->final_enq_idx - q->enq_idx);
+	next_idx = to_xhci_ring_index(q->ring, q->enq_idx + 1);
+
+	/*
+	 * Set cycle being careful not to toggle the cycle of the first
+	 * trb, yet
+	 */
+	field = to_enq_cycle(q, false);
+
+	/*
+	 * Chain all the TRBs together; clear the chain bit in the last
+	 * TRB to indicate it's the last TRB in the chain.
+	 */
+	if (next_idx != q->final_enq_idx) {
+		union xhci_trb *trb;
+
+		/*
+		 * truncate this trb to end on a mbp boundary if we are
+		 * crossing a link with the chain still open
+		 */
+		trb = to_xhci_ring_trb(q->ring, q->ring->enq.seg, next_idx);
+		if (TRB_TYPE_LINK_LE32(trb->link.control)) {
+			unsigned int end;
+
+			end = rounddown(q->len + len, q->mbp);
+			if (WARN_ON_ONCE(end <= q->len))
+				return -EINVAL;
+			len = end - q->len;
+		}
+		field |= TRB_CHAIN;
+	} else {
+		/* FIXME - add check for ZERO_PACKET flag before this */
+		q->td->last_trb = xhci_ring_enqueue(q->ring);
+		field |= TRB_IOC;
+	}
+
+	/* Only set interrupt on short packet for IN endpoints */
+	if (usb_urb_dir_in(q->urb))
+		field |= TRB_ISP;
+
+	remainder = xhci_v1_0_td_remainder(q->running_total, len,
+			q->total_packet_count, q->urb, num_trbs - 1);
+
+	length_field = TRB_LEN(len) | remainder | TRB_INTR_TARGET(0);
+
+	if (num_trbs > 1)
+		more_trbs_coming = true;
+	else
+		more_trbs_coming = false;
+
+	queue_trb(q->ring, more_trbs_coming, lower_32_bits(dma),
+			upper_32_bits(dma), length_field,
+			field | TRB_TYPE(TRB_NORMAL));
+
+	q->running_total += len;
+	return len;
+}
+
+struct truncate_mark {
+	unsigned int truncate_pos;
+	struct scatterlist *sg;
+	unsigned int ring_idx;
+	unsigned int mbp_len;
+	unsigned int len;
+	bool do_truncate;
+	int sg_idx;
+};
+
+static int try_queue_sg_ent(struct queue_bulk_sg_context *q,
+		struct truncate_mark *mark, const unsigned int sg_len)
+{
+	int ret;
+	unsigned int queued_len = 0;
+	unsigned int sg_enq_idx = q->enq_idx;
+
+	do {
+		unsigned int offset, len = sg_len - queued_len;
+		bool do_set_link = false;
+
+		/* check if we hit the end of the current segment */
+		if (is_last_xhci_segment_index(q->enq_idx)) {
+			if (q->mbp_len % q->mbp != 0) {
+				/*
+				 * Hmm, we hit a segment boundary, but we've
+				 * already queued some data for this mbp
+				 * fragment.  Back up to the last trb to cross a
+				 * mbp, truncate it and then set a mid-segment
+				 * link so that the next mbp can start in a
+				 * fresh segment.
+				 */
+				mark->do_truncate = true;
+				if (WARN_ON_ONCE(q->pass == 2))
+					return -EINVAL;
+				return -EAGAIN;
+			}
+
+			ret = conditional_expand(q, 1);
+			if (ret)
+				return ret;
+			q->enq_idx = xhci_ring_advance_seg(q->ring, q->enq_idx);
+		}
+
+		/*
+		 * how much of this sg can we queue in this trb? I.e. check 64k
+		 * and mbp boundaries
+		 */
+		offset = (sg_dma_address(q->sg) + queued_len)
+			% TRB_MAX_BUFF_SIZE;
+		if ((offset + len) > TRB_MAX_BUFF_SIZE) {
+			dma_addr_t start = sg_dma_address(q->sg) + queued_len;
+			dma_addr_t end, dma_len;
+
+			end = round_down(start + len, TRB_MAX_BUFF_SIZE);
+
+			dma_len = end - start;
+			xhci_dbg(q->xhci, "trim64: %#4x -> %pad\n", len,
+					&dma_len);
+			len = end - start;
+		}
+
+		/*
+		 * Check if we are servicing a truncation and limit len
+		 * to end on a mbp boundary. There are 2 truncation cases to
+		 * consider:
+		 * 1/ Never hit an mbp before hitting the end of the
+		 *    segment, the first data trb in the td needs to be
+		 *    placed in the next segment.
+		 *    (mark->truncate_pos == 0)
+		 * 2/ One of the trbs we queued crossed a trb.  Find
+		 *    that boundary, trim the length to end on a trb
+		 *    boundary and set a mid segment link, unless we
+		 *    are already at the end of the segment after
+		 *    submitting the trimmed trb.
+		 */
+		if (q->pass == 1 && mark->do_truncate
+				&& (q->len + len >= mark->truncate_pos)) {
+			mark->do_truncate = false;
+			if (mark->truncate_pos == 0) {
+				ret = set_mid_segment_link(q);
+				if (ret)
+					return ret;
+				WARN_ON_ONCE(q->mbp_len);
+				continue;
+			} else {
+				len = mark->truncate_pos - q->len;
+				do_set_link = true;
+			}
+		}
+
+		/* write this trb and advance the actual enqueue pointer */
+		if (q->pass == 2)
+			len = do_enq_trb(q, sg_dma_address(q->sg) + queued_len, len);
+
+		/* advance index tracker to next portion of the transfer */
+		q->enq_idx = to_xhci_ring_index(q->ring, q->enq_idx + 1);
+
+		/* mark that we crossed a mbp boundary */
+		if (q->len % q->mbp + len >= q->mbp) {
+			/* where to set the link after restart */
+			q->len += len;
+			mark->truncate_pos = rounddown(q->len, q->mbp);
+
+			/* where we were at the start of this sg */
+			mark->sg = q->sg;
+			mark->len = q->len - queued_len - len;
+			mark->sg_idx = q->sg_idx;
+			mark->ring_idx = sg_enq_idx;
+		} else {
+			q->len += len;
+		}
+
+		/*
+		 * track how far into a mbp we are for determining when
+		 * to trigger truncation
+		 */
+		q->mbp_len = (q->mbp_len + len) % q->mbp;
+
+		/* check if enq has advanced to a mid-segment link trb */
+		if (do_set_link && !is_last_xhci_segment_index(q->enq_idx)) {
+			WARN_ON_ONCE(q->mbp_len);
+			ret = set_mid_segment_link(q);
+			if (ret)
+				return ret;
+		} else
+			check_mid_segment_link(q);
+		queued_len += len;
+	} while (sg_len - queued_len);
+
+	return 0;
+}
+
+#define for_each_sg_continue(sg, nr, __i)	\
+	for (; __i < (nr); __i++, sg = sg_next(sg))
+
+static int parse_sg(struct queue_bulk_sg_context *q, int pass)
+{
+	struct truncate_mark mark = {
+		.ring_idx = q->start_idx,
+		.do_truncate = false,
+		.truncate_pos = 0,
+		.sg = q->sgl,
+		.sg_idx = 0,
+		.len = 0,
+	};
+
+	q->pass = pass;
+	q->links = q->total_links;
+ restart:
+	q->sg_idx = mark.sg_idx;
+	q->len = mark.len;
+	q->sg = mark.sg;
+	q->mbp_len = q->len % q->mbp;
+	q->enq_idx = mark.ring_idx;
+
+	for_each_sg_continue(q->sg, q->num_sgs, q->sg_idx) {
+		unsigned int len = sg_dma_len(q->sg);
+		int ret;
+
+		/* check if enq has advanced to a mid-segment link trb */
+		check_mid_segment_link(q);
+
+		/* check if we've mapped more than is set to be transferred */
+		if (len + q->len > q->tx_len)
+			len = q->tx_len - q->len;
+		if (len == 0)
+			break;
+		/* ok, we have some data to enqueue at this index */
+		ret = try_queue_sg_ent(q, &mark, len);
+		if (ret == -EAGAIN)
+			goto restart;
+		else if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+static int queue_bulk_sg_tx_v1(struct xhci_hcd *xhci, struct xhci_ring *ring,
+		gfp_t mem_flags, struct urb *urb, struct scatterlist *sgl,
+		int num_sgs, int slot_id, unsigned int ep_index)
+{
+	int ret;
+	struct urb_priv *urb_priv;
+	union xhci_trb *start_trb;
+	unsigned int final_enq_idx;
+	struct xhci_virt_device *xdev = xhci->devs[slot_id];
+	struct xhci_ep_ctx *ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, 0);
+	struct queue_bulk_sg_context q = { .mbp = xhci_get_ep_ctx_mbp(ep_ctx),
+		.tx_len = urb->transfer_buffer_length, .urb = urb,
+		.flags = mem_flags, .xhci = xhci, .sgl = sgl,
+		.num_sgs = num_sgs, .ring = ring, };
+
+	ret = check_ep_submit_state(xhci, ep_ctx);
+	if (ret)
+		return ret;
+
+	ret = prepare_td(q.ring, urb, 0);
+	if (ret)
+		return ret;
+
+	urb_priv = urb->hcpriv;
+	q.td = urb_priv->td[0];
+
+	/*
+	 * Don't give the first TRB to the hardware (by toggling the cycle bit)
+	 * until we've finished creating all the other TRBs.
+	 */
+	start_trb = xhci_ring_enqueue(q.ring);
+	q.start_cycle = q.ring->cycle_state;
+	q.start_idx = xhci_ring_pointer_to_index(&q.ring->enq);
+
+	/*
+	 * Pass 1 walk the sg list to:
+	 * 1/ invalidate current mid-segment links (if present)
+	 * 2/ determine the td fragment boundaries
+	 * 3/ place mid-segment links where necessary
+	 * 4/ increase the size of the ring to accommodate the full td
+	 *
+	 * The scatterlist walk restarts if we find a td-fragment that will not
+	 * fit within a partial segment.  If we find a td-fragment what will not
+	 * fit in a full segment then we fail the request entirely.
+	 */
+	q.total_links = 0;
+	ret = parse_sg(&q, 1);
+	if (ret)
+		return ret;
+
+	if (enqueue_is_link_trb(ring))
+		advance_enq(ring, 0, do_carry_chain(xhci, ring));
+
+	q.final_enq_idx = q.enq_idx;
+	q.total_packet_count = DIV_ROUND_UP(urb->transfer_buffer_length,
+			usb_endpoint_maxp(&urb->ep->desc));
+
+	/* Pass 2 enqueue trbs and honor the established mid-segment links */
+	ret = parse_sg(&q, 2);
+	if (ret)
+		return ret;
+
+	/*
+	 * standard ->inc_enq() gets num_trbs_accounting wrong, see
+	 * v1_reap_td
+	 */
+	q.ring->num_trbs_free = xhci_ring_num_trbs_free(q.ring);
+
+	/* validate that enq.ptr reached final_enq_idx */
+	final_enq_idx = xhci_ring_pointer_to_index(&q.ring->enq);
+	check_trb_math(urb, final_enq_idx - q.final_enq_idx,
+			q.running_total);
+	giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id,
+			q.start_cycle, start_trb);
+	return 0;
+}
+
 int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		struct urb *urb, int slot_id, unsigned int ep_index)
 {
+	struct xhci_ring *ring = xhci_urb_to_transfer_ring(xhci, urb);
+
+	if (!ring)
+		return -EINVAL;
+
 	if (urb->num_sgs)
-		return queue_bulk_sg_tx(xhci, mem_flags, urb, urb->sg,
-				urb->num_mapped_sgs, slot_id, ep_index);
+		return ring->ops->queue_bulk_sg_tx(xhci, ring, mem_flags, urb,
+				urb->sg, urb->num_mapped_sgs, slot_id,
+				ep_index);
 	else {
 		struct scatterlist scatter, *sg = &scatter;
 
@@ -3361,8 +3953,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 		sg->dma_address = urb->transfer_dma;
 		sg_dma_len(sg) = sg->length;
 
-		return queue_bulk_sg_tx(xhci, mem_flags, urb, sg, 1, slot_id,
-				ep_index);
+		return ring->ops->queue_bulk_sg_tx(xhci, ring, mem_flags, urb,
+				sg, 1, slot_id, ep_index);
 	}
 }
 
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 94c5d74e35b8..1d8f1a5cf8e7 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -688,6 +688,7 @@ struct xhci_ep_ctx {
 /* bit 7 is Host Initiate Disable - for disabling stream selection */
 #define MAX_BURST(p)	(((p)&0xff) << 8)
 #define CTX_TO_MAX_BURST(p)	(((p) >> 8) & 0xff)
+#define CTX_TO_MAX_PACKET(p)	(((p) >> 16) & 0xffff)
 #define MAX_PACKET(p)	(((p)&0xffff) << 16)
 #define MAX_PACKET_MASK		(0xffff << 16)
 #define MAX_PACKET_DECODED(p)	(((p) >> 16) & 0xffff)
@@ -1286,6 +1287,7 @@ struct xhci_segment {
 	/* private to HCD */
 	union xhci_trb		*link;
 	struct list_head	list;
+	int			segid;
 	dma_addr_t		dma;
 	struct device		*dev;
 	struct work_struct	work; /* for dma_free_coherent constraints */
@@ -1332,6 +1334,10 @@ struct xhci_ring_ops {
 	void (*inc_deq)(struct xhci_ring *ring);
 	void (*link_segments)(struct xhci_segment *prev,
 			struct xhci_segment *next);
+	int (*queue_bulk_sg_tx)(struct xhci_hcd *xhci, struct xhci_ring *ring,
+		gfp_t mem_flags, struct urb *urb, struct scatterlist *sgl,
+		int num_sgs, int slot_id, unsigned int ep_index);
+	void (*reap_td)(struct xhci_ring *ring);
 };
 
 struct xhci_ring {
@@ -1362,6 +1368,59 @@ static inline unsigned int xhci_ring_num_segs(struct xhci_ring *ring)
 	return 1 << ring->order;
 }
 
+static inline unsigned int xhci_ring_size(struct xhci_ring *ring)
+{
+	return xhci_ring_num_segs(ring) * TRBS_PER_SEGMENT;
+}
+
+static inline unsigned int xhci_ring_last_index(struct xhci_ring *ring)
+{
+	return xhci_ring_size(ring) - 1;
+}
+
+static inline unsigned int to_xhci_ring_index(struct xhci_ring *ring,
+		unsigned int index)
+{
+	return index & xhci_ring_last_index(ring);
+}
+
+static inline bool is_last_xhci_segment_index(unsigned int index)
+{
+	return index % TRBS_PER_SEGMENT == TRBS_PER_SEGMENT - 1;
+}
+
+static inline struct xhci_segment *to_xhci_ring_segment(struct xhci_ring *ring,
+	struct xhci_segment *seg, unsigned int idx)
+{
+	unsigned int segid = idx / TRBS_PER_SEGMENT;
+	unsigned int advance;
+
+	advance = (segid - seg->segid) & (xhci_ring_num_segs(ring) - 1);
+	while (advance--) {
+		seg = list_next_entry(seg, list);
+		if (&seg->list == &ring->segments)
+			seg = list_next_entry(seg, list);
+	}
+	WARN_ON_ONCE(seg->segid != segid);
+
+	return seg;
+}
+
+static inline unsigned int xhci_ring_pointer_to_index(
+		struct xhci_ring_pointer *rp)
+{
+	unsigned int offset = rp->ptr - rp->seg->trbs;
+
+	return rp->seg->segid * TRBS_PER_SEGMENT + offset;
+}
+
+static inline union xhci_trb *to_xhci_ring_trb(struct xhci_ring *ring,
+		struct xhci_segment *seg, unsigned int idx)
+{
+	seg = to_xhci_ring_segment(ring, seg, to_xhci_ring_index(ring, idx));
+	return &seg->trbs[idx % TRBS_PER_SEGMENT];
+}
+
 static inline union xhci_trb *xhci_ring_enqueue(struct xhci_ring *ring)
 {
 	return ring->enq.ptr;
@@ -1410,6 +1469,13 @@ static inline struct xhci_segment *xhci_segment_next(struct xhci_ring *ring,
 		return list_next_entry(seg, list);
 }
 
+static inline unsigned int xhci_ring_advance_seg(struct xhci_ring *ring,
+	unsigned int idx)
+{
+	return to_xhci_ring_index(ring, idx + TRBS_PER_SEGMENT
+			- (idx % TRBS_PER_SEGMENT));
+}
+
 static inline void xhci_ring_pointer_advance_seg(struct xhci_ring *ring,
 	struct xhci_ring_pointer *rp)
 {
@@ -1976,6 +2042,15 @@ struct xhci_input_control_ctx *xhci_get_input_control_ctx(struct xhci_hcd *xhci,
 struct xhci_slot_ctx *xhci_get_slot_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx);
 struct xhci_ep_ctx *xhci_get_ep_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, unsigned int ep_index);
 
+static inline u32 xhci_get_ep_ctx_mbp(struct xhci_ep_ctx *ctx)
+{
+	u32 ep_info2 = __le32_to_cpu(ctx->ep_info2);
+	u32 max_packet = CTX_TO_MAX_PACKET(ep_info2);
+	u32 max_burst = CTX_TO_MAX_BURST(ep_info2);
+
+	return (max_packet * (max_burst + 1));
+}
+
 /* xHCI quirks */
 bool xhci_compliance_mode_recovery_timer_quirk_check(void);
 

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Media]     [Linux Input]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Old Linux USB Devel Archive]

  Powered by Linux