TRB segments are allocated relatively infrequently and would benefit from being larger (to reduce probability of overrunning a segment in a TD fragment). We already burn a page satisfying a single segment allocation, so there's little reason to not allocate in page-sized chunks in the first instance. In support of freeing segments from irq context struct xhci_segment grows a ->dev and ->ew field (exectue_work). As a result there is no longer a need to pass a 'xhci' parameter down to xhci_free_segment(). Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> --- drivers/usb/host/xhci-mem.c | 70 ++++++++++++++++++++----------------------- drivers/usb/host/xhci.c | 2 + drivers/usb/host/xhci.h | 19 +++++++----- 3 files changed, 45 insertions(+), 46 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index fcae0ce47daa..1d05dc9e1928 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -40,14 +40,15 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci, unsigned int cycle_state, gfp_t flags) { struct xhci_segment *seg; - dma_addr_t dma; int i; seg = kzalloc(sizeof *seg, flags); if (!seg) return NULL; - seg->trbs = dma_pool_alloc(xhci->segment_pool, flags, &dma); + seg->dev = xhci_to_dev(xhci); + seg->trbs = dma_alloc_coherent(seg->dev, TRB_SEGMENT_SIZE, + &seg->dma, flags); if (!seg->trbs) { kfree(seg); return NULL; @@ -59,33 +60,43 @@ static struct xhci_segment *xhci_segment_alloc(struct xhci_hcd *xhci, for (i = 0; i < TRBS_PER_SEGMENT; i++) seg->trbs[i].link.control |= cpu_to_le32(TRB_CYCLE); } - seg->dma = dma; seg->next = NULL; return seg; } -static void xhci_segment_free(struct xhci_hcd *xhci, struct xhci_segment *seg) +static void xhci_segment_free_work(struct work_struct *w) { + struct xhci_segment *seg = container_of(w, typeof(*seg), work); + if (seg->trbs) { - dma_pool_free(xhci->segment_pool, seg->trbs, seg->dma); + dma_free_coherent(seg->dev, TRB_SEGMENT_SIZE, seg->trbs, + seg->dma); seg->trbs = NULL; } + put_device(seg->dev); kfree(seg); } -static void xhci_free_segments_for_ring(struct xhci_hcd *xhci, - struct xhci_segment *first) +static void xhci_segment_free(struct xhci_segment *seg) +{ + INIT_WORK(&seg->work, xhci_segment_free_work); + get_device(seg->dev); + schedule_work(&seg->work); +} + +static void xhci_free_segments_for_ring(struct xhci_segment *first) { struct xhci_segment *seg; seg = first->next; while (seg != first) { struct xhci_segment *next = seg->next; - xhci_segment_free(xhci, seg); + + xhci_segment_free(seg); seg = next; } - xhci_segment_free(xhci, first); + xhci_segment_free(first); } /* @@ -273,7 +284,7 @@ static int xhci_update_stream_mapping(struct xhci_ring *ring, gfp_t mem_flags) } /* XXX: Do we need the hcd structure in all these functions? */ -void xhci_ring_free(struct xhci_hcd *xhci, struct xhci_ring *ring) +void xhci_ring_free(struct xhci_ring *ring) { if (!ring) return; @@ -281,7 +292,7 @@ void xhci_ring_free(struct xhci_hcd *xhci, struct xhci_ring *ring) if (ring->first_seg) { if (ring->type == TYPE_STREAM) xhci_remove_stream_mapping(ring); - xhci_free_segments_for_ring(xhci, ring->first_seg); + xhci_free_segments_for_ring(ring->first_seg); } kfree(ring); @@ -336,7 +347,7 @@ static int xhci_alloc_segments_for_ring(struct xhci_hcd *xhci, prev = *first; while (prev) { next = prev->next; - xhci_segment_free(xhci, prev); + xhci_segment_free(prev); prev = next; } return -ENOMEM; @@ -411,7 +422,7 @@ void xhci_free_or_cache_endpoint_ring(struct xhci_hcd *xhci, virt_dev->num_rings_cached, (virt_dev->num_rings_cached > 1) ? "s" : ""); } else { - xhci_ring_free(xhci, virt_dev->eps[ep_index].ring); + xhci_ring_free(virt_dev->eps[ep_index].ring); xhci_dbg(xhci, "Ring cache full (%d rings), " "freeing ring\n", virt_dev->num_rings_cached); @@ -482,7 +493,7 @@ int xhci_ring_expansion(struct xhci_hcd *xhci, struct xhci_ring *ring, struct xhci_segment *next; do { next = first->next; - xhci_segment_free(xhci, first); + xhci_segment_free(first); if (first == last) break; first = next; @@ -723,7 +734,7 @@ struct xhci_stream_info *xhci_alloc_stream_info(struct xhci_hcd *xhci, ret = xhci_update_stream_mapping(cur_ring, mem_flags); if (ret) { - xhci_ring_free(xhci, cur_ring); + xhci_ring_free(cur_ring); stream_info->stream_rings[cur_stream] = NULL; goto cleanup_rings; } @@ -741,7 +752,7 @@ cleanup_rings: for (cur_stream = 1; cur_stream < num_streams; cur_stream++) { cur_ring = stream_info->stream_rings[cur_stream]; if (cur_ring) { - xhci_ring_free(xhci, cur_ring); + xhci_ring_free(cur_ring); stream_info->stream_rings[cur_stream] = NULL; } } @@ -809,7 +820,7 @@ void xhci_free_stream_info(struct xhci_hcd *xhci, cur_stream++) { cur_ring = stream_info->stream_rings[cur_stream]; if (cur_ring) { - xhci_ring_free(xhci, cur_ring); + xhci_ring_free(cur_ring); stream_info->stream_rings[cur_stream] = NULL; } } @@ -930,7 +941,7 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) for (i = 0; i < 31; ++i) { if (dev->eps[i].ring) - xhci_ring_free(xhci, dev->eps[i].ring); + xhci_ring_free(dev->eps[i].ring); if (dev->eps[i].stream_info) xhci_free_stream_info(xhci, dev->eps[i].stream_info); @@ -951,7 +962,7 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) if (dev->ring_cache) { for (i = 0; i < dev->num_rings_cached; i++) - xhci_ring_free(xhci, dev->ring_cache[i]); + xhci_ring_free(dev->ring_cache[i]); kfree(dev->ring_cache); } @@ -1801,14 +1812,14 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) xhci->erst.entries = NULL; xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed ERST"); if (xhci->event_ring) - xhci_ring_free(xhci, xhci->event_ring); + xhci_ring_free(xhci->event_ring); xhci->event_ring = NULL; xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed event ring"); if (xhci->lpm_command) xhci_free_command(xhci, xhci->lpm_command); if (xhci->cmd_ring) - xhci_ring_free(xhci, xhci->cmd_ring); + xhci_ring_free(xhci->cmd_ring); xhci->cmd_ring = NULL; xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed command ring"); xhci_cleanup_command_queue(xhci); @@ -1826,11 +1837,6 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) for (i = 1; i < MAX_HC_SLOTS; ++i) xhci_free_virt_device(xhci, i); - if (xhci->segment_pool) - dma_pool_destroy(xhci->segment_pool); - xhci->segment_pool = NULL; - xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Freed segment pool"); - if (xhci->device_pool) dma_pool_destroy(xhci->device_pool); xhci->device_pool = NULL; @@ -2363,20 +2369,10 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) (unsigned long long)xhci->dcbaa->dma, xhci->dcbaa); xhci_write_64(xhci, dma, &xhci->op_regs->dcbaa_ptr); - /* - * Initialize the ring segment pool. The ring must be a contiguous - * structure comprised of TRBs. The TRBs must be 16 byte aligned, - * however, the command ring segment needs 64-byte aligned segments - * and our use of dma addresses in the trb_address_map radix tree needs - * TRB_SEGMENT_SIZE alignment, so we pick the greater alignment need. - */ - xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, - TRB_SEGMENT_SIZE, TRB_SEGMENT_SIZE, xhci->page_size); - /* See Table 46 and Note on Figure 55 */ xhci->device_pool = dma_pool_create("xHCI input/output contexts", dev, 2112, 64, xhci->page_size); - if (!xhci->segment_pool || !xhci->device_pool) + if (!xhci->device_pool) goto fail; /* Linear stream context arrays don't have any boundary restrictions, diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0370a3e76cd2..1d3dc014b477 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2799,7 +2799,7 @@ void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) /* Free any rings allocated for added endpoints */ for (i = 0; i < 31; ++i) { if (virt_dev->eps[i].new_ring) { - xhci_ring_free(xhci, virt_dev->eps[i].new_ring); + xhci_ring_free(virt_dev->eps[i].new_ring); virt_dev->eps[i].new_ring = NULL; } } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 90957720adec..1a60e8498a55 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1261,15 +1261,17 @@ union xhci_trb { #define NEC_FW_MAJOR(p) (((p) >> 8) & 0xff) /* - * TRBS_PER_SEGMENT must be a multiple of 4, - * since the command ring is 64-byte aligned. - * It must also be greater than 16. + * The ring sgement must be a contiguous structure comprised of TRBs + * (16-byte data structures). The TRBs must be 16 byte aligned, + * however, the command ring segment needs 64-byte aligned segments and + * our use of dma addresses in the trb_address_map radix tree needs + * TRB_SEGMENT_SIZE alignment. */ -#define TRBS_PER_SEGMENT 64 +#define TRB_SEGMENT_SIZE PAGE_SIZE +#define TRB_SEGMENT_SHIFT PAGE_SHIFT +#define TRBS_PER_SEGMENT (TRB_SEGMENT_SIZE/16) /* Allow two commands + a link TRB, along with any reserved command TRBs */ #define MAX_RSVD_CMD_TRBS (TRBS_PER_SEGMENT - 3) -#define TRB_SEGMENT_SIZE (TRBS_PER_SEGMENT*16) -#define TRB_SEGMENT_SHIFT (ilog2(TRB_SEGMENT_SIZE)) /* TRB buffer pointers can't cross 64KB boundaries */ #define TRB_MAX_BUFF_SHIFT 16 #define TRB_MAX_BUFF_SIZE (1 << TRB_MAX_BUFF_SHIFT) @@ -1279,6 +1281,8 @@ struct xhci_segment { /* private to HCD */ struct xhci_segment *next; dma_addr_t dma; + struct device *dev; + struct work_struct work; /* for dma_free_coherent constraints */ }; struct xhci_td { @@ -1504,7 +1508,6 @@ struct xhci_hcd { /* DMA pools */ struct dma_pool *device_pool; - struct dma_pool *segment_pool; struct dma_pool *small_streams_pool; struct dma_pool *medium_streams_pool; @@ -1701,7 +1704,7 @@ void xhci_slot_copy(struct xhci_hcd *xhci, int xhci_endpoint_init(struct xhci_hcd *xhci, struct xhci_virt_device *virt_dev, struct usb_device *udev, struct usb_host_endpoint *ep, gfp_t mem_flags); -void xhci_ring_free(struct xhci_hcd *xhci, struct xhci_ring *ring); +void xhci_ring_free(struct xhci_ring *ring); int xhci_ring_expansion(struct xhci_hcd *xhci, struct xhci_ring *ring, unsigned int num_trbs, gfp_t flags); void xhci_free_or_cache_endpoint_ring(struct xhci_hcd *xhci, -- To unsubscribe from this list: send the line "unsubscribe linux-usb" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html