This is only for experimental use, and it is only lightly tested. diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0099667..32d2920 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1403,7 +1403,6 @@ static void iommu_detach_domain(struct dmar_domain *domain, } static struct iova_domain reserved_iova_list; -static struct lock_class_key reserved_rbtree_key; static int dmar_init_reserved_ranges(void) { @@ -1413,9 +1412,6 @@ static int dmar_init_reserved_ranges(void) init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); - lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, - &reserved_rbtree_key); - /* IOAPIC ranges shouldn't be accessed by DMA */ iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START), IOVA_PFN(IOAPIC_RANGE_END)); diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 67da6cff..efd5518 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -18,182 +18,113 @@ */ #include <linux/iova.h> +#include <linux/random.h> +#define IOVA_PENDING_INIT ((unsigned long) -1) void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) { - spin_lock_init(&iovad->iova_rbtree_lock); - iovad->rbroot = RB_ROOT; - iovad->cached32_node = NULL; + spin_lock_init(&iovad->del_skiplist_lock); + spin_lock_init(&iovad->skiplist_lock); + sl_init_list(&iovad->skiplist, GFP_ATOMIC); iovad->dma_32bit_pfn = pfn_32bit; } -static struct rb_node * -__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) +static unsigned long +__get_cached_addr(struct iova_domain *iovad, unsigned long limit_pfn) { - if ((*limit_pfn != iovad->dma_32bit_pfn) || - (iovad->cached32_node == NULL)) - return rb_last(&iovad->rbroot); - else { - struct rb_node *prev_node = rb_prev(iovad->cached32_node); - struct iova *curr_iova = - container_of(iovad->cached32_node, struct iova, node); - *limit_pfn = curr_iova->pfn_lo - 1; - return prev_node; - } -} - -static void -__cached_rbnode_insert_update(struct iova_domain *iovad, - unsigned long limit_pfn, struct iova *new) -{ - if (limit_pfn != iovad->dma_32bit_pfn) - return; - iovad->cached32_node = &new->node; -} + unsigned long guess; -static void -__cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) -{ - struct iova *cached_iova; - struct rb_node *curr; + prandom_bytes(&guess, sizeof(guess)); - if (!iovad->cached32_node) - return; - curr = iovad->cached32_node; - cached_iova = container_of(curr, struct iova, node); - - if (free->pfn_lo >= cached_iova->pfn_lo) { - struct rb_node *node = rb_next(&free->node); - struct iova *iova = container_of(node, struct iova, node); - - /* only cache if it's below 32bit pfn */ - if (node && iova->pfn_lo < iovad->dma_32bit_pfn) - iovad->cached32_node = node; - else - iovad->cached32_node = NULL; + /* the skiplist code is fastest when we spread out the + * key range as much as possible. Instead of caching the + * last freed or allocated number, return random guesses + * in hopes of fanning out our locking attempts. + */ + if (limit_pfn == iovad->dma_32bit_pfn) { + guess = guess % iovad->dma_32bit_pfn; + guess = max_t(unsigned long, guess, IOVA_START_PFN); + return guess; + } else { + guess = max_t(unsigned long, guess, IOVA_START_PFN); + guess = min_t(unsigned long, guess, (~0UL) >> 1); + return guess; } } -/* Computes the padding size required, to make the - * the start address naturally aligned on its size - */ -static int -iova_get_pad_size(int size, unsigned int limit_pfn) -{ - unsigned int pad_size = 0; - unsigned int order = ilog2(size); - - if (order) - pad_size = (limit_pfn + 1) % (1 << order); - - return pad_size; -} - static int __alloc_and_insert_iova_range(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, struct iova *new, bool size_aligned) { - struct rb_node *prev, *curr = NULL; unsigned long flags; - unsigned long saved_pfn; - unsigned int pad_size = 0; - - /* Walk the tree backwards */ - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - saved_pfn = limit_pfn; - curr = __get_cached_rbnode(iovad, &limit_pfn); - prev = curr; - while (curr) { - struct iova *curr_iova = container_of(curr, struct iova, node); - - if (limit_pfn < curr_iova->pfn_lo) - goto move_left; - else if (limit_pfn < curr_iova->pfn_hi) - goto adjust_limit_pfn; - else { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((curr_iova->pfn_hi + size + pad_size) <= limit_pfn) - break; /* found a free slot */ - } -adjust_limit_pfn: - limit_pfn = curr_iova->pfn_lo - 1; -move_left: - prev = curr; - curr = rb_prev(curr); - } + unsigned long align = 1; + unsigned long hint; - if (!curr) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((IOVA_START_PFN + size + pad_size) > limit_pfn) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return -ENOMEM; - } - } + int ret; - /* pfn_lo will point to size aligned address if size_aligned is set */ - new->pfn_lo = limit_pfn - (size + pad_size) + 1; - new->pfn_hi = new->pfn_lo + size - 1; - - /* Insert the new_iova into domain rbtree by holding writer lock */ - /* Add new node and rebalance tree. */ - { - struct rb_node **entry, *parent = NULL; - - /* If we have 'prev', it's a valid place to start the - insertion. Otherwise, start from the root. */ - if (prev) - entry = &prev; - else - entry = &iovad->rbroot.rb_node; - - /* Figure out where to put new node */ - while (*entry) { - struct iova *this = container_of(*entry, - struct iova, node); - parent = *entry; - - if (new->pfn_lo < this->pfn_lo) - entry = &((*entry)->rb_left); - else if (new->pfn_lo > this->pfn_lo) - entry = &((*entry)->rb_right); - else - BUG(); /* this should not happen */ - } + if (size_aligned) + align = size; - /* Add new node and rebalance tree. */ - rb_link_node(&new->node, parent, entry); - rb_insert_color(&new->node, &iovad->rbroot); + /* + * make sure that a lockless search into the tree + * understands we're still doing setup on this iova + */ + new->pfn_lo = IOVA_PENDING_INIT; + new->pfn_hi = IOVA_PENDING_INIT; + smp_wmb(); +again: + local_irq_save(flags); + hint = __get_cached_addr(iovad, limit_pfn); + ret = skiplist_insert_hole(&iovad->skiplist, + hint, + limit_pfn, size, align, + &new->slot, GFP_ATOMIC); + /* + * insert hole returns -eagain when it found a good + * spot but someone raced in and stole it. If + * that happens pick a new hint and try again + */ + if (ret == -EAGAIN) { + local_irq_restore(flags); + goto again; } - __cached_rbnode_insert_update(iovad, saved_pfn, new); - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + /* we're fully inserted, set our lo/hi */ + new->pfn_lo = new->slot.key; + new->pfn_hi = new->slot.key + new->slot.size - 1; + smp_wmb(); + local_irq_restore(flags); + if (ret) + return -ENOMEM; return 0; } -static void -iova_insert_rbtree(struct rb_root *root, struct iova *iova) +static int +iova_insert_skiplist(struct sl_list *skiplist, struct iova *iova) { - struct rb_node **new = &(root->rb_node), *parent = NULL; - /* Figure out where to put new node */ - while (*new) { - struct iova *this = container_of(*new, struct iova, node); - parent = *new; - - if (iova->pfn_lo < this->pfn_lo) - new = &((*new)->rb_left); - else if (iova->pfn_lo > this->pfn_lo) - new = &((*new)->rb_right); - else - BUG(); /* this should not happen */ + int ret; + int preload_token; + unsigned long flags; + + local_irq_save(flags); + preload_token = skiplist_preload(skiplist, GFP_ATOMIC); + if (preload_token < 0) { + ret = preload_token; + local_irq_restore(flags); + goto out; } - /* Add new node and rebalance tree. */ - rb_link_node(&iova->node, parent, new); - rb_insert_color(&iova->node, root); + + iova->slot.key = iova->pfn_lo; + iova->slot.size = iova->pfn_hi - iova->pfn_lo + 1; + + ret = skiplist_insert(skiplist, &iova->slot, preload_token); + local_irq_restore(flags); + preempt_enable(); +out: + return ret; } /** @@ -245,53 +176,24 @@ alloc_iova(struct iova_domain *iovad, unsigned long size, */ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { + struct sl_slot *slot; unsigned long flags; - struct rb_node *node; - - /* Take the lock so that no other thread is manipulating the rbtree */ - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - node = iovad->rbroot.rb_node; - while (node) { - struct iova *iova = container_of(node, struct iova, node); - - /* If pfn falls within iova's range, return iova */ - if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - /* We are not holding the lock while this iova - * is referenced by the caller as the same thread - * which called this function also calls __free_iova() - * and it is by design that only one thread can possibly - * reference a particular iova and hence no conflict. - */ - return iova; - } + struct iova *iova; - if (pfn < iova->pfn_lo) - node = node->rb_left; - else if (pfn > iova->pfn_lo) - node = node->rb_right; + local_irq_save(flags); + slot = skiplist_lookup(&iovad->skiplist, pfn, 1); + if (!slot) { + local_irq_restore(flags); + return NULL; } - - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return NULL; -} - -/** - * __free_iova - frees the given iova - * @iovad: iova domain in question. - * @iova: iova in question. - * Frees the given iova belonging to the giving domain - */ -void -__free_iova(struct iova_domain *iovad, struct iova *iova) -{ - unsigned long flags; - - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - __cached_rbnode_delete_update(iovad, iova); - rb_erase(&iova->node, &iovad->rbroot); - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - free_iova_mem(iova); + iova = sl_slot_entry(slot, struct iova, slot); + while (iova->pfn_lo == IOVA_PENDING_INIT || + iova->pfn_hi == IOVA_PENDING_INIT) { + cpu_relax(); + smp_rmb(); + } + local_irq_restore(flags); + return iova; } /** @@ -304,12 +206,29 @@ __free_iova(struct iova_domain *iovad, struct iova *iova) void free_iova(struct iova_domain *iovad, unsigned long pfn) { - struct iova *iova = find_iova(iovad, pfn); - if (iova) - __free_iova(iovad, iova); + struct iova *iova; + struct sl_slot *slot; + unsigned long flags; + + local_irq_save(flags); + slot = skiplist_delete(&iovad->skiplist, pfn, 1); + local_irq_restore(flags); + + if (!slot) + return; + iova = sl_slot_entry(slot, struct iova, slot); + free_iova_mem(iova); } +void +__free_iova(struct iova_domain *iovad, struct iova *iova) +{ + unsigned long pfn = iova->pfn_lo; + free_iova(iovad, pfn); +} + + /** * put_iova_domain - destroys the iova doamin * @iovad: - iova domain in question. @@ -317,29 +236,37 @@ free_iova(struct iova_domain *iovad, unsigned long pfn) */ void put_iova_domain(struct iova_domain *iovad) { - struct rb_node *node; + struct sl_list *skiplist = &iovad->skiplist; + struct sl_node *p; + struct sl_leaf *leaf; unsigned long flags; + struct iova *iova; + struct sl_slot *slot; + int i; - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - node = rb_first(&iovad->rbroot); - while (node) { - struct iova *iova = container_of(node, struct iova, node); - rb_erase(node, &iovad->rbroot); - free_iova_mem(iova); - node = rb_first(&iovad->rbroot); + /* + * the skiplist code needs some helpers for iteration. For now + * roll our own + */ + local_irq_save(flags); + spin_lock(&iovad->skiplist_lock); + sl_lock_node(skiplist->head); + p = skiplist->head->ptrs[0].next; + while (p) { + leaf = sl_entry(p); + for (i = 0; i < leaf->nr; i++) { + slot = leaf->ptrs[i]; + iova = sl_slot_entry(slot, struct iova, slot); + free_iova_mem(iova); + } + p = leaf->node.ptrs[0].next; + sl_free_leaf(leaf); } - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); -} - -static int -__is_range_overlap(struct rb_node *node, - unsigned long pfn_lo, unsigned long pfn_hi) -{ - struct iova *iova = container_of(node, struct iova, node); - - if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) - return 1; - return 0; + /* FIXME call a helper here */ + memset(skiplist->head->ptrs, 0, sl_node_size(SKIP_MAXLEVEL)); + sl_unlock_node(skiplist->head); + spin_unlock(&iovad->skiplist_lock); + local_irq_restore(flags); } static struct iova * @@ -347,6 +274,7 @@ __insert_new_range(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi) { struct iova *iova; + int ret; iova = alloc_iova_mem(); if (!iova) @@ -354,18 +282,16 @@ __insert_new_range(struct iova_domain *iovad, iova->pfn_hi = pfn_hi; iova->pfn_lo = pfn_lo; - iova_insert_rbtree(&iovad->rbroot, iova); - return iova; -} + ret = iova_insert_skiplist(&iovad->skiplist, iova); -static void -__adjust_overlap_range(struct iova *iova, - unsigned long *pfn_lo, unsigned long *pfn_hi) -{ - if (*pfn_lo < iova->pfn_lo) - iova->pfn_lo = *pfn_lo; - if (*pfn_hi > iova->pfn_hi) - *pfn_lo = iova->pfn_hi + 1; + if (ret == -ENOMEM) { + free_iova_mem(iova); + return NULL; + } else { + BUG_ON(ret); + } + + return iova; } /** @@ -380,32 +306,42 @@ struct iova * reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi) { - struct rb_node *node; + struct sl_slot *slot; unsigned long flags; - struct iova *iova; - unsigned int overlap = 0; - - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { - if (__is_range_overlap(node, pfn_lo, pfn_hi)) { - iova = container_of(node, struct iova, node); - __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); - if ((pfn_lo >= iova->pfn_lo) && - (pfn_hi <= iova->pfn_hi)) - goto finish; - overlap = 1; - - } else if (overlap) - break; - } - - /* We are here either because this is the first reserver node - * or need to insert remaining non overlap addr range + struct iova *iova = NULL; + struct iova *found = NULL; + unsigned long size = pfn_hi - pfn_lo + 1; + unsigned long min_pfn = pfn_lo; + unsigned long max_pfn = pfn_hi; + + /* + * this is not locking safe. It only happens while there are no + * concurrent IO requrests (I hope!) */ - iova = __insert_new_range(iovad, pfn_lo, pfn_hi); -finish: - - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + local_irq_save(flags); + spin_lock(&iovad->skiplist_lock); + while(1) { + /* + * really ugly, just delete anything overlapping and + * reinsert the new full range + */ + slot = skiplist_delete(&iovad->skiplist, pfn_lo, size); + if (!slot) + break; + + found = sl_slot_entry(slot, struct iova, slot); + while (found->pfn_lo == IOVA_PENDING_INIT || + found->pfn_hi == IOVA_PENDING_INIT) { + cpu_relax(); + smp_rmb(); + } + min_pfn = min(found->pfn_lo, min_pfn); + max_pfn = max(found->pfn_hi, max_pfn); + free_iova_mem(found); + } + iova = __insert_new_range(iovad, min_pfn, max_pfn); + spin_unlock(&iovad->skiplist_lock); + local_irq_restore(flags); return iova; } @@ -419,17 +355,33 @@ finish: void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) { + struct sl_node *p; + struct sl_leaf *leaf; unsigned long flags; - struct rb_node *node; - - spin_lock_irqsave(&from->iova_rbtree_lock, flags); - for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { - struct iova *iova = container_of(node, struct iova, node); - struct iova *new_iova; - new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); - if (!new_iova) - printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", - iova->pfn_lo, iova->pfn_lo); + struct iova *iova; + struct iova *new_iova; + struct sl_slot *slot; + int i; + + /* + * this is not locking safe. It only happens while there are no + * concurrent IO requrests (I hope!) + */ + local_irq_save(flags); + sl_lock_node(from->skiplist.head); + p = from->skiplist.head->ptrs[0].next; + while (p) { + leaf = sl_entry(p); + for (i = 0; i < leaf->nr; i++) { + slot = leaf->ptrs[i]; + iova = sl_slot_entry(slot, struct iova, slot); + new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); + if (!new_iova) + printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", + iova->pfn_lo, iova->pfn_lo); + } + p = leaf->node.ptrs[0].next; } - spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); + sl_unlock_node(from->skiplist.head); + local_irq_restore(flags); } diff --git a/include/linux/iova.h b/include/linux/iova.h index 76a0759..b514c18 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -13,7 +13,7 @@ #include <linux/types.h> #include <linux/kernel.h> -#include <linux/rbtree.h> +#include <linux/skiplist.h> #include <linux/dma-mapping.h> /* IO virtual address start page frame number */ @@ -21,16 +21,16 @@ /* iova structure */ struct iova { - struct rb_node node; + struct sl_slot slot; unsigned long pfn_hi; /* IOMMU dish out addr hi */ unsigned long pfn_lo; /* IOMMU dish out addr lo */ }; /* holds all the iova translations for a domain */ struct iova_domain { - spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ - struct rb_root rbroot; /* iova domain rbtree root */ - struct rb_node *cached32_node; /* Save last alloced node */ + spinlock_t skiplist_lock; + spinlock_t del_skiplist_lock; + struct sl_list skiplist; /* iova domain skiplist */ unsigned long dma_32bit_pfn; }; -- 1.8.2 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html