On Thu, 2008-05-29 at 22:20 +1000, npiggin@xxxxxxx wrote: > +int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) > +{ > + struct mm_struct *mm = current->mm; > + unsigned long end = start + (nr_pages << PAGE_SHIFT); > + unsigned long addr = start; > + unsigned long next; > + pgd_t *pgdp; > + int nr = 0; > + > + if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, > + start, nr_pages*PAGE_SIZE))) > + goto slow_irqon; > + > + /* > + * XXX: batch / limit 'nr', to avoid large irq off latency > + * needs some instrumenting to determine the common sizes used by > + * important workloads (eg. DB2), and whether limiting the batch size > + * will decrease performance. > + * > + * It seems like we're in the clear for the moment. Direct-IO is > + * the main guy that batches up lots of get_user_pages, and even > + * they are limited to 64-at-a-time which is not so many. > + */ > + /* > + * This doesn't prevent pagetable teardown, but does prevent > + * the pagetables and pages from being freed on x86. > + * > + * So long as we atomically load page table pointers versus teardown > + * (which we do on x86, with the above PAE exception), we can follow the > + * address down to the the page and take a ref on it. > + */ > + local_irq_disable(); > + pgdp = pgd_offset(mm, addr); > + do { > + pgd_t pgd = *pgdp; > + > + next = pgd_addr_end(addr, end); > + if (pgd_none(pgd)) > + goto slow; > + if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) > + goto slow; > + } while (pgdp++, addr = next, addr != end); > + local_irq_enable(); > + > + VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); > + return nr; > + > + { > + int i, ret; > + > +slow: > + local_irq_enable(); > +slow_irqon: > + /* Try to get the remaining pages with get_user_pages */ > + start += nr << PAGE_SHIFT; > + pgaes += nr; Typo: s/pgaes/pages/ > + > + down_read(&mm->mmap_sem); > + ret = get_user_pages(current, mm, start, > + (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); > + up_read(&mm->mmap_sem); > + > + /* Have to be a bit careful with return values */ > + if (nr > 0) { > + if (ret < 0) > + ret = nr; > + else > + ret += nr; > + } > + > + return ret; > + } > +} -- David Kleikamp IBM Linux Technology Center -- To unsubscribe from this list: send the line "unsubscribe linux-arch" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html