HV-style KVM requires some large regions of physically contiguous memory for the guest hashed page table (HPT), and on PPC970 processors, the real mode area (RMA). Currently we can allocate some number of a single size of HPT and a single size of RMA at boot time and use them later to run guests. However, the desired size of the HPT depends on how much RAM a guest is given, and the optimal size of the RMA might differ between guests also. Therefore, this changes the boot-time allocation to allocate a single, contiguous area of memory, rather than many separate areas, and adds a simple bitmap-based allocator which can find aligned or unaligned free regions of a power-of-2 size within the area. Each bit of the bitmap represents a "chunk", a 256kB aligned piece of the area. If the bit is set, it means that the chunk is in use. Signed-off-by: Paul Mackerras <paulus@xxxxxxxxx> --- arch/powerpc/kvm/book3s_hv_builtin.c | 388 ++++++++++++++++++++++++++++------ 1 file changed, 321 insertions(+), 67 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index ec0a9e5..f0c51c5 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -18,11 +18,14 @@ #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> +/* Use a chunk size of 256kB, since this is the smallest allowed HPT size */ +#define KVM_CHUNK_ORDER 18 +#define KVM_CHUNK_SIZE (1ul << KVM_CHUNK_ORDER) + #define KVM_LINEAR_RMA 0 #define KVM_LINEAR_HPT 1 -static void __init kvm_linear_init_one(ulong size, int count, int type); -static struct kvmppc_linear_info *kvm_alloc_linear(int type); +static struct kvmppc_linear_info *kvm_alloc_linear(int type, int order); static void kvm_release_linear(struct kvmppc_linear_info *ri); int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER; @@ -38,7 +41,8 @@ EXPORT_SYMBOL_GPL(kvm_hpt_order); * much physically contiguous memory after the system is up and running, * we preallocate a set of RMAs in early boot for KVM to use. */ -static unsigned long kvm_rma_size = 64 << 20; /* 64MB */ +#define DEFAULT_RMA_SIZE (64 << 20) /* 64MB */ +static unsigned long kvm_rma_size = DEFAULT_RMA_SIZE; static unsigned long kvm_rma_count; /* Work out RMLS (real mode limit selector) field value for a given RMA size. @@ -91,7 +95,7 @@ early_param("kvm_rma_count", early_parse_rma_count); struct kvmppc_linear_info *kvm_alloc_rma(void) { - return kvm_alloc_linear(KVM_LINEAR_RMA); + return kvm_alloc_linear(KVM_LINEAR_RMA, __ilog2(kvm_rma_size)); } EXPORT_SYMBOL_GPL(kvm_alloc_rma); @@ -124,7 +128,7 @@ early_param("kvm_hpt_count", early_parse_hpt_count); struct kvmppc_linear_info *kvm_alloc_hpt(void) { - return kvm_alloc_linear(KVM_LINEAR_HPT); + return kvm_alloc_linear(KVM_LINEAR_HPT, kvm_hpt_order); } EXPORT_SYMBOL_GPL(kvm_alloc_hpt); @@ -134,73 +138,295 @@ void kvm_release_hpt(struct kvmppc_linear_info *li) } EXPORT_SYMBOL_GPL(kvm_release_hpt); -/*************** generic *************/ +/* Bitmap allocator functions */ -static LIST_HEAD(free_linears); -static DEFINE_SPINLOCK(linear_lock); +/* + * Set `nbits' bits in a bitmap starting at bit `start', checking that + * the bits are 0 to begin with. If they aren't, stop and return the + * number of bits remaining (not set to 1). + */ +static long set_zero_bits(unsigned long *bitmap, unsigned long start, + unsigned long nbits) +{ + unsigned long mask; + unsigned long offset; + unsigned long *p; + + offset = start & (BITS_PER_LONG - 1); + mask = ~0UL << offset; + nbits += offset; + p = bitmap + start / BITS_PER_LONG; + while (nbits >= BITS_PER_LONG) { + if (*p & mask) + return nbits; + *p++ |= mask; + mask = ~0UL; + nbits -= BITS_PER_LONG; + } + if (nbits) { + mask &= ~(~0UL << nbits); + if (*p & mask) + return nbits; + *p |= mask; + } + return 0; +} -static void __init kvm_linear_init_one(ulong size, int count, int type) +/* + * Clear `nbits' bits in a bitmap starting at bit `start', checking that + * the bits are 1 to begin with. If they aren't, stop and return the + * number of bits remaining (not set to 0). + */ +static long clear_one_bits(unsigned long *bitmap, unsigned long start, + unsigned long nbits) { - unsigned long i; - unsigned long j, npages; - void *linear; - struct page *pg; - const char *typestr; - struct kvmppc_linear_info *linear_info; + unsigned long mask; + unsigned long offset; + unsigned long *p; + + offset = start & (BITS_PER_LONG - 1); + mask = ~0UL << offset; + nbits += offset; + p = bitmap + start / BITS_PER_LONG; + while (nbits >= BITS_PER_LONG) { + if (~*p & mask) + return nbits; + *p++ &= ~mask; + mask = ~0UL; + nbits -= BITS_PER_LONG; + } + if (nbits) { + mask &= ~(~0UL << nbits); + if (~*p & mask) + return nbits; + *p &= ~mask; + } + return 0; +} - if (!count) - return; +static void bitmap_free(unsigned long *bitmap, unsigned long start, + unsigned long n) +{ + long nr; + + nr = clear_one_bits(bitmap, start, n); + if (nr) { + pr_err("KVM: oops, freeing not-in-use linear memory\n"); + if (nr < n) + set_zero_bits(bitmap, start, n - nr); + } +} + +static unsigned long lsbs[6] = { + ~0UL, /* LSBs of aligned 1-bit fields */ + ~0UL / 3, /* LSBs of aligned 2-bit fields */ + ~0UL / 0xf, /* LSBs of aligned 4-bit fields */ + ~0UL / 0xff, /* LSBs of aligned 8-bit fields */ + ~0UL / 0xffff, /* LSBs of aligned 16-bit fields */ + ~0UL / 0xffffffff, /* LSBs of aligned 32-bit fields */ +}; + +static inline int find_ls_one(unsigned long n) +{ + return __ffs(n); +} + +static inline int find_ms_one(unsigned long n) +{ + return __fls(n); +} - typestr = (type == KVM_LINEAR_RMA) ? "RMA" : "HPT"; - - npages = size >> PAGE_SHIFT; - linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info)); - for (i = 0; i < count; ++i) { - linear = alloc_bootmem_align(size, size); - pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear, - size >> 20); - linear_info[i].base_virt = linear; - linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT; - linear_info[i].npages = npages; - linear_info[i].type = type; - list_add_tail(&linear_info[i].list, &free_linears); - atomic_set(&linear_info[i].use_count, 0); - - pg = pfn_to_page(linear_info[i].base_pfn); - for (j = 0; j < npages; ++j) { - atomic_inc(&pg->_count); - ++pg; +/* + * Allocate a contiguous region of 1 << order bits in a bitmap. + * If aligned != 0, require it to be aligned on a multiple of its size. + * Returns -1 if no suitable free region could be found. + */ +static int bitmap_alloc(unsigned long *bitmap, unsigned long map_length, + int order, int aligned) +{ + unsigned long n = 1ul << order; + int aorder = order; + unsigned long map, map2, *p; + unsigned long ls, ms; + unsigned long i, j, t; + unsigned long b, nb, nw, delta; + + if (!order) + aligned = 1; + else if (!aligned) + --aorder; + + /* + * If we are allocating 32 bits or fewer, we can use arithmetic + * tricks to find aligned free regions 2^n bits. + * If we need an aligned allocation, we just look for aligned + * free regions of 2^order bits. + * If we don't need an aligned allocation, we check if each word + * has any aligned free regions of 2^(order - 1) bits, and if it + * does, we find any free regions of n bits by smearing out the + * one (in-use) bits in the word over the following n-1 bits, + * and then looking for any remaining zero bits. + */ + p = bitmap; + if (n < BITS_PER_LONG) { + ls = lsbs[aorder]; + ms = ls << ((1 << aorder) - 1); + j = map_length; + for (i = 0; i < map_length; i += BITS_PER_LONG) { + map = *p++; + t = (map - ls) & ~map & ms; + if (!t) + continue; + /* found aligned free space of size 1<<aorder */ + if (aligned) { + j = i + (find_ls_one(t) & ~(n - 1)); + break; + } + /* look for unaligned free space */ + map2 = 0; + if (i < map_length - BITS_PER_LONG) + map2 = *p; + for (b = 1; b < n; b <<= 1) { + map |= map >> b; + map |= map2 << (BITS_PER_LONG - b); + map2 |= map2 >> b; + } + if (map != ~0UL) { + j = i + find_ls_one(~map); + break; + } } + } else if (aligned) { + /* + * Aligned allocations of 64 bits or more: we just + * have to find `nw' consecutive 0 words in the bitmap + * starting at multiple of `nw'. + */ + nw = n / BITS_PER_LONG; + j = 0; + for (i = 0; i < map_length; i += BITS_PER_LONG) { + map = *p++; + if (map) { + delta = (n - BITS_PER_LONG) & ~i; + i += delta; + j = i + BITS_PER_LONG; + p += delta / BITS_PER_LONG; + nw = n / BITS_PER_LONG; + } else if (--nw == 0) + break; + } + if (nw) + j = map_length; + } else { + /* + * Unaligned allocations of 64 bits or more: we only + * need to consider groups of consecutive zeros at + * either end of a word, and words that are all zeros. + */ + nb = 0; + j = 0; + for (i = 0; i < map_length; i += BITS_PER_LONG) { + map = *p++; + if (map == 0) { + nb += BITS_PER_LONG; + if (nb >= n) + break; + continue; + } + nb += find_ls_one(map); + if (nb >= n) + break; + j = find_ms_one(map) + 1; + nb = BITS_PER_LONG - j; + j += i; + } + if (nb < n) + j = map_length; } + + if (j >= map_length) + return -1; + + nb = set_zero_bits(bitmap, j, n); + if (nb) { + pr_err("KVM: Bug in alloc_bitmap\n"); + if (nb < n) + clear_one_bits(bitmap, j, n - nb); + return -1; + } + + return j; } -static struct kvmppc_linear_info *kvm_alloc_linear(int type) +/*************** generic *************/ + +static unsigned long *linear_bitmap; +static unsigned long linear_bitmap_len; +static char *linear_mem; +static DEFINE_SPINLOCK(linear_lock); + +static struct kvmppc_linear_info *kvm_alloc_linear(int type, int order) { - struct kvmppc_linear_info *ri, *ret; + struct kvmppc_linear_info *ri; + int aligned = 1; + int index; + + /* Do we have any preallocated memory at all? */ + if (!linear_bitmap_len) + return NULL; + + /* Convert log2(bytes) to log2(chunks) */ + order -= KVM_CHUNK_ORDER; + if (order < 0) + order = 0; + + /* + * Assume arch 2.06 means POWER7, which doesn't require + * the HPT to be aligned on a multiple of its size, + * but only on a 256kB boundary. + */ + if (type == KVM_LINEAR_HPT && cpu_has_feature(CPU_FTR_ARCH_206)) + aligned = 0; + + ri = kzalloc(sizeof(*ri), GFP_KERNEL); + if (!ri) + return NULL; - ret = NULL; spin_lock(&linear_lock); - list_for_each_entry(ri, &free_linears, list) { - if (ri->type != type) - continue; - - list_del(&ri->list); - atomic_inc(&ri->use_count); - memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); - ret = ri; - break; - } + index = bitmap_alloc(linear_bitmap, linear_bitmap_len, order, aligned); spin_unlock(&linear_lock); - return ret; + if (index < 0) { + kfree(ri); + return NULL; + } + + ri->base_virt = linear_mem + index * KVM_CHUNK_SIZE; + ri->base_pfn = __pa(ri->base_virt) >> PAGE_SHIFT; + ri->npages = (KVM_CHUNK_SIZE >> PAGE_SHIFT) << order; + ri->type = type; + atomic_set(&ri->use_count, 1); + + memset(ri->base_virt, 0, ri->npages << PAGE_SHIFT); + + return ri; } static void kvm_release_linear(struct kvmppc_linear_info *ri) { - if (atomic_dec_and_test(&ri->use_count)) { - spin_lock(&linear_lock); - list_add_tail(&ri->list, &free_linears); - spin_unlock(&linear_lock); + unsigned long index, n; + if (atomic_dec_and_test(&ri->use_count)) { + index = ((char *)ri->base_virt - linear_mem) / KVM_CHUNK_SIZE; + n = ri->npages / (KVM_CHUNK_SIZE >> PAGE_SHIFT); + if (index < linear_bitmap_len && + index + n < linear_bitmap_len) { + spin_lock(&linear_lock); + bitmap_free(linear_bitmap, index, n); + spin_unlock(&linear_lock); + } else { + pr_err("KVM: oops, corrupted linear_info %p\n", ri); + } + kfree(ri); } } @@ -211,23 +437,51 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri) */ void __init kvm_linear_init(void) { - /* HPT */ - kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT); + unsigned long total, nchunks, align; + struct page *pg; + unsigned long j, npages; - /* RMA */ - /* Only do this on PPC970 in HV mode */ - if (!cpu_has_feature(CPU_FTR_HVMODE) || - !cpu_has_feature(CPU_FTR_ARCH_201)) + /* only do this if HV KVM is possible on this cpu */ + if (!cpu_has_feature(CPU_FTR_HVMODE)) return; - if (!kvm_rma_size || !kvm_rma_count) - return; + /* HPT */ + total = kvm_hpt_count << kvm_hpt_order; + + /* On PPC970 in HV mode, allow space for RMAs */ + if (cpu_has_feature(CPU_FTR_ARCH_201)) { + if (lpcr_rmls(kvm_rma_size) < 0) { + pr_err("RMA size of 0x%lx not supported, using 0x%x\n", + kvm_rma_size, DEFAULT_RMA_SIZE); + kvm_rma_size = DEFAULT_RMA_SIZE; + } + total += kvm_rma_count * kvm_rma_size; + } - /* Check that the requested size is one supported in hardware */ - if (lpcr_rmls(kvm_rma_size) < 0) { - pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size); + if (!total) return; - } - kvm_linear_init_one(kvm_rma_size, kvm_rma_count, KVM_LINEAR_RMA); + /* round up to multiple of amount represented by a bitmap word (16MB) */ + total = (total + KVM_CHUNK_SIZE * BITS_PER_LONG - 1) & + ~(KVM_CHUNK_SIZE * BITS_PER_LONG - 1); + + /* allocate bitmap of in-use chunks */ + nchunks = total / KVM_CHUNK_SIZE; + linear_bitmap = alloc_bootmem(nchunks / BITS_PER_BYTE); + memset(linear_bitmap, 0, nchunks / BITS_PER_BYTE); + linear_bitmap_len = nchunks; + + /* ask for maximum useful alignment, i.e. max power of 2 <= total */ + align = 1ul << __ilog2(total); + + linear_mem = alloc_bootmem_align(total, align); + pr_info("Allocated KVM memory at %p (%ld MB)\n", linear_mem, + total >> 20); + + npages = total >> PAGE_SHIFT; + pg = virt_to_page(linear_mem); + for (j = 0; j < npages; ++j) { + atomic_inc(&pg->_count); + ++pg; + } } -- 1.7.10.rc3.219.g53414 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html