On Wed, Feb 25, 2009 at 01:52:35PM +0900, Tejun Heo wrote: > Commit c132937556f56ee4b831ef4b23f1846e05fde102 tried to clean up > bootmem arch wrapper but it wasn't quite correct. Before the commit, > the followings were broken. > > * Low level interface functions prefixed with __ ignored arch > preference. > > * reserve_bootmem(...) can't be mapped into > reserve_bootmem_node(NODE_DATA(0)->bdata, ...) because the node is > not preference here. The region specified MUST fall into the > specified region; otherwise, it will panic. > > After the commit, > > * If allocation fails for the arch preferred node, it should fallback > to whatever is available. Instead, it simply failed allocation. > > There are too many internal details to allow generic wrapping and > still keep things simple for archs. Plus, all that arch wants is a > way to prefer certain node over another. > > This patch drops the generic wrapping around alloc_bootmem_core() and > add alloc_bootmem_core() instead. If necessary, arch can define > bootmem_arch_referred_node() macro or function which takes all > allocation information and returns the preferred node. bootmem > generic code will always try the preferred node first and then > fallback to other nodes as usual. > > Signed-off-by: Tejun Heo <tj@xxxxxxxxxx> > Cc: Johannes Weiner <hannes@xxxxxxxxxxx> > --- > It turns out reserve_bootmem() shouldn't be wrapped in the first > place. I hope I got it right this time. Can you please review this > one? And I didn't think your comment was rude at all, no worries. You are right! Sorry, I missed that. Yes, reserve_bootmem() operates on ranges disregarding nodes. > arch/x86/include/asm/mmzone_32.h | 8 +----- > mm/bootmem.c | 45 ++++++++++++++++++++++++++------------- > 2 files changed, 32 insertions(+), 21 deletions(-) > > diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h > index eeacf67..ede6998 100644 > --- a/arch/x86/include/asm/mmzone_32.h > +++ b/arch/x86/include/asm/mmzone_32.h > @@ -92,12 +92,8 @@ static inline int pfn_valid(int pfn) > > #ifdef CONFIG_NEED_MULTIPLE_NODES > /* always use node 0 for bootmem on this numa platform */ > -#define alloc_bootmem_core(__bdata, size, align, goal, limit) \ > -({ \ > - bootmem_data_t __maybe_unused * __abm_bdata_dummy = (__bdata); \ > - __alloc_bootmem_core(NODE_DATA(0)->bdata, \ > - (size), (align), (goal), (limit)); \ > -}) > +#define bootmem_arch_preferred_node(__bdata, size, align, goal, limit) \ > + (NODE_DATA(0)->bdata) > #endif /* CONFIG_NEED_MULTIPLE_NODES */ > > #endif /* _ASM_X86_MMZONE_32_H */ > diff --git a/mm/bootmem.c b/mm/bootmem.c > index d7140c0..daf9271 100644 > --- a/mm/bootmem.c > +++ b/mm/bootmem.c > @@ -37,16 +37,6 @@ static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); > > static int bootmem_debug; > > -/* > - * If an arch needs to apply workarounds to bootmem allocation, it can > - * set CONFIG_HAVE_ARCH_BOOTMEM and define a wrapper around > - * __alloc_bootmem_core(). > - */ > -#ifndef CONFIG_HAVE_ARCH_BOOTMEM > -#define alloc_bootmem_core(bdata, size, align, goal, limit) \ > - __alloc_bootmem_core((bdata), (size), (align), (goal), (limit)) > -#endif > - > static int __init bootmem_debug_setup(char *buf) > { > bootmem_debug = 1; > @@ -436,9 +426,9 @@ static unsigned long align_off(struct bootmem_data *bdata, unsigned long off, > return ALIGN(base + off, align) - base; > } > > -static void * __init __alloc_bootmem_core(struct bootmem_data *bdata, > - unsigned long size, unsigned long align, > - unsigned long goal, unsigned long limit) > +static void * __init alloc_bootmem_core(struct bootmem_data *bdata, > + unsigned long size, unsigned long align, > + unsigned long goal, unsigned long limit) > { > unsigned long fallback = 0; > unsigned long min, max, start, sidx, midx, step; > @@ -538,17 +528,34 @@ find_block: > return NULL; > } > > +static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, > + unsigned long size, unsigned long align, > + unsigned long goal, unsigned long limit) > +{ > +#ifdef CONFIG_HAVE_ARCH_BOOTMEM > + bootmem_data_t *p_bdata; > + > + p_bdata = bootmem_arch_preferred_node(bdata, size, align, goal, limit); > + if (p_bdata) > + return alloc_bootmem_core(p_bdata, size, align, goal, limit); > +#endif > + return NULL; > +} > + > static void * __init ___alloc_bootmem_nopanic(unsigned long size, > unsigned long align, > unsigned long goal, > unsigned long limit) > { > bootmem_data_t *bdata; > + void *region; > > restart: > - list_for_each_entry(bdata, &bdata_list, list) { > - void *region; > + region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit); > + if (region) > + return region; > > + list_for_each_entry(bdata, &bdata_list, list) { > if (goal && bdata->node_low_pfn <= PFN_DOWN(goal)) > continue; > if (limit && bdata->node_min_pfn >= PFN_DOWN(limit)) > @@ -626,6 +633,10 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, > { > void *ptr; > > + ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit); > + if (ptr) > + return ptr; > + > ptr = alloc_bootmem_core(bdata, size, align, goal, limit); > if (ptr) > return ptr; > @@ -682,6 +693,10 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, > { > void *ptr; > > + ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); > + if (ptr) > + return ptr; > + > ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); > if (ptr) > return ptr; Well, okay. It is a bugfix for preferring the arch node even with the __interface. But it brings ifdeffery/special casing to bootmem that was before in a header file and is only for a very rare hardware setup, so I don't think it is a cleanup, rather a necessity. For now, let's just go with it. Acked-by: Johannes Weiner <hannes@xxxxxxxxxxx> -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html