On 2023/07/28 0:10, Sebastian Andrzej Siewior wrote: > On 2023-06-28 21:14:16 [+0900], Tetsuo Handa wrote: >>> Anyway, please do not do this change only because of printk(). >>> IMHO, the current ordering is more logical and the printk() problem >>> should be solved another way. >> >> Then, since [PATCH 1/2] cannot be applied, [PATCH 2/2] is automatically >> rejected. > > My understanding is that this patch gets applied and your objection will > be noted. My preference is that zonelist_update_seq is not checked by !__GFP_DIRECT_RECLAIM allocations, which is a low-hanging fruit towards GFP_LOCKLESS mentioned at https://lkml.kernel.org/r/ZG3+l4qcCWTPtSMD@xxxxxxxxxxxxxx and https://lkml.kernel.org/r/ZJWWpGZMJIADQvRS@xxxxxxxxxxxxxx . Maybe we can defer checking zonelist_update_seq till retry check like below, for this is really an infrequent event. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7d3460c7a480..2f7b82af2590 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3642,22 +3642,27 @@ EXPORT_SYMBOL_GPL(fs_reclaim_release); * retries the allocation if zonelist changes. Writer side is protected by the * embedded spin_lock. */ -static DEFINE_SEQLOCK(zonelist_update_seq); +static unsigned int zonelist_update_seq; static unsigned int zonelist_iter_begin(void) { if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)) - return read_seqbegin(&zonelist_update_seq); + return data_race(READ_ONCE(zonelist_update_seq)); return 0; } -static unsigned int check_retry_zonelist(unsigned int seq) +static unsigned int check_retry_zonelist(gfp_t gfp, unsigned int seq) { - if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)) - return read_seqretry(&zonelist_update_seq, seq); + if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE) && (gfp & __GFP_DIRECT_RECLAIM)) { + unsigned int seq2; + + smp_rmb(); + seq2 = data_race(READ_ONCE(zonelist_update_seq)); + return unlikely(seq != seq2 || (seq2 & 1)); + } - return seq; + return 0; } /* Perform direct synchronous page reclaim */ @@ -4146,7 +4151,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * a unnecessary OOM kill. */ if (check_retry_cpuset(cpuset_mems_cookie, ac) || - check_retry_zonelist(zonelist_iter_cookie)) + check_retry_zonelist(gfp_mask, zonelist_iter_cookie)) goto restart; /* Reclaim has failed us, start killing things */ @@ -4172,7 +4177,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * a unnecessary OOM kill. */ if (check_retry_cpuset(cpuset_mems_cookie, ac) || - check_retry_zonelist(zonelist_iter_cookie)) + check_retry_zonelist(gfp_mask, zonelist_iter_cookie)) goto restart; /* @@ -5136,22 +5141,12 @@ static void __build_all_zonelists(void *data) int nid; int __maybe_unused cpu; pg_data_t *self = data; + static DEFINE_SPINLOCK(lock); unsigned long flags; - /* - * Explicitly disable this CPU's interrupts before taking seqlock - * to prevent any IRQ handler from calling into the page allocator - * (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock. - */ - local_irq_save(flags); - /* - * Explicitly disable this CPU's synchronous printk() before taking - * seqlock to prevent any printk() from trying to hold port->lock, for - * tty_insert_flip_string_and_push_buffer() on other CPU might be - * calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held. - */ - printk_deferred_enter(); - write_seqlock(&zonelist_update_seq); + spin_lock_irqsave(&lock, flags); + data_race(zonelist_update_seq++); + smp_wmb(); #ifdef CONFIG_NUMA memset(node_load, 0, sizeof(node_load)); @@ -5188,9 +5183,9 @@ static void __build_all_zonelists(void *data) #endif } - write_sequnlock(&zonelist_update_seq); - printk_deferred_exit(); - local_irq_restore(flags); + smp_wmb(); + data_race(zonelist_update_seq++); + spin_unlock_irqrestore(&lock, flags); } static noinline void __init