On Wed, Jun 22, 2022 at 3:16 AM Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> wrote: > > +static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, > > + size_t len, loff_t *pos) > > +{ > > + void *buf; > > + char *cur, *next; > > + unsigned int flags; > > + struct blk_plug plug; > > + int err = -EINVAL; > > + struct scan_control sc = { > > + .may_writepage = true, > > + .may_unmap = true, > > + .may_swap = true, > > + .reclaim_idx = MAX_NR_ZONES - 1, > > + .gfp_mask = GFP_KERNEL, > > + }; > > + > > + buf = kvmalloc(len + 1, GFP_KERNEL); > > + if (!buf) > > + return -ENOMEM; > > + > > + if (copy_from_user(buf, src, len)) { > > + kvfree(buf); > > + return -EFAULT; > > + } > > + > > + if (!set_mm_walk(NULL)) { > > The current->reclaim_state will be dereferenced in set_mm_walk(), so > calling set_mm_walk() before set_task_reclaim_state(current, > &sc.reclaim_state) will cause panic: > > [ 1861.154916] BUG: kernel NULL pointer dereference, address: > 0000000000000008 Thanks. Apparently I shot myself in the foot by one of the nits between v11 and v12. > > + kvfree(buf); > > + return -ENOMEM; > > + } > > + > > + set_task_reclaim_state(current, &sc.reclaim_state); > > + flags = memalloc_noreclaim_save(); > > + blk_start_plug(&plug); > > + > > + next = buf; > > + next[len] = '\0'; > > + > > + while ((cur = strsep(&next, ",;\n"))) { > > + int n; > > + int end; > > + char cmd; > > + unsigned int memcg_id; > > + unsigned int nid; > > + unsigned long seq; > > + unsigned int swappiness = -1; > > + unsigned long opt = -1; > > + > > + cur = skip_spaces(cur); > > + if (!*cur) > > + continue; > > + > > + n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid, > > + &seq, &end, &swappiness, &end, &opt, &end); > > + if (n < 4 || cur[end]) { > > + err = -EINVAL; > > + break; > > + } > > + > > + err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt); > > + if (err) > > + break; > > + } > > + > > + blk_finish_plug(&plug); > > + memalloc_noreclaim_restore(flags); > > + set_task_reclaim_state(current, NULL); > > + > > + clear_mm_walk(); > > Ditto, we can't call clear_mm_walk() after > set_task_reclaim_state(current, NULL). > > Maybe it can be modified as follows: > > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 2422edc786eb..552e6ae5243e 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -5569,12 +5569,12 @@ static ssize_t lru_gen_seq_write(struct file > *file, const char __user *src, > return -EFAULT; > } > > + set_task_reclaim_state(current, &sc.reclaim_state); > if (!set_mm_walk(NULL)) { > kvfree(buf); > return -ENOMEM; > } > > - set_task_reclaim_state(current, &sc.reclaim_state); We need a `goto` because otherwise we leave a dangling `current->reclaim_state`. (I swear I had one.)