Re: + pid-optional-first-fit-pid-allocation.patch added to mm-nonmm-unstable branch

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Feb 21, 2025 at 04:21:56PM -0800, Andrew Morton wrote:
> 
> The patch titled
>      Subject: pid: Optional first-fit pid allocation
> has been added to the -mm mm-nonmm-unstable branch.  Its filename is
>      pid-optional-first-fit-pid-allocation.patch
> 
> This patch will shortly appear at
>      https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/pid-optional-first-fit-pid-allocation.patch
> 
> This patch will later appear in the mm-nonmm-unstable branch at
>     git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
> 
> Before you just go and hit "reply", please:
>    a) Consider who else should be cc'ed
>    b) Prefer to cc a suitable mailing list as well
>    c) Ideally: find the original patch on the mailing list and do a
>       reply-to-all to that, adding suitable additional cc's
> 
> *** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
> 
> The -mm tree is included into linux-next via the mm-everything
> branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
> and is updated there every 2-3 working days
> 
> ------------------------------------------------------
> From: =?UTF-8?q?Michal=20Koutn=C3=BD?= <mkoutny@xxxxxxxx>
> Subject: pid: Optional first-fit pid allocation
> Date: Fri, 21 Feb 2025 18:02:49 +0100
> 
> Noone would need to use this allocation strategy (it's slower, pid
> numbers collide sooner). Its primary purpose are pid namespaces in
> conjunction with pids.max cgroup limit which keeps (virtual) pid numbers
> below the given limit. This is for 32-bit userspace programs that may
> not work well with pid numbers above 65536.
> 
> Link: https://lore.kernel.org/r/20241122132459.135120-1-aleksandr.mikhalitsyn@xxxxxxxxxxxxx/
> Link: https://lkml.kernel.org/r/20250221170249.890014-3-mkoutny@xxxxxxxx
> Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx>
> Cc: Alexander Mikhalitsyn <alexander@xxxxxxxxxxxxx>
> Cc: Christian Brauner <brauner@xxxxxxxxxx>
> Cc: Eric Biederman <ebiederm@xxxxxxxxxxxx>
> Cc: Jonathan Corbet <corbet@xxxxxxx>
> Cc: Kees Cook <kees@xxxxxxxxxx>
> Cc: Oleg Nesterov <oleg@xxxxxxxxxx>
> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> ---

Hm, I've never received this patch yet it reverts something and then
substitutes another approach and has no Acks? Don't get me wrong I'm not
objecting to it per se but I did not have any chance to actually look at
it.

> 
>  Documentation/admin-guide/sysctl/kernel.rst |    2 +
>  include/linux/pid_namespace.h               |    3 +
>  kernel/pid.c                                |   12 ++++++-
>  kernel/pid_namespace.c                      |   28 +++++++++++++-----
>  4 files changed, 36 insertions(+), 9 deletions(-)
> 
> --- a/Documentation/admin-guide/sysctl/kernel.rst~pid-optional-first-fit-pid-allocation
> +++ a/Documentation/admin-guide/sysctl/kernel.rst
> @@ -1043,6 +1043,8 @@ The last pid allocated in the current (t
>  lives in) pid namespace. When selecting a pid for a next task on fork
>  kernel tries to allocate a number starting from this one.
>  
> +When set to -1, first-fit pid numbering is used instead of the next-fit.
> +
>  
>  powersave-nap (PPC only)
>  ========================
> --- a/include/linux/pid_namespace.h~pid-optional-first-fit-pid-allocation
> +++ a/include/linux/pid_namespace.h
> @@ -41,6 +41,9 @@ struct pid_namespace {
>  #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
>  	int memfd_noexec_scope;
>  #endif
> +#ifdef CONFIG_IA32_EMULATION
> +	bool pid_noncyclic;
> +#endif
>  } __randomize_layout;
>  
>  extern struct pid_namespace init_pid_ns;
> --- a/kernel/pid.c~pid-optional-first-fit-pid-allocation
> +++ a/kernel/pid.c
> @@ -191,6 +191,10 @@ struct pid *alloc_pid(struct pid_namespa
>  
>  	for (i = ns->level; i >= 0; i--) {
>  		int tid = 0;
> +		bool pid_noncyclic = 0;
> +#ifdef CONFIG_IA32_EMULATION
> +		pid_noncyclic = READ_ONCE(tmp->pid_noncyclic);
> +#endif
>  
>  		if (set_tid_size) {
>  			tid = set_tid[ns->level - i];
> @@ -235,8 +239,12 @@ struct pid *alloc_pid(struct pid_namespa
>  			 * Store a null pointer so find_pid_ns does not find
>  			 * a partially initialized PID (see below).
>  			 */
> -			nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
> -					      pid_max, GFP_ATOMIC);
> +			if (likely(!pid_noncyclic))
> +				nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
> +						      pid_max, GFP_ATOMIC);
> +			else
> +				nr = idr_alloc(&tmp->idr, NULL, pid_min,
> +						      pid_max, GFP_ATOMIC);
>  		}
>  		spin_unlock_irq(&pidmap_lock);
>  		idr_preload_end();
> --- a/kernel/pid_namespace.c~pid-optional-first-fit-pid-allocation
> +++ a/kernel/pid_namespace.c
> @@ -114,6 +114,9 @@ static struct pid_namespace *create_pid_
>  #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
>  	ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns);
>  #endif
> +#ifdef CONFIG_IA32_EMULATION
> +	ns->pid_noncyclic = READ_ONCE(parent_pid_ns->pid_noncyclic);
> +#endif
>  	return ns;
>  
>  out_free_idr:
> @@ -260,7 +263,7 @@ void zap_pid_ns_processes(struct pid_nam
>  	return;
>  }
>  
> -#ifdef CONFIG_CHECKPOINT_RESTORE
> +#if defined(CONFIG_CHECKPOINT_RESTORE) || defined(CONFIG_IA32_EMULATION)
>  static int pid_ns_ctl_handler(const struct ctl_table *table, int write,
>  		void *buffer, size_t *lenp, loff_t *ppos)
>  {
> @@ -271,12 +274,23 @@ static int pid_ns_ctl_handler(const stru
>  	if (write && !checkpoint_restore_ns_capable(pid_ns->user_ns))
>  		return -EPERM;
>  
> -	next = idr_get_cursor(&pid_ns->idr) - 1;
> +	next = -1;
> +#ifdef CONFIG_IA32_EMULATION
> +	if (!pid_ns->pid_noncyclic)
> +#endif
> +		next += idr_get_cursor(&pid_ns->idr);
>  
>  	tmp.data = &next;
>  	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
> -	if (!ret && write)
> -		idr_set_cursor(&pid_ns->idr, next + 1);
> +	if (!ret && write) {
> +		if (next > -1)
> +			idr_set_cursor(&pid_ns->idr, next + 1);
> +		else if (!IS_ENABLED(CONFIG_IA32_EMULATION))
> +			ret = -EINVAL;
> +#ifdef CONFIG_IA32_EMULATION
> +		WRITE_ONCE(pid_ns->pid_noncyclic, next == -1);
> +#endif
> +	}
>  
>  	return ret;
>  }
> @@ -288,11 +302,11 @@ static const struct ctl_table pid_ns_ctl
>  		.maxlen = sizeof(int),
>  		.mode = 0666, /* permissions are checked in the handler */
>  		.proc_handler = pid_ns_ctl_handler,
> -		.extra1 = SYSCTL_ZERO,
> +		.extra1 = SYSCTL_NEG_ONE,
>  		.extra2 = &pid_max,
>  	},
>  };
> -#endif	/* CONFIG_CHECKPOINT_RESTORE */
> +#endif	/* CONFIG_CHECKPOINT_RESTORE || CONFIG_IA32_EMULATION */
>  
>  int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
>  {
> @@ -449,7 +463,7 @@ static __init int pid_namespaces_init(vo
>  {
>  	pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC | SLAB_ACCOUNT);
>  
> -#ifdef CONFIG_CHECKPOINT_RESTORE
> +#if defined(CONFIG_CHECKPOINT_RESTORE) || defined(CONFIG_IA32_EMULATION)
>  	register_sysctl_init("kernel", pid_ns_ctl_table);
>  #endif
>  
> _
> 
> Patches currently in -mm which might be from mkoutny@xxxxxxxx are
> 
> revert-pid-allow-pid_max-to-be-set-per-pid-namespace.patch
> pid-optional-first-fit-pid-allocation.patch
> 




[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux