Re: [PATCH net-next v4 1/3] connector/cn_proc: Add hash table for threads

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/16, Anjali Kulkarni wrote:
> Add a new type PROC_CN_MCAST_NOTIFY to proc connector API, which allows a
> thread to notify the kernel that is going to exit with a non-zero exit
> code and specify the exit code in it. When thread exits in the kernel,
> it will send this exit code as a proc filter notification to any
> listening process.
> Exiting thread can call this either when it wants to call pthread_exit()
> with non-zero value or from signal handler.
> 
> Add a new file cn_hash.c which implements a hash table storing the exit
> codes of abnormally exiting threads, received by the system call above.
> The key used for the hash table is the pid of the thread, so when the
> thread actually exits, we lookup it's pid in the hash table and retrieve
> the exit code sent by user. If the exit code in struct task is 0, we
> then replace it with the user supplied non-zero exit code.
> 
> cn_hash.c implements the hash table add, delete, lookup operations.
> mutex_lock() and mutex_unlock() operations are used to safeguard the
> integrity of the hash table while adding or deleting elements.
> connector.c has the API calls, called from cn_proc.c, as well as calls
> to allocate, initialize and free the hash table.
> 
> Add a new flag in PF_* flags of task_struct - EXIT_NOTIFY. This flag is
> set when user sends the exit code via PROC_CN_MCAST_NOTIFY. While
> exiting, this flag is checked and the hash table add or delete calls
> are only made if this flag is set.
> 
> A refcount field hrefcnt is added in struct cn_hash_dev, to keep track
> of number of threads which have added an entry in hash table. Before
> freeing the struct cn_hash_dev, this value must be 0.
> This refcnt check is added in case CONFIG_CONNECTOR is compiled as a
> module. In that case, when unloading the module, we need to make sure
> no hash entries are still present in the hdev table.
> 
> Copy the task's name (task->comm) into the exit event notification.
> This will allow applications to filter on the name further using
> userspace filtering like ebpf.
> 
> Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx>
> ---
>  drivers/connector/Makefile    |   2 +-
>  drivers/connector/cn_hash.c   | 181 ++++++++++++++++++++++++++++++++++
>  drivers/connector/cn_proc.c   |  62 +++++++++++-
>  drivers/connector/connector.c |  63 +++++++++++-
>  include/linux/connector.h     |  31 ++++++
>  include/linux/sched.h         |   2 +-
>  include/uapi/linux/cn_proc.h  |   5 +-
>  7 files changed, 338 insertions(+), 8 deletions(-)
>  create mode 100644 drivers/connector/cn_hash.c
> 
> diff --git a/drivers/connector/Makefile b/drivers/connector/Makefile
> index 1bf67d3df97d..cb1dcdf067ad 100644
> --- a/drivers/connector/Makefile
> +++ b/drivers/connector/Makefile
> @@ -2,4 +2,4 @@
>  obj-$(CONFIG_CONNECTOR)		+= cn.o
>  obj-$(CONFIG_PROC_EVENTS)	+= cn_proc.o
>  
> -cn-y				+= cn_queue.o connector.o
> +cn-y				+= cn_hash.o cn_queue.o connector.o
> diff --git a/drivers/connector/cn_hash.c b/drivers/connector/cn_hash.c
> new file mode 100644
> index 000000000000..a079e9bcea6d
> --- /dev/null
> +++ b/drivers/connector/cn_hash.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Author: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx>
> + *
> + * Copyright (c) 2024 Oracle and/or its affiliates.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/connector.h>
> +#include <linux/mutex.h>
> +#include <linux/pid_namespace.h>
> +
> +#include <linux/cn_proc.h>
> +
> +struct cn_hash_dev *cn_hash_alloc_dev(const char *name)
> +{
> +	struct cn_hash_dev *hdev;
> +
> +	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
> +	if (!hdev)
> +		return NULL;
> +
> +	snprintf(hdev->name, sizeof(hdev->name), "%s", name);
> +	atomic_set(&hdev->hrefcnt, 0);
> +	mutex_init(&hdev->uexit_hash_lock);
> +	hash_init(hdev->uexit_pid_htable);
> +	return hdev;
> +}
> +
> +void cn_hash_free_dev(struct cn_hash_dev *hdev)
> +{
> +	struct uexit_pid_hnode *hnode;
> +	struct hlist_node *tmp;
> +	int bucket;
> +
> +	pr_debug("%s: Freeing entire hdev %p\n", __func__, hdev);
> +
> +	mutex_lock(&hdev->uexit_hash_lock);
> +	hash_for_each_safe(hdev->uexit_pid_htable, bucket, tmp,
> +			hnode, uexit_pid_hlist) {
> +		hash_del(&hnode->uexit_pid_hlist);
> +		pr_debug("%s: Freeing node for pid %d\n",
> +				__func__, hnode->pid);
> +		kfree(hnode);
> +	}
> +
> +	mutex_unlock(&hdev->uexit_hash_lock);
> +	mutex_destroy(&hdev->uexit_hash_lock);
> +
> +	/*
> +	 * This refcnt check is added in case CONFIG_CONNECTOR is
> +	 * compiled with =m as a module. In that case, when unloading
> +	 * the module, we need to make sure no hash entries are still
> +	 * present in the hdev table.
> +	 */
> +	while (atomic_read(&hdev->hrefcnt)) {
> +		pr_info("Waiting for %s to become free: refcnt=%d\n",
> +				hdev->name, atomic_read(&hdev->hrefcnt));
> +		msleep(1000);
> +	}
> +
> +	kfree(hdev);
> +	hdev = NULL;
> +}
> +
> +static struct uexit_pid_hnode *cn_hash_alloc_elem(__u32 uexit_code, pid_t pid)
> +{
> +	struct uexit_pid_hnode *elem;
> +
> +	elem = kzalloc(sizeof(*elem), GFP_KERNEL);
> +	if (!elem)
> +		return NULL;
> +
> +	INIT_HLIST_NODE(&elem->uexit_pid_hlist);
> +	elem->uexit_code = uexit_code;
> +	elem->pid = pid;
> +	return elem;
> +}
> +
> +static inline void cn_hash_free_elem(struct uexit_pid_hnode *elem)
> +{
> +	kfree(elem);
> +}
> +
> +int cn_hash_add_elem(struct cn_hash_dev *hdev, __u32 uexit_code, pid_t pid)
> +{
> +	struct uexit_pid_hnode *elem, *hnode;
> +
> +	elem = cn_hash_alloc_elem(uexit_code, pid);
> +	if (!elem) {
> +		pr_err("%s: cn_hash_alloc_elem() returned NULL pid %d\n",
> +				__func__, pid);
> +		return -ENOMEM;
> +	}
> +
> +	mutex_lock(&hdev->uexit_hash_lock);
> +	/*
> +	 * Check if an entry for the same pid already exists
> +	 */
> +	hash_for_each_possible(hdev->uexit_pid_htable,
> +				hnode, uexit_pid_hlist, pid) {
> +		if (hnode->pid == pid) {
> +			mutex_unlock(&hdev->uexit_hash_lock);
> +			cn_hash_free_elem(elem);
> +			pr_debug("%s: pid %d already exists in hash table\n",
> +				__func__, pid);
> +			return -EEXIST;
> +		}
> +	}
> +
> +	hash_add(hdev->uexit_pid_htable, &elem->uexit_pid_hlist, pid);
> +	mutex_unlock(&hdev->uexit_hash_lock);
> +
> +	atomic_inc(&hdev->hrefcnt);
> +
> +	pr_debug("%s: After hash_add of pid %d elem %p hrefcnt %d\n",
> +			__func__, pid, elem, atomic_read(&hdev->hrefcnt));
> +	return 0;
> +}
> +
> +int cn_hash_del_get_exval(struct cn_hash_dev *hdev, pid_t pid)
> +{
> +	struct uexit_pid_hnode *hnode;
> +	struct hlist_node *tmp;
> +	int excde;
> +
> +	mutex_lock(&hdev->uexit_hash_lock);
> +	hash_for_each_possible_safe(hdev->uexit_pid_htable,
> +				hnode, tmp, uexit_pid_hlist, pid) {
> +		if (hnode->pid == pid) {
> +			excde = hnode->uexit_code;
> +			hash_del(&hnode->uexit_pid_hlist);
> +			mutex_unlock(&hdev->uexit_hash_lock);
> +			kfree(hnode);
> +			atomic_dec(&hdev->hrefcnt);
> +			pr_debug("%s: After hash_del of pid %d, found exit code %u hrefcnt %d\n",
> +					__func__, pid, excde,
> +					atomic_read(&hdev->hrefcnt));
> +			return excde;
> +		}
> +	}
> +
> +	mutex_unlock(&hdev->uexit_hash_lock);
> +	pr_err("%s: pid %d not found in hash table\n",
> +			__func__, pid);
> +	return -EINVAL;
> +}
> +
> +int cn_hash_get_exval(struct cn_hash_dev *hdev, pid_t pid)
> +{
> +	struct uexit_pid_hnode *hnode;
> +	__u32 excde;
> +
> +	mutex_lock(&hdev->uexit_hash_lock);
> +	hash_for_each_possible(hdev->uexit_pid_htable,
> +				hnode, uexit_pid_hlist, pid) {
> +		if (hnode->pid == pid) {
> +			excde = hnode->uexit_code;
> +			mutex_unlock(&hdev->uexit_hash_lock);
> +			pr_debug("%s: Found exit code %u for pid %d\n",
> +					__func__, excde, pid);
> +			return excde;
> +		}
> +	}
> +
> +	mutex_unlock(&hdev->uexit_hash_lock);
> +	pr_debug("%s: pid %d not found in hash table\n",
> +			__func__, pid);
> +	return -EINVAL;
> +}
> +
> +bool cn_hash_table_empty(struct cn_hash_dev *hdev)
> +{
> +	bool is_empty;
> +
> +	is_empty = hash_empty(hdev->uexit_pid_htable);
> +	pr_debug("Hash table is %s\n", (is_empty ? "empty" : "not empty"));
> +
> +	return is_empty;
> +}
> diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
> index 44b19e696176..0632a70a89a0 100644
> --- a/drivers/connector/cn_proc.c
> +++ b/drivers/connector/cn_proc.c
> @@ -69,6 +69,8 @@ static int cn_filter(struct sock *dsk, struct sk_buff *skb, void *data)
>  	if ((__u32)val == PROC_EVENT_ALL)
>  		return 0;
>  
> +	pr_debug("%s: val %lx, what %x\n", __func__, val, what);
> +
>  	/*
>  	 * Drop packet if we have to report only non-zero exit status
>  	 * (PROC_EVENT_NONZERO_EXIT) and exit status is 0
> @@ -326,9 +328,15 @@ void proc_exit_connector(struct task_struct *task)
>  	struct proc_event *ev;
>  	struct task_struct *parent;
>  	__u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
> +	int uexit_code;
>  
> -	if (atomic_read(&proc_event_num_listeners) < 1)
> +	if (atomic_read(&proc_event_num_listeners) < 1) {
> +		if (likely(!(task->flags & PF_EXIT_NOTIFY)))
> +			return;
> +
> +		cn_del_get_exval(task->pid);
>  		return;
> +	}
>  
>  	msg = buffer_to_cn_msg(buffer);
>  	ev = (struct proc_event *)msg->data;
> @@ -337,7 +345,26 @@ void proc_exit_connector(struct task_struct *task)
>  	ev->what = PROC_EVENT_EXIT;
>  	ev->event_data.exit.process_pid = task->pid;
>  	ev->event_data.exit.process_tgid = task->tgid;
> -	ev->event_data.exit.exit_code = task->exit_code;
> +	if (unlikely(task->flags & PF_EXIT_NOTIFY)) {
> +		task->flags &= ~PF_EXIT_NOTIFY;
> +
> +		uexit_code = cn_del_get_exval(task->pid);
> +		if (uexit_code <= 0) {
> +			pr_debug("%s: err %d returning task's exit code %u\n",
> +					uexit_code, __func__,
> +					task->exit_code);

(I might have commented on the older revision, but same issue here)

In file included from ./include/linux/kernel.h:31,
                 from drivers/connector/cn_proc.c:11:
drivers/connector/cn_proc.c: In function ‘proc_exit_connector’:
drivers/connector/cn_proc.c:353:34: error: format ‘%s’ expects argument of type ‘char *’, but argument 3 has type ‘int’ [-Werror=format=]
  353 |                         pr_debug("%s: err %d returning task's exit code %u\n",
      |                                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

---
pw-bot: cr




[Index of Archives]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Share Photos]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Device Mapper]

  Powered by Linux