* Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx> [241015 13:30]: > Add a new type PROC_CN_MCAST_NOTIFY to proc connector API, which allows a > thread to notify the kernel that is going to exit with a non-zero exit > code and specify the exit code in it. When thread exits in the kernel, > it will send this exit code as a proc filter notification to any > listening process. > Exiting thread can call this either when it wants to call pthread_exit() > with non-zero value or from signal handler. > > Add a new file cn_hash.c which implements a hash table storing the exit > codes of abnormally exiting threads, received by the system call above. > The key used for the hash table is the pid of the thread, so when the > thread actually exits, we lookup it's pid in the hash table and retrieve > the exit code sent by user. If the exit code in struct task is 0, we > then replace it with the user supplied non-zero exit code. > > cn_hash.c implements the hash table add, delete, lookup operations. > mutex_lock() and mutex_unlock() operations are used to safeguard the > integrity of the hash table while adding or deleting elements. > connector.c has the API calls, called from cn_proc.c, as well as calls > to allocate, initialize and free the hash table. > > Add a new flag in PF_* flags of task_struct - EXIT_NOTIFY. This flag is > set when user sends the exit code via PROC_CN_MCAST_NOTIFY. While > exiting, this flag is checked and the hash table add or delete calls > are only made if this flag is set. > > A refcount field hrefcnt is added in struct cn_hash_dev, to keep track > of number of threads which have added an entry in hash table. Before > freeing the struct cn_hash_dev, this value must be 0. > > Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx> > --- > drivers/connector/Makefile | 2 +- > drivers/connector/cn_hash.c | 195 ++++++++++++++++++++++++++++++++++ > drivers/connector/cn_proc.c | 55 +++++++++- > drivers/connector/connector.c | 83 ++++++++++++++- > include/linux/connector.h | 43 ++++++++ > include/linux/sched.h | 2 +- > include/uapi/linux/cn_proc.h | 4 +- > 7 files changed, 375 insertions(+), 9 deletions(-) > create mode 100644 drivers/connector/cn_hash.c > > diff --git a/drivers/connector/Makefile b/drivers/connector/Makefile > index 1bf67d3df97d..cb1dcdf067ad 100644 > --- a/drivers/connector/Makefile > +++ b/drivers/connector/Makefile > @@ -2,4 +2,4 @@ > obj-$(CONFIG_CONNECTOR) += cn.o > obj-$(CONFIG_PROC_EVENTS) += cn_proc.o > > -cn-y += cn_queue.o connector.o > +cn-y += cn_hash.o cn_queue.o connector.o > diff --git a/drivers/connector/cn_hash.c b/drivers/connector/cn_hash.c > new file mode 100644 > index 000000000000..a0211cd99132 > --- /dev/null > +++ b/drivers/connector/cn_hash.c > @@ -0,0 +1,195 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Author: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx> > + * > + * Copyright (c) 2024 Oracle and/or its affiliates. > + */ > + > +#include <linux/kernel.h> > +#include <linux/init.h> > +#include <linux/connector.h> > +#include <linux/mutex.h> > +#include <linux/pid_namespace.h> > + > +#include <linux/cn_proc.h> > + > +struct cn_hash_dev *cn_hash_alloc_dev(const char *name) > +{ > + struct cn_hash_dev *hdev; > + > + hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); > + if (!hdev) > + return NULL; > + > + snprintf(hdev->name, sizeof(hdev->name), "%s", name); > + atomic_set(&hdev->hrefcnt, 0); > + mutex_init(&hdev->uexit_hash_lock); > + hash_init(hdev->uexit_pid_htable); > + return hdev; > +} > + > +void cn_hash_free_dev(struct cn_hash_dev *hdev) > +{ > + struct uexit_pid_hnode *hnode; > + struct hlist_node *tmp; > + int bucket; > + > + pr_debug("%s: Freeing entire hdev %p\n", __func__, hdev); > + > + mutex_lock(&hdev->uexit_hash_lock); > + hash_for_each_safe(hdev->uexit_pid_htable, bucket, tmp, > + hnode, uexit_pid_hlist) { > + hash_del(&hnode->uexit_pid_hlist); > + pr_debug("%s: Freeing node for pid %d\n", > + __func__, hnode->pid); > + kfree(hnode); > + } > + > + mutex_unlock(&hdev->uexit_hash_lock); > + mutex_destroy(&hdev->uexit_hash_lock); > + > + while (atomic_read(&hdev->hrefcnt)) { > + pr_info("Waiting for %s to become free: refcnt=%d\n", > + hdev->name, atomic_read(&hdev->hrefcnt)); > + msleep(1000); > + } > + > + kfree(hdev); It might be a good idea to set hdev = NULL here, like cn_queue_free_dev() does on free since hdev is passed into the function. Although, I cannot find a reason for this or a comment as to why it was done - so maybe it's not useful. > +} > + > +static struct uexit_pid_hnode *cn_hash_alloc_elem(__u32 uexit_code, pid_t pid) > +{ > + struct uexit_pid_hnode *elem; > + > + elem = kzalloc(sizeof(*elem), GFP_KERNEL); > + if (!elem) > + return NULL; > + > + INIT_HLIST_NODE(&elem->uexit_pid_hlist); > + elem->uexit_code = uexit_code; > + elem->pid = pid; > + return elem; > +} > + > +void cn_hash_free_elem(struct uexit_pid_hnode *elem) > +{ > + kfree(elem); > +} > + > +int cn_hash_add_elem(struct cn_hash_dev *hdev, __u32 uexit_code, pid_t pid) > +{ > + struct uexit_pid_hnode *elem, *hnode; > + > + elem = cn_hash_alloc_elem(uexit_code, pid); > + if (!elem) { > + pr_err("%s: cn_hash_alloc_elem() returned NULL pid %d\n", > + __func__, pid); > + return -ENOMEM; > + } > + > + mutex_lock(&hdev->uexit_hash_lock); > + /* > + * Check if an entry for the same pid already exists > + */ > + hash_for_each_possible(hdev->uexit_pid_htable, > + hnode, uexit_pid_hlist, pid) { > + if (hnode->pid == pid) { > + mutex_unlock(&hdev->uexit_hash_lock); > + cn_hash_free_elem(elem); > + pr_debug("%s: pid %d already exists in hash table\n", > + __func__, pid); > + return -EEXIST; > + } > + } > + > + hash_add(hdev->uexit_pid_htable, &elem->uexit_pid_hlist, pid); > + mutex_unlock(&hdev->uexit_hash_lock); > + > + atomic_inc(&hdev->hrefcnt); > + > + pr_debug("%s: After hash_add of pid %d elem %p hrefcnt %d\n", > + __func__, pid, elem, atomic_read(&hdev->hrefcnt)); > + return 0; > +} > + > +int cn_hash_del_elem(struct cn_hash_dev *hdev, pid_t pid) > +{ > + struct uexit_pid_hnode *hnode; > + struct hlist_node *tmp; > + > + mutex_lock(&hdev->uexit_hash_lock); > + hash_for_each_possible_safe(hdev->uexit_pid_htable, > + hnode, tmp, uexit_pid_hlist, pid) { > + if (hnode && hnode->pid == pid) { > + hash_del(&hnode->uexit_pid_hlist); > + mutex_unlock(&hdev->uexit_hash_lock); > + kfree(hnode); > + atomic_dec(&hdev->hrefcnt); > + pr_debug("%s: After hash_del of pid %d, hrefcnt %d\n", > + __func__, pid, > + atomic_read(&hdev->hrefcnt)); > + return 0; > + } > + } > + > + mutex_unlock(&hdev->uexit_hash_lock); > + pr_err("%s: pid %d not found in hash table\n", > + __func__, pid); > + return -EINVAL; > +} > + > +__u32 cn_hash_del_get_exval(struct cn_hash_dev *hdev, pid_t pid) > +{ > + struct uexit_pid_hnode *hnode; > + struct hlist_node *tmp; > + __u32 excde; > + > + mutex_lock(&hdev->uexit_hash_lock); > + hash_for_each_possible_safe(hdev->uexit_pid_htable, > + hnode, tmp, uexit_pid_hlist, pid) { > + if (hnode->pid == pid) { > + excde = hnode->uexit_code; > + hash_del(&hnode->uexit_pid_hlist); > + mutex_unlock(&hdev->uexit_hash_lock); > + kfree(hnode); > + atomic_dec(&hdev->hrefcnt); > + pr_debug("%s: After hash_del of pid %d, found exit code %u hrefcnt %d\n", > + __func__, pid, excde, > + atomic_read(&hdev->hrefcnt)); > + return excde; > + } > + } > + > + mutex_unlock(&hdev->uexit_hash_lock); > + pr_err("%s: pid %d not found in hash table\n", > + __func__, pid); > + return 0; > +} > + > +__u32 cn_hash_get_exval(struct cn_hash_dev *hdev, pid_t pid) > +{ > + struct uexit_pid_hnode *hnode; > + __u32 excde; > + > + mutex_lock(&hdev->uexit_hash_lock); > + hash_for_each_possible(hdev->uexit_pid_htable, > + hnode, uexit_pid_hlist, pid) { > + if (hnode->pid == pid) { > + excde = hnode->uexit_code; > + mutex_unlock(&hdev->uexit_hash_lock); > + pr_debug("%s: Found exit code %u for pid %d\n", > + __func__, excde, pid); > + return excde; > + } > + } > + > + mutex_unlock(&hdev->uexit_hash_lock); > + pr_debug("%s: pid %d not found in hash table\n", > + __func__, pid); > + return -EINVAL; > +} > + > +bool cn_hash_table_empty(struct cn_hash_dev *hdev) > +{ > + return hash_empty(hdev->uexit_pid_htable); > +} > diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c > index 44b19e696176..9205498fcf0f 100644 > --- a/drivers/connector/cn_proc.c > +++ b/drivers/connector/cn_proc.c > @@ -69,6 +69,8 @@ static int cn_filter(struct sock *dsk, struct sk_buff *skb, void *data) > if ((__u32)val == PROC_EVENT_ALL) > return 0; > > + pr_debug("%s: val %lx, what %x\n", __func__, val, what); > + > /* > * Drop packet if we have to report only non-zero exit status > * (PROC_EVENT_NONZERO_EXIT) and exit status is 0 > @@ -326,9 +328,15 @@ void proc_exit_connector(struct task_struct *task) > struct proc_event *ev; > struct task_struct *parent; > __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8); > + __u32 uexit_code; > > - if (atomic_read(&proc_event_num_listeners) < 1) > + if (atomic_read(&proc_event_num_listeners) < 1) { > + if (likely(!(task->flags & PF_EXIT_NOTIFY))) > + return; > + > + cn_del_elem(task->pid); > return; > + } > > msg = buffer_to_cn_msg(buffer); > ev = (struct proc_event *)msg->data; > @@ -337,7 +345,25 @@ void proc_exit_connector(struct task_struct *task) > ev->what = PROC_EVENT_EXIT; > ev->event_data.exit.process_pid = task->pid; > ev->event_data.exit.process_tgid = task->tgid; > - ev->event_data.exit.exit_code = task->exit_code; > + if (unlikely(task->flags & PF_EXIT_NOTIFY)) { > + task->flags &= ~PF_EXIT_NOTIFY; > + nit: extra blank line > + uexit_code = cn_del_get_exval(task->pid); > + if (uexit_code == 0) { > + pr_debug("%s: Returning with task's exit code %u\n", > + __func__, task->exit_code); > + ev->event_data.exit.exit_code = task->exit_code; > + } else { > + ev->event_data.exit.exit_code = uexit_code; > + pr_debug("%s: Reset PF_EXIT_NOTIFY & retrieved exit code %u from hash table, pid %d\n", > + __func__, > + ev->event_data.exit.exit_code, > + task->pid); > + } > + } else { > + ev->event_data.exit.exit_code = task->exit_code; > + } > + > ev->event_data.exit.exit_signal = task->exit_signal; > > rcu_read_lock(); > @@ -413,6 +439,13 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg, > if (msg->len == sizeof(*pinput)) { > pinput = (struct proc_input *)msg->data; > mc_op = pinput->mcast_op; > + if (mc_op == PROC_CN_MCAST_NOTIFY) { > + pr_debug("%s: Received PROC_CN_MCAST_NOTIFY, pid %d\n", > + __func__, current->pid); > + current->flags |= PF_EXIT_NOTIFY; > + err = cn_add_elem(pinput->uexit_code, current->pid); > + return; > + } > ev_type = pinput->event_type; > } else if (msg->len == sizeof(mc_op)) { > mc_op = *((enum proc_cn_mcast_op *)msg->data); > @@ -432,6 +465,8 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg, > sk->sk_user_data = kzalloc(sizeof(struct proc_input), > GFP_KERNEL); > if (sk->sk_user_data == NULL) { > + pr_err("%s: ENOMEM for sk_user_data, pid %d\n", > + __func__, current->pid); > err = ENOMEM; > goto out; > } > @@ -442,21 +477,33 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg, > } > ((struct proc_input *)(sk->sk_user_data))->event_type = > ev_type; > + pr_debug("%s: sk: %p pid: %d event_type: %x\n", > + __func__, sk, current->pid, ev_type); > ((struct proc_input *)(sk->sk_user_data))->mcast_op = mc_op; > } > > switch (mc_op) { > case PROC_CN_MCAST_LISTEN: > - if (initial || (prev_mc_op != PROC_CN_MCAST_LISTEN)) > + if (initial || (prev_mc_op != PROC_CN_MCAST_LISTEN)) { > atomic_inc(&proc_event_num_listeners); > + pr_debug("%s: PROC_CN_MCAST_LISTEN pid %d: Incremented listeners to %d\n", > + __func__, current->pid, > + atomic_read(&proc_event_num_listeners)); > + } > break; > case PROC_CN_MCAST_IGNORE: > - if (!initial && (prev_mc_op != PROC_CN_MCAST_IGNORE)) > + if (!initial && (prev_mc_op != PROC_CN_MCAST_IGNORE)) { > atomic_dec(&proc_event_num_listeners); > + pr_debug("%s: PROC_CN_MCAST_IGNORE pid %d: Decremented listeners to %d\n", > + __func__, current->pid, > + atomic_read(&proc_event_num_listeners)); > + } > ((struct proc_input *)(sk->sk_user_data))->event_type = > PROC_EVENT_NONE; > break; > default: > + pr_warn("%s: Invalid value for mc_op %d\n", > + __func__, mc_op); nit: Does this need to be split to two lines? > err = EINVAL; > break; > } > diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c > index 4028e8eeba82..506e3cbedf85 100644 > --- a/drivers/connector/connector.c > +++ b/drivers/connector/connector.c > @@ -271,6 +271,67 @@ static int __maybe_unused cn_proc_show(struct seq_file *m, void *v) > return 0; > } > > +__u32 cn_del_get_exval(pid_t pid) > +{ > + struct cn_dev *dev = &cdev; > + __u32 exval; > + > + if (!cn_already_initialized) > + return 0; > + > + exval = cn_hash_del_get_exval(dev->hdev, pid); > + return exval; You can remove exval from this function by returning the result directly. > +} > +EXPORT_SYMBOL_GPL(cn_del_get_exval); > + > +int cn_del_elem(pid_t pid) > +{ > + struct cn_dev *dev = &cdev; > + int ret; > + > + if (!cn_already_initialized) > + return 0; > + > + ret = cn_hash_del_elem(dev->hdev, pid); > + return ret; You can remove ret from this function by returning the result directly. > +} > +EXPORT_SYMBOL_GPL(cn_del_elem); > + > +int cn_add_elem(__u32 uexit_code, pid_t pid) > +{ > + struct cn_dev *dev = &cdev; > + > + if (!cn_already_initialized) > + return 0; > + > + return cn_hash_add_elem(dev->hdev, uexit_code, pid); > +} > +EXPORT_SYMBOL_GPL(cn_add_elem); > + > +__u32 cn_get_exval(pid_t pid) > +{ > + struct cn_dev *dev = &cdev; > + __u32 exval; > + > + if (!cn_already_initialized) > + return 0; > + > + exval = cn_hash_get_exval(dev->hdev, pid); > + return exval; You can remove exval from this function by returning the result directly. > +} > +EXPORT_SYMBOL_GPL(cn_get_exval); > + > +bool cn_table_empty(void) > +{ > + struct cn_dev *dev = &cdev; > + > + if (!cn_already_initialized) > + return 0; > + > + return cn_hash_table_empty(dev->hdev); > +} > +EXPORT_SYMBOL_GPL(cn_table_empty); > + > static int cn_init(void) > { > struct cn_dev *dev = &cdev; > @@ -283,18 +344,35 @@ static int cn_init(void) > }; > > dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg); > - if (!dev->nls) > + if (!dev->nls) { > + pr_err("%s: netlink_kernel_create failed, connector not initialized\n", > + __func__); > return -EIO; > + } > > dev->cbdev = cn_queue_alloc_dev("cqueue", dev->nls); > if (!dev->cbdev) { > + pr_err("%s: Allocation of dev->cbdev failed, connector not initialized\n", > + __func__); > netlink_kernel_release(dev->nls); > return -EINVAL; > } > > + dev->hdev = cn_hash_alloc_dev("pid hash table"); > + if (!dev->hdev) { > + pr_err("%s: Allocation of dev->hdev failed, connector not initialized\n", > + __func__); > + netlink_kernel_release(dev->nls); > + cn_queue_free_dev(dev->cbdev); > + return -ENOMEM; > + } > + > + pr_debug("Connector initialized, allocated hdev %p\n", dev->hdev); > + > cn_already_initialized = 1; > > - proc_create_single("connector", S_IRUGO, init_net.proc_net, cn_proc_show); > + proc_create_single("connector", S_IRUGO, init_net.proc_net, > + cn_proc_show); Unnecessary change here. > > return 0; > } > @@ -308,6 +386,7 @@ static void cn_fini(void) > remove_proc_entry("connector", init_net.proc_net); > > cn_queue_free_dev(dev->cbdev); > + cn_hash_free_dev(dev->hdev); > netlink_kernel_release(dev->nls); > } > > diff --git a/include/linux/connector.h b/include/linux/connector.h > index 70bc1160f3d8..094e1730a4f6 100644 > --- a/include/linux/connector.h > +++ b/include/linux/connector.h > @@ -18,6 +18,8 @@ > #include <uapi/linux/connector.h> > > #define CN_CBQ_NAMELEN 32 > +#define HASHT_NAMELEN 32 > +#define PID_HASH_TABLE_BITS 10 > > struct cn_queue_dev { > atomic_t refcnt; > @@ -45,6 +47,19 @@ struct cn_callback_entry { > u32 seq, group; > }; > > +struct uexit_pid_hnode { > + __u32 uexit_code; > + pid_t pid; > + struct hlist_node uexit_pid_hlist; > +}; > + > +struct cn_hash_dev { > + atomic_t hrefcnt; > + unsigned char name[HASHT_NAMELEN]; > + struct mutex uexit_hash_lock; > + DECLARE_HASHTABLE(uexit_pid_htable, PID_HASH_TABLE_BITS); > +}; > + > struct cn_dev { > struct cb_id id; > > @@ -52,6 +67,7 @@ struct cn_dev { > struct sock *nls; > > struct cn_queue_dev *cbdev; > + struct cn_hash_dev *hdev; > }; > > /** > @@ -137,4 +153,31 @@ void cn_queue_free_dev(struct cn_queue_dev *dev); > > int cn_cb_equal(const struct cb_id *, const struct cb_id *); > > +struct cn_hash_dev *cn_hash_alloc_dev(const char *name); > +void cn_hash_free_dev(struct cn_hash_dev *hdev); > +struct uexit_pid_hnode *cn_hash_find_pid_node(struct cn_hash_dev *hdev, > + pid_t pid); > +void cn_hash_free_elem(struct uexit_pid_hnode *elem); > +int cn_hash_add_elem(struct cn_hash_dev *hdev, __u32 uexit_code, pid_t pid); > +int cn_hash_del_elem(struct cn_hash_dev *hdev, pid_t pid); > +__u32 cn_hash_del_get_exval(struct cn_hash_dev *hdev, pid_t pid); > + > +int cn_add_elem(__u32 uexit_code, pid_t pid); > +int cn_del_elem(pid_t pid); > +__u32 cn_del_get_exval(pid_t pid); > +__u32 cn_get_exval(pid_t pid); > + > +struct cn_hash_dev *cn_hash_alloc_dev(const char *name); > +void cn_hash_free_dev(struct cn_hash_dev *hdev); > +struct uexit_pid_hnode *cn_hash_find_pid_node(struct cn_hash_dev *hdev, > + pid_t pid); > +void cn_hash_free_elem(struct uexit_pid_hnode *elem); > +int cn_hash_add_elem(struct cn_hash_dev *hdev, __u32 uexit_code, pid_t pid); > +int cn_hash_del_elem(struct cn_hash_dev *hdev, pid_t pid); > +__u32 cn_hash_del_get_exval(struct cn_hash_dev *hdev, pid_t pid); > +__u32 cn_hash_get_exval(struct cn_hash_dev *hdev, pid_t pid); > + Why are these here twice? Am I missing something? It also seems like a lot of these can be static inline and removed completely from the header as they are not used externally. > +bool cn_table_empty(void); > +bool cn_hash_table_empty(struct cn_hash_dev *hdev); > + > #endif /* __CONNECTOR_H */ > diff --git a/include/linux/sched.h b/include/linux/sched.h > index e6ee4258169a..a2339ae6208b 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1673,7 +1673,7 @@ extern struct pid *cad_pid; > #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ > #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ > #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ > -#define PF__HOLE__00010000 0x00010000 > +#define PF_EXIT_NOTIFY 0x00010000 /* This thread has sent an exit value to be sent as a notification to listening processes */ > #define PF_KSWAPD 0x00020000 /* I am kswapd */ > #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ > #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ > diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h > index 18e3745b86cd..2b12a24e4651 100644 > --- a/include/uapi/linux/cn_proc.h > +++ b/include/uapi/linux/cn_proc.h > @@ -27,7 +27,8 @@ > */ > enum proc_cn_mcast_op { > PROC_CN_MCAST_LISTEN = 1, > - PROC_CN_MCAST_IGNORE = 2 > + PROC_CN_MCAST_IGNORE = 2, > + PROC_CN_MCAST_NOTIFY = 3 > }; > > #define PROC_EVENT_ALL (PROC_EVENT_FORK | PROC_EVENT_EXEC | PROC_EVENT_UID | \ > @@ -65,6 +66,7 @@ enum proc_cn_event { > struct proc_input { > enum proc_cn_mcast_op mcast_op; > enum proc_cn_event event_type; > + __u32 uexit_code; > }; > > static inline enum proc_cn_event valid_event(enum proc_cn_event ev_type) > -- > 2.46.0 >