> On Oct 15, 2024, at 12:27 PM, Liam Howlett <liam.howlett@xxxxxxxxxx> wrote: > > * Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx> [241015 13:30]: >> Test to check if setting PROC_CN_MCAST_NOTIFY in proc connector API, allows >> a thread's non-zero exit status to be returned to proc_filter. >> >> The threads.c program creates 2 child threads. 1st thread handles signal >> SIGSEGV, and 2nd thread needs to indicate some error condition (value 1) >> to the kernel, instead of using pthread_exit() with 1. >> >> In both cases, child sends notify_netlink_thread_exit(exit_code) to kernel, >> to let kernel know it has exited abnormally with exit_code. >> >> Compile: >> make thread >> make proc_filter >> To see non-zero exit notifications, run: >> ./proc_filter -f >> Run threads code in another window: >> ./threads >> Note the 2 child thread IDs reported above >> Send SIGSEGV signal to the child handling SIGSEGV: >> kill -11 <child1-tid> >> Watch the child 1 tid being notified with exit code 11 to proc_filter >> Watch child 2 tid being notified with exit code 1 (value defined in code) >> to proc_filter >> >> Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx> >> --- >> tools/testing/selftests/connector/Makefile | 23 +++- >> .../testing/selftests/connector/proc_filter.c | 5 + >> tools/testing/selftests/connector/thread.c | 116 ++++++++++++++++++ >> .../selftests/connector/thread_filter.c | 96 +++++++++++++++ >> 4 files changed, 239 insertions(+), 1 deletion(-) >> create mode 100644 tools/testing/selftests/connector/thread.c >> create mode 100644 tools/testing/selftests/connector/thread_filter.c >> >> diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile >> index 92188b9bac5c..bf335826bc3b 100644 >> --- a/tools/testing/selftests/connector/Makefile >> +++ b/tools/testing/selftests/connector/Makefile >> @@ -1,5 +1,26 @@ >> # SPDX-License-Identifier: GPL-2.0 >> -CFLAGS += -Wall $(KHDR_INCLUDES) >> +KERNEL="../../../.." >> + >> +CFLAGS += -Wall $(KHDR_INCLUDES) -I $(KERNEL)/include/uapi -I $(KERNEL)/include >> + >> +proc_filter: proc_filter.o >> + cc proc_filter.o -o proc_filter >> + >> +proc_filter.o: proc_filter.c >> + cc -c proc_filter.c -o proc_filter.o $(CFLAGS) >> + >> +thread: thread.o thread_filter.o >> + cc thread.o thread_filter.o -o thread >> + >> +thread.o: thread.c $(DEPS) >> + cc -c thread.c -o thread.o $(CFLAGS) >> + >> +thread_filter.o: thread_filter.c >> + cc -c thread_filter.c -o thread_filter.o $(CFLAGS) >> + >> +define EXTRA_CLEAN >> + rm *.o thread >> +endef >> >> TEST_GEN_PROGS = proc_filter >> >> diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c >> index 4a825b997666..6fb4842894f8 100644 >> --- a/tools/testing/selftests/connector/proc_filter.c >> +++ b/tools/testing/selftests/connector/proc_filter.c >> @@ -1,4 +1,9 @@ >> // SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Author: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx> >> + * >> + * Copyright (c) 2024 Oracle and/or its affiliates. >> + */ >> >> #include <sys/types.h> >> #include <sys/epoll.h> >> diff --git a/tools/testing/selftests/connector/thread.c b/tools/testing/selftests/connector/thread.c >> new file mode 100644 >> index 000000000000..8c9abf6115d6 >> --- /dev/null >> +++ b/tools/testing/selftests/connector/thread.c >> @@ -0,0 +1,116 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Author: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx> >> + * >> + * Copyright (c) 2024 Oracle and/or its affiliates. >> + */ >> + >> +#include <pthread.h> >> +#include <stdio.h> >> +#include <unistd.h> >> +#include <stdlib.h> >> +#include <signal.h> >> + >> +/* >> + * This code tests a thread exit notification when thread exits abnormally. >> + * Normally, when a thread exits abnormally, the kernel is not aware of the >> + * exit code. This is usually only conveyed from child to parent via the >> + * pthread_exit() and pthread_join() calls. Sometimes, however, a parent >> + * process cannot monitor all child processes via pthread_join(), particularly >> + * when there is a huge amount of child processes. In this case, the parent >> + * has created the child with PTHREAD_CREATE_DETACHED attribute. >> + * To fix this problem, either when child wants to convey non-zero exit via >> + * pthread_exit() or in a signal handler, the child can notify the kernel's >> + * connector module it's exit status via a netlink call with new type >> + * PROC_CN_MCAST_NOTIFY. (Implemented in the thread_filter.c file). >> + * This will send the exit code from the child to the kernel, which the kernel >> + * can later return to proc_filter program when the child actually exits. >> + * To test this usecase: >> + * Compile: >> + * make thread >> + * make proc_filter >> + * To see non-zero exit notifications, run: >> + * ./proc_filter -f >> + * Start the threads code, creating 2 threads, in another window: >> + * ./threads >> + * Note the 2 child thread IDs reported above >> + * Send SIGSEGV signal to the child handling SIGSEGV: >> + * kill -11 <child1-tid> >> + * Watch the event being notified with exit code 11 to proc_filter >> + * Watch child 2 tid being notified with exit code 1 (value defined in code) >> + * to proc_filter >> + */ >> + >> +extern int notify_netlink_thread_exit(unsigned int exit_code); >> + >> +static void sigsegvh(int sig) >> +{ >> + unsigned int exit_code = (unsigned int) sig; >> + /* >> + * Send any non-zero value to get a notification. Here we are >> + * sending the signal number for SIGSEGV which is 11 >> + */ >> + notify_netlink_thread_exit(exit_code); >> +} >> + >> +void *threadc1(void *ptr) >> +{ >> + signal(SIGSEGV, sigsegvh); >> + printf("Child 1 thread id %d, handling SIGSEGV\n", gettid()); >> + sleep(20); >> + pthread_exit(NULL); >> +} >> + >> +void *threadc2(void *ptr) >> +{ >> + int exit_val = 1; >> + >> + printf("Child 2 thread id %d, wants to exit with value %d\n", >> + gettid(), exit_val); >> + sleep(2); >> + notify_netlink_thread_exit(exit_val); >> + pthread_exit(NULL); >> +} >> + >> +int main(int argc, char **argv) >> +{ >> + pthread_t thread1, thread2; >> + pthread_attr_t attr1, attr2; >> + int ret; >> + >> + ret = pthread_attr_init(&attr1); >> + if (ret != 0) { >> + perror("pthread_attr_init failed"); >> + exit(ret); >> + } >> + ret = pthread_attr_setdetachstate(&attr1, PTHREAD_CREATE_DETACHED); >> + if (ret != 0) { >> + perror("pthread_attr_setdetachstate failed"); >> + exit(ret); >> + } >> + ret = pthread_create(&thread1, &attr1, *threadc1, NULL); >> + if (ret != 0) { >> + perror("pthread_create failed"); >> + exit(ret); >> + } >> + >> + ret = pthread_attr_init(&attr2); >> + if (ret != 0) { >> + perror("pthread_attr_init failed"); >> + exit(ret); >> + } >> + ret = pthread_attr_setdetachstate(&attr2, PTHREAD_CREATE_DETACHED); >> + if (ret != 0) { >> + perror("pthread_attr_setdetachstate failed"); >> + exit(ret); >> + } >> + ret = pthread_create(&thread2, &attr2, *threadc2, NULL); >> + if (ret != 0) { >> + perror("pthread_create failed"); >> + exit(ret); >> + } > > I expected the test to check for the correct value to be returned? > Could you use pthread_join() and verify the same value is returned > through the new mechanism, or will they not match? This is a manual test, you need to check visually that the proc_filter Is returning correct values for the 2 cases. I can make this an automated test, reading the output of proc_filter directly from a file and checking the values are as expected. There is also another test program which does an automated test of the values returned by proc_filter for scale (100k or so), which I will send out later. (The co-author of that patch, who wrote the original code for processes, is away - @peili.io@xxxxxxxxxx) pthread_join() cannot return a value to the kernel, so we cannot use it to verify the new mechanism. > >> + >> + /* Wait for children to exit or be killed */ >> + sleep(30); >> + exit(0); >> +} >> diff --git a/tools/testing/selftests/connector/thread_filter.c b/tools/testing/selftests/connector/thread_filter.c >> new file mode 100644 >> index 000000000000..3da740aa7537 >> --- /dev/null >> +++ b/tools/testing/selftests/connector/thread_filter.c >> @@ -0,0 +1,96 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Author: Anjali Kulkarni <anjali.k.kulkarni@xxxxxxxxxx> >> + * >> + * Copyright (c) 2024 Oracle and/or its affiliates. >> + */ >> + >> +#include <sys/types.h> >> +#include <sys/epoll.h> >> +#include <sys/socket.h> >> +#include <linux/netlink.h> >> +#include <linux/connector.h> >> +#include <linux/cn_proc.h> >> + >> +#include <stddef.h> >> +#include <stdio.h> >> +#include <stdlib.h> >> +#include <unistd.h> >> +#include <strings.h> >> +#include <errno.h> >> +#include <signal.h> >> +#include <string.h> >> + >> +#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \ >> + sizeof(struct proc_input)) >> + >> +/* >> + * Send PROC_CN_MCAST_NOTIFY type notification to the connector code in kernel. >> + * This will send the exit_code specified by user to the connector layer, so >> + * it can send a notification for that event to any listening process >> + */ >> +int send_message(int nl_sock, unsigned int exit_code) >> +{ >> + char buff[NL_MESSAGE_SIZE]; >> + struct nlmsghdr *hdr; >> + struct cn_msg *msg; >> + >> + hdr = (struct nlmsghdr *)buff; >> + hdr->nlmsg_len = NL_MESSAGE_SIZE; >> + hdr->nlmsg_type = NLMSG_DONE; >> + hdr->nlmsg_flags = 0; >> + hdr->nlmsg_seq = 0; >> + hdr->nlmsg_pid = getpid(); >> + >> + msg = (struct cn_msg *)NLMSG_DATA(hdr); >> + msg->id.idx = CN_IDX_PROC; >> + msg->id.val = CN_VAL_PROC; >> + msg->seq = 0; >> + msg->ack = 0; >> + msg->flags = 0; >> + >> + msg->len = sizeof(struct proc_input); >> + ((struct proc_input *)msg->data)->mcast_op = >> + PROC_CN_MCAST_NOTIFY; >> + ((struct proc_input *)msg->data)->uexit_code = exit_code; >> + >> + if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) { >> + perror("send failed"); >> + return -errno; >> + } >> + return 0; >> +} >> + >> +int notify_netlink_thread_exit(unsigned int exit_code) >> +{ >> + struct sockaddr_nl sa_nl; >> + int err = 0; >> + int nl_sock; >> + >> + nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); >> + >> + if (nl_sock == -1) { >> + perror("socket failed"); >> + return -errno; >> + } >> + >> + bzero(&sa_nl, sizeof(sa_nl)); >> + sa_nl.nl_family = AF_NETLINK; >> + sa_nl.nl_groups = CN_IDX_PROC; >> + sa_nl.nl_pid = gettid(); >> + >> + if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) { >> + perror("bind failed"); >> + close(nl_sock); >> + return -errno; >> + } >> + >> + err = send_message(nl_sock, exit_code); >> + >> + close(nl_sock); >> + >> + if (err < 0) >> + return err; >> + >> + return 0; >> +} >> -- >> 2.46.0