[PATCH] Improve behaviour of Netlink Sockets

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Davem,

I've observed a problem when requesting information from user space via netlink sockets. If you send a well-formed message by the means of sendmsg() from user space, netlink_sendmsg() calls sk->sk_data_ready() which calls the input (callback) function implemented in another module.

Now the module sends the information requested to the socket via netlink_unicast/_broadcast. At this moment, we are still inside sendmsg(), so the user space program is still waiting for sendmsg() to finish.

The problem happens if the modules send more information than space available in socket buffer, in that case:

---- snipped from netlink_attachskb ----

               __set_current_state(TASK_INTERRUPTIBLE);
               add_wait_queue(&nlk->wait, &wait);

               if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
                    test_bit(0, &nlk->state)) &&
                   !sock_flag(sk, SOCK_DEAD))
                       timeo = schedule_timeout(timeo);

               __set_current_state(TASK_RUNNING);
               remove_wait_queue(&nlk->wait, &wait);
               sock_put(sk);

---- end of snip ----

This task goes to sleep (to let the the user space program read from the queue), but this can't happen because the user space program is still waiting for sendmsg() to finish... so both get blocked.

So I made a patch which uses a kernel thread to fix this problem. In sendmsg(), instead of calling sk->sk_data_ready, we enqueue all the request and kernel thread processes them.

I've also added this:

+               if (nlk->pid) {
+                       /* Kernel is sending information to user space
+                        * and socket buffer is full: Wake up user
+                        * process */
+                       client = find_task_by_pid(nlk->pid);
+                       if (!client) {
+                               sock_put(sk);
+                               kfree_skb(skb);
+                               return -EAGAIN;
+                       }
+                       wake_up_process(client);
+               }

With unicast sockets we know the pid, so waking up the user space process directly increases netlink sockets performance. This is interesting for applications like netfilter's ip_queue.

BTW, this patch applies only to unicast sockets but it's easy to make broadcast sockets use the kernel thread, I have no problem in modifying this patch if you want.

If I'm missing something, please let me know.

regards,
Pablo
diff -u -r1.1.1.1 af_netlink.c
--- a/net/netlink/af_netlink.c	19 Aug 2004 14:52:01 -0000	1.1.1.1
+++ b/net/netlink/af_netlink.c	25 Aug 2004 19:53:46 -0000
@@ -46,6 +46,8 @@
 #include <linux/security.h>
 #include <net/sock.h>
 #include <net/scm.h>
+#include <linux/list.h>
+#include <linux/kthread.h>
 
 #define Nprintk(a...)
 
@@ -53,6 +55,11 @@
 #define NL_EMULATE_DEV
 #endif
 
+/* Maximum number of requests in queue for netlink kthread:
+ * a request has a size of 8 bytes in x86, so the maximum 
+ * size in bytes will be 4K. */
+#define NETLINK_RCV_QUEUE_MAX 400
+
 struct netlink_opt
 {
 	u32			pid;
@@ -69,8 +76,18 @@
 
 #define nlk_sk(__sk) ((struct netlink_opt *)(__sk)->sk_protinfo)
 
+struct netlink_rcv_queue
+{
+	struct list_head 	head;
+	struct sock 		*sk;
+	int 			len;
+};
+
+static atomic_t nl_rcv_queue_size = ATOMIC_INIT(0);
+static LIST_HEAD(nl_rcv_queue);
 static struct hlist_head nl_table[MAX_LINKS];
 static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
+static DECLARE_WAIT_QUEUE_HEAD(nl_rcv_kthread);
 static unsigned nl_nonroot[MAX_LINKS];
 
 #ifdef NL_EMULATE_DEV
@@ -451,6 +468,26 @@
 	return sock;
 }
 
+inline int netlink_rcv_queue_tail(struct sock *sk, int len)
+{
+	struct netlink_rcv_queue *nlrcv;
+
+	nlrcv = kmalloc(sizeof(struct netlink_rcv_queue), GFP_KERNEL);
+	if (!nlrcv) {
+		sock_put(sk);
+		return 0;
+	}
+
+	nlrcv->sk = sk;
+	nlrcv->len = len;
+	list_add_tail(&nlrcv->head, &nl_rcv_queue);
+	atomic_inc(&nl_rcv_queue_size);
+	if (atomic_read(&nl_rcv_queue_size) > NETLINK_RCV_QUEUE_MAX) 
+		wake_up_interruptible_all(&nl_rcv_kthread);
+
+	return 1;
+}
+
 /*
  * Attach a skb to a netlink socket.
  * The caller must hold a reference to the destination socket. On error, the
@@ -474,6 +511,8 @@
 	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 	    test_bit(0, &nlk->state)) {
 		DECLARE_WAITQUEUE(wait, current);
+		task_t *client;
+
 		if (!timeo) {
 			if (!nlk->pid)
 				netlink_overrun(sk);
@@ -482,6 +521,19 @@
 			return -EAGAIN;
 		}
 
+		if (nlk->pid) {
+			/* Kernel is sending information to user space
+			 * and socket buffer is full: Wake up user
+			 * process */
+			client = find_task_by_pid(nlk->pid);
+			if (!client) {
+				sock_put(sk);
+				kfree_skb(skb);
+				return -EAGAIN;
+			}
+			wake_up_process(client);
+		}
+
 		__set_current_state(TASK_INTERRUPTIBLE);
 		add_wait_queue(&nlk->wait, &wait);
 
@@ -521,8 +573,19 @@
 #endif
 
 	skb_queue_tail(&sk->sk_receive_queue, skb);
-	sk->sk_data_ready(sk, len);
-	sock_put(sk);
+	if (!nlk->pid) {
+		/* Adding a skb to a sk whose pid is zero: We are in 
+		 * user context, so feed the thread */
+		if (!netlink_rcv_queue_tail(sk, len)) {
+			sock_put(sk);
+			return len;
+		}
+		wake_up(&nl_rcv_kthread);
+	} else {
+		sk->sk_data_ready(sk, len);
+		sock_put(sk);
+	}
+
 	return len;
 }
 
@@ -987,6 +1050,43 @@
 	netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
 }
 
+/*
+ * netlink kthread to serve requests
+ */
+
+static void __netlink_rcv_thread(void)
+{
+	struct list_head *cur, *tmp;
+	struct netlink_rcv_queue *request;
+	DECLARE_WAITQUEUE(wait, current);
+	
+	for(;;) {
+		list_for_each_safe(cur, tmp, &nl_rcv_queue) {
+			request = list_entry(cur, struct netlink_rcv_queue, head);
+			request->sk->sk_data_ready(request->sk, request->len);
+			sock_put(request->sk);
+			list_del(cur);
+			atomic_dec(&nl_rcv_queue_size);
+			kfree(request);
+		}
+
+		if (!list_empty(&nl_rcv_queue))
+			continue;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&nl_rcv_kthread, &wait);
+		schedule();
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&nl_rcv_kthread, &wait);
+	}
+}
+
+static int netlink_rcv_thread(void *dummy) 
+{
+	set_user_nice(current, 0);
+	__netlink_rcv_thread();
+	return 0;
+}
 
 #ifdef NL_EMULATE_DEV
 
@@ -1198,6 +1298,9 @@
 #endif
 	/* The netlink device handler may be needed early. */ 
 	rtnetlink_init();
+	
+	/* Create a kernel thread to handle callbacks to modules */
+	kthread_run(netlink_rcv_thread, NULL, "netlink");
 	return 0;
 }
 

[Index of Archives]     [Netdev]     [Ethernet Bridging]     [Linux 802.1Q VLAN]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Git]     [Bugtraq]     [Yosemite News and Information]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux PCI]     [Linux Admin]     [Samba]

  Powered by Linux