Re: [PATCH net-next v2 11/12] net: homa: create homa_plumbing.c homa_utils.c

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 11/12/24 7:40 AM, John Ousterhout wrote:
homa_plumbing.c contains functions that connect Homa to the rest of
the Linux kernel, such as dispatch tables used by Linux and the
top-level functions that Linux invokes from those dispatch tables.

homa_utils.c contains a few odds and ends, such as code to initialize
and destroy struct homa's.

Signed-off-by: John Ousterhout <ouster@xxxxxxxxxxxxxxx>
---
  net/homa/homa_plumbing.c | 965 +++++++++++++++++++++++++++++++++++++++
  net/homa/homa_utils.c    | 150 ++++++
  2 files changed, 1115 insertions(+)
  create mode 100644 net/homa/homa_plumbing.c
  create mode 100644 net/homa/homa_utils.c

diff --git a/net/homa/homa_plumbing.c b/net/homa/homa_plumbing.c
new file mode 100644
index 000000000000..afd3a9cc97ba
--- /dev/null
+++ b/net/homa/homa_plumbing.c
@@ -0,0 +1,965 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+/* This file consists mostly of "glue" that hooks Homa into the rest of
+ * the Linux kernel. The guts of the protocol are in other files.
+ */
+
+#include "homa_impl.h"
+#include "homa_peer.h"
+#include "homa_pool.h"
+
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_AUTHOR("John Ousterhout");
+MODULE_DESCRIPTION("Homa transport protocol");
+MODULE_VERSION("0.01");
+
+/* Not yet sure what these variables are for */
+static long sysctl_homa_mem[3] __read_mostly;
+static int sysctl_homa_rmem_min __read_mostly;
+static int sysctl_homa_wmem_min __read_mostly;
+
+/* Global data for Homa. Never reference homa_data directory. Always use
+ * the homa variable instead; this allows overriding during unit tests.
+ */
+static struct homa homa_data;
+struct homa *homa = &homa_data;
+
+/* True means that the Homa module is in the process of unloading itself,
+ * so everyone should clean up.
+ */
+static bool exiting;
+
+/* Thread that runs timer code to detect lost packets and crashed peers. */
+static struct task_struct *timer_kthread;
+
+/* This structure defines functions that handle various operations on
+ * Homa sockets. These functions are relatively generic: they are called
+ * to implement top-level system calls. Many of these operations can
+ * be implemented by PF_INET6 functions that are independent of the
+ * Homa protocol.
+ */
+static const struct proto_ops homa_proto_ops = {
+	.family		   = PF_INET,
+	.owner		   = THIS_MODULE,
+	.release	   = inet_release,
+	.bind		   = homa_bind,
+	.connect	   = inet_dgram_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = sock_no_accept,
+	.getname	   = inet_getname,
+	.poll		   = homa_poll,
+	.ioctl		   = inet_ioctl,
+	.listen		   = sock_no_listen,
+	.shutdown	   = homa_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = inet_sendmsg,
+	.recvmsg	   = inet_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.set_peek_off	   = sk_set_peek_off,
+};
+
+static const struct proto_ops homav6_proto_ops = {
+	.family		   = PF_INET6,
+	.owner		   = THIS_MODULE,
+	.release	   = inet6_release,
+	.bind		   = homa_bind,
+	.connect	   = inet_dgram_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = sock_no_accept,
+	.getname	   = inet6_getname,
+	.poll		   = homa_poll,
+	.ioctl		   = inet6_ioctl,
+	.listen		   = sock_no_listen,
+	.shutdown	   = homa_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = inet_sendmsg,
+	.recvmsg	   = inet_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.set_peek_off	   = sk_set_peek_off,
+};
+
+/* This structure also defines functions that handle various operations
+ * on Homa sockets. However, these functions are lower-level than those
+ * in homa_proto_ops: they are specific to the PF_INET or PF_INET6
+ * protocol family, and in many cases they are invoked by functions in
+ * homa_proto_ops. Most of these functions have Homa-specific implementations.
+ */
+static struct proto homa_prot = {
+	.name		   = "HOMA",
+	.owner		   = THIS_MODULE,
+	.close		   = homa_close,
+	.connect	   = ip4_datagram_connect,
+	.disconnect	   = homa_disconnect,
+	.ioctl		   = homa_ioctl,
+	.init		   = homa_socket,
+	.destroy	   = NULL,
+	.setsockopt	   = homa_setsockopt,
+	.getsockopt	   = homa_getsockopt,
+	.sendmsg	   = homa_sendmsg,
+	.recvmsg	   = homa_recvmsg,
+	.backlog_rcv       = homa_backlog_rcv,
+	.hash		   = homa_hash,
+	.unhash		   = homa_unhash,
+	.get_port	   = homa_get_port,
+	.sysctl_mem	   = sysctl_homa_mem,
+	.sysctl_wmem	   = &sysctl_homa_wmem_min,
+	.sysctl_rmem	   = &sysctl_homa_rmem_min,
+	.obj_size	   = sizeof(struct homa_sock),
+	.no_autobind       = 1,
+};
+
+static struct proto homav6_prot = {
+	.name		   = "HOMAv6",
+	.owner		   = THIS_MODULE,
+	.close		   = homa_close,
+	.connect	   = ip6_datagram_connect,
+	.disconnect	   = homa_disconnect,
+	.ioctl		   = homa_ioctl,
+	.init		   = homa_socket,
+	.destroy	   = NULL,
+	.setsockopt	   = homa_setsockopt,
+	.getsockopt	   = homa_getsockopt,
+	.sendmsg	   = homa_sendmsg,
+	.recvmsg	   = homa_recvmsg,
+	.backlog_rcv       = homa_backlog_rcv,
+	.hash		   = homa_hash,
+	.unhash		   = homa_unhash,
+	.get_port	   = homa_get_port,
+	.sysctl_mem	   = sysctl_homa_mem,
+	.sysctl_wmem	   = &sysctl_homa_wmem_min,
+	.sysctl_rmem	   = &sysctl_homa_rmem_min,
+
+	/* IPv6 data comes *after* Homa's data, and isn't included in
+	 * struct homa_sock.
+	 */
+	.obj_size	   = sizeof(struct homa_sock) + sizeof(struct ipv6_pinfo),


The implementation of inet6_sk_generic() has already changed, you should set
.ipv6_pinfo_offset.


+	.no_autobind       = 1,
+};
+
+/* Top-level structure describing the Homa protocol. */
+static struct inet_protosw homa_protosw = {
+	.type              = SOCK_DGRAM,
+	.protocol          = IPPROTO_HOMA,
+	.prot              = &homa_prot,
+	.ops               = &homa_proto_ops,
+	.flags             = INET_PROTOSW_REUSE,
+};
+
+static struct inet_protosw homav6_protosw = {
+	.type              = SOCK_DGRAM,
+	.protocol          = IPPROTO_HOMA,
+	.prot              = &homav6_prot,
+	.ops               = &homav6_proto_ops,
+	.flags             = INET_PROTOSW_REUSE,
+};
+
+/* This structure is used by IP to deliver incoming Homa packets to us. */
+static struct net_protocol homa_protocol = {
+	.handler =	homa_softirq,
+	.err_handler =	homa_err_handler_v4,
+	.no_policy =     1,
+};
+
+static struct inet6_protocol homav6_protocol = {
+	.handler =	homa_softirq,
+	.err_handler =	homa_err_handler_v6,
+	.flags =        INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+};
+
+/* Sizes of the headers for each Homa packet type, in bytes. */
+static __u16 header_lengths[] = {
+	sizeof32(struct data_header),
+	0,
+	sizeof32(struct resend_header),
+	sizeof32(struct unknown_header),
+	sizeof32(struct busy_header),
+	0,
+	sizeof32(struct common_header),
+	sizeof32(struct need_ack_header),
+	sizeof32(struct ack_header)
+};
+
+static DECLARE_COMPLETION(timer_thread_done);
+
+/**
+ * homa_load() - invoked when this module is loaded into the Linux kernel
+ * Return: 0 on success, otherwise a negative errno.
+ */
+static int __init homa_load(void)
+{
+	int status;
+
+	pr_notice("Homa module loading\n");
+	pr_notice("Homa structure sizes: data_header %u, seg_header %u, ack %u, peer %u, ip_hdr %u flowi %u ipv6_hdr %u, flowi6 %u tcp_sock %u homa_rpc %u sk_buff %u rcvmsg_control %u union sockaddr_in_union %u HOMA_MAX_BPAGES %u NR_CPUS %u nr_cpu_ids %u, MAX_NUMNODES %d\n",
+		  sizeof32(struct data_header),
+		  sizeof32(struct seg_header),
+		  sizeof32(struct homa_ack),
+		  sizeof32(struct homa_peer),
+		  sizeof32(struct iphdr),
+		  sizeof32(struct flowi),
+		  sizeof32(struct ipv6hdr),
+		  sizeof32(struct flowi6),
+		  sizeof32(struct tcp_sock),
+		  sizeof32(struct homa_rpc),
+		  sizeof32(struct sk_buff),
+		  sizeof32(struct homa_recvmsg_args),
+		  sizeof32(union sockaddr_in_union),
+		  HOMA_MAX_BPAGES,
+		  NR_CPUS,
+		  nr_cpu_ids,
+		  MAX_NUMNODES);
+	status = proto_register(&homa_prot, 1);
+	if (status != 0) {
+		pr_err("proto_register failed for homa_prot: %d\n", status);
+		goto out;
+	}
+	status = proto_register(&homav6_prot, 1);
+	if (status != 0) {
+		pr_err("proto_register failed for homav6_prot: %d\n", status);
+		goto out;
+	}
+	inet_register_protosw(&homa_protosw);
+	inet6_register_protosw(&homav6_protosw);


better to check the retval of inet6_register_protosw().


+	status = inet_add_protocol(&homa_protocol, IPPROTO_HOMA);
+	if (status != 0) {
+		pr_err("inet_add_protocol failed in %s: %d\n", __func__,
+		       status);
+		goto out_cleanup;
+	}
+	status = inet6_add_protocol(&homav6_protocol, IPPROTO_HOMA);
+	if (status != 0) {
+		pr_err("inet6_add_protocol failed in %s: %d\n",  __func__,
+		       status);
+		goto out_cleanup;
+	}
+
+	status = homa_init(homa);
+	if (status)
+		goto out_cleanup;
+
+	timer_kthread = kthread_run(homa_timer_main, homa, "homa_timer");
+	if (IS_ERR(timer_kthread)) {
+		status = PTR_ERR(timer_kthread);
+		pr_err("couldn't create homa pacer thread: error %d\n",
+		       status);
+		timer_kthread = NULL;
+		goto out_cleanup;
+	}
+
+	return 0;
+
+out_cleanup:
+	homa_destroy(homa);
+	inet_del_protocol(&homa_protocol, IPPROTO_HOMA);
+	inet_unregister_protosw(&homa_protosw);
+	inet6_del_protocol(&homav6_protocol, IPPROTO_HOMA);
+	inet6_unregister_protosw(&homav6_protosw);
+	proto_unregister(&homa_prot);
+	proto_unregister(&homav6_prot);


It's a bit strange for me that this relies on a premise: that every reverse operation can correctly identify whether the corresponding forward operation has been executed. Currently, perhaps every function includes this capability. It's up to you, I don't insist.



+out:
+	return status;
+}
+
+/**
+ * homa_unload() - invoked when this module is unloaded from the Linux kernel.
+ */
+static void __exit homa_unload(void)
+{
+	pr_notice("Homa module unloading\n");
+	exiting = true;
+
+	if (timer_kthread)
+		wake_up_process(timer_kthread);
+	wait_for_completion(&timer_thread_done);
+	homa_destroy(homa);
+	inet_del_protocol(&homa_protocol, IPPROTO_HOMA);
+	inet_unregister_protosw(&homa_protosw);
+	inet6_del_protocol(&homav6_protocol, IPPROTO_HOMA);
+	inet6_unregister_protosw(&homav6_protosw);
+	proto_unregister(&homa_prot);
+	proto_unregister(&homav6_prot);
+}
+
+module_init(homa_load);
+module_exit(homa_unload);

Perhaps you can try adding MODULE_ALIAS_NET_PF_PROTO_TYPE so that the kernel will automatically load the module when creating IPPROTO_HOMA socket. A functional suggestion, It's up to you.

+
+/**
+ * homa_bind() - Implements the bind system call for Homa sockets: associates
+ * a well-known service port with a socket. Unlike other AF_INET6 protocols,
+ * there is no need to invoke this system call for sockets that are only
+ * used as clients.
+ * @sock:     Socket on which the system call was invoked.
+ * @addr:    Contains the desired port number.
+ * @addr_len: Number of bytes in uaddr.
+ * Return:    0 on success, otherwise a negative errno.
+ */
+int homa_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+{
+	struct homa_sock *hsk = homa_sk(sock->sk);
+	union sockaddr_in_union *addr_in = (union sockaddr_in_union *)addr;
+	int port = 0;
+
+	if (unlikely(addr->sa_family != sock->sk->sk_family))
+		return -EAFNOSUPPORT;
+	if (addr_in->in6.sin6_family == AF_INET6) {
+		if (addr_len < sizeof(struct sockaddr_in6))
+			return -EINVAL;
+		port = ntohs(addr_in->in4.sin_port);
+	} else if (addr_in->in4.sin_family == AF_INET) {
+		if (addr_len < sizeof(struct sockaddr_in))
+			return -EINVAL;
+		port = ntohs(addr_in->in6.sin6_port);
+	}
+	return homa_sock_bind(homa->port_map, hsk, port);
+}

Is binding multiple times legal? For example, bind 80 first and then bind 8080. If not, I think
you might need to check the inet_num.


+
+/**
+ * homa_close() - Invoked when close system call is invoked on a Homa socket.
+ * @sk:      Socket being closed
+ * @timeout: ??
+ */
+void homa_close(struct sock *sk, long timeout)
+{
+	struct homa_sock *hsk = homa_sk(sk);
+
+	homa_sock_destroy(hsk);
+	sk_common_release(sk);
+}
+
+/**
+ * homa_shutdown() - Implements the shutdown system call for Homa sockets.
+ * @sock:    Socket to shut down.
+ * @how:     Ignored: for other sockets, can independently shut down
+ *           sending and receiving, but for Homa any shutdown will
+ *           shut down everything.
+ *
+ * Return: 0 on success, otherwise a negative errno.
+ */
+int homa_shutdown(struct socket *sock, int how)
+{
+	homa_sock_shutdown(homa_sk(sock->sk));
+	return 0;
+}
+
+/**
+ * homa_disconnect() - Invoked when disconnect system call is invoked on a
+ * Homa socket.
+ * @sk:    Socket to disconnect
+ * @flags: ??
+ *
+ * Return: 0 on success, otherwise a negative errno.
+ */
+int homa_disconnect(struct sock *sk, int flags)
+{
+	pr_warn("unimplemented disconnect invoked on Homa socket\n");
+	return -EINVAL;
+}
+
+/**
+ * homa_ioctl() - Implements the ioctl system call for Homa sockets.
+ * @sk:    Socket on which the system call was invoked.
+ * @cmd:   Identifier for a particular ioctl operation.
+ * @karg:  Operation-specific argument; typically the address of a block
+ *         of data in user address space.
+ *
+ * Return: 0 on success, otherwise a negative errno.
+ */
+int homa_ioctl(struct sock *sk, int cmd, int *karg)
+{
+	return -EINVAL;
+}
+
+/**
+ * homa_socket() - Implements the socket(2) system call for sockets.
+ * @sk:    Socket on which the system call was invoked. The non-Homa
+ *         parts have already been initialized.
+ *
+ * Return: always 0 (success).
+ */
+int homa_socket(struct sock *sk)
+{
+	struct homa_sock *hsk = homa_sk(sk);
+
+	homa_sock_init(hsk, homa)
I noticed that homa_sock_init() contains a memory allocation action, perhaps you should add a return value check.


-> hsk->buffer_pool = kzalloc(sizeof(*hsk->buffer_pool), GFP_KERNEL);

+	return 0;
+}
+
+/**
+ * homa_setsockopt() - Implements the getsockopt system call for Homa sockets.
+ * @sk:      Socket on which the system call was invoked.
+ * @level:   Level at which the operation should be handled; will always
+ *           be IPPROTO_HOMA.
+ * @optname: Identifies a particular setsockopt operation.
+ * @optval:  Address in user space of information about the option.
+ * @optlen:  Number of bytes of data at @optval.
+ * Return:   0 on success, otherwise a negative errno.
+ */
+int homa_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
+		    unsigned int optlen)
+{
+	struct homa_sock *hsk = homa_sk(sk);
+	struct homa_set_buf_args args;
+	int ret;
+
+	if (level != IPPROTO_HOMA || optname != SO_HOMA_SET_BUF ||
+	    optlen != sizeof(struct homa_set_buf_args))
+		return -EINVAL;
+
+	if (copy_from_sockptr(&args, optval, optlen))
+		return -EFAULT;
+
+	/* Do a trivial test to make sure we can at least write the first
+	 * page of the region.
+	 */
+	if (copy_to_user((__force void __user *)args.start, &args, sizeof(args)))
+		return -EFAULT;
+
+	homa_sock_lock(hsk, "homa_setsockopt SO_HOMA_SET_BUF");
+	ret = homa_pool_init(hsk, (__force void __user *)args.start, args.length);
+	homa_sock_unlock(hsk);
+	return ret;
+}
+
+/**
+ * homa_getsockopt() - Implements the getsockopt system call for Homa sockets.
+ * @sk:      Socket on which the system call was invoked.
+ * @level:   ??
+ * @optname: Identifies a particular setsockopt operation.
+ * @optval:  Address in user space where the option's value should be stored.
+ * @option:  ??.
+ * Return:   0 on success, otherwise a negative errno.
+ */
+int homa_getsockopt(struct sock *sk, int level, int optname,
+		    char __user *optval, int __user *option)
+{
+	pr_warn("unimplemented getsockopt invoked on Homa socket: level %d, optname %d\n",
+		level, optname);
+	return -EINVAL;
+}
+
+/**
+ * homa_sendmsg() - Send a request or response message on a Homa socket.
+ * @sk:     Socket on which the system call was invoked.
+ * @msg:    Structure describing the message to send; the msg_control
+ *          field points to additional information.
+ * @length: Number of bytes of the message.
+ * Return: 0 on success, otherwise a negative errno.
+ */
+int homa_sendmsg(struct sock *sk, struct msghdr *msg, size_t length)
+{
+	struct homa_sock *hsk = homa_sk(sk);
+	struct homa_sendmsg_args args;
+	int result = 0;
+	struct homa_rpc *rpc = NULL;
+	union sockaddr_in_union *addr = (union sockaddr_in_union *)msg->msg_name;
+
+	if (unlikely(!msg->msg_control_is_user)) {
+		result = -EINVAL;
+		goto error;
+	}
+	if (unlikely(copy_from_user(&args,
+				    (__force void __user *)msg->msg_control,
+				    sizeof(args)))) {
+		result = -EFAULT;
+		goto error;
+	}
+	if (addr->in6.sin6_family != sk->sk_family) {
+		result = -EAFNOSUPPORT;
+		goto error;
+	}
+	if (msg->msg_namelen < sizeof(struct sockaddr_in) ||
+	    (msg->msg_namelen < sizeof(struct sockaddr_in6) &&
+	     addr->in6.sin6_family == AF_INET6)) {
+		result = -EINVAL;
+		goto error;
+	}
+
+	if (!args.id) {
+		/* This is a request message. */
+		rpc = homa_rpc_new_client(hsk, addr);
+		if (IS_ERR(rpc)) {
+			result = PTR_ERR(rpc);
+			rpc = NULL;
+			goto error;
+		}
+		rpc->completion_cookie = args.completion_cookie;
+		result = homa_message_out_fill(rpc, &msg->msg_iter, 1);
+		if (result)
+			goto error;
+		args.id = rpc->id;
+		homa_rpc_unlock(rpc);
+		rpc = NULL;
+
+		if (unlikely(copy_to_user((__force void __user *)msg->msg_control,
+					  &args, sizeof(args)))) {
+			rpc = homa_find_client_rpc(hsk, args.id);
+			result = -EFAULT;
+			goto error;
+		}
+	} else {
+		/* This is a response message. */
+		struct in6_addr canonical_dest;
+
+		if (args.completion_cookie != 0) {
+			result = -EINVAL;
+			goto error;
+		}
+		canonical_dest = canonical_ipv6_addr(addr);
+
+		rpc = homa_find_server_rpc(hsk, &canonical_dest,
+					   ntohs(addr->in6.sin6_port), args.id);
+		if (!rpc)
+			/* Return without an error if the RPC doesn't exist;
+			 * this could be totally valid (e.g. client is
+			 * no longer interested in it).
+			 */
+			return 0;
+		if (rpc->error) {
+			result = rpc->error;
+			goto error;
+		}
+		if (rpc->state != RPC_IN_SERVICE) {
+			homa_rpc_unlock(rpc);
+			rpc = NULL;
+			result = -EINVAL;
+			goto error;
+		}
+		rpc->state = RPC_OUTGOING;
+
+		result = homa_message_out_fill(rpc, &msg->msg_iter, 1);
+		if (result && rpc->state != RPC_DEAD)
+			goto error;
+		homa_rpc_unlock(rpc);
+	}
+	return 0;
+
+error:
+	if (rpc) {
+		homa_rpc_free(rpc);
+		homa_rpc_unlock(rpc);
+	}
+	return result;
+}
+
+/**
+ * homa_recvmsg() - Receive a message from a Homa socket.
+ * @sk:          Socket on which the system call was invoked.
+ * @msg:         Controlling information for the receive.
+ * @len:         Total bytes of space available in msg->msg_iov; not used.
+ * @flags:       Flags from system call; only MSG_DONTWAIT is used.
+ * @addr_len:    Store the length of the sender address here
+ * Return:       The length of the message on success, otherwise a negative
+ *               errno.
+ */
+int homa_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
+		 int *addr_len)
+{
+	struct homa_sock *hsk = homa_sk(sk);
+	struct homa_recvmsg_args control;
+	struct homa_rpc *rpc;
+	int result;
+
+	if (unlikely(!msg->msg_control)) {
+		/* This test isn't strictly necessary, but it provides a
+		 * hook for testing kernel call times.
+		 */
+		return -EINVAL;
+	}
+	if (msg->msg_controllen != sizeof(control)) {
+		result = -EINVAL;
+		goto done;
+	}
+	if (unlikely(copy_from_user(&control,
+				    (__force void __user *)msg->msg_control,
+				    sizeof(control)))) {
+		result = -EFAULT;
+		goto done;
+	}
+	control.completion_cookie = 0;
+
+	if (control.num_bpages > HOMA_MAX_BPAGES ||
+	    (control.flags & ~HOMA_RECVMSG_VALID_FLAGS)) {
+		result = -EINVAL;
+		goto done;
+	}
+	homa_pool_release_buffers(hsk->buffer_pool, control.num_bpages,
+				  control.bpage_offsets);
+	control.num_bpages = 0;
+
+	rpc = homa_wait_for_message(hsk, (flags & MSG_DONTWAIT)
+			? (control.flags | HOMA_RECVMSG_NONBLOCKING)
+			: control.flags, control.id);
+	if (IS_ERR(rpc)) {
+		/* If we get here, it means there was an error that prevented
+		 * us from finding an RPC to return. If there's an error in
+		 * the RPC itself we won't get here.
+		 */
+		result = PTR_ERR(rpc);
+		goto done;
+	}
+	result = rpc->error ? rpc->error : rpc->msgin.length;
+
+	/* Collect result information. */
+	control.id = rpc->id;
+	control.completion_cookie = rpc->completion_cookie;
+	if (likely(rpc->msgin.length >= 0)) {
+		control.num_bpages = rpc->msgin.num_bpages;
+		memcpy(control.bpage_offsets, rpc->msgin.bpage_offsets,
+		       sizeof(control.bpage_offsets));
+	}
+	if (sk->sk_family == AF_INET6) {
+		struct sockaddr_in6 *in6 = msg->msg_name;
+
+		in6->sin6_family = AF_INET6;
+		in6->sin6_port = htons(rpc->dport);
+		in6->sin6_addr = rpc->peer->addr;
+		*addr_len = sizeof(*in6);
+	} else {
+		struct sockaddr_in *in4 = msg->msg_name;
+
+		in4->sin_family = AF_INET;
+		in4->sin_port = htons(rpc->dport);
+		in4->sin_addr.s_addr = ipv6_to_ipv4(rpc->peer->addr);
+		*addr_len = sizeof(*in4);
+	}
+
+	/* This indicates that the application now owns the buffers, so
+	 * we won't free them in homa_rpc_free.
+	 */
+	rpc->msgin.num_bpages = 0;
+
+	/* Must release the RPC lock (and potentially free the RPC) before
+	 * copying the results back to user space.
+	 */
+	if (homa_is_client(rpc->id)) {
+		homa_peer_add_ack(rpc);
+		homa_rpc_free(rpc);
+	} else {
+		if (result < 0)
+			homa_rpc_free(rpc);
+		else
+			rpc->state = RPC_IN_SERVICE;
+	}
+	homa_rpc_unlock(rpc);
+
+done:
+	if (unlikely(copy_to_user((__force void __user *)msg->msg_control,
+		     &control, sizeof(control)))) {
+		/* Note: in this case the message's buffers will be leaked. */
+		pr_notice("%s couldn't copy back args\n", __func__);
+		result = -EFAULT;
+	}
+
+	return result;
+}
+
+/**
+ * homa_hash() - Not needed for Homa.
+ * @sk:    Socket for the operation
+ * Return: ??
+ */
+int homa_hash(struct sock *sk)
+{
+	return 0;
+}
+
+/**
+ * homa_unhash() - Not needed for Homa.
+ * @sk:    Socket for the operation
+ */
+void homa_unhash(struct sock *sk)
+{
+}
+
+/**
+ * homa_get_port() - It appears that this function is called to assign a
+ * default port for a socket.
+ * @sk:    Socket for the operation
+ * @snum:  Unclear what this is.
+ * Return: Zero for success, or a negative errno for an error.
+ */
+int homa_get_port(struct sock *sk, unsigned short snum)
+{
+	/* Homa always assigns ports immediately when a socket is created,
+	 * so there is nothing to do here.
+	 */
+	return 0;
+}
+
+/**
+ * homa_softirq() - This function is invoked at SoftIRQ level to handle
+ * incoming packets.
+ * @skb:   The incoming packet.
+ * Return: Always 0
+ */
+int homa_softirq(struct sk_buff *skb)
+{
+	struct sk_buff *packets, *other_pkts, *next;
+	struct sk_buff **prev_link, **other_link;
+	struct common_header *h;
+	int first_packet = 1;
+	int header_offset;
+	int pull_length;
+
+	/* skb may actually contain many distinct packets, linked through
+	 * skb_shinfo(skb)->frag_list by the Homa GRO mechanism. Make a
+	 * pass through the list to process all of the short packets,
+	 * leaving the longer packets in the list. Also, perform various
+	 * prep/cleanup/error checking functions.
+	 */
+	skb->next = skb_shinfo(skb)->frag_list;
+	skb_shinfo(skb)->frag_list = NULL;
+	packets = skb;
+	prev_link = &packets;
+	for (skb = packets; skb; skb = next) {
+		next = skb->next;
+
+		/* Make the header available at skb->data, even if the packet
+		 * is fragmented. One complication: it's possible that the IP
+		 * header hasn't yet been removed (this happens for GRO packets
+		 * on the frag_list, since they aren't handled explicitly by IP.
+		 */
+		header_offset = skb_transport_header(skb) - skb->data;
+		pull_length = HOMA_MAX_HEADER + header_offset;
+		if (pull_length > skb->len)
+			pull_length = skb->len;
+		if (!pskb_may_pull(skb, pull_length))
+			goto discard;
+		if (header_offset)
+			__skb_pull(skb, header_offset);
+
+		/* Reject packets that are too short or have bogus types. */
+		h = (struct common_header *)skb->data;
+		if (unlikely(skb->len < sizeof(struct common_header) ||
+			     h->type < DATA || h->type >= BOGUS ||
+			     skb->len < header_lengths[h->type - DATA]))
+			goto discard;
+
+		if (first_packet)
+			first_packet = 0;
+
+		/* Process the packet now if it is a control packet or
+		 * if it contains an entire short message.
+		 */
+		if (h->type != DATA || ntohl(((struct data_header *)h)
+				->message_length) < 1400) {
+			*prev_link = skb->next;
+			skb->next = NULL;
+			homa_dispatch_pkts(skb, homa);
+		} else {
+			prev_link = &skb->next;
+		}
+		continue;
+
+discard:
+		*prev_link = skb->next;
+		kfree_skb(skb);
+	}
+
+	/* Now process the longer packets. Each iteration of this loop
+	 * collects all of the packets for a particular RPC and dispatches
+	 * them.
+	 */
+	while (packets) {
+		struct in6_addr saddr, saddr2;
+		struct common_header *h2;
+		struct sk_buff *skb2;
+
+		skb = packets;
+		prev_link = &skb->next;
+		saddr = skb_canonical_ipv6_saddr(skb);
+		other_pkts = NULL;
+		other_link = &other_pkts;
+		h = (struct common_header *)skb->data;
+		for (skb2 = skb->next; skb2; skb2 = next) {
+			next = skb2->next;
+			h2 = (struct common_header *)skb2->data;
+			if (h2->sender_id == h->sender_id) {
+				saddr2 = skb_canonical_ipv6_saddr(skb2);
+				if (ipv6_addr_equal(&saddr, &saddr2)) {
+					*prev_link = skb2;
+					prev_link = &skb2->next;
+					continue;
+				}
+			}
+			*other_link = skb2;
+			other_link = &skb2->next;
+		}
+		*prev_link = NULL;
+		*other_link = NULL;
+		homa_dispatch_pkts(packets, homa);
+		packets = other_pkts;
+	}
+
+	return 0;
+}
+
+/**
+ * homa_backlog_rcv() - Invoked to handle packets saved on a socket's
+ * backlog because it was locked when the packets first arrived.
+ * @sk:     Homa socket that owns the packet's destination port.
+ * @skb:    The incoming packet. This function takes ownership of the packet
+ *          (we'll delete it).
+ *
+ * Return:  Always returns 0.
+ */
+int homa_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	pr_warn("unimplemented backlog_rcv invoked on Homa socket\n");
+	kfree_skb(skb);
+	return 0;
+}
+
+/**
+ * homa_err_handler_v4() - Invoked by IP to handle an incoming error
+ * packet, such as ICMP UNREACHABLE.
+ * @skb:   The incoming packet.
+ * @info:  Information about the error that occurred?
+ *
+ * Return: zero, or a negative errno if the error couldn't be handled here.
+ */
+int homa_err_handler_v4(struct sk_buff *skb, u32 info)
+{
+	const struct in6_addr saddr = skb_canonical_ipv6_saddr(skb);
+	const struct iphdr *iph = ip_hdr(skb);
+	int type = icmp_hdr(skb)->type;
+	int code = icmp_hdr(skb)->code;
+
+	if (type == ICMP_DEST_UNREACH && code == ICMP_PORT_UNREACH) {
+		char *icmp = (char *)icmp_hdr(skb);
+		struct common_header *h;
+
+		iph = (struct iphdr *)(icmp + sizeof(struct icmphdr));
+		h = (struct common_header *)(icmp + sizeof(struct icmphdr)
+				+ iph->ihl * 4);
+		homa_abort_rpcs(homa, &saddr, ntohs(h->dport), -ENOTCONN);
+	} else if (type == ICMP_DEST_UNREACH) {
+		int error;
+
+		if (code == ICMP_PROT_UNREACH)
+			error = -EPROTONOSUPPORT;
+		else
+			error = -EHOSTUNREACH;
+		homa_abort_rpcs(homa, &saddr, 0, error);
+	} else {
+		pr_notice("%s invoked with info %x, ICMP type %d, ICMP code %d\n",
+			  __func__, info, type, code);
+	}
+	return 0;
+}
+
+/**
+ * homa_err_handler_v6() - Invoked by IP to handle an incoming error
+ * packet, such as ICMP UNREACHABLE.
+ * @skb:    The incoming packet.
+ * @opt:    Not used.
+ * @type:   Type of ICMP packet.
+ * @code:   Additional information about the error.
+ * @offset: Not used.
+ * @info:   Information about the error that occurred?
+ *
+ * Return: zero, or a negative errno if the error couldn't be handled here.
+ */
+int homa_err_handler_v6(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			u8 type,  u8 code,  int offset,  __be32 info)
+{
+	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;
+
+	if (type == ICMPV6_DEST_UNREACH && code == ICMPV6_PORT_UNREACH) {
+		char *icmp = (char *)icmp_hdr(skb);
+		struct common_header *h;
+
+		iph = (struct ipv6hdr *)(icmp + sizeof(struct icmphdr));
+		h = (struct common_header *)(icmp + sizeof(struct icmphdr)
+				+ HOMA_IPV6_HEADER_LENGTH);
+		homa_abort_rpcs(homa, &iph->daddr, ntohs(h->dport), -ENOTCONN);
+	} else if (type == ICMPV6_DEST_UNREACH) {
+		int error;
+
+		if (code == ICMP_PROT_UNREACH)
+			error = -EPROTONOSUPPORT;
+		else
+			error = -EHOSTUNREACH;
+		homa_abort_rpcs(homa, &iph->daddr, 0, error);
+	}
+	return 0;
+}
+
+/**
+ * homa_poll() - Invoked by Linux as part of implementing select, poll,
+ * epoll, etc.
+ * @file:  Open file that is participating in a poll, select, etc.
+ * @sock:  A Homa socket, associated with @file.
+ * @wait:  This table will be registered with the socket, so that it
+ *         is notified when the socket's ready state changes.
+ *
+ * Return: A mask of bits such as EPOLLIN, which indicate the current
+ *         state of the socket.
+ */
+__poll_t homa_poll(struct file *file, struct socket *sock,
+		   struct poll_table_struct *wait)
+{
+	struct sock *sk = sock->sk;
+	__u32 mask;
+
+	/* It seems to be standard practice for poll functions *not* to
+	 * acquire the socket lock, so we don't do it here; not sure
+	 * why...
+	 */
+
+	sock_poll_wait(file, sock, wait);
+	mask = POLLOUT | POLLWRNORM;
+
+	if (!list_empty(&homa_sk(sk)->ready_requests) ||
+	    !list_empty(&homa_sk(sk)->ready_responses))
+		mask |= POLLIN | POLLRDNORM;
+	return (__force __poll_t)mask;
+}
+
+/**
+ * homa_hrtimer() - This function is invoked by the hrtimer mechanism to
+ * wake up the timer thread. Runs at IRQ level.
+ * @timer:   The timer that triggered; not used.
+ *
+ * Return:   Always HRTIMER_RESTART.
+ */
+enum hrtimer_restart homa_hrtimer(struct hrtimer *timer)
+{
+	wake_up_process(timer_kthread);
+	return HRTIMER_NORESTART;
+}
+
+/**
+ * homa_timer_main() - Top-level function for the timer thread.
+ * @transport:  Pointer to struct homa.
+ *
+ * Return:         Always 0.
+ */
+int homa_timer_main(void *transport)
+{
+	struct homa *homa = (struct homa *)transport;
+	struct hrtimer hrtimer;
+	ktime_t tick_interval;
+	u64 nsec;
+
+	hrtimer_init(&hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer.function = &homa_hrtimer;
+	nsec = 1000000;                   /* 1 ms */
+	tick_interval = ns_to_ktime(nsec);
+	while (1) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (!exiting) {
+			hrtimer_start(&hrtimer, tick_interval, HRTIMER_MODE_REL);
+			schedule();
+		}
+		__set_current_state(TASK_RUNNING);
+		if (exiting)
+			break;
+		homa_timer(homa);
+	}
+	hrtimer_cancel(&hrtimer);
+	kthread_complete_and_exit(&timer_thread_done, 0);
+	return 0;
+}
diff --git a/net/homa/homa_utils.c b/net/homa/homa_utils.c
new file mode 100644
index 000000000000..905d00c836bd
--- /dev/null
+++ b/net/homa/homa_utils.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: BSD-2-Clause
+
+/* This file contains miscellaneous utility functions for Homa, such
+ * as initializing and destroying homa structs.
+ */
+
+#include "homa_impl.h"
+#include "homa_peer.h"
+#include "homa_rpc.h"
+#include "homa_stub.h"
+
+struct completion homa_pacer_kthread_done;
+
+/**
+ * homa_init() - Constructor for homa objects.
+ * @homa:   Object to initialize.
+ *
+ * Return:  0 on success, or a negative errno if there was an error. Even
+ *          if an error occurs, it is safe (and necessary) to call
+ *          homa_destroy at some point.
+ */
+int homa_init(struct homa *homa)
+{
+	int err;
+
+	_Static_assert(HOMA_MAX_PRIORITIES >= 8,
+		       "homa_init assumes at least 8 priority levels");
+
+	homa->pacer_kthread = NULL;
+	init_completion(&homa_pacer_kthread_done);
+	atomic64_set(&homa->next_outgoing_id, 2);
+	atomic64_set(&homa->link_idle_time, sched_clock());
+	spin_lock_init(&homa->pacer_mutex);
+	homa->pacer_fifo_fraction = 50;
+	homa->pacer_fifo_count = 1;
+	homa->pacer_wake_time = 0;
+	spin_lock_init(&homa->throttle_lock);
+	INIT_LIST_HEAD_RCU(&homa->throttled_rpcs);
+	homa->throttle_add = 0;
+	homa->throttle_min_bytes = 200;
+	homa->next_client_port = HOMA_MIN_DEFAULT_PORT;
+	homa->port_map = kmalloc(sizeof(*homa->port_map), GFP_KERNEL);
+	if (!homa->port_map) {
+		pr_err("%s couldn't create port_map: kmalloc failure", __func__);
+		return -ENOMEM;
+	}
+	homa_socktab_init(homa->port_map);
+	homa->peers = kmalloc(sizeof(*homa->peers), GFP_KERNEL);
+	if (!homa->peers) {
+		pr_err("%s couldn't create peers: kmalloc failure", __func__);
+		return -ENOMEM;
+	}
+	err = homa_peertab_init(homa->peers);
+	if (err) {
+		pr_err("%s couldn't initialize peer table (errno %d)\n",
+		       __func__, -err);
+		return err;
+	}
+
+	/* Wild guesses to initialize configuration values... */
+	homa->unsched_bytes = 40000;
+	homa->window_param = 100000;
+	homa->link_mbps = 25000;
+	homa->fifo_grant_increment = 10000;
+	homa->grant_fifo_fraction = 50;
+	homa->max_overcommit = 8;
+	homa->max_incoming = 400000;
+	homa->max_rpcs_per_peer = 1;
+	homa->resend_ticks = 5;
+	homa->resend_interval = 5;
+	homa->timeout_ticks = 100;
+	homa->timeout_resends = 5;
+	homa->request_ack_ticks = 2;
+	homa->reap_limit = 10;
+	homa->dead_buffs_limit = 5000;
+	homa->max_dead_buffs = 0;
+	homa->pacer_kthread = kthread_run(homa_pacer_main, homa,
+					  "homa_pacer");
+	if (IS_ERR(homa->pacer_kthread)) {
+		err = PTR_ERR(homa->pacer_kthread);
+		homa->pacer_kthread = NULL;
+		pr_err("couldn't create homa pacer thread: error %d\n", err);
+		return err;
+	}
+	homa->pacer_exit = false;
+	homa->max_nic_queue_ns = 5000;
+	homa->ns_per_mbyte = 0;
+	homa->max_gso_size = 10000;
+	homa->gso_force_software = 0;
+	homa->max_gro_skbs = 20;
+	homa->gro_policy = HOMA_GRO_NORMAL;
+	homa->timer_ticks = 0;
+	homa->flags = 0;
+	homa->bpage_lease_usecs = 10000;
+	homa->next_id = 0;
+	homa_outgoing_sysctl_changed(homa);
+	homa_incoming_sysctl_changed(homa);
+	return 0;
+}
+
+/**
+ * homa_destroy() -  Destructor for homa objects.
+ * @homa:      Object to destroy.
+ */
+void homa_destroy(struct homa *homa)
+{
+	if (homa->pacer_kthread) {
+		homa_pacer_stop(homa);
+		wait_for_completion(&homa_pacer_kthread_done);
+	}
+
+	/* The order of the following statements matters! */
+	if (homa->port_map) {
+		homa_socktab_destroy(homa->port_map);
+		kfree(homa->port_map);
+		homa->port_map = NULL;
+	}
+	if (homa->peers) {
+		homa_peertab_destroy(homa->peers);
+		kfree(homa->peers);
+		homa->peers = NULL;
+	}
+}
+
+/**
+ * homa_spin() - Delay (without sleeping) for a given time interval.
+ * @ns:   How long to delay (in nanoseconds)
+ */
+void homa_spin(int ns)
+{
+	__u64 end;
+
+	end = sched_clock() + ns;
+	while (sched_clock() < end)
+		/* Empty loop body.*/
+		;
+}
+
+/**
+ * homa_throttle_lock_slow() - This function implements the slow path for
+ * acquiring the throttle lock. It is invoked when the lock isn't immediately
+ * available. It waits for the lock, but also records statistics about
+ * the waiting time.
+ * @homa:    Overall data about the Homa protocol implementation.
+ */
+void homa_throttle_lock_slow(struct homa *homa)
+	__acquires(&homa->throttle_lock)
+{
+	spin_lock_bh(&homa->throttle_lock);
+}




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux