This patch introduces TX HW offload. tls_main: contains generic logic that will be shared by both SW and HW implementations. tls_device: contains generic HW logic that is shared by all HW offload implementations. Signed-off-by: Boris Pismenny <borisp@xxxxxxxxxxxx> Signed-off-by: Ilya Lesokhin <ilyal@xxxxxxxxxxxx> Signed-off-by: Aviad Yehezkel <aviadye@xxxxxxxxxxxx> --- MAINTAINERS | 13 + include/net/tls.h | 184 ++++++++++++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/tls.h | 84 +++++++ net/Kconfig | 1 + net/Makefile | 1 + net/tls/Kconfig | 12 + net/tls/Makefile | 7 + net/tls/tls_device.c | 594 ++++++++++++++++++++++++++++++++++++++++++++++ net/tls/tls_main.c | 348 +++++++++++++++++++++++++++ 10 files changed, 1245 insertions(+) create mode 100644 include/net/tls.h create mode 100644 include/uapi/linux/tls.h create mode 100644 net/tls/Kconfig create mode 100644 net/tls/Makefile create mode 100644 net/tls/tls_device.c create mode 100644 net/tls/tls_main.c diff --git a/MAINTAINERS b/MAINTAINERS index b340ef6..e3b70c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8486,6 +8486,19 @@ F: net/ipv6/ F: include/net/ip* F: arch/x86/net/* +NETWORKING [TLS] +M: Ilya Lesokhin <ilyal@xxxxxxxxxxxx> +M: Aviad Yehezkel <aviadye@xxxxxxxxxxxx> +M: Boris Pismenny <borisp@xxxxxxxxxxxx> +M: Haggai Eran <haggaie@xxxxxxxxxxxx> +L: netdev@xxxxxxxxxxxxxxx +T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git +S: Maintained +F: net/tls/* +F: include/uapi/linux/tls.h +F: include/net/tls.h + NETWORKING [IPSEC] M: Steffen Klassert <steffen.klassert@xxxxxxxxxxx> M: Herbert Xu <herbert@xxxxxxxxxxxxxxxxxxx> diff --git a/include/net/tls.h b/include/net/tls.h new file mode 100644 index 0000000..f7f0cde --- /dev/null +++ b/include/net/tls.h @@ -0,0 +1,184 @@ +/* Copyright (c) 2016-2017, Mellanox Technologies All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * - Neither the name of the Mellanox Technologies nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE + */ + +#ifndef _TLS_OFFLOAD_H +#define _TLS_OFFLOAD_H + +#include <linux/types.h> + +#include <uapi/linux/tls.h> + + +/* Maximum data size carried in a TLS record */ +#define TLS_MAX_PAYLOAD_SIZE ((size_t)1 << 14) + +#define TLS_HEADER_SIZE 5 +#define TLS_NONCE_OFFSET TLS_HEADER_SIZE + +#define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type) +#define TLS_IS_STATE_HW(info) ((info)->state == TLS_STATE_HW) + +#define TLS_RECORD_TYPE_DATA 0x17 + + +struct tls_record_info { + struct list_head list; + u32 end_seq; + int len; + int num_frags; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +struct tls_offload_context { + struct list_head records_list; + struct tls_record_info *open_record; + struct tls_record_info *retransmit_hint; + u32 expectedSN; + spinlock_t lock; /* protects records list */ +}; + +struct tls_context { + union { + struct tls_crypto_info crypto_send; + struct tls_crypto_info_aes_gcm_128 crypto_send_aes_gcm_128; + }; + + void *priv_ctx; + + u16 prepand_size; + u16 tag_size; + u16 iv_size; + char *iv; + + /* TODO: change sw code to use below fields and push_frags function */ + skb_frag_t *pending_frags; + u16 num_pending_frags; + u16 pending_offset; + + void (*sk_write_space)(struct sock *sk); + void (*sk_destruct)(struct sock *sk); +}; + + +int tls_sk_query(struct sock *sk, int optname, char __user *optval, + int __user *optlen); +int tls_sk_attach(struct sock *sk, int optname, char __user *optval, + unsigned int optlen); + +void tls_clear_device_offload(struct sock *sk, struct tls_context *ctx); +int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tls_device_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags); + +struct tls_record_info *tls_get_record(struct tls_offload_context *context, + u32 seq); + +void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); +void tls_icsk_clean_acked(struct sock *sk); + +void tls_device_sk_destruct(struct sock *sk); + + +int tls_push_frags(struct sock *sk, struct tls_context *ctx, + skb_frag_t *frag, u16 num_frags, u16 first_offset, + int flags); +int tls_push_paritial_record(struct sock *sk, struct tls_context *ctx, + int flags); + +static inline bool tls_is_pending_open_record(struct tls_context *ctx) +{ + return !!ctx->num_pending_frags; +} + +static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) +{ + return smp_load_acquire(&sk->sk_destruct) == + &tls_device_sk_destruct; +} + +static inline void tls_err_abort(struct sock *sk) +{ + xchg(&sk->sk_err, -EBADMSG); + sk->sk_error_report(sk); +} + +static inline void tls_increment_seqno(unsigned char *seq, struct sock *sk) +{ + int i; + + for (i = 7; i >= 0; i--) { + ++seq[i]; + if (seq[i] != 0) + break; + } + + if (i == -1) + tls_err_abort(sk); +} + +static inline void tls_fill_prepend(struct tls_context *ctx, + char *buf, + size_t plaintext_len, + unsigned char record_type) +{ + size_t pkt_len, iv_size = ctx->iv_size; + + pkt_len = plaintext_len + iv_size + ctx->tag_size; + + /* we cover nonce explicit here as well, so buf should be of + * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE + */ + buf[0] = record_type; + buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.version); + buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.version); + /* we can use IV for nonce explicit according to spec */ + buf[3] = pkt_len >> 8; + buf[4] = pkt_len & 0xFF; + memcpy(buf + TLS_NONCE_OFFSET, ctx->iv, iv_size); +} + +static inline struct tls_context *tls_get_ctx(const struct sock *sk) +{ + return sk->sk_user_data; +} + +static inline struct tls_offload_context *tls_offload_ctx( + const struct tls_context *tls_ctx) +{ + return (struct tls_offload_context *)tls_ctx->priv_ctx; +} + + +#endif /* _TLS_OFFLOAD_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index cd2be1c..96ae5ca 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -406,6 +406,7 @@ header-y += sysinfo.h header-y += target_core_user.h header-y += taskstats.h header-y += tcp.h +header-y += tls.h header-y += tcp_metrics.h header-y += telephony.h header-y += termios.h diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h new file mode 100644 index 0000000..464621b --- /dev/null +++ b/include/uapi/linux/tls.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2016-2017, Mellanox Technologies All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * - Neither the name of the Mellanox Technologies nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE + */ + +#ifndef _UAPI_LINUX_TLS_H +#define _UAPI_LINUX_TLS_H + +#include <linux/types.h> +#include <asm/byteorder.h> +#include <linux/socket.h> +#include <linux/tcp.h> + +/* Supported versions */ +#define TLS_VERSION_MINOR(ver) ((ver) & 0xFF) +#define TLS_VERSION_MAJOR(ver) (((ver) >> 8) & 0xFF) + +#define TLS_VERSION_NUMBER(id) ((((id##_VERSION_MAJOR) & 0xFF) << 8) | \ + ((id##_VERSION_MINOR) & 0xFF)) + +#define TLS_1_2_VERSION_MAJOR 0x3 +#define TLS_1_2_VERSION_MINOR 0x3 +#define TLS_1_2_VERSION TLS_VERSION_NUMBER(TLS_1_2) + +/* Supported ciphers */ +#define TLS_CIPHER_AES_GCM_128 51 +#define TLS_CIPHER_AES_GCM_128_IV_SIZE ((size_t)8) +#define TLS_CIPHER_AES_GCM_128_KEY_SIZE ((size_t)16) +#define TLS_CIPHER_AES_GCM_128_SALT_SIZE ((size_t)4) +#define TLS_CIPHER_AES_GCM_128_TAG_SIZE ((size_t)16) + +struct tls_ctrlmsg { + unsigned char type; + unsigned char data[0]; +} __attribute__((packed)); + +enum tls_state { + TLS_STATE_SW = 0x0, + TLS_STATE_HW = 0x1, +}; + +struct tls_crypto_info { + __u16 version; + __u16 cipher_type; + __u32 state; +}; + +struct tls_crypto_info_aes_gcm_128 { + struct tls_crypto_info info; + unsigned char iv[TLS_CIPHER_AES_GCM_128_IV_SIZE]; + unsigned char key[TLS_CIPHER_AES_GCM_128_KEY_SIZE]; + unsigned char salt[TLS_CIPHER_AES_GCM_128_SALT_SIZE]; +}; + +#endif /* _UAPI_LINUX_TLS_H */ diff --git a/net/Kconfig b/net/Kconfig index a100500..b50e899 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -55,6 +55,7 @@ menu "Networking options" source "net/packet/Kconfig" source "net/unix/Kconfig" +source "net/tls/Kconfig" source "net/xfrm/Kconfig" source "net/iucv/Kconfig" diff --git a/net/Makefile b/net/Makefile index 4cafaa2..23da6df 100644 --- a/net/Makefile +++ b/net/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_LLC) += llc/ obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_INET) += ipv4/ +obj-$(CONFIG_TLS) += tls/ obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_NET) += ipv6/ diff --git a/net/tls/Kconfig b/net/tls/Kconfig new file mode 100644 index 0000000..75bfb43 --- /dev/null +++ b/net/tls/Kconfig @@ -0,0 +1,12 @@ +# +# TLS configuration +# +config TLS + tristate "Transport Layer Security support" + depends on NET + default m + ---help--- + Enable kernel support for TLS protocol. This allows processing + of protocol in kernel as well as oflloading it to HW. + + If unsure, say N. diff --git a/net/tls/Makefile b/net/tls/Makefile new file mode 100644 index 0000000..65e5677 --- /dev/null +++ b/net/tls/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the TLS subsystem. +# + +obj-$(CONFIG_TLS) += tls.o + +tls-y := tls_main.o tls_device.o diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c new file mode 100644 index 0000000..77a4a59 --- /dev/null +++ b/net/tls/tls_device.c @@ -0,0 +1,594 @@ +/* Copyright (c) 2016-2017, Mellanox Technologies All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * - Neither the name of the Mellanox Technologies nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE + */ + +#include <linux/module.h> +#include <net/tcp.h> +#include <net/inet_common.h> +#include <linux/highmem.h> +#include <linux/netdevice.h> + +#include <net/tls.h> + +/* We assume that the socket is already connected */ +static struct net_device *get_netdev_for_sock(struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + struct net_device *netdev = NULL; + + pr_info("Using output interface 0x%x\n", inet->cork.fl.flowi_oif); + netdev = dev_get_by_index(sock_net(sk), inet->cork.fl.flowi_oif); + + return netdev; +} + +static void detach_sock_from_netdev(struct sock *sk, struct tls_context *ctx) +{ + struct net_device *netdev; + + netdev = get_netdev_for_sock(sk); + if (!netdev) { + pr_err("got offloaded socket with no netdev\n"); + return; + } + + if (!netdev->tlsdev_ops) { + pr_err("attach_sock_to_netdev: netdev %s with no TLS offload\n", + netdev->name); + return; + } + + netdev->tlsdev_ops->tls_dev_del(netdev, sk, TLS_OFFLOAD_CTX_DIR_TX); + dev_put(netdev); +} + +static int attach_sock_to_netdev(struct sock *sk, struct tls_context *ctx) +{ + struct net_device *netdev = get_netdev_for_sock(sk); + int rc = -EINVAL; + + if (!netdev) { + pr_err("attach_sock_to_netdev: netdev not found\n"); + goto out; + } + + if (!netdev->tlsdev_ops) { + pr_err("attach_sock_to_netdev: netdev %s with no TLS offload\n", + netdev->name); + goto out; + } + + rc = netdev->tlsdev_ops->tls_dev_add( + netdev, + sk, + TLS_OFFLOAD_CTX_DIR_TX, + &ctx->crypto_send, + (struct tls_offload_context **)(&ctx->priv_ctx)); + if (rc) { + pr_err("The netdev has refused to offload this socket\n"); + goto out; + } + + sk->sk_bound_dev_if = netdev->ifindex; + sk_dst_reset(sk); + + rc = 0; +out: + dev_put(netdev); + return rc; +} + +static void destroy_record(struct tls_record_info *record) +{ + skb_frag_t *frag; + int nr_frags = record->num_frags; + + while (nr_frags > 0) { + frag = &record->frags[nr_frags - 1]; + __skb_frag_unref(frag); + --nr_frags; + } + kfree(record); +} + +static void delete_all_records(struct tls_offload_context *offload_ctx) +{ + struct tls_record_info *info, *temp; + + list_for_each_entry_safe(info, temp, &offload_ctx->records_list, list) { + list_del(&info->list); + destroy_record(info); + } +} + +void tls_clear_device_offload(struct sock *sk, struct tls_context *tls_ctx) +{ + struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx); + + if (!ctx) + return; + + if (ctx->open_record) + destroy_record(ctx->open_record); + + delete_all_records(ctx); + detach_sock_from_netdev(sk, tls_ctx); +} + +void tls_icsk_clean_acked(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context *ctx; + struct tcp_sock *tp = tcp_sk(sk); + struct tls_record_info *info, *temp; + unsigned long flags; + + if (!tls_ctx) + return; + + ctx = tls_offload_ctx(tls_ctx); + + spin_lock_irqsave(&ctx->lock, flags); + info = ctx->retransmit_hint; + if (info && !before(tp->snd_una, info->end_seq)) { + ctx->retransmit_hint = NULL; + list_del(&info->list); + destroy_record(info); + } + + list_for_each_entry_safe(info, temp, &ctx->records_list, list) { + if (before(tp->snd_una, info->end_seq)) + break; + list_del(&info->list); + + destroy_record(info); + } + + spin_unlock_irqrestore(&ctx->lock, flags); +} +EXPORT_SYMBOL(tls_icsk_clean_acked); + +void tls_device_sk_destruct(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + tls_clear_device_offload(sk, ctx); + tls_sk_destruct(sk, ctx); +} +EXPORT_SYMBOL(tls_device_sk_destruct); + +static inline void tls_append_frag(struct tls_record_info *record, + struct page_frag *pfrag, + int size) +{ + skb_frag_t *frag; + + frag = &record->frags[record->num_frags - 1]; + if (frag->page.p == pfrag->page && + frag->page_offset + frag->size == pfrag->offset) { + frag->size += size; + } else { + ++frag; + frag->page.p = pfrag->page; + frag->page_offset = pfrag->offset; + frag->size = size; + ++record->num_frags; + get_page(pfrag->page); + } + + pfrag->offset += size; + record->len += size; +} + +static inline int tls_push_record(struct sock *sk, + struct tls_context *ctx, + struct tls_offload_context *offload_ctx, + struct tls_record_info *record, + struct page_frag *pfrag, + int flags, + unsigned char record_type) +{ + skb_frag_t *frag; + struct tcp_sock *tp = tcp_sk(sk); + struct page_frag fallback_frag; + struct page_frag *tag_pfrag = pfrag; + + /* fill prepand */ + frag = &record->frags[0]; + tls_fill_prepend(ctx, + skb_frag_address(frag), + record->len - ctx->prepand_size, + record_type); + + if (unlikely(!skb_page_frag_refill( + ctx->tag_size, + pfrag, GFP_KERNEL))) { + /* HW doesn't care about the data in the tag + * so in case pfrag has no room + * for a tag and we can't allocate a new pfrag + * just use the page in the first frag + * rather then write a complicated fall back code. + */ + tag_pfrag = &fallback_frag; + tag_pfrag->page = skb_frag_page(frag); + tag_pfrag->offset = 0; + } + + tls_append_frag(record, tag_pfrag, ctx->tag_size); + record->end_seq = tp->write_seq + record->len; + spin_lock_irq(&offload_ctx->lock); + list_add_tail(&record->list, &offload_ctx->records_list); + spin_unlock_irq(&offload_ctx->lock); + + offload_ctx->open_record = NULL; + tls_increment_seqno(ctx->iv, sk); + + /* all ready, send */ + return tls_push_frags(sk, ctx, record->frags, + record->num_frags, 0, flags); + +} + +static inline int tls_get_new_record( + struct tls_offload_context *offload_ctx, + struct page_frag *pfrag, + size_t prepand_size) +{ + skb_frag_t *frag; + struct tls_record_info *record; + + /* TODO: do we want to use pfrag + * to store the record metadata? + * the lifetime of the data and + * metadata is the same and + * we can avoid kmalloc overhead. + */ + record = kmalloc(sizeof(*record), GFP_KERNEL); + if (!record) + return -ENOMEM; + + frag = &record->frags[0]; + __skb_frag_set_page(frag, pfrag->page); + frag->page_offset = pfrag->offset; + skb_frag_size_set(frag, prepand_size); + + get_page(pfrag->page); + pfrag->offset += prepand_size; + + record->num_frags = 1; + record->len = prepand_size; + offload_ctx->open_record = record; + return 0; +} + +static inline int tls_do_allocation( + struct sock *sk, + struct tls_offload_context *offload_ctx, + struct page_frag *pfrag, + size_t prepand_size) +{ + struct tls_record_info *record; + + if (!sk_page_frag_refill(sk, pfrag)) + return -ENOMEM; + + record = offload_ctx->open_record; + if (!record) { + tls_get_new_record(offload_ctx, pfrag, prepand_size); + record = offload_ctx->open_record; + if (!record) + return -ENOMEM; + } + + return 0; +} + +static int tls_push_data(struct sock *sk, + struct iov_iter *msg_iter, + size_t size, int flags, + unsigned char record_type) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx); + struct tls_record_info *record = ctx->open_record; + struct page_frag *pfrag; + int copy, rc = 0; + size_t orig_size = size; + u32 max_open_record_len; + long timeo; + int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE); + int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + bool last = false; + + if (sk->sk_err) + return sk->sk_err; + + /* Only one writer at a time is allowed */ + if (sk->sk_write_pending) + return -EBUSY; + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + pfrag = sk_page_frag(sk); + + /* KTLS_TLS_HEADER_SIZE is not counted as part of the TLS record, and + * we need to leave room for an authentication tag. + */ + max_open_record_len = TLS_MAX_PAYLOAD_SIZE + + TLS_HEADER_SIZE - tls_ctx->tag_size; + + if (tls_is_pending_open_record(tls_ctx)) { + rc = tls_push_paritial_record(sk, tls_ctx, flags); + if (rc < 0) + return rc; + } + + do { + if (tls_do_allocation(sk, ctx, pfrag, + tls_ctx->prepand_size)) { + rc = sk_stream_wait_memory(sk, &timeo); + if (!rc) + continue; + + record = ctx->open_record; + if (!record) + break; +handle_error: + if (record_type != TLS_RECORD_TYPE_DATA) { + /* avoid sending partial + * record with type != + * application_data + */ + size = orig_size; + destroy_record(record); + ctx->open_record = NULL; + } else if (record->len > tls_ctx->prepand_size) { + goto last_record; + } + + break; + } + + record = ctx->open_record; + copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); + copy = min_t(size_t, copy, (max_open_record_len - record->len)); + + if (copy_from_iter_nocache( + page_address(pfrag->page) + pfrag->offset, + copy, msg_iter) != copy) { + rc = -EFAULT; + goto handle_error; + } + tls_append_frag(record, pfrag, copy); + + size -= copy; + if (!size) { +last_record: + tls_push_record_flags = flags; + last = true; + } + + if ((last && !more) || + (record->len >= max_open_record_len) || + (record->num_frags >= MAX_SKB_FRAGS - 1)) { + rc = tls_push_record(sk, + tls_ctx, + ctx, + record, + pfrag, + tls_push_record_flags, + record_type); + if (rc < 0) + break; + } + } while (!last); + + if (orig_size - size > 0) { + rc = orig_size - size; + if (record_type != TLS_RECORD_TYPE_DATA) + rc++; + } + + return rc; +} + +static inline bool record_is_open(struct sock *sk) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_offload_context *ctx = tls_offload_ctx(tls_ctx); + struct tls_record_info *record = ctx->open_record; + + return record; +} + +int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + unsigned char record_type = TLS_RECORD_TYPE_DATA; + int rc = 0; + + lock_sock(sk); + + if (unlikely(msg->msg_flags & MSG_OOB)) { + if ((msg->msg_flags & MSG_MORE) || record_is_open(sk)) { + rc = -EINVAL; + goto out; + } + + if (copy_from_iter(&record_type, 1, &msg->msg_iter) != 1) { + rc = -EFAULT; + goto out; + } + + --size; + msg->msg_flags &= ~MSG_OOB; + } + + rc = tls_push_data(sk, &msg->msg_iter, size, + msg->msg_flags, + record_type); + +out: + release_sock(sk); + return rc; +} + +int tls_device_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + struct iov_iter msg_iter; + struct kvec iov; + char *kaddr = kmap(page); + int rc = 0; + + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + + lock_sock(sk); + + if (flags & MSG_OOB) { + rc = -ENOTSUPP; + goto out; + } + + iov.iov_base = kaddr + offset; + iov.iov_len = size; + iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size); + rc = tls_push_data(sk, &msg_iter, size, + flags, + TLS_RECORD_TYPE_DATA); + kunmap(page); + +out: + release_sock(sk); + return rc; +} + +struct tls_record_info *tls_get_record(struct tls_offload_context *context, + u32 seq) +{ + struct tls_record_info *info; + + info = context->retransmit_hint; + if (!info || + before(seq, info->end_seq - info->len)) + info = list_first_entry(&context->records_list, + struct tls_record_info, list); + + list_for_each_entry_from(info, &context->records_list, list) { + if (before(seq, info->end_seq)) { + if (!context->retransmit_hint || + after(info->end_seq, + context->retransmit_hint->end_seq)) + context->retransmit_hint = info; + return info; + } + } + + return NULL; +} +EXPORT_SYMBOL(tls_get_record); + +int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) +{ + struct tls_crypto_info *crypto_info; + struct tls_offload_context *offload_ctx; + struct tls_record_info *dummy_record; + u16 nonece_size, tag_size, iv_size; + char *iv; + int rc; + + if (!ctx) { + rc = -EINVAL; + goto out; + } + + if (ctx->priv_ctx) { + rc = -EEXIST; + goto out; + } + + crypto_info = &ctx->crypto_send; + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + nonece_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE; + iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; + iv = ((struct tls_crypto_info_aes_gcm_128 *)crypto_info)->iv; + break; + } + default: + rc = -EINVAL; + goto out; + } + + dummy_record = kmalloc(sizeof(*dummy_record), GFP_KERNEL); + if (!dummy_record) { + rc = -ENOMEM; + goto out; + } + + rc = attach_sock_to_netdev(sk, ctx); + if (rc) + goto err_dummy_record; + + ctx->prepand_size = TLS_HEADER_SIZE + nonece_size; + ctx->tag_size = tag_size; + ctx->iv_size = iv_size; + ctx->iv = kmalloc(iv_size, GFP_KERNEL); + if (!ctx->iv) { + rc = ENOMEM; + goto detach_sock; + } + memcpy(ctx->iv, iv, iv_size); + + offload_ctx = ctx->priv_ctx; + dummy_record->end_seq = offload_ctx->expectedSN; + dummy_record->len = 0; + dummy_record->num_frags = 0; + + INIT_LIST_HEAD(&offload_ctx->records_list); + list_add_tail(&dummy_record->list, &offload_ctx->records_list); + spin_lock_init(&offload_ctx->lock); + + inet_csk(sk)->icsk_clean_acked = &tls_icsk_clean_acked; + + /* After this line the tx_handler might access the offload context */ + smp_store_release(&sk->sk_destruct, + &tls_device_sk_destruct); + goto out; + +detach_sock: + detach_sock_from_netdev(sk, ctx); +err_dummy_record: + kfree(dummy_record); +out: + return rc; +} diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c new file mode 100644 index 0000000..6a3df25 --- /dev/null +++ b/net/tls/tls_main.c @@ -0,0 +1,348 @@ +/* Copyright (c) 2016-2017, Mellanox Technologies All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * - Neither the name of the Mellanox Technologies nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE + */ + +#include <linux/module.h> + +#include <net/tcp.h> +#include <net/inet_common.h> +#include <linux/highmem.h> +#include <linux/netdevice.h> + +#include <net/tls.h> + +MODULE_AUTHOR("Mellanox Technologies"); +MODULE_DESCRIPTION("Transport Layer Security Support"); +MODULE_LICENSE("Dual BSD/GPL"); + +static struct proto tls_device_prot; + +int tls_push_frags(struct sock *sk, + struct tls_context *ctx, + skb_frag_t *frag, + u16 num_frags, + u16 first_offset, + int flags) +{ + int sendpage_flags = flags | MSG_SENDPAGE_NOTLAST; + int ret = 0; + size_t size; + int offset = first_offset; + + size = skb_frag_size(frag) - offset; + offset += frag->page_offset; + + while (1) { + if (!--num_frags) + sendpage_flags = flags; + + /* is sending application-limited? */ + tcp_rate_check_app_limited(sk); +retry: + ret = do_tcp_sendpages(sk, + skb_frag_page(frag), + offset, + size, + sendpage_flags); + + if (ret != size) { + if (ret > 0) { + offset += ret; + size -= ret; + goto retry; + } + + offset -= frag->page_offset; + ctx->pending_offset = offset; + ctx->pending_frags = frag; + ctx->num_pending_frags = num_frags + 1; + return ret; + } + + if (!num_frags) + break; + + frag++; + offset = frag->page_offset; + size = skb_frag_size(frag); + } + + return 0; +} + +int tls_push_paritial_record(struct sock *sk, struct tls_context *ctx, + int flags) { + skb_frag_t *frag = ctx->pending_frags; + u16 offset = ctx->pending_offset; + u16 num_frags = ctx->num_pending_frags; + + ctx->num_pending_frags = 0; + + return tls_push_frags(sk, ctx, frag, + num_frags, offset, flags); +} + +static void tls_write_space(struct sock *sk) +{ + struct tls_context *ctx = tls_get_ctx(sk); + + if (tls_is_pending_open_record(ctx)) { + gfp_t sk_allocation = sk->sk_allocation; + int rc; + + sk->sk_allocation = GFP_ATOMIC; + rc = tls_push_paritial_record(sk, ctx, + MSG_DONTWAIT | MSG_NOSIGNAL); + sk->sk_allocation = sk_allocation; + + if (rc < 0) + return; + } + + ctx->sk_write_space(sk); +} + +int tls_sk_query(struct sock *sk, int optname, char __user *optval, + int __user *optlen) +{ + int rc = 0; + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_crypto_info *crypto_info; + int len; + + if (get_user(len, optlen)) + return -EFAULT; + + if (!optval || (len < sizeof(*crypto_info))) { + rc = -EINVAL; + goto out; + } + + if (!ctx) { + rc = -EBUSY; + goto out; + } + + /* get user crypto info */ + switch (optname) { + case TCP_TLS_TX: { + crypto_info = &ctx->crypto_send; + break; + } + case TCP_TLS_RX: + /* fallthru since for now we don't support */ + default: { + rc = -ENOPROTOOPT; + goto out; + } + } + + if (!TLS_CRYPTO_INFO_READY(crypto_info)) { + rc = -EBUSY; + goto out; + } + + if (len == sizeof(crypto_info)) { + rc = copy_to_user(optval, crypto_info, sizeof(*crypto_info)); + goto out; + } + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls_crypto_info_aes_gcm_128 *crypto_info_aes_gcm_128 = + container_of(crypto_info, + struct tls_crypto_info_aes_gcm_128, + info); + + if (len != sizeof(*crypto_info_aes_gcm_128)) { + rc = -EINVAL; + goto out; + } + if (TLS_IS_STATE_HW(crypto_info)) { + lock_sock(sk); + memcpy(crypto_info_aes_gcm_128->iv, + ctx->iv, + TLS_CIPHER_AES_GCM_128_IV_SIZE); + release_sock(sk); + } + rc = copy_to_user(optval, + crypto_info_aes_gcm_128, + sizeof(*crypto_info_aes_gcm_128)); + break; + } + default: + rc = -EINVAL; + } + +out: + return rc; +} +EXPORT_SYMBOL(tls_sk_query); + +void tls_sk_destruct(struct sock *sk, struct tls_context *ctx) +{ + ctx->sk_destruct(sk); + kfree(ctx->iv); + kfree(ctx); + module_put(THIS_MODULE); +} + +int tls_sk_attach(struct sock *sk, int optname, char __user *optval, + unsigned int optlen) +{ + int rc = 0; + struct tls_context *ctx = tls_get_ctx(sk); + struct tls_crypto_info *crypto_info; + bool allocated_tls_ctx = false; + + if (!optval || (optlen < sizeof(*crypto_info))) { + rc = -EINVAL; + goto out; + } + + /* allocate tls context */ + if (!ctx) { + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + rc = -ENOMEM; + goto out; + } + sk->sk_user_data = ctx; + allocated_tls_ctx = true; + } + + /* get user crypto info */ + switch (optname) { + case TCP_TLS_TX: { + crypto_info = &ctx->crypto_send; + break; + } + case TCP_TLS_RX: + /* fallthru since for now we don't support */ + default: { + rc = -ENOPROTOOPT; + goto err_sk_user_data; + } + } + + /* Currently we don't support set crypto info more than one time */ + if (TLS_CRYPTO_INFO_READY(crypto_info)) { + rc = -EEXIST; + goto err_sk_user_data; + } + + rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); + if (rc) { + rc = -EFAULT; + goto err_sk_user_data; + } + + /* currently we support only HW offload */ + if (!TLS_IS_STATE_HW(crypto_info)) { + rc = -ENOPROTOOPT; + goto err_crypto_info; + } + + /* check version */ + if (crypto_info->version != TLS_1_2_VERSION) { + rc = -ENOTSUPP; + goto err_crypto_info; + } + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + if (optlen != sizeof(struct tls_crypto_info_aes_gcm_128)) { + rc = -EINVAL; + goto err_crypto_info; + } + rc = copy_from_user(crypto_info, + optval, + sizeof(struct tls_crypto_info_aes_gcm_128)); + break; + } + default: + rc = -EINVAL; + goto err_crypto_info; + } + + if (rc) { + rc = -EFAULT; + goto err_crypto_info; + } + + ctx->sk_write_space = sk->sk_write_space; + ctx->sk_destruct = sk->sk_destruct; + sk->sk_write_space = tls_write_space; + + if (TLS_IS_STATE_HW(crypto_info)) { + rc = tls_set_device_offload(sk, ctx); + if (rc) + goto err_crypto_info; + } + + if (!try_module_get(THIS_MODULE)) { + rc = -EINVAL; + goto err_set_device_offload; + } + + /* TODO: add protection */ + sk->sk_prot = &tls_device_prot; + goto out; + +err_set_device_offload: + tls_clear_device_offload(sk, ctx); +err_crypto_info: + memset(crypto_info, 0, sizeof(*crypto_info)); +err_sk_user_data: + if (allocated_tls_ctx) + kfree(ctx); +out: + return rc; +} +EXPORT_SYMBOL(tls_sk_attach); + +static int __init tls_init(void) +{ + tls_device_prot = tcp_prot; + tls_device_prot.sendmsg = tls_device_sendmsg; + tls_device_prot.sendpage = tls_device_sendpage; + + return 0; +} + +static void __exit tls_exit(void) +{ +} + +module_init(tls_init); +module_exit(tls_exit); -- 2.7.4