[RFC PATCH bpf-next 1/2] xdp: add dev map multicast support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a prototype for xdp multicast support. In this implemention we
use map-in-map to store the multicast groups, because we may have both
include and exclude groups on one interface.

The include and exclude groups are seperated by a 32 bits map key.
the high 16 bits keys are used for include groups and low 16 bits
keys are for exclude groups.

The general data path is kept in net/core/filter.c. The native data
path is in kernel/bpf/devmap.c so we can use direct calls to
get better performace.

Signed-off-by: Hangbin Liu <liuhangbin@xxxxxxxxx>
---
 include/linux/bpf.h   |  29 +++++++++++
 include/net/xdp.h     |   1 +
 kernel/bpf/arraymap.c |   2 +-
 kernel/bpf/devmap.c   | 118 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/hashtab.c  |   2 +-
 kernel/bpf/verifier.c |  15 +++++-
 net/core/filter.c     |  69 +++++++++++++++++++++++-
 net/core/xdp.c        |  26 ++++++++++
 8 files changed, 256 insertions(+), 6 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fd2b2322412d..72797667bca8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1156,11 +1156,17 @@ struct sk_buff;
 
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
+void *array_of_map_lookup_elem(struct bpf_map *map, void *key);
+void *htab_of_map_lookup_elem(struct bpf_map *map, void *key);
 void __dev_flush(void);
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
+bool dev_in_exclude_map(struct bpf_dtab_netdev *obj, struct bpf_map *map);
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+			  struct bpf_map *map, u32 index);
+
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
 
@@ -1276,6 +1282,16 @@ static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map
 	return NULL;
 }
 
+static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
+{
+
+}
+
+static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
+{
+
+}
+
 static inline void __dev_flush(void)
 {
 }
@@ -1297,6 +1313,19 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 	return 0;
 }
 
+static inline
+bool dev_in_exclude_map(struct bpf_dtab_netdev *obj, struct bpf_map *map)
+{
+	return true;
+}
+
+static inline
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+			  struct bpf_map *map, u32 index)
+{
+	return 0;
+}
+
 struct sk_buff;
 
 static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 40c6d3398458..a214dce8579c 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -92,6 +92,7 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
 }
 
 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf);
 
 /* Convert xdp_buff to xdp_frame */
 static inline
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 95d77770353c..26ac66a05015 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -1031,7 +1031,7 @@ static void array_of_map_free(struct bpf_map *map)
 	fd_array_map_free(map);
 }
 
-static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
+void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_map **inner_map = array_map_lookup_elem(map, key);
 
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 58bdca5d978a..3a60cb209ae1 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -85,6 +85,9 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list);
 static DEFINE_SPINLOCK(dev_map_lock);
 static LIST_HEAD(dev_map_list);
 
+static void *dev_map_lookup_elem(struct bpf_map *map, void *key);
+static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key);
+
 static struct hlist_head *dev_map_create_hash(unsigned int entries)
 {
 	int i;
@@ -456,6 +459,121 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 	return __xdp_enqueue(dev, xdp, dev_rx);
 }
 
+/* Use direct call in fast path instead of  map->ops->map_get_next_key() */
+static int devmap_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+
+	switch (map->map_type) {
+	case BPF_MAP_TYPE_DEVMAP:
+		return dev_map_get_next_key(map, key, next_key);
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+		return dev_map_hash_get_next_key(map, key, next_key);
+	default:
+		break;
+	}
+
+	return -ENOENT;
+}
+
+bool dev_in_exclude_map(struct bpf_dtab_netdev *obj, struct bpf_map *map)
+{
+	struct bpf_dtab_netdev *in_obj = NULL;
+	u32 key, next_key;
+	int err;
+
+	devmap_get_next_key(map, NULL, &key);
+
+	for (;;) {
+		switch (map->map_type) {
+		case BPF_MAP_TYPE_DEVMAP:
+			in_obj = __dev_map_lookup_elem(map, key);
+			break;
+		case BPF_MAP_TYPE_DEVMAP_HASH:
+			in_obj = __dev_map_hash_lookup_elem(map, key);
+			break;
+		default:
+			break;
+		}
+
+		if (in_obj && in_obj->dev->ifindex == obj->dev->ifindex)
+			return true;
+
+		err = devmap_get_next_key(map, &key, &next_key);
+
+		if (err)
+			break;
+
+		key = next_key;
+	}
+
+	return false;
+}
+
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+			  struct bpf_map *map, u32 index)
+{
+	struct bpf_dtab_netdev *obj = NULL;
+	struct bpf_map *in_map, *ex_map;
+	struct xdp_frame *xdpf, *nxdpf;
+	struct net_device *dev;
+	u32 in_index, ex_index;
+	u32 key, next_key;
+	int err;
+
+	in_index = index >> 16;
+	in_index = in_index << 16;
+	ex_index = in_index ^ index;
+
+	in_map = map->ops->map_lookup_elem(map, &in_index);
+	/* ex_map could be NULL */
+	ex_map = map->ops->map_lookup_elem(map, &ex_index);
+
+	devmap_get_next_key(in_map, NULL, &key);
+
+	xdpf = convert_to_xdp_frame(xdp);
+	if (unlikely(!xdpf))
+		return -EOVERFLOW;
+
+	for (;;) {
+		switch (in_map->map_type) {
+		case BPF_MAP_TYPE_DEVMAP:
+			obj = __dev_map_lookup_elem(in_map, key);
+			break;
+		case BPF_MAP_TYPE_DEVMAP_HASH:
+			obj = __dev_map_hash_lookup_elem(in_map, key);
+			break;
+		default:
+			break;
+		}
+		if (!obj)
+			goto find_next;
+
+		if (ex_map && !dev_in_exclude_map(obj, ex_map)) {
+			dev = obj->dev;
+
+			if (!dev->netdev_ops->ndo_xdp_xmit)
+				return -EOPNOTSUPP;
+
+			err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+			if (unlikely(err))
+				return err;
+
+			nxdpf = xdpf_clone(xdpf);
+			if (unlikely(!nxdpf))
+				return -ENOMEM;
+
+			bq_enqueue(dev, nxdpf, dev_rx);
+		}
+find_next:
+		err = devmap_get_next_key(in_map, &key, &next_key);
+		if (err)
+			break;
+		key = next_key;
+	}
+
+	return 0;
+}
+
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog)
 {
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d541c8486c95..4e0a2eebd38d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1853,7 +1853,7 @@ static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr)
 	return map;
 }
 
-static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
+void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_map **inner_map  = htab_map_lookup_elem(map, key);
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 04c6630cc18f..84d23418823a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3898,7 +3898,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		break;
 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 	case BPF_MAP_TYPE_HASH_OF_MAPS:
-		if (func_id != BPF_FUNC_map_lookup_elem)
+		/* Used by multicast redirect */
+		if (func_id != BPF_FUNC_redirect_map &&
+		    func_id != BPF_FUNC_map_lookup_elem)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_SOCKMAP:
@@ -3968,8 +3970,17 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
-		    map->map_type != BPF_MAP_TYPE_XSKMAP)
+		    map->map_type != BPF_MAP_TYPE_XSKMAP &&
+		    map->map_type != BPF_MAP_TYPE_ARRAY_OF_MAPS &&
+		    map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS)
 			goto error;
+		if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+		    map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+			/* FIXME: Maybe we should also strict the key size here ?? */
+			if (map->inner_map_meta->map_type != BPF_MAP_TYPE_DEVMAP &&
+			    map->inner_map_meta->map_type != BPF_MAP_TYPE_DEVMAP_HASH)
+				goto error;
+		}
 		break;
 	case BPF_FUNC_sk_redirect_map:
 	case BPF_FUNC_msg_redirect_map:
diff --git a/net/core/filter.c b/net/core/filter.c
index 7628b947dbc3..7d2076f5b0a4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3473,12 +3473,17 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
 };
 
 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
-			    struct bpf_map *map, struct xdp_buff *xdp)
+			    struct bpf_map *map, struct xdp_buff *xdp,
+			    u32 index)
 {
 	switch (map->map_type) {
 	case BPF_MAP_TYPE_DEVMAP:
+		/* fall through */
 	case BPF_MAP_TYPE_DEVMAP_HASH:
 		return dev_map_enqueue(fwd, xdp, dev_rx);
+	case BPF_MAP_TYPE_HASH_OF_MAPS:
+	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+		return dev_map_enqueue_multi(xdp, dev_rx, map, index);
 	case BPF_MAP_TYPE_CPUMAP:
 		return cpu_map_enqueue(fwd, xdp, dev_rx);
 	case BPF_MAP_TYPE_XSKMAP:
@@ -3508,6 +3513,10 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
 		return __cpu_map_lookup_elem(map, index);
 	case BPF_MAP_TYPE_XSKMAP:
 		return __xsk_map_lookup_elem(map, index);
+	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+		return array_of_map_lookup_elem(map, (index >> 16) << 16);
+	case BPF_MAP_TYPE_HASH_OF_MAPS:
+		return htab_of_map_lookup_elem(map, (index >> 16) << 16);
 	default:
 		return NULL;
 	}
@@ -3552,7 +3561,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 
 		err = dev_xdp_enqueue(fwd, xdp, dev);
 	} else {
-		err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
+		err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
 	}
 
 	if (unlikely(err))
@@ -3566,6 +3575,55 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
 
+static int dev_map_redirect_multi(struct sk_buff *skb, struct bpf_prog *xdp_prog,
+				  struct bpf_map *map, u32 index)
+
+{
+	struct bpf_map *in_map, *ex_map;
+	struct bpf_dtab_netdev *dst;
+	u32 in_index, ex_index;
+	struct sk_buff *nskb;
+	u32 key, next_key;
+	int err;
+	void *fwd;
+
+	in_index = index >> 16;
+	in_index = in_index << 16;
+	ex_index = in_index ^ index;
+
+	in_map = map->ops->map_lookup_elem(map, &in_index);
+	/* ex_map could be NULL */
+	ex_map = map->ops->map_lookup_elem(map, &ex_index);
+
+	in_map->ops->map_get_next_key(in_map, NULL, &key);
+
+	for (;;) {
+		fwd = __xdp_map_lookup_elem(in_map, key);
+		if (fwd) {
+			dst = (struct bpf_dtab_netdev *)fwd;
+			if (ex_map && dev_in_exclude_map(dst, ex_map))
+				goto find_next;
+
+			nskb = skb_clone(skb, GFP_ATOMIC);
+			if (!nskb)
+				return -EOVERFLOW;
+
+			err = dev_map_generic_redirect(dst, nskb, xdp_prog);
+			if (unlikely(err))
+				return err;
+		}
+
+find_next:
+		err = in_map->ops->map_get_next_key(in_map, &key, &next_key);
+		if (err)
+			break;
+
+		key = next_key;
+	}
+
+	return 0;
+}
+
 static int xdp_do_generic_redirect_map(struct net_device *dev,
 				       struct sk_buff *skb,
 				       struct xdp_buff *xdp,
@@ -3588,6 +3646,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 		err = dev_map_generic_redirect(dst, skb, xdp_prog);
 		if (unlikely(err))
 			goto err;
+	} else if (map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+		   map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+		/* Do multicast redirecting */
+		err = dev_map_redirect_multi(skb, xdp_prog, map, index);
+		if (unlikely(err))
+			goto err;
+		consume_skb(skb);
 	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
 		struct xdp_sock *xs = fwd;
 
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4c7ea85486af..70dfb4910f84 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -496,3 +496,29 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
 	return xdpf;
 }
 EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
+
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
+{
+	unsigned int headroom, totalsize;
+	struct xdp_frame *nxdpf;
+	struct page *page;
+	void *addr;
+
+	headroom = xdpf->headroom + sizeof(*xdpf);
+	totalsize = headroom + xdpf->len;
+
+	if (unlikely(totalsize > PAGE_SIZE))
+		return NULL;
+	page = dev_alloc_page();
+	if (!page)
+		return NULL;
+	addr = page_to_virt(page);
+
+	memcpy(addr, xdpf, totalsize);
+
+	nxdpf = addr;
+	nxdpf->data = addr + headroom;
+
+	return nxdpf;
+}
+EXPORT_SYMBOL_GPL(xdpf_clone);
-- 
2.19.2




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux