On 10/26/23 08:26, Jiri Pirko wrote:
Wed, Oct 25, 2023 at 09:21:01PM CEST, razor@xxxxxxxxxxxxx wrote:
On 10/25/23 18:47, Jiri Pirko wrote:
Tue, Oct 24, 2023 at 11:48:58PM CEST, daniel@xxxxxxxxxxxxx wrote:
This work adds a new, minimal BPF-programmable device called "netkit"
[snip]
Couple of nitpicks below:
[..]
Hi,
Thanks for the review. I know about the nits below but decided against
changing them, more below each...
+static int netkit_check_policy(int policy, struct nlattr *tb,
+ struct netlink_ext_ack *extack)
+{
+ switch (policy) {
+ case NETKIT_PASS:
+ case NETKIT_DROP:
+ return 0;
+ default:
Isn't this job for netlink policy?
This cannot be handled by policies AFAIK, because only 2 sparse values from
more are allowed. We could potentially do it through validate() but
Perhaps good time to extend the netlink validation for list
of values allowed?
it's the same minus the explicit policy type info. IMO this approach is good.
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "Provided default xmit policy not supported");
+ return -EINVAL;
+ }
+}
+
+static int netkit_check_mode(int mode, struct nlattr *tb,
+ struct netlink_ext_ack *extack)
+{
+ switch (mode) {
+ case NETKIT_L2:
+ case NETKIT_L3:
+ return 0;
+ default:
Isn't this job for netlink policy?
This one can be handled by policy indeed, but then we lose the nice user
error. Again can be done through validate(), but it's the same and we
lose explicit policy type information.
But the netlink validator setups the extack properly. What's wrong with
that?
"integer out of range" vs "Provided device mode can only be L2 or L3" ?
I like the second one better and it's more informative. The way to get
it is to use NLA_POLICY_VALIDATE_FN() as type and provide custom
validate() callback, which is identical to the current solution, I see
no added value in changing it.
+ NL_SET_ERR_MSG_ATTR(extack, tb,
+ "Provided device mode can only be L2 or L3");
+ return -EINVAL;
+ }
+}
+
+static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *attr = tb[IFLA_ADDRESS];
+
+ if (!attr)
+ return 0;
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Setting Ethernet address is not supported");
+ return -EOPNOTSUPP;
+}
+
+static struct rtnl_link_ops netkit_link_ops;
+
+static int netkit_new_link(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr;
+ enum netkit_action default_prim = NETKIT_PASS;
+ enum netkit_action default_peer = NETKIT_PASS;
+ enum netkit_mode mode = NETKIT_L3;
+ unsigned char ifname_assign_type;
+ struct ifinfomsg *ifmp = NULL;
+ struct net_device *peer;
+ char ifname[IFNAMSIZ];
+ struct netkit *nk;
+ struct net *net;
+ int err;
+
+ if (data) {
+ if (data[IFLA_NETKIT_MODE]) {
+ attr = data[IFLA_NETKIT_MODE];
+ mode = nla_get_u32(attr);
+ err = netkit_check_mode(mode, attr, extack);
+ if (err < 0)
+ return err;
+ }
+ if (data[IFLA_NETKIT_PEER_INFO]) {
+ attr = data[IFLA_NETKIT_PEER_INFO];
+ ifmp = nla_data(attr);
+ err = rtnl_nla_parse_ifinfomsg(peer_tb, attr, extack);
+ if (err < 0)
+ return err;
+ err = netkit_validate(peer_tb, NULL, extack);
+ if (err < 0)
+ return err;
+ tbp = peer_tb;
+ }
+ if (data[IFLA_NETKIT_POLICY]) {
+ attr = data[IFLA_NETKIT_POLICY];
+ default_prim = nla_get_u32(attr);
+ err = netkit_check_policy(default_prim, attr, extack);
+ if (err < 0)
+ return err;
+ }
+ if (data[IFLA_NETKIT_PEER_POLICY]) {
+ attr = data[IFLA_NETKIT_PEER_POLICY];
+ default_peer = nla_get_u32(attr);
+ err = netkit_check_policy(default_peer, attr, extack);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ if (ifmp && tbp[IFLA_IFNAME]) {
+ nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
+ ifname_assign_type = NET_NAME_USER;
+ } else {
+ strscpy(ifname, "nk%d", IFNAMSIZ);
+ ifname_assign_type = NET_NAME_ENUM;
+ }
+
+ net = rtnl_link_get_net(src_net, tbp);
+ if (IS_ERR(net))
+ return PTR_ERR(net);
+
+ peer = rtnl_create_link(net, ifname, ifname_assign_type,
+ &netkit_link_ops, tbp, extack);
+ if (IS_ERR(peer)) {
+ put_net(net);
+ return PTR_ERR(peer);
+ }
+
+ netif_inherit_tso_max(peer, dev);
+
+ if (mode == NETKIT_L2)
+ eth_hw_addr_random(peer);
+ if (ifmp && dev->ifindex)
+ peer->ifindex = ifmp->ifi_index;
+
+ nk = netkit_priv(peer);
+ nk->primary = false;
+ nk->policy = default_peer;
+ nk->mode = mode;
+ bpf_mprog_bundle_init(&nk->bundle);
+ RCU_INIT_POINTER(nk->active, NULL);
+ RCU_INIT_POINTER(nk->peer, NULL);
Aren't these already 0?
Yep, they are. Here decided in favor of explicit show of values, although
it's minor and I'm fine either way.
It is always confusing to see this. Reader might think this is necessary
as if the mem was not previuosly cleared. The general approach is to
rely on mem being zeroed, not sure why this is different.
Oh come on :) you really believe someone reading this code would start
inferring about netdev_alloc mem zeroing instead of getting a mental
picture of the expected state? Anyway, as I said I think this is way too
minor and it's fine either way, we can remove the explicit
initialization and rely on the implicit one.
+
+ err = register_netdevice(peer);
+ put_net(net);
+ if (err < 0)
+ goto err_register_peer;
+ netif_carrier_off(peer);
+ if (mode == NETKIT_L2)
+ dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL);
+
+ err = rtnl_configure_link(peer, NULL, 0, NULL);
+ if (err < 0)
+ goto err_configure_peer;
+
+ if (mode == NETKIT_L2)
+ eth_hw_addr_random(dev);
+ if (tb[IFLA_IFNAME])
+ nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
+ else
+ strscpy(dev->name, "nk%d", IFNAMSIZ);
+
+ nk = netkit_priv(dev);
+ nk->primary = true;
+ nk->policy = default_prim;
+ nk->mode = mode;
+ bpf_mprog_bundle_init(&nk->bundle);
+ RCU_INIT_POINTER(nk->active, NULL);
+ RCU_INIT_POINTER(nk->peer, NULL);
+
+ err = register_netdevice(dev);
+ if (err < 0)
+ goto err_configure_peer;
+ netif_carrier_off(dev);
+ if (mode == NETKIT_L2)
+ dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL);
+
+ rcu_assign_pointer(netkit_priv(dev)->peer, peer);
+ rcu_assign_pointer(netkit_priv(peer)->peer, dev);
+ return 0;
+err_configure_peer:
+ unregister_netdevice(peer);
+ return err;
+err_register_peer:
+ free_netdev(peer);
+ return err;
+}
+
[..]
Cheers,
Nik