On Wed, Jan 12, 2011 at 06:26:55PM +0100, Dragos Tatulea wrote: > Hi, > > Trying to stir up a year old conversation [1] about mac filtering. > The patch below is Alex Williamson's work updated for the current qemu > taking into account some comments. An extra check for multiple nics on > the same vlan has been added as well. Now, I know it's not ideal but > I'm looking for feedback here. > > The original thread contained some ideas that weren't discussed extensively: > > * Alex Williamson's patch that makes the nic call the associated tap > vlan client to set the mac filters. This is possible only for 1 nix x > 1 tap setup. It's a bit hackish (even more visible in the current > version) but it's straightforward. > > * Paul Brook considered that the nic shouldn't know anything about > what else is connected to the vlan. He proposed a model that is more > generic and would allow us to filter stuff from device to vlan and the > other way around. > > * Anthony Liguori proposed moving all the mac filtering to the vlan > side. This would be nice, but it would imply removing & rewriting all > the emulated nics. Don't know how acceptable this is since, I suppose, > the emulated behavior will change. > > The bottom line here is that neither of these 3 approaches is ok for > what we want to achieve: basically tap & macvtap filtering for virtio. > What do we want? The simple change for virtio tun tx fiiltering or > filtering MACs in both directions taking into account how nics are > connected and if hw filtering is possible? The latter being quite a > change. > > [1] - http://thread.gmane.org/gmane.comp.emulators.qemu/37714/focus=37719 > > -- Dragos Tatulea > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > Signed-off-by: Dragos Tatulea <dragos.tatulea@xxxxxxxxx> BTW the comments above are mostly addressed by using a peer and not a vlan. Here's the patch I used for testing. As I mentioned, this all needs to be optional to avoid filtering performance hit. So - don't apply this as is, but it does work. --> tap: use kernel filtering support Use kernel filtering for tap where available. Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> --- diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 1d61f19..7892ca9 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -343,6 +343,15 @@ static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd, n->mac_table.multi_overflow = 1; } } + if (n->nic->nc.peer && n->nic->nc.peer->info->set_rx_filter) { + n->nic->nc.peer->info->set_rx_filter(n->nic->nc.peer, + n->promisc, + n->allmulti, + n->nobcast, + n->mac, + n->mac_table.in_use, + n->mac_table.macs); + } return VIRTIO_NET_OK; } diff --git a/net.h b/net.h index 44c31a9..9e164b9 100644 --- a/net.h +++ b/net.h @@ -42,6 +42,8 @@ typedef ssize_t (NetReceive)(VLANClientState *, const uint8_t *, size_t); typedef ssize_t (NetReceiveIOV)(VLANClientState *, const struct iovec *, int); typedef void (NetCleanup) (VLANClientState *); typedef void (LinkStatusChanged)(VLANClientState *); +typedef int (NetRXFilter)(VLANClientState *, bool allmulti, bool promisc, + bool nobcast, uint8_t *, int , uint8_t *); typedef struct NetClientInfo { net_client_type type; @@ -50,6 +52,7 @@ typedef struct NetClientInfo { NetReceive *receive_raw; NetReceiveIOV *receive_iov; NetCanReceive *can_receive; + NetRXFilter *set_rx_filter; NetCleanup *cleanup; LinkStatusChanged *link_status_changed; NetPoll *poll; diff --git a/net/tap-aix.c b/net/tap-aix.c index e19aaba..4cf5ad9 100644 --- a/net/tap-aix.c +++ b/net/tap-aix.c @@ -59,3 +59,9 @@ void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo) { } + +int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast, + uint8_t *mac, int count, uint8_t *macs) +{ + return -1; +} diff --git a/net/tap-bsd.c b/net/tap-bsd.c index efccfe0..d092c6f 100644 --- a/net/tap-bsd.c +++ b/net/tap-bsd.c @@ -125,3 +125,9 @@ void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo) { } + +int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast, + uint8_t *mac, int count, uint8_t *macs) +{ + return -1; +} diff --git a/net/tap-haiku.c b/net/tap-haiku.c index 91dda8e..8117c03 100644 --- a/net/tap-haiku.c +++ b/net/tap-haiku.c @@ -59,3 +59,9 @@ void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo) { } + +int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast, + uint8_t *mac, int count, uint8_t *macs) +{ + return -1; +} diff --git a/net/tap-linux.c b/net/tap-linux.c index f7aa904..19d05c6 100644 --- a/net/tap-linux.c +++ b/net/tap-linux.c @@ -188,3 +188,41 @@ void tap_fd_set_offload(int fd, int csum, int tso4, } } } + +int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast, + uint8_t *mac, int count, uint8_t *list) +{ + struct tun_filter *filter; + int ret; + int len = TAP_LINUX_ETH_ALEN * (promisc ? 0 : (count + 2)); + + filter = qemu_mallocz(offsetof(struct tun_filter, addr) + len); + if (!filter) { + return -1; + } + + if (!promisc) { + memcpy(filter->addr, mac, TAP_LINUX_ETH_ALEN); + filter->count += 1; + + memcpy(&filter->addr[filter->count], list, count * TAP_LINUX_ETH_ALEN); + filter->count += count; + } + + if (!promisc && !allmulti && !nobcast) { + /* If enabled and not included implicitly by promisc or all multicast + * mode, add all ones - the broadcast address. */ + memset(&filter->addr[filter->count], 0xff, TAP_LINUX_ETH_ALEN); + filter->count += 1; + } + + if (allmulti) { + filter->flags |= TUN_FLT_ALLMULTI; + } + + ret = ioctl(fd, TUNSETTXFILTER, filter); + + qemu_free(filter); + + return ret; +} diff --git a/net/tap-linux.h b/net/tap-linux.h index 659e981..6e53562 100644 --- a/net/tap-linux.h +++ b/net/tap-linux.h @@ -25,6 +25,7 @@ #define TUNSETIFF _IOW('T', 202, int) #define TUNGETFEATURES _IOR('T', 207, unsigned int) #define TUNSETOFFLOAD _IOW('T', 208, unsigned int) +#define TUNSETTXFILTER _IOW('T', 209, unsigned int) #define TUNGETIFF _IOR('T', 210, unsigned int) #define TUNSETSNDBUF _IOW('T', 212, int) #define TUNGETVNETHDRSZ _IOR('T', 215, int) @@ -44,6 +45,24 @@ #define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ #define TUN_F_UFO 0x10 /* I can handle UFO packets */ +/* + * Filter spec (used for SETXXFILTER ioctls) + * This stuff is applicable only to the TAP (Ethernet) devices. + * If the count is zero the filter is disabled and the driver accepts + * all packets (promisc mode). + * If the filter is enabled in order to accept broadcast packets + * broadcast addr must be explicitly included in the addr list. + */ +#define TUN_FLT_ALLMULTI 0x0001 /* Accept all multicast packets */ + +#define TAP_LINUX_ETH_ALEN 6 + +struct tun_filter { + uint16_t flags; /* TUN_FLT_ flags see above */ + uint16_t count; /* Number of addresses */ + uint8_t addr[0][TAP_LINUX_ETH_ALEN]; +}; + struct virtio_net_hdr { uint8_t flags; diff --git a/net/tap-solaris.c b/net/tap-solaris.c index c216d28..2a1d065 100644 --- a/net/tap-solaris.c +++ b/net/tap-solaris.c @@ -225,3 +225,9 @@ void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo) { } + +int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast, + uint8_t *mac, int count, uint8_t *macs) +{ + return -1; +} diff --git a/net/tap-win32.c b/net/tap-win32.c index 081904e..333846b 100644 --- a/net/tap-win32.c +++ b/net/tap-win32.c @@ -749,3 +749,9 @@ struct vhost_net *tap_get_vhost_net(VLANClientState *nc) { return NULL; } + +int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast, + uint8_t *mac, int count, uint8_t *macs) +{ + return -1; +} diff --git a/net/tap.c b/net/tap.c index eada34a..7bc47f8 100644 --- a/net/tap.c +++ b/net/tap.c @@ -310,6 +310,13 @@ int tap_get_fd(VLANClientState *nc) return s->fd; } +static int tap_set_rx_filter(VLANClientState *nc, bool allmulti, bool promisc, + bool nobcast, uint8_t *mac, int count, uint8_t *list) +{ + TAPState *s = DO_UPCAST(TAPState, nc, nc); + return tap_fd_set_rx_filter(s->fd, allmulti, promisc, nobcast, mac, count, list); +} + /* fd support */ static NetClientInfo net_tap_info = { @@ -318,6 +325,7 @@ static NetClientInfo net_tap_info = { .receive = tap_receive, .receive_raw = tap_receive_raw, .receive_iov = tap_receive_iov, + .set_rx_filter = tap_set_rx_filter, .poll = tap_poll, .cleanup = tap_cleanup, }; diff --git a/net/tap.h b/net/tap.h index e44bd2b..0cf820c 100644 --- a/net/tap.h +++ b/net/tap.h @@ -51,6 +51,8 @@ int tap_probe_vnet_hdr_len(int fd, int len); int tap_probe_has_ufo(int fd); void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo); void tap_fd_set_vnet_hdr_len(int fd, int len); +int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast, + uint8_t *, int , uint8_t *); int tap_get_fd(VLANClientState *vc); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html