Re: [PATCH v8] af_packet: Handle outgoing VLAN packets without hardware offloading

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Mon, Jun 17, 2024 at 09:07:58PM CEST, willemdebruijn.kernel@xxxxxxxxx wrote:
>Jiri Pirko wrote:
>> Mon, Jun 17, 2024 at 07:45:14AM CEST, chengen.du@xxxxxxxxxxxxx wrote:
>> >The issue initially stems from libpcap. The ethertype will be overwritten
>> >as the VLAN TPID if the network interface lacks hardware VLAN offloading.
>> >In the outbound packet path, if hardware VLAN offloading is unavailable,
>> >the VLAN tag is inserted into the payload but then cleared from the sk_buff
>> >struct. Consequently, this can lead to a false negative when checking for
>> >the presence of a VLAN tag, causing the packet sniffing outcome to lack
>> >VLAN tag information (i.e., TCI-TPID). As a result, the packet capturing
>> >tool may be unable to parse packets as expected.
>> >
>> >The TCI-TPID is missing because the prb_fill_vlan_info() function does not
>> >modify the tp_vlan_tci/tp_vlan_tpid values, as the information is in the
>> >payload and not in the sk_buff struct. The skb_vlan_tag_present() function
>> >only checks vlan_all in the sk_buff struct. In cooked mode, the L2 header
>> >is stripped, preventing the packet capturing tool from determining the
>> >correct TCI-TPID value. Additionally, the protocol in SLL is incorrect,
>> >which means the packet capturing tool cannot parse the L3 header correctly.
>> >
>> >Link: https://github.com/the-tcpdump-group/libpcap/issues/1105
>> >Link: https://lore.kernel.org/netdev/20240520070348.26725-1-chengen.du@xxxxxxxxxxxxx/T/#u
>> >Fixes: 393e52e33c6c ("packet: deliver VLAN TCI to userspace")
>> >Cc: stable@xxxxxxxxxxxxxxx
>> >Signed-off-by: Chengen Du <chengen.du@xxxxxxxxxxxxx>
>> >---
>> > net/packet/af_packet.c | 86 +++++++++++++++++++++++++++++++++++++++++-
>> > 1 file changed, 84 insertions(+), 2 deletions(-)
>> >
>> >diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
>> >index ea3ebc160e25..84e8884a77e3 100644
>> >--- a/net/packet/af_packet.c
>> >+++ b/net/packet/af_packet.c
>> >@@ -538,6 +538,61 @@ static void *packet_current_frame(struct packet_sock *po,
>> > 	return packet_lookup_frame(po, rb, rb->head, status);
>> > }
>> > 
>> >+static u16 vlan_get_tci(struct sk_buff *skb, struct net_device *dev)
>> >+{
>> >+	struct vlan_hdr vhdr, *vh;
>> >+	u8 *skb_orig_data = skb->data;
>> >+	int skb_orig_len = skb->len;
>> >+	unsigned int header_len;
>> >+
>> >+	if (!dev)
>> >+		return 0;
>> >+
>> >+	/* In the SOCK_DGRAM scenario, skb data starts at the network
>> >+	 * protocol, which is after the VLAN headers. The outer VLAN
>> >+	 * header is at the hard_header_len offset in non-variable
>> >+	 * length link layer headers. If it's a VLAN device, the
>> >+	 * min_header_len should be used to exclude the VLAN header
>> >+	 * size.
>> >+	 */
>> >+	if (dev->min_header_len == dev->hard_header_len)
>> >+		header_len = dev->hard_header_len;
>> >+	else if (is_vlan_dev(dev))
>> >+		header_len = dev->min_header_len;
>> >+	else
>> >+		return 0;
>> >+
>> >+	skb_push(skb, skb->data - skb_mac_header(skb));
>> >+	vh = skb_header_pointer(skb, header_len, sizeof(vhdr), &vhdr);
>> >+	if (skb_orig_data != skb->data) {
>> >+		skb->data = skb_orig_data;
>> >+		skb->len = skb_orig_len;
>> >+	}
>> >+	if (unlikely(!vh))
>> >+		return 0;
>> >+
>> >+	return ntohs(vh->h_vlan_TCI);
>> >+}
>> >+
>> >+static __be16 vlan_get_protocol_dgram(struct sk_buff *skb)
>> >+{
>> >+	__be16 proto = skb->protocol;
>> >+
>> >+	if (unlikely(eth_type_vlan(proto))) {
>> >+		u8 *skb_orig_data = skb->data;
>> >+		int skb_orig_len = skb->len;
>> >+
>> >+		skb_push(skb, skb->data - skb_mac_header(skb));
>> >+		proto = __vlan_get_protocol(skb, proto, NULL);
>> >+		if (skb_orig_data != skb->data) {
>> >+			skb->data = skb_orig_data;
>> >+			skb->len = skb_orig_len;
>> >+		}
>> >+	}
>> >+
>> >+	return proto;
>> >+}
>> >+
>> > static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
>> > {
>> > 	del_timer_sync(&pkc->retire_blk_timer);
>> >@@ -1007,10 +1062,16 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
>> > static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
>> > 			struct tpacket3_hdr *ppd)
>> > {
>> >+	struct packet_sock *po = container_of(pkc, struct packet_sock, rx_ring.prb_bdqc);
>> >+
>> > 	if (skb_vlan_tag_present(pkc->skb)) {
>> > 		ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
>> > 		ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
>> > 		ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
>> >+	} else if (unlikely(po->sk.sk_type == SOCK_DGRAM && eth_type_vlan(pkc->skb->protocol))) {
>> >+		ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb, pkc->skb->dev);
>> >+		ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol);
>> >+		ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
>> > 	} else {
>> > 		ppd->hv1.tp_vlan_tci = 0;
>> > 		ppd->hv1.tp_vlan_tpid = 0;
>> >@@ -2428,6 +2489,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
>> > 			h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
>> > 			h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
>> > 			status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
>> >+		} else if (unlikely(sk->sk_type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
>> >+			h.h2->tp_vlan_tci = vlan_get_tci(skb, skb->dev);
>> >+			h.h2->tp_vlan_tpid = ntohs(skb->protocol);
>> >+			status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
>> > 		} else {
>> > 			h.h2->tp_vlan_tci = 0;
>> > 			h.h2->tp_vlan_tpid = 0;
>> >@@ -2457,7 +2522,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
>> > 	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
>> > 	sll->sll_family = AF_PACKET;
>> > 	sll->sll_hatype = dev->type;
>> >-	sll->sll_protocol = skb->protocol;
>> >+	sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ?
>> >+		vlan_get_protocol_dgram(skb) : skb->protocol;
>> > 	sll->sll_pkttype = skb->pkt_type;
>> > 	if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
>> > 		sll->sll_ifindex = orig_dev->ifindex;
>> >@@ -3482,7 +3548,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
>> > 		/* Original length was stored in sockaddr_ll fields */
>> > 		origlen = PACKET_SKB_CB(skb)->sa.origlen;
>> > 		sll->sll_family = AF_PACKET;
>> >-		sll->sll_protocol = skb->protocol;
>> >+		sll->sll_protocol = (sock->type == SOCK_DGRAM) ?
>> >+			vlan_get_protocol_dgram(skb) : skb->protocol;
>> > 	}
>> > 
>> > 	sock_recv_cmsgs(msg, sk, skb);
>> >@@ -3539,6 +3606,21 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
>> > 			aux.tp_vlan_tci = skb_vlan_tag_get(skb);
>> > 			aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
>> > 			aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
>> >+		} else if (unlikely(sock->type == SOCK_DGRAM && eth_type_vlan(skb->protocol))) {
>> 
>> I don't understand why this would be needed here. We spent quite a bit
>> of efford in the past to make sure vlan header is always stripped.
>> Could you fix that in tx path to fulfill the expectation?
>
>Doesn't that require NETIF_F_HW_VLAN_CTAG_TX?
>
>I also wondered whether we should just convert the skb for this case
>with skb_vlan_untag, to avoid needing new PF_PACKET logic to handle
>unstripped tags in the packet socket code. But it seems equally
>complex.

Correct. skb_vlan_untag() as a preparation of skb before this function
is called is exactly what I was suggesting.


>
>Aside from this conversation whether we need to support this
>unstripped case at all, code LGTM.




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux