Hi Willem, Thank you for your comments on patch v4. I have made some modifications and would like to discuss some of your suggestions in detail. On Tue, Jun 4, 2024 at 1:50 PM Chengen Du <chengen.du@xxxxxxxxxxxxx> wrote: > > The issue initially stems from libpcap. The ethertype will be overwritten > as the VLAN TPID if the network interface lacks hardware VLAN offloading. > In the outbound packet path, if hardware VLAN offloading is unavailable, > the VLAN tag is inserted into the payload but then cleared from the sk_buff > struct. Consequently, this can lead to a false negative when checking for > the presence of a VLAN tag, causing the packet sniffing outcome to lack > VLAN tag information (i.e., TCI-TPID). As a result, the packet capturing > tool may be unable to parse packets as expected. > > The TCI-TPID is missing because the prb_fill_vlan_info() function does not > modify the tp_vlan_tci/tp_vlan_tpid values, as the information is in the > payload and not in the sk_buff struct. The skb_vlan_tag_present() function > only checks vlan_all in the sk_buff struct. In cooked mode, the L2 header > is stripped, preventing the packet capturing tool from determining the > correct TCI-TPID value. Additionally, the protocol in SLL is incorrect, > which means the packet capturing tool cannot parse the L3 header correctly. > > Link: https://github.com/the-tcpdump-group/libpcap/issues/1105 > Link: https://lore.kernel.org/netdev/20240520070348.26725-1-chengen.du@xxxxxxxxxxxxx/T/#u > Fixes: 393e52e33c6c ("packet: deliver VLAN TCI to userspace") > Cc: stable@xxxxxxxxxxxxxxx > Signed-off-by: Chengen Du <chengen.du@xxxxxxxxxxxxx> > --- > net/packet/af_packet.c | 64 ++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 62 insertions(+), 2 deletions(-) > > diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c > index ea3ebc160e25..53d51ac87ac6 100644 > --- a/net/packet/af_packet.c > +++ b/net/packet/af_packet.c > @@ -538,6 +538,52 @@ static void *packet_current_frame(struct packet_sock *po, > return packet_lookup_frame(po, rb, rb->head, status); > } > > +static u16 vlan_get_tci(struct sk_buff *skb) > +{ > + unsigned int vlan_depth = skb->mac_len; > + struct vlan_hdr vhdr, *vh; > + u8 *skb_head = skb->data; > + int skb_len = skb->len; > + > + if (vlan_depth) { > + if (WARN_ON(vlan_depth < VLAN_HLEN)) > + return 0; > + vlan_depth -= VLAN_HLEN; > + } else { > + vlan_depth = ETH_HLEN; > + } > + > + skb_push(skb, skb->data - skb_mac_header(skb)); > + vh = skb_header_pointer(skb, vlan_depth, sizeof(vhdr), &vhdr); > + if (skb_head != skb->data) { > + skb->data = skb_head; > + skb->len = skb_len; > + } > + if (unlikely(!vh)) > + return 0; > + > + return ntohs(vh->h_vlan_TCI); > +} As you mentioned, we need the outermost VLAN tag to fit the protocol we referenced in tp_vlan_tpid. In if_vlan.h, __vlan_get_protocol() only provides the protocol and may affect other usage scenarios if we modify it to also provide TCI. There is a similar function called __vlan_get_tag(), which also aims to extract TCI from the L2 header, but it doesn't check the header size as __vlan_get_protocol() does. To prevent affecting other usage scenarios, I introduced a new function to achieve our purpose. Additionally, the starting point of skb->data differs in SOCK_RAW and SOCK_DGRAM cases. I accounted for this by using skb_push to ensure that skb->data starts from the L2 header. Would you recommend modifying __vlan_get_tag() to add the size check logic instead? If so, we would also need to consider how to integrate the size check logic into both __vlan_get_protocol() and __vlan_get_tag(). > + > +static __be16 vlan_get_protocol_dgram(struct sk_buff *skb) > +{ > + __be16 proto = skb->protocol; > + > + if (unlikely(eth_type_vlan(proto))) { > + u8 *skb_head = skb->data; > + int skb_len = skb->len; > + > + skb_push(skb, skb->data - skb_mac_header(skb)); > + proto = __vlan_get_protocol(skb, proto, NULL); > + if (skb_head != skb->data) { > + skb->data = skb_head; > + skb->len = skb_len; > + } > + } > + > + return proto; > +} > + > static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) > { > del_timer_sync(&pkc->retire_blk_timer); > @@ -1011,6 +1057,10 @@ static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, > ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); > ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); > ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; > + } else if (unlikely(eth_type_vlan(pkc->skb->protocol))) { > + ppd->hv1.tp_vlan_tci = vlan_get_tci(pkc->skb); > + ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->protocol); > + ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; > } else { > ppd->hv1.tp_vlan_tci = 0; > ppd->hv1.tp_vlan_tpid = 0; > @@ -2428,6 +2478,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, > h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); > h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); > status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; > + } else if (unlikely(eth_type_vlan(skb->protocol))) { > + h.h2->tp_vlan_tci = vlan_get_tci(skb); > + h.h2->tp_vlan_tpid = ntohs(skb->protocol); > + status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; > } else { > h.h2->tp_vlan_tci = 0; > h.h2->tp_vlan_tpid = 0; > @@ -2457,7 +2511,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, > sll->sll_halen = dev_parse_header(skb, sll->sll_addr); > sll->sll_family = AF_PACKET; > sll->sll_hatype = dev->type; > - sll->sll_protocol = skb->protocol; > + sll->sll_protocol = (sk->sk_type == SOCK_DGRAM) ? > + vlan_get_protocol_dgram(skb) : skb->protocol; > sll->sll_pkttype = skb->pkt_type; > if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) > sll->sll_ifindex = orig_dev->ifindex; > @@ -3482,7 +3537,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, > /* Original length was stored in sockaddr_ll fields */ > origlen = PACKET_SKB_CB(skb)->sa.origlen; > sll->sll_family = AF_PACKET; > - sll->sll_protocol = skb->protocol; > + sll->sll_protocol = (sock->type == SOCK_DGRAM) ? > + vlan_get_protocol_dgram(skb) : skb->protocol; > } > > sock_recv_cmsgs(msg, sk, skb); > @@ -3539,6 +3595,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, > aux.tp_vlan_tci = skb_vlan_tag_get(skb); > aux.tp_vlan_tpid = ntohs(skb->vlan_proto); > aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; > + } else if (unlikely(eth_type_vlan(skb->protocol))) { > + aux.tp_vlan_tci = vlan_get_tci(skb); > + aux.tp_vlan_tpid = ntohs(skb->protocol); > + aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; > } else { > aux.tp_vlan_tci = 0; > aux.tp_vlan_tpid = 0; > -- > 2.43.0 >