Bitterblue Smith <rtl8821cerfe2@xxxxxxxxx> wrote: > On 18/11/2024 11:16, Ping-Ke Shih wrote: > > Bitterblue Smith <rtl8821cerfe2@xxxxxxxxx> wrote: > >> "iperf3 -c 192.168.0.1 -R --udp -b 0" shows about 40% of datagrams > >> are lost. Many torrents don't download faster than 3 MiB/s, probably > >> because the Bittorrent protocol uses UDP. This is somehow related to > >> the use of skb_clone() in the RX path. > > > > Using skb_clone() can improve throughput is weird to me too. Do you check > > top with 100% cpu usage? > > > > I checked the CPU usage now and the results are interesting. In short, > patch 1/2 slightly raises the CPU usage, and patch 2/2 lowers it a lot. Originally I just wanted to know if throughput is a limit of CPU bound. Anyway good to know this patchset can improve CPU usage. > >> > >> Don't use skb_clone(). Instead allocate a new skb for each 802.11 frame > >> received and copy the data from the big (32768 byte) skb. > >> > >> With this patch, "iperf3 -c 192.168.0.1 -R --udp -b 0" shows only 1-2% > >> of datagrams are lost, and torrents can reach download speeds of 36 > >> MiB/s. > >> > >> Tested with RTL8812AU and RTL8822CU. > >> > >> Signed-off-by: Bitterblue Smith <rtl8821cerfe2@xxxxxxxxx> > >> --- > >> drivers/net/wireless/realtek/rtw88/usb.c | 52 ++++++++++++++---------- > >> 1 file changed, 31 insertions(+), 21 deletions(-) > >> > >> diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c > >> index 93ac4837e1b5..727569d4ef4b 100644 > >> --- a/drivers/net/wireless/realtek/rtw88/usb.c > >> +++ b/drivers/net/wireless/realtek/rtw88/usb.c > >> @@ -7,6 +7,7 @@ > >> #include <linux/mutex.h> > >> #include "main.h" > >> #include "debug.h" > >> +#include "mac.h" > >> #include "reg.h" > >> #include "tx.h" > >> #include "rx.h" > >> @@ -546,49 +547,58 @@ static void rtw_usb_rx_handler(struct work_struct *work) > >> { > >> struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_work); > >> struct rtw_dev *rtwdev = rtwusb->rtwdev; > >> - const struct rtw_chip_info *chip = rtwdev->chip; > >> - u32 pkt_desc_sz = chip->rx_pkt_desc_sz; > >> struct ieee80211_rx_status rx_status; > >> - u32 pkt_offset, next_pkt, urb_len; > >> struct rtw_rx_pkt_stat pkt_stat; > >> - struct sk_buff *next_skb; > >> + struct sk_buff *rx_skb; > >> struct sk_buff *skb; > >> + u32 pkt_desc_sz = rtwdev->chip->rx_pkt_desc_sz; > >> + u32 max_skb_len = pkt_desc_sz + PHY_STATUS_SIZE * 8 + > >> + IEEE80211_MAX_MPDU_LEN_VHT_11454; > >> + u32 pkt_offset, next_pkt, skb_len; > >> u8 *rx_desc; > >> int limit; > >> > >> for (limit = 0; limit < 200; limit++) { > >> - skb = skb_dequeue(&rtwusb->rx_queue); > >> - if (!skb) > >> + rx_skb = skb_dequeue(&rtwusb->rx_queue); > >> + if (!rx_skb) > >> break; > >> > >> if (skb_queue_len(&rtwusb->rx_queue) >= RTW_USB_MAX_RXQ_LEN) { > >> dev_dbg_ratelimited(rtwdev->dev, "failed to get rx_queue, overflow\n"); > >> - dev_kfree_skb_any(skb); > >> + dev_kfree_skb_any(rx_skb); > >> continue; > >> } > >> > >> - urb_len = skb->len; > >> + rx_desc = rx_skb->data; > >> > >> do { > >> - rx_desc = skb->data; > >> rtw_rx_query_rx_desc(rtwdev, rx_desc, &pkt_stat, > >> &rx_status); > >> pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz + > >> pkt_stat.shift; > >> > >> - next_pkt = round_up(pkt_stat.pkt_len + pkt_offset, 8); > >> + skb_len = pkt_stat.pkt_len + pkt_offset; > >> + if (skb_len > max_skb_len) { > > > > This seems a new rule introduced by this patch. Do you really encounter this > > case? Maybe this is the cause of slow download throughput? > > > > No, I never saw this case. It just seemed like a good idea to check the > size passed to alloc_skb. Maybe it's not needed? I think it is fine. Asking some questions before, I just tried to find a cause why 40% datagram get lost as you mentioned in commit message, but I can't. > > >> + rtw_dbg(rtwdev, RTW_DBG_USB, > >> + "skipping too big packet: %u\n", > >> + skb_len); > >> + goto skip_packet; > >> + } > >> > >> - if (urb_len >= next_pkt + pkt_desc_sz) > >> - next_skb = skb_clone(skb, GFP_KERNEL); > >> - else > >> - next_skb = NULL; > >> + skb = alloc_skb(skb_len, GFP_KERNEL); > >> + if (!skb) { > >> + rtw_dbg(rtwdev, RTW_DBG_USB, > >> + "failed to allocate RX skb of size %u\n", > >> + skb_len); > >> + goto skip_packet; > >> + } > >> + > >> + skb_put_data(skb, rx_desc, skb_len); > >> > >> if (pkt_stat.is_c2h) { > >> - skb_trim(skb, pkt_stat.pkt_len + pkt_offset); > >> rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb); > >> } else { > >> skb_pull(skb, pkt_offset); > >> - skb_trim(skb, pkt_stat.pkt_len); > >> rtw_update_rx_freq_for_invalid(rtwdev, skb, > >> &rx_status, > >> &pkt_stat); > >> @@ -597,12 +607,12 @@ static void rtw_usb_rx_handler(struct work_struct *work) > >> ieee80211_rx_irqsafe(rtwdev->hw, skb); > >> } > >> > >> - skb = next_skb; > >> - if (skb) > >> - skb_pull(skb, next_pkt); > >> +skip_packet: > >> + next_pkt = round_up(skb_len, 8); > >> + rx_desc += next_pkt; > >> + } while (rx_desc + pkt_desc_sz < rx_skb->data + rx_skb->len); > >> > >> - urb_len -= next_pkt; > >> - } while (skb); > >> + dev_kfree_skb_any(rx_skb); > >> } > >> } > >> > >> -- > >> 2.47.0 > >