On 15/01/2024 02:50, Ping-Ke Shih wrote: > > >> -----Original Message----- >> From: Bitterblue Smith <rtl8821cerfe2@xxxxxxxxx> >> Sent: Saturday, January 13, 2024 3:51 AM >> To: linux-wireless@xxxxxxxxxxxxxxx >> Cc: Ping-Ke Shih <pkshih@xxxxxxxxxxx>; Larry Finger <Larry.Finger@xxxxxxxxxxxx> >> Subject: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB >> >> Currently it takes almost 6 seconds to upload the firmware for RTL8192CU >> (and 11 seconds for RTL8192DU). That's because the firmware is uploaded >> one byte at a time. >> >> Also, after plugging the device, the firmware gets uploaded three times >> before a connection to the AP is established. >> >> Maybe this is fine for most users, but when testing changes to the >> driver it's really annoying to wait so long. >> >> Speed up the firmware upload by writing chunks of 64 bytes at a time. >> This way it takes about 110 ms for RTL8192CU (and about 210 ms for >> RTL8192DU). >> >> PCI devices could upload it in chunks of 4 bytes, but I don't have any >> to test and commit 89d32c9071aa ("rtlwifi: Download firmware as bytes >> rather than as dwords") decided otherwise anyway. >> >> Signed-off-by: Bitterblue Smith <rtl8821cerfe2@xxxxxxxxx> >> --- >> drivers/net/wireless/realtek/rtlwifi/efuse.c | 65 +++++++++++++++++-- >> drivers/net/wireless/realtek/rtlwifi/efuse.h | 4 +- >> .../wireless/realtek/rtlwifi/rtl8192cu/sw.c | 6 +- >> drivers/net/wireless/realtek/rtlwifi/usb.c | 9 +++ >> drivers/net/wireless/realtek/rtlwifi/wifi.h | 8 +++ >> 5 files changed, 82 insertions(+), 10 deletions(-) >> >> diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c >> b/drivers/net/wireless/realtek/rtlwifi/efuse.c >> index 2e945554ed6d..870a276299f5 100644 >> --- a/drivers/net/wireless/realtek/rtlwifi/efuse.c >> +++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c >> @@ -1287,18 +1287,73 @@ int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv, >> } >> EXPORT_SYMBOL_GPL(rtl_get_hwinfo); >> >> -void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size) >> +static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size) >> +{ >> + struct rtl_priv *rtlpriv = rtl_priv(hw); >> + u32 blockcount, blockcount8, blockcount4; >> + u32 remain8 = 0, remain4 = 0, remain = 0; >> + const u32 blocksize = 64; >> + const u32 blocksize8 = 8; >> + const u32 blocksize4 = 4; >> + u32 i, offset; >> + >> + blockcount = size / blocksize; >> + remain8 = size % blocksize; >> + for (i = 0; i < blockcount; i++) { >> + offset = i * blocksize; >> + rtl_write_chunk(rtlpriv, >> + START_ADDRESS + offset, >> + blocksize, buffer + offset); >> + } >> + >> + if (remain8) { >> + offset = blockcount * blocksize; >> + blockcount8 = remain8 / blocksize8; >> + remain4 = remain8 % blocksize8; >> + >> + for (i = 0; i < blockcount8; i++) >> + rtl_write_chunk(rtlpriv, >> + START_ADDRESS + offset + i * blocksize8, >> + blocksize8, >> + buffer + offset + i * blocksize8); >> + } >> + >> + if (remain4) { >> + offset += blockcount8 * blocksize8; >> + blockcount4 = remain4 / blocksize4; >> + remain = remain8 % blocksize4; >> + >> + for (i = 0; i < blockcount4; i++) >> + rtl_write_dword(rtlpriv, >> + START_ADDRESS + offset + i * blocksize4, >> + cpu_to_le32(*(u32 *)(buffer + offset + i))); > > Here should be le32_to_cpu(). > Right. But now I realise that rtl_write_dword is called at most once here, so there is not much point using it. >> + } >> + >> + if (remain) { >> + offset += blockcount4 * blocksize4; >> + >> + for (i = 0; i < remain; i++) >> + rtl_write_byte(rtlpriv, START_ADDRESS + offset + i, >> + *(buffer + offset + i)); >> + } >> +} > > I think we can increase 'start' and 'buffer' addresses after writing, so > arithmetic can be simple. And, combine 64/8/4/1 block writing into single > loop. Pseudo code like > > static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size) > { > u32 start = START_ADDRESS; > u32 n; > > while (size > 0) { > if (size >= 64) { > n = 64; > rtl_write_chunk(rtlpriv, start, 64, buffer); > } else if (size >= 8) { > n = 8; > rtl_write_chunk(rtlpriv, start, 8, buffer); > } else if (size >= 4) { > n = 4; > rtl_write_dword(rtlpriv, start, le32_to_cpu(*(u32 *)buffer)); > } else { > n = 1; > rtl_write_byte(rtlpriv, start, *buffer); > } > > start += n; > buffer += n; > size -= n; > } > } > Why didn't I think of that? (Because I didn't think much, just copied the code from the RTL8192DU driver and cleaned it a bit.) >> + >> +void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size) >> { >> struct rtl_priv *rtlpriv = rtl_priv(hw); >> - u8 *pu4byteptr = (u8 *)buffer; >> u32 i; >> >> - for (i = 0; i < size; i++) >> - rtl_write_byte(rtlpriv, (START_ADDRESS + i), *(pu4byteptr + i)); >> + if (rtlpriv->rtlhal.interface == INTF_PCI) { >> + for (i = 0; i < size; i++) >> + rtl_write_byte(rtlpriv, (START_ADDRESS + i), >> + *(buffer + i)); >> + } else if (rtlpriv->rtlhal.interface == INTF_USB) { >> + _rtl_fw_block_write_usb(hw, buffer, size); >> + } >> } >> EXPORT_SYMBOL_GPL(rtl_fw_block_write); >> >> -void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer, >> +void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer, >> u32 size) >> { >> struct rtl_priv *rtlpriv = rtl_priv(hw); >> diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.h >> b/drivers/net/wireless/realtek/rtlwifi/efuse.h >> index 1ec59f439382..4821625ad1e5 100644 >> --- a/drivers/net/wireless/realtek/rtlwifi/efuse.h >> +++ b/drivers/net/wireless/realtek/rtlwifi/efuse.h >> @@ -91,8 +91,8 @@ void efuse_power_switch(struct ieee80211_hw *hw, u8 write, u8 pwrstate); >> int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv, >> int max_size, u8 *hwinfo, int *params); >> void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen); >> -void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer, >> +void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer, >> u32 size); >> -void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size); >> +void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size); >> void rtl_efuse_ops_init(struct ieee80211_hw *hw); >> #endif >> diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c >> b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c >> index 20b4aac69642..9f4cf09090d6 100644 >> --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c >> +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c >> @@ -40,7 +40,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) >> rtlpriv->dm.thermalvalue = 0; >> >> /* for firmware buf */ >> - rtlpriv->rtlhal.pfirmware = vzalloc(0x4000); >> + rtlpriv->rtlhal.pfirmware = kmalloc(0x4000, GFP_KERNEL); > > Why should we use kmalloc instead? I don't see any description about this in > commit message. > That's because usb_control_msg() can't use memory allocated by vmalloc: Jan 09 19:39:29 ideapad2 kernel: xhci_hcd 0000:03:00.3: rejecting DMA map of vmalloc memory I will mention this in the commit message. >> if (!rtlpriv->rtlhal.pfirmware) { >> pr_err("Can't alloc buffer for fw\n"); >> return 1; >> @@ -61,7 +61,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) >> fw_name, rtlpriv->io.dev, >> GFP_KERNEL, hw, rtl_fw_cb); >> if (err) { >> - vfree(rtlpriv->rtlhal.pfirmware); >> + kfree(rtlpriv->rtlhal.pfirmware); >> rtlpriv->rtlhal.pfirmware = NULL; >> } >> return err; >> @@ -72,7 +72,7 @@ static void rtl92cu_deinit_sw_vars(struct ieee80211_hw *hw) >> struct rtl_priv *rtlpriv = rtl_priv(hw); >> >> if (rtlpriv->rtlhal.pfirmware) { >> - vfree(rtlpriv->rtlhal.pfirmware); >> + kfree(rtlpriv->rtlhal.pfirmware); >> rtlpriv->rtlhal.pfirmware = NULL; >> } >> } > > [...] > >