> -----Original Message----- > From: Bitterblue Smith <rtl8821cerfe2@xxxxxxxxx> > Sent: Saturday, January 13, 2024 3:51 AM > To: linux-wireless@xxxxxxxxxxxxxxx > Cc: Ping-Ke Shih <pkshih@xxxxxxxxxxx>; Larry Finger <Larry.Finger@xxxxxxxxxxxx> > Subject: [PATCH] wifi: rtlwifi: Speed up firmware loading for USB > > Currently it takes almost 6 seconds to upload the firmware for RTL8192CU > (and 11 seconds for RTL8192DU). That's because the firmware is uploaded > one byte at a time. > > Also, after plugging the device, the firmware gets uploaded three times > before a connection to the AP is established. > > Maybe this is fine for most users, but when testing changes to the > driver it's really annoying to wait so long. > > Speed up the firmware upload by writing chunks of 64 bytes at a time. > This way it takes about 110 ms for RTL8192CU (and about 210 ms for > RTL8192DU). > > PCI devices could upload it in chunks of 4 bytes, but I don't have any > to test and commit 89d32c9071aa ("rtlwifi: Download firmware as bytes > rather than as dwords") decided otherwise anyway. > > Signed-off-by: Bitterblue Smith <rtl8821cerfe2@xxxxxxxxx> > --- > drivers/net/wireless/realtek/rtlwifi/efuse.c | 65 +++++++++++++++++-- > drivers/net/wireless/realtek/rtlwifi/efuse.h | 4 +- > .../wireless/realtek/rtlwifi/rtl8192cu/sw.c | 6 +- > drivers/net/wireless/realtek/rtlwifi/usb.c | 9 +++ > drivers/net/wireless/realtek/rtlwifi/wifi.h | 8 +++ > 5 files changed, 82 insertions(+), 10 deletions(-) > > diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.c > b/drivers/net/wireless/realtek/rtlwifi/efuse.c > index 2e945554ed6d..870a276299f5 100644 > --- a/drivers/net/wireless/realtek/rtlwifi/efuse.c > +++ b/drivers/net/wireless/realtek/rtlwifi/efuse.c > @@ -1287,18 +1287,73 @@ int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv, > } > EXPORT_SYMBOL_GPL(rtl_get_hwinfo); > > -void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size) > +static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size) > +{ > + struct rtl_priv *rtlpriv = rtl_priv(hw); > + u32 blockcount, blockcount8, blockcount4; > + u32 remain8 = 0, remain4 = 0, remain = 0; > + const u32 blocksize = 64; > + const u32 blocksize8 = 8; > + const u32 blocksize4 = 4; > + u32 i, offset; > + > + blockcount = size / blocksize; > + remain8 = size % blocksize; > + for (i = 0; i < blockcount; i++) { > + offset = i * blocksize; > + rtl_write_chunk(rtlpriv, > + START_ADDRESS + offset, > + blocksize, buffer + offset); > + } > + > + if (remain8) { > + offset = blockcount * blocksize; > + blockcount8 = remain8 / blocksize8; > + remain4 = remain8 % blocksize8; > + > + for (i = 0; i < blockcount8; i++) > + rtl_write_chunk(rtlpriv, > + START_ADDRESS + offset + i * blocksize8, > + blocksize8, > + buffer + offset + i * blocksize8); > + } > + > + if (remain4) { > + offset += blockcount8 * blocksize8; > + blockcount4 = remain4 / blocksize4; > + remain = remain8 % blocksize4; > + > + for (i = 0; i < blockcount4; i++) > + rtl_write_dword(rtlpriv, > + START_ADDRESS + offset + i * blocksize4, > + cpu_to_le32(*(u32 *)(buffer + offset + i))); Here should be le32_to_cpu(). > + } > + > + if (remain) { > + offset += blockcount4 * blocksize4; > + > + for (i = 0; i < remain; i++) > + rtl_write_byte(rtlpriv, START_ADDRESS + offset + i, > + *(buffer + offset + i)); > + } > +} I think we can increase 'start' and 'buffer' addresses after writing, so arithmetic can be simple. And, combine 64/8/4/1 block writing into single loop. Pseudo code like static void _rtl_fw_block_write_usb(struct ieee80211_hw *hw, u8 *buffer, u32 size) { u32 start = START_ADDRESS; u32 n; while (size > 0) { if (size >= 64) { n = 64; rtl_write_chunk(rtlpriv, start, 64, buffer); } else if (size >= 8) { n = 8; rtl_write_chunk(rtlpriv, start, 8, buffer); } else if (size >= 4) { n = 4; rtl_write_dword(rtlpriv, start, le32_to_cpu(*(u32 *)buffer)); } else { n = 1; rtl_write_byte(rtlpriv, start, *buffer); } start += n; buffer += n; size -= n; } } > + > +void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size) > { > struct rtl_priv *rtlpriv = rtl_priv(hw); > - u8 *pu4byteptr = (u8 *)buffer; > u32 i; > > - for (i = 0; i < size; i++) > - rtl_write_byte(rtlpriv, (START_ADDRESS + i), *(pu4byteptr + i)); > + if (rtlpriv->rtlhal.interface == INTF_PCI) { > + for (i = 0; i < size; i++) > + rtl_write_byte(rtlpriv, (START_ADDRESS + i), > + *(buffer + i)); > + } else if (rtlpriv->rtlhal.interface == INTF_USB) { > + _rtl_fw_block_write_usb(hw, buffer, size); > + } > } > EXPORT_SYMBOL_GPL(rtl_fw_block_write); > > -void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer, > +void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer, > u32 size) > { > struct rtl_priv *rtlpriv = rtl_priv(hw); > diff --git a/drivers/net/wireless/realtek/rtlwifi/efuse.h > b/drivers/net/wireless/realtek/rtlwifi/efuse.h > index 1ec59f439382..4821625ad1e5 100644 > --- a/drivers/net/wireless/realtek/rtlwifi/efuse.h > +++ b/drivers/net/wireless/realtek/rtlwifi/efuse.h > @@ -91,8 +91,8 @@ void efuse_power_switch(struct ieee80211_hw *hw, u8 write, u8 pwrstate); > int rtl_get_hwinfo(struct ieee80211_hw *hw, struct rtl_priv *rtlpriv, > int max_size, u8 *hwinfo, int *params); > void rtl_fill_dummy(u8 *pfwbuf, u32 *pfwlen); > -void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, const u8 *buffer, > +void rtl_fw_page_write(struct ieee80211_hw *hw, u32 page, u8 *buffer, > u32 size); > -void rtl_fw_block_write(struct ieee80211_hw *hw, const u8 *buffer, u32 size); > +void rtl_fw_block_write(struct ieee80211_hw *hw, u8 *buffer, u32 size); > void rtl_efuse_ops_init(struct ieee80211_hw *hw); > #endif > diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c > b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c > index 20b4aac69642..9f4cf09090d6 100644 > --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c > +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c > @@ -40,7 +40,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) > rtlpriv->dm.thermalvalue = 0; > > /* for firmware buf */ > - rtlpriv->rtlhal.pfirmware = vzalloc(0x4000); > + rtlpriv->rtlhal.pfirmware = kmalloc(0x4000, GFP_KERNEL); Why should we use kmalloc instead? I don't see any description about this in commit message. > if (!rtlpriv->rtlhal.pfirmware) { > pr_err("Can't alloc buffer for fw\n"); > return 1; > @@ -61,7 +61,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) > fw_name, rtlpriv->io.dev, > GFP_KERNEL, hw, rtl_fw_cb); > if (err) { > - vfree(rtlpriv->rtlhal.pfirmware); > + kfree(rtlpriv->rtlhal.pfirmware); > rtlpriv->rtlhal.pfirmware = NULL; > } > return err; > @@ -72,7 +72,7 @@ static void rtl92cu_deinit_sw_vars(struct ieee80211_hw *hw) > struct rtl_priv *rtlpriv = rtl_priv(hw); > > if (rtlpriv->rtlhal.pfirmware) { > - vfree(rtlpriv->rtlhal.pfirmware); > + kfree(rtlpriv->rtlhal.pfirmware); > rtlpriv->rtlhal.pfirmware = NULL; > } > } [...]