Search Linux Wireless

Re: [PATCH 1/2] add mt7601u driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 04 May 2015 12:15:11 +0200, Johannes Berg wrote:
> On Mon, 2015-05-04 at 12:04 +0200, Jakub Kiciński wrote:
> 
> > > Don't know how your buffers are set up, but if the DMA engine consumes
> > > pages you could consider using paged RX instead of the memcpy().
> > 
> > DMA engine can concatenate multiple frames into a single USB bulk
> > transfer to a large continuous buffer.  There is no way to request 
> > any alignment of the frames within that large buffer so I think paged 
> > RX is not an option.
> 
> You could probably still do it because the networking stack only
> requires the *headers* to be aligned. But if they headers aren't in the
> skb->data (skb->head) then they'll be pulled into that by the network
> stack, where they'll be aligned.

I *think* I got it.  Would you care to take a look?  I basically
allocate a compound page with dev_alloc_pages() and run get_page() for
every fragment.

------>8-----------------------------------

diff --git a/dma.c b/dma.c
index e865cf8384a5..a92993a64aeb 100644
--- a/dma.c
+++ b/dma.c
@@ -16,6 +16,9 @@
 #include "usb.h"
 #include "trace.h"
 
+static int mt7601u_submit_rx_buf(struct mt7601u_dev *dev,
+				 struct mt7601u_dma_buf_rx *e, gfp_t gfp);
+
 static unsigned int ieee80211_get_hdrlen_from_buf(const u8 *data, unsigned len)
 {
 	const struct ieee80211_hdr *hdr = (const struct ieee80211_hdr *)data;
@@ -60,7 +63,7 @@ mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, u32 seg_len)
 {
 	struct sk_buff *skb;
 	struct mt7601u_rxwi *rxwi;
-	u32 fce_info;
+	u32 true_len, fce_info;
 
 	/* DMA_INFO field at the beginning of the segment contains only some of
 	 * the information, we need to read the FCE descriptor from the end.
@@ -86,11 +89,81 @@ mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, u32 seg_len)
 	if (!skb)
 		return;
 
-	if (mt76_mac_process_rx(dev, skb, rxwi)) {
-		dev_kfree_skb(skb);
-		return;
+	true_len = mt76_mac_process_rx(dev, skb, skb->data, rxwi);
+	skb_trim(skb, true_len);
+
+	ieee80211_rx_ni(dev->hw, skb);
+}
+
+static void mt7601u_rx_add_frags(struct mt7601u_dev *dev, struct sk_buff *skb,
+				 void *data, u32 true_len, u32 truesize,
+				 struct page *p)
+{
+	unsigned long addr = (unsigned long) data;
+	u32 overpage = 0;
+	u32 p_off = (data - page_address(p)) >> PAGE_SHIFT;
+
+	if ((addr & ~PAGE_MASK) + true_len > PAGE_SIZE)
+		overpage = (addr & ~PAGE_MASK) + true_len - PAGE_SIZE;
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, p + p_off,
+			addr & ~PAGE_MASK, true_len - overpage, truesize);
+	get_page(p + p_off);
+
+	/* Do we really need to split the buffer if it crosses a page boundary?
+	 * Does networking code care?
+	 */
+	if (overpage) {
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, p + p_off + 1,
+				0, overpage, 0);
+		get_page(p + p_off + 1);
+	}
+}
+
+static void mt7601u_rx_process_seg_paged(struct mt7601u_dev *dev, u8 *data,
+					 u32 seg_len, struct page *p)
+{
+	struct sk_buff *skb;
+	struct mt7601u_rxwi *rxwi;
+	u32 true_len, fce_info, truesize = seg_len;
+
+	/* DMA_INFO field at the beginning of the segment contains only some of
+	 * the information, we need to read the FCE descriptor from the end.
+	 */
+	fce_info = get_unaligned_le32(data + seg_len - MT_FCE_INFO_LEN);
+	seg_len -= MT_FCE_INFO_LEN;
+
+	data += MT_DMA_HDR_LEN;
+	seg_len -= MT_DMA_HDR_LEN;
+
+	rxwi = (struct mt7601u_rxwi *) data;
+	data += sizeof(struct mt7601u_rxwi);
+	seg_len -= sizeof(struct mt7601u_rxwi);
+
+	if (unlikely(rxwi->zero[0] || rxwi->zero[1] || rxwi->zero[2]))
+		dev_err_once(dev->dev, "Error: RXWI zero fields are set\n");
+	if (unlikely(MT76_GET(MT_RXD_INFO_TYPE, fce_info)))
+		dev_err_once(dev->dev, "Error: RX path seen a non-pkt urb\n");
+
+	trace_mt_rx(dev, rxwi, fce_info);
+
+	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_L2PAD)) {
+		int hdr_len = ieee80211_get_hdrlen_from_buf(data, seg_len);
+
+		memmove(data + 2, data, hdr_len);
+		seg_len -= 2;
+		data += 2;
 	}
 
+	skb = __netdev_alloc_skb(NULL, 256, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	true_len = mt76_mac_process_rx(dev, skb, data, rxwi);
+	WARN_ON(true_len > seg_len || true_len < seg_len - 3);
+
+	mt7601u_rx_add_frags(dev, skb, data, true_len, truesize, p);
+
 	ieee80211_rx_ni(dev->hw, skb);
 }
 
@@ -110,17 +183,31 @@ static u16 mt7601u_rx_next_seg_len(u8 *data, u32 data_len)
 }
 
 static void
-mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf *e)
+mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf_rx *e)
 {
 	u32 seg_len, data_len = e->urb->actual_length;
-	u8 *data = e->buf;
+	u8 *data = page_address(e->p);
+	struct page *new_p = NULL;
+	bool paged = true;
 	int cnt = 0;
 
 	if (!test_bit(MT7601U_STATE_INITIALIZED, &dev->state))
 		return;
 
+	/* Copy if there is very little data in the buffer. */
+	if (data_len < 512) {
+		paged = false;
+	} else {
+		new_p = dev_alloc_pages(MT_RX_ORDER);
+		if (!new_p)
+			paged = false;
+	}
+
 	while ((seg_len = mt7601u_rx_next_seg_len(data, data_len))) {
-		mt7601u_rx_process_seg(dev, data, seg_len);
+		if (paged)
+			mt7601u_rx_process_seg_paged(dev, data, seg_len, e->p);
+		else
+			mt7601u_rx_process_seg(dev, data, seg_len);
 
 		data_len -= seg_len;
 		data += seg_len;
@@ -128,14 +215,21 @@ mt7601u_rx_process_entry(struct mt7601u_dev *dev, struct mt7601u_dma_buf *e)
 	}
 
 	if (cnt > 1)
-		trace_mt_rx_dma_aggr(dev, cnt);
+		trace_mt_rx_dma_aggr(dev, cnt, paged);
+
+	if (paged) {
+		/* we have one extra ref from the allcator */
+		put_page(e->p);
+
+		e->p = new_p;
+	}
 }
 
-static struct mt7601u_dma_buf *
+static struct mt7601u_dma_buf_rx *
 mt7601u_rx_get_pending_entry(struct mt7601u_dev *dev)
 {
 	struct mt7601u_rx_queue *q = &dev->rx_q;
-	struct mt7601u_dma_buf *buf = NULL;
+	struct mt7601u_dma_buf_rx *buf = NULL;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->rx_lock, flags);
@@ -175,15 +269,14 @@ out:
 static void mt7601u_rx_tasklet(unsigned long data)
 {
 	struct mt7601u_dev *dev = (struct mt7601u_dev *) data;
-	struct mt7601u_dma_buf *e;
+	struct mt7601u_dma_buf_rx *e;
 
 	while ((e = mt7601u_rx_get_pending_entry(dev))) {
 		if (e->urb->status)
 			continue;
 
 		mt7601u_rx_process_entry(dev, e);
-		mt7601u_usb_submit_buf(dev, USB_DIR_IN, MT_EP_IN_PKT_RX, e,
-				       GFP_ATOMIC, mt7601u_complete_rx, dev);
+		mt7601u_submit_rx_buf(dev, e, GFP_ATOMIC);
 	}
 }
 
@@ -325,14 +418,33 @@ static void mt7601u_kill_rx(struct mt7601u_dev *dev)
 	spin_unlock_irqrestore(&dev->rx_lock, flags);
 }
 
+static int mt7601u_submit_rx_buf(struct mt7601u_dev *dev,
+				 struct mt7601u_dma_buf_rx *e, gfp_t gfp)
+{
+	struct usb_device *usb_dev = mt7601u_to_usb_dev(dev);
+	u8 *buf = page_address(e->p);
+	unsigned pipe;
+	int ret;
+
+	pipe = usb_rcvbulkpipe(usb_dev, dev->in_eps[MT_EP_IN_PKT_RX]);
+
+	usb_fill_bulk_urb(e->urb, usb_dev, pipe, buf, MT_RX_URB_SIZE,
+			  mt7601u_complete_rx, dev);
+
+	trace_mt_submit_urb(dev, e->urb);
+	ret = usb_submit_urb(e->urb, gfp);
+	if (ret)
+		dev_err(dev->dev, "Error: submit RX URB failed:%d\n", ret);
+
+	return ret;
+}
+
 static int mt7601u_submit_rx(struct mt7601u_dev *dev)
 {
 	int i, ret;
 
 	for (i = 0; i < dev->rx_q.entries; i++) {
-		ret = mt7601u_usb_submit_buf(dev, USB_DIR_IN, MT_EP_IN_PKT_RX,
-					     &dev->rx_q.e[i], GFP_KERNEL,
-					     mt7601u_complete_rx, dev);
+		ret = mt7601u_submit_rx_buf(dev, &dev->rx_q.e[i], GFP_KERNEL);
 		if (ret)
 			return ret;
 	}
@@ -344,8 +456,10 @@ static void mt7601u_free_rx(struct mt7601u_dev *dev)
 {
 	int i;
 
-	for (i = 0; i < dev->rx_q.entries; i++)
-		mt7601u_usb_free_buf(dev, &dev->rx_q.e[i]);
+	for (i = 0; i < dev->rx_q.entries; i++) {
+		__free_pages(dev->rx_q.e[i].p, MT_RX_ORDER);
+		usb_free_urb(dev->rx_q.e[i].urb);
+	}
 }
 
 static int mt7601u_alloc_rx(struct mt7601u_dev *dev)
@@ -356,9 +470,13 @@ static int mt7601u_alloc_rx(struct mt7601u_dev *dev)
 	dev->rx_q.dev = dev;
 	dev->rx_q.entries = N_RX_ENTRIES;
 
-	for (i = 0; i < N_RX_ENTRIES; i++)
-		if (mt7601u_usb_alloc_buf(dev, MT_RX_URB_SIZE, &dev->rx_q.e[i]))
+	for (i = 0; i < N_RX_ENTRIES; i++) {
+		dev->rx_q.e[i].urb = usb_alloc_urb(0, GFP_KERNEL);
+		dev->rx_q.e[i].p = dev_alloc_pages(MT_RX_ORDER);
+
+		if (!dev->rx_q.e[i].urb || !dev->rx_q.e[i].p)
 			return -ENOMEM;
+	}
 
 	return 0;
 }
diff --git a/mac.c b/mac.c
index 539751362b41..8802366f7181 100644
--- a/mac.c
+++ b/mac.c
@@ -441,30 +441,28 @@ mt7601u_rx_monitor_beacon(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi,
 }
 
 static int
-mt7601u_rx_is_our_beacon(struct mt7601u_dev *dev, struct sk_buff *skb)
+mt7601u_rx_is_our_beacon(struct mt7601u_dev *dev, u8 *data)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)data;
 
 	return ieee80211_is_beacon(hdr->frame_control) &&
 		ether_addr_equal(hdr->addr2, dev->ap_bssid);
 }
 
-int mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxi)
+u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
+			u8 *data, void *rxi)
 {
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct mt7601u_rxwi *rxwi = rxi;
 	u32 ctl = le32_to_cpu(rxwi->ctl);
 	u16 rate = le16_to_cpu(rxwi->rate);
-	int len, rssi;
+	int rssi;
 
 	if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_DECRYPT)) {
 		status->flag |= RX_FLAG_DECRYPTED;
 		status->flag |= RX_FLAG_IV_STRIPPED | RX_FLAG_MMIC_STRIPPED;
 	}
 
-	len = MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
-	skb_trim(skb, len);
-
 	status->chains = BIT(0);
 	rssi = mt7601u_phy_get_rssi(dev, rxwi, rate);
 	status->chain_signal[0] = status->signal = rssi;
@@ -474,13 +472,13 @@ int mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxi)
 	mt76_mac_process_rate(status, rate);
 
 	spin_lock_bh(&dev->con_mon_lock);
-	if (mt7601u_rx_is_our_beacon(dev, skb))
+	if (mt7601u_rx_is_our_beacon(dev, data))
 		mt7601u_rx_monitor_beacon(dev, rxwi, rate, rssi);
 	else if (rxwi->rxinfo & cpu_to_le32(MT_RXINFO_U2M))
 		dev->avg_rssi = (dev->avg_rssi * 15) / 16 + (rssi << 8);
 	spin_unlock_bh(&dev->con_mon_lock);
 
-	return 0;
+	return MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
 }
 
 static enum mt76_cipher_type
diff --git a/mac.h b/mac.h
index 90cf147f37e7..2c22d63c63a2 100644
--- a/mac.h
+++ b/mac.h
@@ -160,8 +160,8 @@ struct mt76_txwi {
 #define MT_TXWI_CTL_CHAN_CHECK_PKT	BIT(4)
 #define MT_TXWI_CTL_PIFS_REV		BIT(6)
 
-int
-mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, void *rxwi);
+u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb,
+			u8 *data, void *rxi);
 int mt76_mac_wcid_set_key(struct mt7601u_dev *dev, u8 idx,
 			  struct ieee80211_key_conf *key);
 void mt76_mac_wcid_set_rate(struct mt7601u_dev *dev, struct mt76_wcid *wcid,
diff --git a/mt7601u.h b/mt7601u.h
index f9957350a187..d6f83e03cb58 100644
--- a/mt7601u.h
+++ b/mt7601u.h
@@ -37,6 +37,7 @@
 #define MT_USB_AGGR_SIZE_LIMIT		21 /* * 1024B */
 #define MT_USB_AGGR_TIMEOUT		0x80 /* * 33ns */
 #define MT_RX_URB_SIZE			(24 * 1024)
+#define MT_RX_ORDER			3
 
 struct mt7601u_dma_buf {
 	struct urb *urb;
@@ -73,7 +74,10 @@ struct mac_stats {
 struct mt7601u_rx_queue {
 	struct mt7601u_dev *dev;
 
-	struct mt7601u_dma_buf e[N_RX_ENTRIES];
+	struct mt7601u_dma_buf_rx {
+		struct urb *urb;
+		struct page *p;
+	} e[N_RX_ENTRIES];
 
 	unsigned int start;
 	unsigned int end;
diff --git a/trace.h b/trace.h
index db86272401ff..289897300ef0 100644
--- a/trace.h
+++ b/trace.h
@@ -352,17 +352,20 @@ TRACE_EVENT(mt_tx_status,
 );
 
 TRACE_EVENT(mt_rx_dma_aggr,
-	TP_PROTO(struct mt7601u_dev *dev, int cnt),
-	TP_ARGS(dev, cnt),
+	TP_PROTO(struct mt7601u_dev *dev, int cnt, bool paged),
+	TP_ARGS(dev, cnt, paged),
 	TP_STRUCT__entry(
 		DEV_ENTRY
 		__field(u8, cnt)
+		__field(bool, paged)
 	),
 	TP_fast_assign(
 		DEV_ASSIGN;
 		__entry->cnt = cnt;
+		__entry->paged = paged;
 	),
-	TP_printk(DEV_PR_FMT "%d", DEV_PR_ARG, __entry->cnt)
+	TP_printk(DEV_PR_FMT "cnt:%d paged:%d",
+		  DEV_PR_ARG, __entry->cnt, __entry->paged)
 );
 
 DEFINE_EVENT(dev_simple_evt, set_key,

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Host AP]     [ATH6KL]     [Linux Wireless Personal Area Network]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Linux Kernel]     [IDE]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite Hiking]     [MIPS Linux]     [ARM Linux]     [Linux RAID]

  Powered by Linux