On 2015/1/15 0:34, Eric Dumazet wrote: > On Wed, 2015-01-14 at 14:34 +0800, Ding Tianhong wrote: >> Support Hisilicon hip04 ethernet driver, including 100M / 1000M controller. >> The controller has no tx done interrupt, reclaim xmitted buffer in the poll. >> >> v13: Fix the problem of alignment parameters for function and checkpatch warming. >> >> v12: According Alex's suggestion, modify the changelog and add MODULE_DEVICE_TABLE >> for hip04 ethernet. >> >> v11: Add ethtool support for tx coalecse getting and setting, the xmit_more >> is not supported for this patch, but I think it could work for hip04, >> will support it later after some tests for performance better. >> >> Here are some performance test results by ping and iperf(add tx_coalesce_frames/users), >> it looks that the performance and latency is more better by tx_coalesce_frames/usecs. >> >> - Before: >> $ ping 192.168.1.1 ... >> === 192.168.1.1 ping statistics === >> 24 packets transmitted, 24 received, 0% packet loss, time 22999ms >> rtt min/avg/max/mdev = 0.180/0.202/0.403/0.043 ms >> >> $ iperf -c 192.168.1.1 ... >> [ ID] Interval Transfer Bandwidth >> [ 3] 0.0- 1.0 sec 115 MBytes 945 Mbits/sec >> >> - After: >> $ ping 192.168.1.1 ... >> === 192.168.1.1 ping statistics === >> 24 packets transmitted, 24 received, 0% packet loss, time 22999ms >> rtt min/avg/max/mdev = 0.178/0.190/0.380/0.041 ms >> >> $ iperf -c 192.168.1.1 ... >> [ ID] Interval Transfer Bandwidth >> [ 3] 0.0- 1.0 sec 115 MBytes 965 Mbits/sec >> >> v10: According David Miller and Arnd Bergmann's suggestion, add some modification >> for v9 version >> - drop the workqueue >> - batch cleanup based on tx_coalesce_frames/usecs for better throughput >> - use a reasonable default tx timeout (200us, could be shorted >> based on measurements) with a range timer >> - fix napi poll function return value >> - use a lockless queue for cleanup >> >> Signed-off-by: Zhangfei Gao <zhangfei.gao@xxxxxxxxxx> >> Signed-off-by: Arnd Bergmann <arnd@xxxxxxxx> >> Signed-off-by: Ding Tianhong <dingtianhong@xxxxxxxxxx> >> --- >> drivers/net/ethernet/hisilicon/Makefile | 2 +- >> drivers/net/ethernet/hisilicon/hip04_eth.c | 969 +++++++++++++++++++++++++++++ >> 2 files changed, 970 insertions(+), 1 deletion(-) >> create mode 100644 drivers/net/ethernet/hisilicon/hip04_eth.c >> >> diff --git a/drivers/net/ethernet/hisilicon/Makefile b/drivers/net/ethernet/hisilicon/Makefile >> index 40115a7..6c14540 100644 >> --- a/drivers/net/ethernet/hisilicon/Makefile >> +++ b/drivers/net/ethernet/hisilicon/Makefile >> @@ -3,4 +3,4 @@ >> # >> >> obj-$(CONFIG_HIX5HD2_GMAC) += hix5hd2_gmac.o >> -obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o >> +obj-$(CONFIG_HIP04_ETH) += hip04_mdio.o hip04_eth.o >> diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c >> new file mode 100644 >> index 0000000..525214e >> --- /dev/null >> +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c >> @@ -0,0 +1,969 @@ >> + >> +/* Copyright (c) 2014 Linaro Ltd. >> + * Copyright (c) 2014 Hisilicon Limited. >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License as published by >> + * the Free Software Foundation; either version 2 of the License, or >> + * (at your option) any later version. >> + */ >> + >> +#include <linux/module.h> >> +#include <linux/etherdevice.h> >> +#include <linux/platform_device.h> >> +#include <linux/interrupt.h> >> +#include <linux/ktime.h> >> +#include <linux/of_address.h> >> +#include <linux/phy.h> >> +#include <linux/of_mdio.h> >> +#include <linux/of_net.h> >> +#include <linux/mfd/syscon.h> >> +#include <linux/regmap.h> >> + >> +#define PPE_CFG_RX_ADDR 0x100 >> +#define PPE_CFG_POOL_GRP 0x300 >> +#define PPE_CFG_RX_BUF_SIZE 0x400 >> +#define PPE_CFG_RX_FIFO_SIZE 0x500 >> +#define PPE_CURR_BUF_CNT 0xa200 >> + >> +#define GE_DUPLEX_TYPE 0x08 >> +#define GE_MAX_FRM_SIZE_REG 0x3c >> +#define GE_PORT_MODE 0x40 >> +#define GE_PORT_EN 0x44 >> +#define GE_SHORT_RUNTS_THR_REG 0x50 >> +#define GE_TX_LOCAL_PAGE_REG 0x5c >> +#define GE_TRANSMIT_CONTROL_REG 0x60 >> +#define GE_CF_CRC_STRIP_REG 0x1b0 >> +#define GE_MODE_CHANGE_REG 0x1b4 >> +#define GE_RECV_CONTROL_REG 0x1e0 >> +#define GE_STATION_MAC_ADDRESS 0x210 >> +#define PPE_CFG_CPU_ADD_ADDR 0x580 >> +#define PPE_CFG_MAX_FRAME_LEN_REG 0x408 >> +#define PPE_CFG_BUS_CTRL_REG 0x424 >> +#define PPE_CFG_RX_CTRL_REG 0x428 >> +#define PPE_CFG_RX_PKT_MODE_REG 0x438 >> +#define PPE_CFG_QOS_VMID_GEN 0x500 >> +#define PPE_CFG_RX_PKT_INT 0x538 >> +#define PPE_INTEN 0x600 >> +#define PPE_INTSTS 0x608 >> +#define PPE_RINT 0x604 >> +#define PPE_CFG_STS_MODE 0x700 >> +#define PPE_HIS_RX_PKT_CNT 0x804 >> + >> +/* REG_INTERRUPT */ >> +#define RCV_INT BIT(10) >> +#define RCV_NOBUF BIT(8) >> +#define RCV_DROP BIT(7) >> +#define TX_DROP BIT(6) >> +#define DEF_INT_ERR (RCV_NOBUF | RCV_DROP | TX_DROP) >> +#define DEF_INT_MASK (RCV_INT | DEF_INT_ERR) >> + >> +/* TX descriptor config */ >> +#define TX_FREE_MEM BIT(0) >> +#define TX_READ_ALLOC_L3 BIT(1) >> +#define TX_FINISH_CACHE_INV BIT(2) >> +#define TX_CLEAR_WB BIT(4) >> +#define TX_L3_CHECKSUM BIT(5) >> +#define TX_LOOP_BACK BIT(11) >> + >> +/* RX error */ >> +#define RX_PKT_DROP BIT(0) >> +#define RX_L2_ERR BIT(1) >> +#define RX_PKT_ERR (RX_PKT_DROP | RX_L2_ERR) >> + >> +#define SGMII_SPEED_1000 0x08 >> +#define SGMII_SPEED_100 0x07 >> +#define SGMII_SPEED_10 0x06 >> +#define MII_SPEED_100 0x01 >> +#define MII_SPEED_10 0x00 >> + >> +#define GE_DUPLEX_FULL BIT(0) >> +#define GE_DUPLEX_HALF 0x00 >> +#define GE_MODE_CHANGE_EN BIT(0) >> + >> +#define GE_TX_AUTO_NEG BIT(5) >> +#define GE_TX_ADD_CRC BIT(6) >> +#define GE_TX_SHORT_PAD_THROUGH BIT(7) >> + >> +#define GE_RX_STRIP_CRC BIT(0) >> +#define GE_RX_STRIP_PAD BIT(3) >> +#define GE_RX_PAD_EN BIT(4) >> + >> +#define GE_AUTO_NEG_CTL BIT(0) >> + >> +#define GE_RX_INT_THRESHOLD BIT(6) >> +#define GE_RX_TIMEOUT 0x04 >> + >> +#define GE_RX_PORT_EN BIT(1) >> +#define GE_TX_PORT_EN BIT(2) >> + >> +#define PPE_CFG_STS_RX_PKT_CNT_RC BIT(12) >> + >> +#define PPE_CFG_RX_PKT_ALIGN BIT(18) >> +#define PPE_CFG_QOS_VMID_MODE BIT(14) >> +#define PPE_CFG_QOS_VMID_GRP_SHIFT 8 >> + >> +#define PPE_CFG_RX_FIFO_FSFU BIT(11) >> +#define PPE_CFG_RX_DEPTH_SHIFT 16 >> +#define PPE_CFG_RX_START_SHIFT 0 >> +#define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11 >> + >> +#define PPE_CFG_BUS_LOCAL_REL BIT(14) >> +#define PPE_CFG_BUS_BIG_ENDIEN BIT(0) >> + >> +#define RX_DESC_NUM 128 >> +#define TX_DESC_NUM 256 >> +#define TX_NEXT(N) (((N) + 1) & (TX_DESC_NUM-1)) >> +#define RX_NEXT(N) (((N) + 1) & (RX_DESC_NUM-1)) >> + >> +#define GMAC_PPE_RX_PKT_MAX_LEN 379 >> +#define GMAC_MAX_PKT_LEN 1516 >> +#define GMAC_MIN_PKT_LEN 31 >> +#define RX_BUF_SIZE 1600 >> +#define RESET_TIMEOUT 1000 >> +#define TX_TIMEOUT (6 * HZ) >> + >> +#define DRV_NAME "hip04-ether" >> +#define DRV_VERSION "v1.0" >> + >> +#define HIP04_MAX_TX_COALESCE_USECS 200 >> +#define HIP04_MIN_TX_COALESCE_USECS 100 >> +#define HIP04_MAX_TX_COALESCE_FRAMES 200 >> +#define HIP04_MIN_TX_COALESCE_FRAMES 100 >> + >> +struct tx_desc { >> + u32 send_addr; > > __be32 send_adddr; ? > >> + u32 send_size; > > __be32 > >> + u32 next_addr; > __be32 > >> + u32 cfg; > __be32 > >> + u32 wb_addr; > __be32 wb_addr ? > >> +} __aligned(64); >> + >> +struct rx_desc { >> + u16 reserved_16; >> + u16 pkt_len; >> + u32 reserve1[3]; >> + u32 pkt_err; >> + u32 reserve2[4]; >> +}; >> + >> +struct hip04_priv { >> + void __iomem *base; >> + int phy_mode; >> + int chan; >> + unsigned int port; >> + unsigned int speed; >> + unsigned int duplex; >> + unsigned int reg_inten; >> + >> + struct napi_struct napi; >> + struct net_device *ndev; >> + >> + struct tx_desc *tx_desc; >> + dma_addr_t tx_desc_dma; >> + struct sk_buff *tx_skb[TX_DESC_NUM]; >> + dma_addr_t tx_phys[TX_DESC_NUM]; > > This is not an efficient way to store skb/phys, as for each skb, info > will be store in 2 separate cache lines. > > It would be better to use a > > struct hip04_tx_desc { > struct sk_buff *skb; > dma_addr_t phys; > } > >> + unsigned int tx_head; >> + >> + int tx_coalesce_frames; >> + int tx_coalesce_usecs; >> + struct hrtimer tx_coalesce_timer; >> + >> + unsigned char *rx_buf[RX_DESC_NUM]; >> + dma_addr_t rx_phys[RX_DESC_NUM]; > > Same thing here : Use a struct to get better data locality. > >> + unsigned int rx_head; >> + unsigned int rx_buf_size; >> + >> + struct device_node *phy_node; >> + struct phy_device *phy; >> + struct regmap *map; >> + struct work_struct tx_timeout_task; >> + >> + /* written only by tx cleanup */ >> + unsigned int tx_tail ____cacheline_aligned_in_smp; >> +}; >> + >> +static inline unsigned int tx_count(unsigned int head, unsigned int tail) >> +{ >> + return (head - tail) % (TX_DESC_NUM - 1); >> +} >> + >> +static void hip04_config_port(struct net_device *ndev, u32 speed, u32 duplex) >> +{ >> + struct hip04_priv *priv = netdev_priv(ndev); >> + u32 val; >> + >> + priv->speed = speed; >> + priv->duplex = duplex; >> + >> + switch (priv->phy_mode) { >> + case PHY_INTERFACE_MODE_SGMII: >> + if (speed == SPEED_1000) >> + val = SGMII_SPEED_1000; >> + else if (speed == SPEED_100) >> + val = SGMII_SPEED_100; >> + else >> + val = SGMII_SPEED_10; >> + break; >> + case PHY_INTERFACE_MODE_MII: >> + if (speed == SPEED_100) >> + val = MII_SPEED_100; >> + else >> + val = MII_SPEED_10; >> + break; >> + default: >> + netdev_warn(ndev, "not supported mode\n"); >> + val = MII_SPEED_10; >> + break; >> + } >> + writel_relaxed(val, priv->base + GE_PORT_MODE); >> + >> + val = duplex ? GE_DUPLEX_FULL : GE_DUPLEX_HALF; >> + writel_relaxed(val, priv->base + GE_DUPLEX_TYPE); >> + >> + val = GE_MODE_CHANGE_EN; >> + writel_relaxed(val, priv->base + GE_MODE_CHANGE_REG); >> +} >> + >> +static void hip04_reset_ppe(struct hip04_priv *priv) >> +{ >> + u32 val, tmp, timeout = 0; >> + >> + do { >> + regmap_read(priv->map, priv->port * 4 + PPE_CURR_BUF_CNT, &val); >> + regmap_read(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, &tmp); >> + if (timeout++ > RESET_TIMEOUT) >> + break; >> + } while (val & 0xfff); >> +} >> + >> +static void hip04_config_fifo(struct hip04_priv *priv) >> +{ >> + u32 val; >> + >> + val = readl_relaxed(priv->base + PPE_CFG_STS_MODE); >> + val |= PPE_CFG_STS_RX_PKT_CNT_RC; >> + writel_relaxed(val, priv->base + PPE_CFG_STS_MODE); >> + >> + val = BIT(priv->port); >> + regmap_write(priv->map, priv->port * 4 + PPE_CFG_POOL_GRP, val); >> + >> + val = priv->port << PPE_CFG_QOS_VMID_GRP_SHIFT; >> + val |= PPE_CFG_QOS_VMID_MODE; >> + writel_relaxed(val, priv->base + PPE_CFG_QOS_VMID_GEN); >> + >> + val = RX_BUF_SIZE; >> + regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_BUF_SIZE, val); >> + >> + val = RX_DESC_NUM << PPE_CFG_RX_DEPTH_SHIFT; >> + val |= PPE_CFG_RX_FIFO_FSFU; >> + val |= priv->chan << PPE_CFG_RX_START_SHIFT; >> + regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_FIFO_SIZE, val); >> + >> + val = NET_IP_ALIGN << PPE_CFG_RX_CTRL_ALIGN_SHIFT; >> + writel_relaxed(val, priv->base + PPE_CFG_RX_CTRL_REG); >> + >> + val = PPE_CFG_RX_PKT_ALIGN; >> + writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_MODE_REG); >> + >> + val = PPE_CFG_BUS_LOCAL_REL | PPE_CFG_BUS_BIG_ENDIEN; >> + writel_relaxed(val, priv->base + PPE_CFG_BUS_CTRL_REG); >> + >> + val = GMAC_PPE_RX_PKT_MAX_LEN; >> + writel_relaxed(val, priv->base + PPE_CFG_MAX_FRAME_LEN_REG); >> + >> + val = GMAC_MAX_PKT_LEN; >> + writel_relaxed(val, priv->base + GE_MAX_FRM_SIZE_REG); >> + >> + val = GMAC_MIN_PKT_LEN; >> + writel_relaxed(val, priv->base + GE_SHORT_RUNTS_THR_REG); >> + >> + val = readl_relaxed(priv->base + GE_TRANSMIT_CONTROL_REG); >> + val |= GE_TX_AUTO_NEG | GE_TX_ADD_CRC | GE_TX_SHORT_PAD_THROUGH; >> + writel_relaxed(val, priv->base + GE_TRANSMIT_CONTROL_REG); >> + >> + val = GE_RX_STRIP_CRC; >> + writel_relaxed(val, priv->base + GE_CF_CRC_STRIP_REG); >> + >> + val = readl_relaxed(priv->base + GE_RECV_CONTROL_REG); >> + val |= GE_RX_STRIP_PAD | GE_RX_PAD_EN; >> + writel_relaxed(val, priv->base + GE_RECV_CONTROL_REG); >> + >> + val = GE_AUTO_NEG_CTL; >> + writel_relaxed(val, priv->base + GE_TX_LOCAL_PAGE_REG); >> +} >> + >> +static void hip04_mac_enable(struct net_device *ndev) >> +{ >> + struct hip04_priv *priv = netdev_priv(ndev); >> + u32 val; >> + >> + /* enable tx & rx */ >> + val = readl_relaxed(priv->base + GE_PORT_EN); >> + val |= GE_RX_PORT_EN | GE_TX_PORT_EN; >> + writel_relaxed(val, priv->base + GE_PORT_EN); >> + >> + /* clear rx int */ >> + val = RCV_INT; >> + writel_relaxed(val, priv->base + PPE_RINT); >> + >> + /* config recv int */ >> + val = GE_RX_INT_THRESHOLD | GE_RX_TIMEOUT; >> + writel_relaxed(val, priv->base + PPE_CFG_RX_PKT_INT); >> + >> + /* enable interrupt */ >> + priv->reg_inten = DEF_INT_MASK; >> + writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN); >> +} >> + >> +static void hip04_mac_disable(struct net_device *ndev) >> +{ >> + struct hip04_priv *priv = netdev_priv(ndev); >> + u32 val; >> + >> + /* disable int */ >> + priv->reg_inten &= ~(DEF_INT_MASK); >> + writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN); >> + >> + /* disable tx & rx */ >> + val = readl_relaxed(priv->base + GE_PORT_EN); >> + val &= ~(GE_RX_PORT_EN | GE_TX_PORT_EN); >> + writel_relaxed(val, priv->base + GE_PORT_EN); >> +} >> + >> +static void hip04_set_xmit_desc(struct hip04_priv *priv, dma_addr_t phys) >> +{ >> + writel(phys, priv->base + PPE_CFG_CPU_ADD_ADDR); >> +} >> + >> +static void hip04_set_recv_desc(struct hip04_priv *priv, dma_addr_t phys) >> +{ >> + regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, phys); >> +} >> + >> +static u32 hip04_recv_cnt(struct hip04_priv *priv) >> +{ >> + return readl(priv->base + PPE_HIS_RX_PKT_CNT); >> +} >> + >> +static void hip04_update_mac_address(struct net_device *ndev) >> +{ >> + struct hip04_priv *priv = netdev_priv(ndev); >> + >> + writel_relaxed(((ndev->dev_addr[0] << 8) | (ndev->dev_addr[1])), >> + priv->base + GE_STATION_MAC_ADDRESS); >> + writel_relaxed(((ndev->dev_addr[2] << 24) | (ndev->dev_addr[3] << 16) | >> + (ndev->dev_addr[4] << 8) | (ndev->dev_addr[5])), >> + priv->base + GE_STATION_MAC_ADDRESS + 4); >> +} >> + >> +static int hip04_set_mac_address(struct net_device *ndev, void *addr) >> +{ >> + eth_mac_addr(ndev, addr); >> + hip04_update_mac_address(ndev); >> + return 0; >> +} >> + >> +static int hip04_tx_reclaim(struct net_device *ndev, bool force) >> +{ >> + struct hip04_priv *priv = netdev_priv(ndev); >> + unsigned tx_tail = priv->tx_tail; >> + struct tx_desc *desc; >> + unsigned int bytes_compl = 0, pkts_compl = 0; >> + unsigned int count; >> + >> + smp_rmb(); >> + count = tx_count(ACCESS_ONCE(priv->tx_head), tx_tail); >> + if (count == 0) >> + goto out; >> + >> + while (count) { >> + desc = &priv->tx_desc[tx_tail]; >> + if (desc->send_addr != 0) { >> + if (force) >> + desc->send_addr = 0; >> + else >> + break; >> + } >> + >> + if (priv->tx_phys[tx_tail]) { >> + dma_unmap_single(&ndev->dev, priv->tx_phys[tx_tail], >> + priv->tx_skb[tx_tail]->len, >> + DMA_TO_DEVICE); >> + priv->tx_phys[tx_tail] = 0; >> + } >> + pkts_compl++; >> + bytes_compl += priv->tx_skb[tx_tail]->len; >> + dev_kfree_skb(priv->tx_skb[tx_tail]); >> + priv->tx_skb[tx_tail] = NULL; >> + tx_tail = TX_NEXT(tx_tail); >> + count--; >> + } >> + >> + priv->tx_tail = tx_tail; >> + smp_wmb(); /* Ensure tx_tail visible to xmit */ >> + >> +out: >> + if (pkts_compl || bytes_compl) > > Testing bytes_compl should be enough : There is no way pkt_compl could > be 0 if bytes_compl is not 0. > >> + netdev_completed_queue(ndev, pkts_compl, bytes_compl); >> + >> + if (unlikely(netif_queue_stopped(ndev)) && (count < (TX_DESC_NUM - 1))) >> + netif_wake_queue(ndev); >> + >> + return count; >> +} >> + >> +static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) >> +{ >> + struct hip04_priv *priv = netdev_priv(ndev); >> + struct net_device_stats *stats = &ndev->stats; >> + unsigned int tx_head = priv->tx_head, count; >> + struct tx_desc *desc = &priv->tx_desc[tx_head]; >> + dma_addr_t phys; >> + >> + smp_rmb(); >> + count = tx_count(tx_head, ACCESS_ONCE(priv->tx_tail)); >> + if (count == (TX_DESC_NUM - 1)) { >> + netif_stop_queue(ndev); >> + return NETDEV_TX_BUSY; >> + } >> + >> + phys = dma_map_single(&ndev->dev, skb->data, skb->len, DMA_TO_DEVICE); >> + if (dma_mapping_error(&ndev->dev, phys)) { >> + dev_kfree_skb(skb); >> + return NETDEV_TX_OK; >> + } >> + >> + priv->tx_skb[tx_head] = skb; >> + priv->tx_phys[tx_head] = phys; >> + desc->send_addr = cpu_to_be32(phys); >> + desc->send_size = cpu_to_be32(skb->len); >> + desc->cfg = cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV); >> + phys = priv->tx_desc_dma + tx_head * sizeof(struct tx_desc); >> + desc->wb_addr = cpu_to_be32(phys); >> + skb_tx_timestamp(skb); >> + >> + hip04_set_xmit_desc(priv, phys); >> + priv->tx_head = TX_NEXT(tx_head); >> + count++; > > Starting from this point, skb might already have been freed by TX > completion. > > Its racy to access skb->len > >> + netdev_sent_queue(ndev, skb->len); >> + >> + stats->tx_bytes += skb->len; >> + stats->tx_packets++; >> + >> + /* Ensure tx_head update visible to tx reclaim */ >> + smp_wmb(); >> + >> + /* queue is getting full, better start cleaning up now */ >> + if (count >= priv->tx_coalesce_frames) { >> + if (napi_schedule_prep(&priv->napi)) { >> + /* disable rx interrupt and timer */ >> + priv->reg_inten &= ~(RCV_INT); >> + writel_relaxed(DEF_INT_MASK & ~RCV_INT, >> + priv->base + PPE_INTEN); >> + hrtimer_cancel(&priv->tx_coalesce_timer); >> + __napi_schedule(&priv->napi); >> + } >> + } else if (!hrtimer_is_queued(&priv->tx_coalesce_timer)) { >> + /* cleanup not pending yet, start a new timer */ >> + hrtimer_start_expires(&priv->tx_coalesce_timer, >> + HRTIMER_MODE_REL); >> + } >> + >> + return NETDEV_TX_OK; >> +} >> + >> +static int hip04_rx_poll(struct napi_struct *napi, int budget) >> +{ >> + struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi); >> + struct net_device *ndev = priv->ndev; >> + struct net_device_stats *stats = &ndev->stats; >> + unsigned int cnt = hip04_recv_cnt(priv); >> + struct rx_desc *desc; >> + struct sk_buff *skb; >> + unsigned char *buf; >> + bool last = false; >> + dma_addr_t phys; >> + int rx = 0; >> + int tx_remaining; >> + u16 len; >> + u32 err; >> + >> + while (cnt && !last) { >> + buf = priv->rx_buf[priv->rx_head]; >> + skb = build_skb(buf, priv->rx_buf_size); >> + if (unlikely(!skb)) >> + net_dbg_ratelimited("build_skb failed\n"); > > Well, is skb is NULL, you're crashing later... > You really have to address a memory allocation error much better than > that ! > >> + >> + dma_unmap_single(&ndev->dev, priv->rx_phys[priv->rx_head], >> + RX_BUF_SIZE, DMA_FROM_DEVICE); >> + priv->rx_phys[priv->rx_head] = 0; >> + >> + desc = (struct rx_desc *)skb->data; >> + len = be16_to_cpu(desc->pkt_len); >> + err = be32_to_cpu(desc->pkt_err); >> + >> + if (0 == len) { >> + dev_kfree_skb_any(skb); >> + last = true; >> + } else if ((err & RX_PKT_ERR) || (len >= GMAC_MAX_PKT_LEN)) { >> + dev_kfree_skb_any(skb); >> + stats->rx_dropped++; >> + stats->rx_errors++; >> + } else { >> + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); >> + skb_put(skb, len); >> + skb->protocol = eth_type_trans(skb, ndev); >> + napi_gro_receive(&priv->napi, skb); >> + stats->rx_packets++; >> + stats->rx_bytes += len; >> + rx++; >> + } >> + >> + buf = netdev_alloc_frag(priv->rx_buf_size); >> + if (!buf) >> + goto done; > > Same problem here : In case of memory allocation error, your driver is > totally screwed. > >> + phys = dma_map_single(&ndev->dev, buf, >> + RX_BUF_SIZE, DMA_FROM_DEVICE); >> + if (dma_mapping_error(&ndev->dev, phys)) >> + goto done; > > Same problem here : You really have to recover properly. > >> + priv->rx_buf[priv->rx_head] = buf; >> + priv->rx_phys[priv->rx_head] = phys; >> + hip04_set_recv_desc(priv, phys); >> + >> + priv->rx_head = RX_NEXT(priv->rx_head); >> + if (rx >= budget) >> + goto done; >> + >> + if (--cnt == 0) >> + cnt = hip04_recv_cnt(priv); >> + } >> + >> + if (!(priv->reg_inten & RCV_INT)) { >> + /* enable rx interrupt */ >> + priv->reg_inten |= RCV_INT; >> + writel_relaxed(priv->reg_inten, priv->base + PPE_INTEN); >> + } >> + napi_complete(napi); >> +done: >> + /* clean up tx descriptors and start a new timer if necessary */ >> + tx_remaining = hip04_tx_reclaim(ndev, false); >> + if (rx < budget && tx_remaining) >> + hrtimer_start_expires(&priv->tx_coalesce_timer, HRTIMER_MODE_REL); >> + >> + return rx; >> +} >> + Yes, thanks, fix them later. Ding > > > . > -- To unsubscribe from this list: send the line "unsubscribe devicetree" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html