Re: [PATCH 06/11] net: ethernet: mtk-eth-mac: new driver

Bartosz Golaszewski <brgl@xxxxxxxx> · Wed, 6 May 2020 09:09:52 +0200

Hi Jakub,

thanks for the review.

wt., 5 maj 2020 o 20:04 Jakub Kicinski <kuba@xxxxxxxxxx> napisał(a):
>
> > +/* Represents the actual structure of descriptors used by the MAC. We can
> > + * reuse the same structure for both TX and RX - the layout is the same, only
> > + * the flags differ slightly.
> > + */
> > +struct mtk_mac_ring_desc {
> > +     /* Contains both the status flags as well as packet length. */
> > +     u32 status;
> > +     u32 data_ptr;
> > +     u32 vtag;
> > +     u32 reserved;
> > +} __aligned(4) __packed;
>
> It will be aligned to 4, because the members are all 4B. And there is
> no possibility of holes. You can safely remove those attrs.
>

I noticed some other drivers whose descriptors are well aligned define
these attributes anyway so I assumed it's a convention. I'll drop them
in v2.

>
> > +     status = desc->status;
> > +
> > +     if (!(status & MTK_MAC_DESC_BIT_COWN))
> > +             return -1;
> > +
> > +     desc_data->len = status & MTK_MAC_DESC_MSK_LEN;
> > +     desc_data->flags = status & ~MTK_MAC_DESC_MSK_LEN;
> > +     desc_data->dma_addr = desc->data_ptr;
> > +     desc_data->skb = ring->skbs[ring->tail];
> > +
> > +     desc->data_ptr = 0;
> > +     desc->status = MTK_MAC_DESC_BIT_COWN;
> > +     if (status & MTK_MAC_DESC_BIT_EOR)
> > +             desc->status |= MTK_MAC_DESC_BIT_EOR;
> > +
> > +     dma_wmb();
>
> What is this separating?

I'll add comments to barriers in v2.

>
> > +/* All processing for TX and RX happens in the napi poll callback. */
> > +static irqreturn_t mtk_mac_handle_irq(int irq, void *data)
> > +{
> > +     struct mtk_mac_priv *priv;
> > +     struct net_device *ndev;
> > +     unsigned int status;
> > +
> > +     ndev = data;
> > +     priv = netdev_priv(ndev);
> > +
> > +     if (netif_running(ndev)) {
> > +             mtk_mac_intr_mask_all(priv);
> > +             status = mtk_mac_intr_read_and_clear(priv);
> > +
> > +             /* RX Complete */
> > +             if (status & MTK_MAC_BIT_INT_STS_FNRC)
> > +                     napi_schedule(&priv->napi);
> > +
> > +             /* TX Complete */
> > +             if (status & MTK_MAC_BIT_INT_STS_TNTC)
> > +                     schedule_work(&priv->tx_work);
> > +
> > +             /* One of the counter reached 0x8000000 */
> > +             if (status & MTK_MAC_REG_INT_STS_MIB_CNT_TH) {
> > +                     mtk_mac_update_stats(priv);
> > +                     mtk_mac_reset_counters(priv);
> > +             }
> > +
> > +             mtk_mac_intr_unmask_all(priv);
>
> Why do you unmask all IRQs here? The usual way to operate is to leave
> TX and RX IRQs masked until NAPI finishes.
>

I actually did it before as the leftover comment says above the
function. Then I thought this way we mask interrupt for a shorter
period of time. I can go back to the previous approach.

> > +     }
> > +
> > +     return IRQ_HANDLED;
> > +}
>
> > +static int mtk_mac_enable(struct net_device *ndev)
> > +{
> > +     /* Reset all counters */
> > +     mtk_mac_reset_counters(priv);
>
> This doesn't reset the counters to zero, right?
>

Yes, it does actually. I'll drop it in v2 - it's not necessary.

>
> > +static void mtk_mac_tx_work(struct work_struct *work)
> > +{
> > +     struct mtk_mac_priv *priv;
> > +     struct mtk_mac_ring *ring;
> > +     struct net_device *ndev;
> > +     bool wake = false;
> > +     int ret;
> > +
> > +     priv = container_of(work, struct mtk_mac_priv, tx_work);
> > +     ndev = mtk_mac_get_netdev(priv);
> > +     ring = &priv->tx_ring;
> > +
> > +     for (;;) {
> > +             mtk_mac_lock(priv);
> > +
> > +             if (!mtk_mac_ring_descs_available(ring)) {
> > +                     mtk_mac_unlock(priv);
> > +                     break;
> > +             }
> > +
> > +             ret = mtk_mac_tx_complete(priv);
> > +             mtk_mac_unlock(priv);
> > +             if (ret)
> > +                     break;
> > +
> > +             wake = true;
> > +     }
> > +
> > +     if (wake)
> > +             netif_wake_queue(ndev);
>
> This looks racy, if the TX path runs in parallel the queue may have
> already been filled up at the point you wake it up.
>
> > +}
>
> Why do you clean the TX ring from a work rather than from the NAPI
> context?
>

So this was unclear to me, that's why I went with a workqueue. The
budget argument in napi poll is for RX. Should I put some cap on the
number of TX descriptors processed in napi context?

>
> > +static int mtk_mac_receive_packet(struct mtk_mac_priv *priv)
> > +{
> > +     struct net_device *ndev = mtk_mac_get_netdev(priv);
> > +     struct mtk_mac_ring *ring = &priv->rx_ring;
> > +     struct device *dev = mtk_mac_get_dev(priv);
> > +     struct mtk_mac_ring_desc_data desc_data;
> > +     struct sk_buff *new_skb;
> > +     int ret;
> > +
> > +     mtk_mac_lock(priv);
> > +     ret = mtk_mac_ring_pop_tail(ring, &desc_data);
> > +     mtk_mac_unlock(priv);
> > +     if (ret)
> > +             return -1;
> > +
> > +     mtk_mac_dma_unmap_rx(priv, &desc_data);
> > +
> > +     if ((desc_data.flags & MTK_MAC_DESC_BIT_RX_CRCE) ||
> > +         (desc_data.flags & MTK_MAC_DESC_BIT_RX_OSIZE)) {
> > +             /* Error packet -> drop and reuse skb. */
> > +             new_skb = desc_data.skb;
> > +             goto map_skb;
> > +     }
> > +
> > +     new_skb = mtk_mac_alloc_skb(ndev);
> > +     if (!new_skb) {
> > +             netdev_err(ndev, "out of memory for skb\n");
>
> No need for printing, kernel will complain loudly about oom.
>
> > +             ndev->stats.rx_dropped++;
> > +             new_skb = desc_data.skb;
> > +             goto map_skb;
> > +     }
> > +
> > +     skb_put(desc_data.skb, desc_data.len);
> > +     desc_data.skb->ip_summed = CHECKSUM_NONE;
> > +     desc_data.skb->protocol = eth_type_trans(desc_data.skb, ndev);
> > +     desc_data.skb->dev = ndev;
> > +     netif_receive_skb(desc_data.skb);
> > +
> > +map_skb:
> > +     desc_data.dma_addr = mtk_mac_dma_map_rx(priv, new_skb);
> > +     if (dma_mapping_error(dev, desc_data.dma_addr)) {
> > +             dev_kfree_skb(new_skb);
> > +             netdev_err(ndev, "DMA mapping error of RX descriptor\n");
> > +             return -ENOMEM;
>
> In this case nothing will ever replenish the RX ring right? If we hit
> this condition 128 times the ring will be empty?
>

Indeed. What should I do if this fails though?

I'll address all other issues in v2.

Bart