On Mon, Jan 06, 2014 at 09:25:55PM -0800, Michael Dalton wrote: > Add initial support for debugfs to virtio-net. Each virtio-net network > device will have a directory under /virtio-net in debugfs. The > per-network device directory will contain one sub-directory per active, > enabled receive queue. If mergeable receive buffers are enabled, each > receive queue directory will contain a read-only file that returns the > current packet buffer size for the receive queue. > > Signed-off-by: Michael Dalton <mwdalton@xxxxxxxxxx> thanks, I'll play with it. Could you tell us meanwhile, what's the typical size that you see? > --- > drivers/net/virtio_net.c | 314 ++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 296 insertions(+), 18 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index f6e1ee0..5da18d6 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -27,6 +27,9 @@ > #include <linux/slab.h> > #include <linux/cpu.h> > #include <linux/average.h> > +#include <linux/seqlock.h> > +#include <linux/kref.h> > +#include <linux/debugfs.h> > > static int napi_weight = NAPI_POLL_WEIGHT; > module_param(napi_weight, int, 0444); > @@ -35,6 +38,9 @@ static bool csum = true, gso = true; > module_param(csum, bool, 0444); > module_param(gso, bool, 0444); > > +/* Debugfs root directory for all virtio-net devices. */ > +static struct dentry *virtnet_debugfs_root; > + > /* FIXME: MTU in config. */ > #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) > #define GOOD_COPY_LEN 128 > @@ -102,9 +108,6 @@ struct receive_queue { > /* Chain pages by the private ptr. */ > struct page *pages; > > - /* Average packet length for mergeable receive buffers. */ > - struct ewma mrg_avg_pkt_len; > - > /* Page frag for packet buffer allocation. */ > struct page_frag alloc_frag; > > @@ -115,6 +118,28 @@ struct receive_queue { > char name[40]; > }; > > +/* Per-receive queue statistics exported via debugfs. */ > +struct receive_queue_stats { > + /* Average packet length of receive queue (for mergeable rx buffers). */ > + struct ewma avg_pkt_len; > + > + /* Per-receive queue stats debugfs directory. */ > + struct dentry *dbg; > + > + /* Reference count for the receive queue statistics, needed because > + * an open debugfs file may outlive the receive queue and netdevice. > + * Open files will remain in-use until all outstanding file descriptors > + * are closed, even after the underlying file is unlinked. > + */ > + struct kref refcount; > + > + /* Sequence counter to allow debugfs readers to safely access stats. > + * Assumes a single virtio-net writer, which is enforced by virtio-net > + * and NAPI. > + */ > + seqcount_t dbg_seq; > +}; > + > struct virtnet_info { > struct virtio_device *vdev; > struct virtqueue *cvq; > @@ -147,6 +172,15 @@ struct virtnet_info { > /* Active statistics */ > struct virtnet_stats __percpu *stats; > > + /* Per-receive queue statstics exported via debugfs. Stored in > + * virtnet_info to survive freeze/restore -- a task may have a per-rq > + * debugfs file open at the time of freeze. > + */ > + struct receive_queue_stats **rq_stats; > + > + /* Per-netdevice debugfs directory. */ > + struct dentry *dbg_dev_root; > + > /* Work struct for refilling if we run low on memory. */ > struct delayed_work refill; > > @@ -358,6 +392,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > unsigned int len) > { > struct skb_vnet_hdr *hdr = ctx->buf; > + struct virtnet_info *vi = netdev_priv(dev); > + struct receive_queue_stats *rq_stats = vi->rq_stats[vq2rxq(rq->vq)]; > int num_buf = hdr->mhdr.num_buffers; > struct page *page = virt_to_head_page(ctx->buf); > int offset = ctx->buf - page_address(page); > @@ -413,7 +449,9 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, > } > } > > - ewma_add(&rq->mrg_avg_pkt_len, head_skb->len); > + write_seqcount_begin(&rq_stats->dbg_seq); > + ewma_add(&rq_stats->avg_pkt_len, head_skb->len); > + write_seqcount_end(&rq_stats->dbg_seq); > return head_skb; > > err_skb: > @@ -600,18 +638,30 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp) > return err; > } > > +static unsigned int get_mergeable_buf_len(struct ewma *avg_pkt_len) > +{ > + const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > + unsigned int len; > + > + len = hdr_len + clamp_t(unsigned int, ewma_read(avg_pkt_len), > + GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); > + return ALIGN(len, L1_CACHE_BYTES); > +} > + > static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) > { > const unsigned int ring_size = rq->mrg_buf_ctx_size; > - const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); > struct page_frag *alloc_frag = &rq->alloc_frag; > + struct virtnet_info *vi = rq->vq->vdev->priv; > struct mergeable_receive_buf_ctx *ctx; > int err; > unsigned int len, hole; > > - len = hdr_len + clamp_t(unsigned int, ewma_read(&rq->mrg_avg_pkt_len), > - GOOD_PACKET_LEN, PAGE_SIZE - hdr_len); > - len = ALIGN(len, L1_CACHE_BYTES); > + /* avg_pkt_len is written only in NAPI rx softirq context. We may > + * read avg_pkt_len without using the dbg_seq seqcount, as this code > + * is called only in NAPI rx softirq context or when NAPI is disabled. > + */ > + len = get_mergeable_buf_len(&vi->rq_stats[vq2rxq(rq->vq)]->avg_pkt_len); > if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) > return -ENOMEM; > > @@ -1274,13 +1324,101 @@ static void virtnet_get_drvinfo(struct net_device *dev, > > } > > +static ssize_t mergeable_rx_buffer_size_read(struct file *file, > + char __user *userbuf, > + size_t count, > + loff_t *ppos) > +{ > + struct receive_queue_stats *rq_stats = file->private_data; > + char buf[32]; > + struct ewma avg; > + unsigned int start, len; > + > + /* Don't allow partial reads. */ > + if (*ppos) > + return 0; > + do { > + start = read_seqcount_begin(&rq_stats->dbg_seq); > + avg = rq_stats->avg_pkt_len; > + } while (read_seqcount_retry(&rq_stats->dbg_seq, start)); > + len = scnprintf(buf, sizeof(buf), "%u\n", get_mergeable_buf_len(&avg)); > + return simple_read_from_buffer(userbuf, count, ppos, buf, len); > +} > + > +void receive_queue_stats_free(struct kref *ref) > +{ > + struct receive_queue_stats *rq_stats; > + > + rq_stats = container_of(ref, struct receive_queue_stats, refcount); > + kfree(rq_stats); > +} > + > +static int receive_queue_stats_debugfs_open(struct inode *inode, > + struct file *file) > +{ > + struct receive_queue_stats *rq_stats = inode->i_private; > + kref_get(&rq_stats->refcount); > + file->private_data = rq_stats; > + return 0; > +} > + > +static int receive_queue_stats_debugfs_release(struct inode *inode, > + struct file *file) > +{ > + struct receive_queue_stats *rq_stats = inode->i_private; > + kref_put(&rq_stats->refcount, receive_queue_stats_free); > + file->private_data = NULL; > + return 0; > +} > + > +static const struct file_operations mergeable_rx_buffer_size_fops = { > + .owner = THIS_MODULE, > + .open = receive_queue_stats_debugfs_open, > + .read = mergeable_rx_buffer_size_read, > + .llseek = default_llseek, > + .release = receive_queue_stats_debugfs_release, > +}; > + > +static void receive_queue_debugfs_add(struct receive_queue *rq) > +{ > + struct virtnet_info *vi = rq->vq->vdev->priv; > + unsigned int rq_index = vq2rxq(rq->vq); > + struct receive_queue_stats *rq_stats = vi->rq_stats[rq_index]; > + struct dentry *dentry; > + char name[32]; > + > + if (IS_ERR_OR_NULL(vi->dbg_dev_root)) > + return; > + scnprintf(name, sizeof(name), "rx-%u", rq_index); > + dentry = debugfs_create_dir(name, vi->dbg_dev_root); > + if (IS_ERR_OR_NULL(dentry)) { > + pr_warn("%s: could not create %s rx queue debugfs dir\n", > + vi->dev->name, name); > + return; > + } > + rq_stats->dbg = dentry; > + if (vi->mergeable_rx_bufs) > + debugfs_create_file("mergeable_rx_buffer_size", S_IRUSR, > + rq_stats->dbg, rq_stats, > + &mergeable_rx_buffer_size_fops); > +} > + > +static void receive_queue_debugfs_del(struct receive_queue *rq) > +{ > + struct virtnet_info *vi = rq->vq->vdev->priv; > + struct receive_queue_stats *rq_stats = vi->rq_stats[vq2rxq(rq->vq)]; > + debugfs_remove_recursive(rq_stats->dbg); > + rq_stats->dbg = NULL; > +} > + > /* TODO: Eliminate OOO packets during switching */ > static int virtnet_set_channels(struct net_device *dev, > struct ethtool_channels *channels) > { > struct virtnet_info *vi = netdev_priv(dev); > - u16 queue_pairs = channels->combined_count; > - int err; > + u16 new_queue_pairs = channels->combined_count; > + u16 old_queue_pairs = vi->curr_queue_pairs; > + int err, i; > > /* We don't support separate rx/tx channels. > * We don't allow setting 'other' channels. > @@ -1288,14 +1426,21 @@ static int virtnet_set_channels(struct net_device *dev, > if (channels->rx_count || channels->tx_count || channels->other_count) > return -EINVAL; > > - if (queue_pairs > vi->max_queue_pairs) > + if (new_queue_pairs > vi->max_queue_pairs) > return -EINVAL; > > get_online_cpus(); > - err = virtnet_set_queues(vi, queue_pairs); > + err = virtnet_set_queues(vi, new_queue_pairs); > if (!err) { > - netif_set_real_num_tx_queues(dev, queue_pairs); > - netif_set_real_num_rx_queues(dev, queue_pairs); > + if (new_queue_pairs < old_queue_pairs) { > + for (i = new_queue_pairs; i < old_queue_pairs; i++) > + receive_queue_debugfs_del(&vi->rq[i]); > + } else { > + for (i = old_queue_pairs; i < new_queue_pairs; i++) > + receive_queue_debugfs_add(&vi->rq[i]); > + } > + netif_set_real_num_tx_queues(dev, new_queue_pairs); > + netif_set_real_num_rx_queues(dev, new_queue_pairs); > > virtnet_set_affinity(vi); > } > @@ -1336,7 +1481,44 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu) > return 0; > } > > +/* Must be called only after the net_device name has been expanded. */ > +static void virtnet_debugfs_init(struct virtnet_info *vi) > +{ > + int i; > + > + if (IS_ERR_OR_NULL(virtnet_debugfs_root)) > + return; > + vi->dbg_dev_root = debugfs_create_dir(vi->dev->name, > + virtnet_debugfs_root); > + if (IS_ERR_OR_NULL(vi->dbg_dev_root)) { > + pr_warn("%s: could not create netdevice debugfs dir\n", > + vi->dev->name); > + return; > + } > + for (i = 0; i < vi->curr_queue_pairs; i++) > + receive_queue_debugfs_add(&vi->rq[i]); > +} > + > +static void virtnet_debugfs_cleanup(struct virtnet_info *vi) > +{ > + int i; > + > + for (i = 0; i < vi->max_queue_pairs; i++) > + receive_queue_debugfs_del(&vi->rq[i]); > + debugfs_remove_recursive(vi->dbg_dev_root); > + vi->dbg_dev_root = NULL; > +} > + > +static int virtnet_init(struct net_device *dev) > +{ > + struct virtnet_info *vi = netdev_priv(dev); > + > + virtnet_debugfs_init(vi); > + return 0; > +} > + > static const struct net_device_ops virtnet_netdev = { > + .ndo_init = virtnet_init, > .ndo_open = virtnet_open, > .ndo_stop = virtnet_close, > .ndo_start_xmit = start_xmit, > @@ -1560,7 +1742,6 @@ static int virtnet_alloc_queues(struct virtnet_info *vi) > napi_weight); > > sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); > - ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT); > sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); > } > > @@ -1614,6 +1795,39 @@ err: > return ret; > } > > +static int virtnet_rename(struct notifier_block *this, > + unsigned long event, void *ptr) > +{ > + struct net_device *dev = netdev_notifier_info_to_dev(ptr); > + struct virtnet_info *vi; > + > + if (event != NETDEV_CHANGENAME || dev->netdev_ops != &virtnet_netdev) > + return NOTIFY_DONE; > + vi = netdev_priv(dev); > + if (IS_ERR_OR_NULL(vi->dbg_dev_root)) > + return NOTIFY_DONE; > + if (IS_ERR_OR_NULL(debugfs_rename(virtnet_debugfs_root, > + vi->dbg_dev_root, > + virtnet_debugfs_root, dev->name))) { > + pr_warn("%s: failed debugfs rename, removing old debugfs dir\n", > + dev->name); > + virtnet_debugfs_cleanup(vi); > + } > + return NOTIFY_DONE; > +} > + > +static void virtnet_release_receive_queue_stats(struct virtnet_info *vi) > +{ > + int i; > + > + for (i = 0; i < vi->max_queue_pairs; i++) { > + struct receive_queue_stats *rq_stats = vi->rq_stats[i]; > + if (rq_stats) > + kref_put(&rq_stats->refcount, receive_queue_stats_free); > + } > + kfree(vi->rq_stats); > +} > + > static int virtnet_probe(struct virtio_device *vdev) > { > int i, err; > @@ -1723,10 +1937,24 @@ static int virtnet_probe(struct virtio_device *vdev) > vi->curr_queue_pairs = 1; > vi->max_queue_pairs = max_queue_pairs; > > + vi->rq_stats = kzalloc(sizeof(vi->rq_stats[0]) * > + vi->max_queue_pairs, GFP_KERNEL); > + if (!vi->rq_stats) > + goto free_dev_stats; > + for (i = 0; i < vi->max_queue_pairs; i++) { > + vi->rq_stats[i] = kzalloc(sizeof(*vi->rq_stats[0]), GFP_KERNEL); > + if (!vi->rq_stats[i]) > + goto free_rq_stats; > + seqcount_init(&vi->rq_stats[i]->dbg_seq); > + kref_init(&vi->rq_stats[i]->refcount); > + ewma_init(&vi->rq_stats[i]->avg_pkt_len, 1, > + RECEIVE_AVG_WEIGHT); > + } > + > /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ > err = init_vqs(vi); > if (err) > - goto free_stats; > + goto free_rq_stats; > > netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs); > netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs); > @@ -1777,8 +2005,11 @@ free_recv_bufs: > free_vqs: > cancel_delayed_work_sync(&vi->refill); > free_receive_page_frags(vi); > + virtnet_debugfs_cleanup(vi); > virtnet_del_vqs(vi); > -free_stats: > +free_rq_stats: > + virtnet_release_receive_queue_stats(vi); > +free_dev_stats: > free_percpu(vi->stats); > free: > free_netdev(dev); > @@ -1812,10 +2043,12 @@ static void virtnet_remove(struct virtio_device *vdev) > > unregister_netdev(vi->dev); > > + virtnet_debugfs_cleanup(vi); > remove_vq_common(vi); > > flush_work(&vi->config_work); > > + virtnet_release_receive_queue_stats(vi); > free_percpu(vi->stats); > free_netdev(vi->dev); > } > @@ -1884,6 +2117,19 @@ static int virtnet_restore(struct virtio_device *vdev) > } > #endif > > +static void virtnet_register_debugfs(void) > +{ > + virtnet_debugfs_root = debugfs_create_dir("virtio-net", NULL); > + if (IS_ERR_OR_NULL(virtnet_debugfs_root)) > + pr_warn("Could not create virtio-net debugfs dir\n"); > +} > + > +static void virtnet_unregister_debugfs(void) > +{ > + debugfs_remove_recursive(virtnet_debugfs_root); > + virtnet_debugfs_root = NULL; > +} > + > static struct virtio_device_id id_table[] = { > { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, > { 0 }, > @@ -1917,7 +2163,39 @@ static struct virtio_driver virtio_net_driver = { > #endif > }; > > -module_virtio_driver(virtio_net_driver); > +static struct notifier_block virtnet_rename_notifier = { > + .notifier_call = virtnet_rename, > +}; > + > +static int __init init(void) > +{ > + int err; > + > + virtnet_register_debugfs(); > + err = register_netdevice_notifier(&virtnet_rename_notifier); > + if (err) > + goto free_debugfs; > + err = register_virtio_driver(&virtio_net_driver); > + if (err) > + goto free_notifier; > + return 0; > + > +free_notifier: > + unregister_netdevice_notifier(&virtnet_rename_notifier); > +free_debugfs: > + virtnet_unregister_debugfs(); > + return err; > +} > + > +static void __exit cleanup(void) > +{ > + unregister_virtio_driver(&virtio_net_driver); > + unregister_netdevice_notifier(&virtnet_rename_notifier); > + virtnet_unregister_debugfs(); > +} > + > +module_init(init); > +module_exit(cleanup); > > MODULE_DEVICE_TABLE(virtio, id_table); > MODULE_DESCRIPTION("Virtio network driver"); > -- > 1.8.5.1 _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization