Re: [PATCH] drivers: spi: spi.c: Convert statistics to per-cpu u64_stats_t

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, May 23, 2022 at 04:20:09PM +0200, David Jander wrote:
> This change gives a dramatic performance improvement in the hot path,
> since many costly spin_lock_irqsave() calls can be avoided.

It is normal to back up such claims with numbers. Please include your
numbers here.

> 
> Suggested-by: Andrew Lunn <andrew@xxxxxxx>
> Signed-off-by: David Jander <david@xxxxxxxxxxx>
> ---
>  drivers/spi/spi.c       | 95 ++++++++++++++++++++++++-----------------
>  include/linux/spi/spi.h | 80 ++++++++++++++++++++++++----------
>  2 files changed, 114 insertions(+), 61 deletions(-)
> 
> diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
> index c4dd1200fe99..edc290e67b92 100644
> --- a/drivers/spi/spi.c
> +++ b/drivers/spi/spi.c
> @@ -33,6 +33,7 @@
>  #include <linux/idr.h>
>  #include <linux/platform_data/x86/apple.h>
>  #include <linux/ptp_clock_kernel.h>
> +#include <linux/percpu.h>
>  
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/spi.h>
> @@ -111,6 +112,25 @@ static ssize_t driver_override_show(struct device *dev,
>  }
>  static DEVICE_ATTR_RW(driver_override);
>  
> +#define spi_pcpu_stats_totalize(ret, in, field)				\
> +do {									\
> +	int i;								\
> +	ret = 0;							\
> +	for_each_possible_cpu(i) {					\
> +		const struct spi_statistics *pcpu_stats;		\
> +		u64 inc;						\
> +		unsigned int start;					\
> +		pcpu_stats = per_cpu_ptr(in, i);			\
> +		do {							\
> +			start = u64_stats_fetch_begin_irq(		\
> +					&pcpu_stats->syncp);		\
> +			inc = u64_stats_read(&pcpu_stats->field);	\
> +		} while (u64_stats_fetch_retry_irq(			\
> +					&pcpu_stats->syncp, start));	\
> +		ret += inc;						\
> +	}								\
> +} while (0)

Looking at this from the perspective of a network reviewer, that is a
pretty big macro. netdev would probably not allow this. But macro
magic does seem normal in this file.

What also makes a difference here is the API to user space. With
netdev, it is not so much the absolute values of the counters, but the
relationship with other counters that is interesting. So there is a
method to get all the counters in one atomic snapshot. For that to
work, the loop doing the addition adds up all the counters, not just
one specific counter. So there is no need for 'field' which is the
source of all this macro magic. But it seems like SPI only ever
exports individual counters.

>  #define SPI_STATISTICS_ATTRS(field, file)				\
>  static ssize_t spi_controller_##field##_show(struct device *dev,	\
>  					     struct device_attribute *attr, \
> @@ -118,7 +138,7 @@ static ssize_t spi_controller_##field##_show(struct device *dev,	\
>  {									\
>  	struct spi_controller *ctlr = container_of(dev,			\
>  					 struct spi_controller, dev);	\
> -	return spi_statistics_##field##_show(&ctlr->statistics, buf);	\
> +	return spi_statistics_##field##_show(ctlr->pcpu_statistics, buf); \
>  }									\
>  static struct device_attribute dev_attr_spi_controller_##field = {	\
>  	.attr = { .name = file, .mode = 0444 },				\
> @@ -129,7 +149,7 @@ static ssize_t spi_device_##field##_show(struct device *dev,		\
>  					char *buf)			\
>  {									\
>  	struct spi_device *spi = to_spi_device(dev);			\
> -	return spi_statistics_##field##_show(&spi->statistics, buf);	\
> +	return spi_statistics_##field##_show(spi->pcpu_statistics, buf); \
>  }									\
>  static struct device_attribute dev_attr_spi_device_##field = {		\
>  	.attr = { .name = file, .mode = 0444 },				\

Not a criticism of this patch, but this is the old way to do this. It
is better to use DEVICE_ATTR_RO(), but that is a bigger rework.

> @@ -140,11 +160,10 @@ static struct device_attribute dev_attr_spi_device_##field = {		\
>  static ssize_t spi_statistics_##name##_show(struct spi_statistics *stat, \
>  					    char *buf)			\
>  {									\
> -	unsigned long flags;						\
>  	ssize_t len;							\
> -	spin_lock_irqsave(&stat->lock, flags);				\
> -	len = sysfs_emit(buf, format_string "\n", stat->field);		\
> -	spin_unlock_irqrestore(&stat->lock, flags);			\
> +	u64 val;							\
> +	spi_pcpu_stats_totalize(val, stat, field);			\
> +	len = sysfs_emit(buf, format_string "\n", val);			\

If you have made all the statistics u64, you don't need format_string.

> @@ -543,7 +562,7 @@ struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
>  	spi->dev.release = spidev_release;
>  	spi->mode = ctlr->buswidth_override_bits;
>  
> -	spin_lock_init(&spi->statistics.lock);
> +	spi->pcpu_statistics = spi_alloc_pcpu_stats(struct spi_statistics);

It looks like you are missing error checking here.

> @@ -3042,7 +3061,7 @@ int spi_register_controller(struct spi_controller *ctlr)
>  		}
>  	}
>  	/* add statistics */
> -	spin_lock_init(&ctlr->statistics.lock);
> +	ctlr->pcpu_statistics = devm_spi_alloc_pcpu_stats(dev, struct spi_statistics);

This could fail, so you should check for NULL pointer.

> +#define devm_spi_alloc_pcpu_stats(dev, type)				\
> +({									\
> +	typeof(type) __percpu *pcpu_stats = devm_alloc_percpu(dev, type);\
> +	if (pcpu_stats) {						\
> +		int __cpu;						\
> +		for_each_possible_cpu(__cpu) {				\
> +			typeof(type) *stat;				\
> +			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
> +			u64_stats_init(&stat->syncp);			\
> +		}							\
> +	}								\
> +	pcpu_stats;							\
> +})
> +
> +#define spi_alloc_pcpu_stats(type)					\
> +({									\
> +	typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, GFP_KERNEL);\
> +	if (pcpu_stats) {						\
> +		int __cpu;						\
> +		for_each_possible_cpu(__cpu) {				\
> +			typeof(type) *stat;				\
> +			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
> +			u64_stats_init(&stat->syncp);			\
> +		}							\
> +	}								\
> +	pcpu_stats;							\
> +})

Do these need to be macros? How many different types are used?

   Andrew



[Index of Archives]     [Linux Kernel]     [Linux ARM (vger)]     [Linux ARM MSM]     [Linux Omap]     [Linux Arm]     [Linux Tegra]     [Fedora ARM]     [Linux for Samsung SOC]     [eCos]     [Linux Fastboot]     [Gcc Help]     [Git]     [DCCP]     [IETF Announce]     [Security]     [Linux MIPS]     [Yosemite Campsites]

  Powered by Linux