Re: [PATCH 1/5] mean and variance: Promote to lib/math

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jan 26, 2024 at 05:06:51PM -0500, Kent Overstreet wrote:
> Small statistics library, for taking in a series of value and computing
> mean, weighted mean, standard deviation and weighted deviation.
> 
> The main use case is for statistics on latency measurements.

Oh, heh, I didn't realize this was a patch and not a cover letter.

A few things I noticed while adding timestats to xfs -- pahole says this
about the weighted mean and variance object:

struct mean_and_variance_weighted {
        /* typedef bool */ _Bool                      init;                              /*     0     1 */
        /* typedef u8 -> __u8 */ unsigned char              weight;                      /*     1     1 */

        /* XXX 6 bytes hole, try to pack */

        /* typedef s64 -> __s64 */ long long int              mean;                      /*     8     8 */
        /* typedef u64 -> __u64 */ long long unsigned int     variance;                  /*    16     8 */

        /* size: 24, cachelines: 1, members: 4 */
        /* sum members: 18, holes: 1, sum holes: 6 */
        /* last cacheline: 24 bytes */
};

AFAICT the init flag isn't used, and the u8 wastes 6 bytes of space.
Two of these get plugged into struct time_stats, which means we waste 12
bytes of space on a ~464 byte structure.  Removing quantiles support
from that shrinks the struct down to 208 bytes.

Any chance we could pass in the weights as a function parameter so that
we could shrink this to 16 bytes?  If we do that and rearrange
time_stats, the whole thing goes down to 192 bytes, which means I can
spray in twice as many timestats.

I also noticed that struct time_stat_buffer is:

struct time_stat_buffer {
        unsigned int               nr;                                                   /*     0     4 */

        /* XXX 4 bytes hole, try to pack */

        struct time_stat_buffer_entry {
                /* typedef u64 -> __u64 */ long long unsigned int start;                 /*     8     8 */
                /* typedef u64 -> __u64 */ long long unsigned int end;                   /*    16     8 */
        } entries[32]; /*     8   512 */

        /* size: 520, cachelines: 9, members: 2 */
        /* sum members: 516, holes: 1, sum holes: 4 */
        /* last cacheline: 8 bytes */
};

I wonder, if entries[] shrank to 31 entries, this would align to 512b;
would that make for more efficient allocations?  I tried to follow
alloc_percpu_gfp and got caught in a twisty mess of macros.

--D

FWIW the full time_stats struct ended up like this after I turned off
quantiles and did all the lazy rearranging I could do without removing
init or weight:

struct time_stats {
        /* typedef spinlock_t */ struct spinlock {
                union {
                        struct raw_spinlock {
                                /* typedef arch_spinlock_t */ struct qspinlock {
                                        union {
                                                /* typedef atomic_t */ struct {
                                                        int                    counter;  /*     0     4 */
                                                } val; /*     0     4 */
                                                struct {
                                                        /* typedef u8 -> __u8 */ unsigned char          locked; /*     0     1 */
                                                        /* typedef u8 -> __u8 */ unsigned char          pending; /*     1     1 */
                                                };                                       /*     0     2 */
                                                struct {
                                                        /* typedef u16 -> __u16 */ short unsigned int     locked_pending; /*     0     2 */
                                                        /* typedef u16 -> __u16 */ short unsigned int     tail; /*     2     2 */
                                                };                                       /*     0     4 */
                                        };                                               /*     0     4 */
                                } raw_lock; /*     0     4 */
                                unsigned int magic;                                      /*     4     4 */
                                unsigned int owner_cpu;                                  /*     8     4 */

                                /* XXX 4 bytes hole, try to pack */

                                void * owner;                                            /*    16     8 */
                        }rlock; /*     0    24 */
                };                                                                       /*     0    24 */
        } lock; /*     0    24 */
        /* typedef u64 -> __u64 */ long long unsigned int     min_duration;              /*    24     8 */
        /* typedef u64 -> __u64 */ long long unsigned int     max_duration;              /*    32     8 */
        /* typedef u64 -> __u64 */ long long unsigned int     total_duration;            /*    40     8 */
        /* typedef u64 -> __u64 */ long long unsigned int     max_freq;                  /*    48     8 */
        /* typedef u64 -> __u64 */ long long unsigned int     min_freq;                  /*    56     8 */
        /* --- cacheline 1 boundary (64 bytes) --- */
        /* typedef u64 -> __u64 */ long long unsigned int     last_event;                /*    64     8 */
        /* typedef u64 -> __u64 */ long long unsigned int     last_event_start;          /*    72     8 */
        struct mean_and_variance {
                /* typedef s64 -> __s64 */ long long int      n;                         /*    80     8 */
                /* typedef s64 -> __s64 */ long long int      sum;                       /*    88     8 */
                /* typedef u128_u */ struct {
                        __int128 unsigned v;                                             /*    96    16 */
                } sum_squares __attribute__((__aligned__(16))) __attribute__((__aligned__(16))); /*    96    16 */
        } __attribute__((__aligned__(16)))duration_stats __attribute__((__aligned__(16))); /*    80    32 */
        struct mean_and_variance {
                /* typedef s64 -> __s64 */ long long int      n;                         /*   112     8 */
                /* typedef s64 -> __s64 */ long long int      sum;                       /*   120     8 */
                /* --- cacheline 2 boundary (128 bytes) --- */
                /* typedef u128_u */ struct {
                        __int128 unsigned v;                                             /*   128    16 */
                } sum_squares __attribute__((__aligned__(16))) __attribute__((__aligned__(16))); /*   128    16 */
        } __attribute__((__aligned__(16)))freq_stats __attribute__((__aligned__(16))); /*   112    32 */
        struct mean_and_variance_weighted {
                /* typedef bool */ _Bool              init;                              /*   144     1 */
                /* typedef u8 -> __u8 */ unsigned char      weight;                      /*   145     1 */

                /* XXX 6 bytes hole, try to pack */

                /* typedef s64 -> __s64 */ long long int      mean;                      /*   152     8 */
                /* typedef u64 -> __u64 */ long long unsigned int variance;              /*   160     8 */
        }duration_stats_weighted; /*   144    24 */
        struct mean_and_variance_weighted {
                /* typedef bool */ _Bool              init;                              /*   168     1 */
                /* typedef u8 -> __u8 */ unsigned char      weight;                      /*   169     1 */

                /* XXX 6 bytes hole, try to pack */

                /* typedef s64 -> __s64 */ long long int      mean;                      /*   176     8 */
                /* typedef u64 -> __u64 */ long long unsigned int variance;              /*   184     8 */
        }freq_stats_weighted; /*   168    24 */
        /* --- cacheline 3 boundary (192 bytes) --- */
        struct time_stat_buffer *  buffer;                                               /*   192     8 */

        /* size: 208, cachelines: 4, members: 13 */
        /* padding: 8 */
        /* forced alignments: 2 */
        /* last cacheline: 16 bytes */
} __attribute__((__aligned__(16)));


> Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx>
> Cc: Daniel Hill <daniel@xxxxxxx>
> Cc: Darrick J. Wong <djwong@xxxxxxxxxx>
> ---
>  MAINTAINERS                                        |  9 +++++++++
>  fs/bcachefs/Kconfig                                | 10 +---------
>  fs/bcachefs/Makefile                               |  3 ---
>  fs/bcachefs/util.c                                 |  2 +-
>  fs/bcachefs/util.h                                 |  3 +--
>  {fs/bcachefs => include/linux}/mean_and_variance.h |  0
>  lib/Kconfig.debug                                  |  9 +++++++++
>  lib/math/Kconfig                                   |  3 +++
>  lib/math/Makefile                                  |  2 ++
>  {fs/bcachefs => lib/math}/mean_and_variance.c      |  3 +--
>  {fs/bcachefs => lib/math}/mean_and_variance_test.c |  3 +--
>  11 files changed, 28 insertions(+), 19 deletions(-)
>  rename {fs/bcachefs => include/linux}/mean_and_variance.h (100%)
>  rename {fs/bcachefs => lib/math}/mean_and_variance.c (99%)
>  rename {fs/bcachefs => lib/math}/mean_and_variance_test.c (99%)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 8d1052fa6a69..de635cfd354d 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -13379,6 +13379,15 @@ S:	Maintained
>  F:	drivers/net/mdio/mdio-regmap.c
>  F:	include/linux/mdio/mdio-regmap.h
>  
> +MEAN AND VARIANCE LIBRARY
> +M:	Daniel B. Hill <daniel@xxxxxxx>
> +M:	Kent Overstreet <kent.overstreet@xxxxxxxxx>
> +S:	Maintained
> +T:	git https://github.com/YellowOnion/linux/
> +F:	include/linux/mean_and_variance.h
> +F:	lib/math/mean_and_variance.c
> +F:	lib/math/mean_and_variance_test.c
> +
>  MEASUREMENT COMPUTING CIO-DAC IIO DRIVER
>  M:	William Breathitt Gray <william.gray@xxxxxxxxxx>
>  L:	linux-iio@xxxxxxxxxxxxxxx
> diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
> index 5cdfef3b551a..72d1179262b3 100644
> --- a/fs/bcachefs/Kconfig
> +++ b/fs/bcachefs/Kconfig
> @@ -24,6 +24,7 @@ config BCACHEFS_FS
>  	select XXHASH
>  	select SRCU
>  	select SYMBOLIC_ERRNAME
> +	select MEAN_AND_VARIANCE
>  	help
>  	The bcachefs filesystem - a modern, copy on write filesystem, with
>  	support for multiple devices, compression, checksumming, etc.
> @@ -86,12 +87,3 @@ config BCACHEFS_SIX_OPTIMISTIC_SPIN
>  	Instead of immediately sleeping when attempting to take a six lock that
>  	is held by another thread, spin for a short while, as long as the
>  	thread owning the lock is running.
> -
> -config MEAN_AND_VARIANCE_UNIT_TEST
> -	tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
> -	depends on KUNIT
> -	depends on BCACHEFS_FS
> -	default KUNIT_ALL_TESTS
> -	help
> -	  This option enables the kunit tests for mean_and_variance module.
> -	  If unsure, say N.
> diff --git a/fs/bcachefs/Makefile b/fs/bcachefs/Makefile
> index 1a05cecda7cc..b11ba74b8ad4 100644
> --- a/fs/bcachefs/Makefile
> +++ b/fs/bcachefs/Makefile
> @@ -57,7 +57,6 @@ bcachefs-y		:=	\
>  	keylist.o		\
>  	logged_ops.o		\
>  	lru.o			\
> -	mean_and_variance.o	\
>  	migrate.o		\
>  	move.o			\
>  	movinggc.o		\
> @@ -88,5 +87,3 @@ bcachefs-y		:=	\
>  	util.o			\
>  	varint.o		\
>  	xattr.o
> -
> -obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST)   += mean_and_variance_test.o
> diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
> index 56b815fd9fc6..d7ea95abb9df 100644
> --- a/fs/bcachefs/util.c
> +++ b/fs/bcachefs/util.c
> @@ -22,9 +22,9 @@
>  #include <linux/string.h>
>  #include <linux/types.h>
>  #include <linux/sched/clock.h>
> +#include <linux/mean_and_variance.h>
>  
>  #include "eytzinger.h"
> -#include "mean_and_variance.h"
>  #include "util.h"
>  
>  static const char si_units[] = "?kMGTPEZY";
> diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
> index b414736d59a5..0059481995ef 100644
> --- a/fs/bcachefs/util.h
> +++ b/fs/bcachefs/util.h
> @@ -17,8 +17,7 @@
>  #include <linux/slab.h>
>  #include <linux/vmalloc.h>
>  #include <linux/workqueue.h>
> -
> -#include "mean_and_variance.h"
> +#include <linux/mean_and_variance.h>
>  
>  #include "darray.h"
>  
> diff --git a/fs/bcachefs/mean_and_variance.h b/include/linux/mean_and_variance.h
> similarity index 100%
> rename from fs/bcachefs/mean_and_variance.h
> rename to include/linux/mean_and_variance.h
> diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> index 975a07f9f1cc..817ddfe132cd 100644
> --- a/lib/Kconfig.debug
> +++ b/lib/Kconfig.debug
> @@ -2191,6 +2191,15 @@ config CPUMASK_KUNIT_TEST
>  
>  	  If unsure, say N.
>  
> +config MEAN_AND_VARIANCE_UNIT_TEST
> +	tristate "mean_and_variance unit tests" if !KUNIT_ALL_TESTS
> +	depends on KUNIT
> +	select MEAN_AND_VARIANCE
> +	default KUNIT_ALL_TESTS
> +	help
> +	  This option enables the kunit tests for mean_and_variance module.
> +	  If unsure, say N.
> +
>  config TEST_LIST_SORT
>  	tristate "Linked list sorting test" if !KUNIT_ALL_TESTS
>  	depends on KUNIT
> diff --git a/lib/math/Kconfig b/lib/math/Kconfig
> index 0634b428d0cb..7530ae9a3584 100644
> --- a/lib/math/Kconfig
> +++ b/lib/math/Kconfig
> @@ -15,3 +15,6 @@ config PRIME_NUMBERS
>  
>  config RATIONAL
>  	tristate
> +
> +config MEAN_AND_VARIANCE
> +	tristate
> diff --git a/lib/math/Makefile b/lib/math/Makefile
> index 91fcdb0c9efe..8cdfa13a67ce 100644
> --- a/lib/math/Makefile
> +++ b/lib/math/Makefile
> @@ -4,6 +4,8 @@ obj-y += div64.o gcd.o lcm.o int_log.o int_pow.o int_sqrt.o reciprocal_div.o
>  obj-$(CONFIG_CORDIC)		+= cordic.o
>  obj-$(CONFIG_PRIME_NUMBERS)	+= prime_numbers.o
>  obj-$(CONFIG_RATIONAL)		+= rational.o
> +obj-$(CONFIG_MEAN_AND_VARIANCE) += mean_and_variance.o
>  
>  obj-$(CONFIG_TEST_DIV64)	+= test_div64.o
>  obj-$(CONFIG_RATIONAL_KUNIT_TEST) += rational-test.o
> +obj-$(CONFIG_MEAN_AND_VARIANCE_UNIT_TEST)   += mean_and_variance_test.o
> diff --git a/fs/bcachefs/mean_and_variance.c b/lib/math/mean_and_variance.c
> similarity index 99%
> rename from fs/bcachefs/mean_and_variance.c
> rename to lib/math/mean_and_variance.c
> index bf0ef668fd38..ba90293204ba 100644
> --- a/fs/bcachefs/mean_and_variance.c
> +++ b/lib/math/mean_and_variance.c
> @@ -40,10 +40,9 @@
>  #include <linux/limits.h>
>  #include <linux/math.h>
>  #include <linux/math64.h>
> +#include <linux/mean_and_variance.h>
>  #include <linux/module.h>
>  
> -#include "mean_and_variance.h"
> -
>  u128_u u128_div(u128_u n, u64 d)
>  {
>  	u128_u r;
> diff --git a/fs/bcachefs/mean_and_variance_test.c b/lib/math/mean_and_variance_test.c
> similarity index 99%
> rename from fs/bcachefs/mean_and_variance_test.c
> rename to lib/math/mean_and_variance_test.c
> index 019583c3ca0e..f45591a169d8 100644
> --- a/fs/bcachefs/mean_and_variance_test.c
> +++ b/lib/math/mean_and_variance_test.c
> @@ -1,7 +1,6 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include <kunit/test.h>
> -
> -#include "mean_and_variance.h"
> +#include <linux/mean_and_variance.h>
>  
>  #define MAX_SQR (SQRT_U64_MAX*SQRT_U64_MAX)
>  
> -- 
> 2.43.0
> 
> 




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux ARM Kernel]     [Linux Filesystem Development]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux