Re: [PATCH 01/10] repair: translation lookups limit scalability

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Feb 24, 2014 at 05:29:20PM +1100, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> A bit of perf magic showed that scalability was limits to 3-4
> concurrent threads due to contention on a lock inside in something
> called __dcigettext(). That some library somewhere that repair is
> linked against, and it turns out to be inside the translation
> infrastructure to support the _() string mechanism:
> 
> # Samples: 34K of event 'cs'
> # Event count (approx.): 495567
> #
> # Overhead        Command      Shared Object          Symbol
> # ........  .............  .................  ..............
> #
>     60.30%     xfs_repair  [kernel.kallsyms]  [k] __schedule
>                |
>                --- 0x63fffff9c
>                    process_bmbt_reclist_int
>                   |
>                   |--39.95%-- __dcigettext
>                   |          __lll_lock_wait
>                   |          system_call_fastpath
>                   |          SyS_futex
>                   |          do_futex
>                   |          futex_wait
>                   |          futex_wait_queue_me
>                   |          schedule
>                   |          __schedule
>                   |
>                   |--8.91%-- __lll_lock_wait
>                   |          system_call_fastpath
>                   |          SyS_futex
>                   |          do_futex
>                   |          futex_wait
>                   |          futex_wait_queue_me
>                   |          schedule
>                   |          __schedule
>                    --51.13%-- [...]
> 
> Fix this by initialising global variables that hold the translated
> strings at startup, hence avoiding the need to do repeated runtime
> translation of the same strings.
> 
> Runtime of an unpatched xfs_repair is roughly:
> 
>         XFS_REPAIR Summary    Fri Dec  6 11:15:50 2013
> 
> Phase           Start           End             Duration
> Phase 1:        12/06 10:56:21  12/06 10:56:21
> Phase 2:        12/06 10:56:21  12/06 10:56:23  2 seconds
> Phase 3:        12/06 10:56:23  12/06 11:01:31  5 minutes, 8 seconds
> Phase 4:        12/06 11:01:31  12/06 11:07:08  5 minutes, 37 seconds
> Phase 5:        12/06 11:07:08  12/06 11:07:09  1 second
> Phase 6:        12/06 11:07:09  12/06 11:15:49  8 minutes, 40 seconds
> Phase 7:        12/06 11:15:49  12/06 11:15:50  1 second
> 
> Total run time: 19 minutes, 29 seconds
> 
> Patched version:
> 
> Phase           Start           End             Duration
> Phase 1:        12/06 10:36:29  12/06 10:36:29
> Phase 2:        12/06 10:36:29  12/06 10:36:31  2 seconds
> Phase 3:        12/06 10:36:31  12/06 10:40:08  3 minutes, 37 seconds
> Phase 4:        12/06 10:40:08  12/06 10:43:42  3 minutes, 34 seconds
> Phase 5:        12/06 10:43:42  12/06 10:43:42
> Phase 6:        12/06 10:43:42  12/06 10:50:28  6 minutes, 46 seconds
> Phase 7:        12/06 10:50:28  12/06 10:50:29  1 second
> 
> Total run time: 14 minutes
> 
> Big win!
> 
> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  repair/dinode.c     | 49 +++++++++++++++++++++++++++++++++++--------------
>  repair/dinode.h     |  3 +++
>  repair/scan.c       |  7 +------
>  repair/xfs_repair.c |  2 ++
>  4 files changed, 41 insertions(+), 20 deletions(-)
> 
> diff --git a/repair/dinode.c b/repair/dinode.c
> index 3115bd0..4953a56 100644
> --- a/repair/dinode.c
> +++ b/repair/dinode.c
> @@ -32,6 +32,37 @@
>  #include "threads.h"
>  
>  /*
> + * gettext lookups for translations of strings use mutexes internally to
> + * the library. Hence when we come through here doing parallel scans in
> + * multiple AGs, then all do concurrent text conversions and serialise
Typo:		    they ?

Reviewed-by: Brian Foster <bfoster@xxxxxxxxxx>

> + * on the translation string lookups. Let's avoid doing repeated lookups
> + * by making them static variables and only assigning the translation
> + * once.
> + */
> +static char	*forkname_data;
> +static char	*forkname_attr;
> +static char	*ftype_real_time;
> +static char	*ftype_regular;
> +
> +void
> +dinode_bmbt_translation_init(void)
> +{
> +	forkname_data = _("data");
> +	forkname_attr = _("attr");
> +	ftype_real_time = _("real-time");
> +	ftype_regular = _("regular");
> +}
> +
> +char *
> +get_forkname(int whichfork)
> +{
> +
> +	if (whichfork == XFS_DATA_FORK)
> +		return forkname_data;
> +	return forkname_attr;
> +}
> +
> +/*
>   * inode clearing routines
>   */
>  
> @@ -542,7 +573,7 @@ process_bmbt_reclist_int(
>  	xfs_dfiloff_t		op = 0;		/* prev offset */
>  	xfs_dfsbno_t		b;
>  	char			*ftype;
> -	char			*forkname;
> +	char			*forkname = get_forkname(whichfork);
>  	int			i;
>  	int			state;
>  	xfs_agnumber_t		agno;
> @@ -552,15 +583,10 @@ process_bmbt_reclist_int(
>  	xfs_agnumber_t		locked_agno = -1;
>  	int			error = 1;
>  
> -	if (whichfork == XFS_DATA_FORK)
> -		forkname = _("data");
> -	else
> -		forkname = _("attr");
> -
>  	if (type == XR_INO_RTDATA)
> -		ftype = _("real-time");
> +		ftype = ftype_real_time;
>  	else
> -		ftype = _("regular");
> +		ftype = ftype_regular;
>  
>  	for (i = 0; i < *numrecs; i++) {
>  		libxfs_bmbt_disk_get_all(rp + i, &irec);
> @@ -1110,7 +1136,7 @@ process_btinode(
>  	xfs_ino_t		lino;
>  	xfs_bmbt_ptr_t		*pp;
>  	xfs_bmbt_key_t		*pkey;
> -	char			*forkname;
> +	char			*forkname = get_forkname(whichfork);
>  	int			i;
>  	int			level;
>  	int			numrecs;
> @@ -1122,11 +1148,6 @@ process_btinode(
>  	*tot = 0;
>  	*nex = 0;
>  
> -	if (whichfork == XFS_DATA_FORK)
> -		forkname = _("data");
> -	else
> -		forkname = _("attr");
> -
>  	magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_BMAP_CRC_MAGIC
>  						 : XFS_BMAP_MAGIC;
>  
> diff --git a/repair/dinode.h b/repair/dinode.h
> index d9197c1..7521521 100644
> --- a/repair/dinode.h
> +++ b/repair/dinode.h
> @@ -127,4 +127,7 @@ get_bmapi(xfs_mount_t		*mp,
>  		xfs_dfiloff_t	bno,
>  		int             whichfork );
>  
> +void dinode_bmbt_translation_init(void);
> +char * get_forkname(int whichfork);
> +
>  #endif /* _XR_DINODE_H */
> diff --git a/repair/scan.c b/repair/scan.c
> index 73b4581..b12f48b 100644
> --- a/repair/scan.c
> +++ b/repair/scan.c
> @@ -171,17 +171,12 @@ scan_bmapbt(
>  	xfs_bmbt_rec_t		*rp;
>  	xfs_dfiloff_t		first_key;
>  	xfs_dfiloff_t		last_key;
> -	char			*forkname;
> +	char			*forkname = get_forkname(whichfork);
>  	int			numrecs;
>  	xfs_agnumber_t		agno;
>  	xfs_agblock_t		agbno;
>  	int			state;
>  
> -	if (whichfork == XFS_DATA_FORK)
> -		forkname = _("data");
> -	else
> -		forkname = _("attr");
> -
>  	/*
>  	 * unlike the ag freeblock btrees, if anything looks wrong
>  	 * in an inode bmap tree, just bail.  it's possible that
> diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> index 9e0502a..bac334f 100644
> --- a/repair/xfs_repair.c
> +++ b/repair/xfs_repair.c
> @@ -29,6 +29,7 @@
>  #include "prefetch.h"
>  #include "threads.h"
>  #include "progress.h"
> +#include "dinode.h"
>  
>  #define	rounddown(x, y)	(((x)/(y))*(y))
>  
> @@ -533,6 +534,7 @@ main(int argc, char **argv)
>  	setlocale(LC_ALL, "");
>  	bindtextdomain(PACKAGE, LOCALEDIR);
>  	textdomain(PACKAGE);
> +	dinode_bmbt_translation_init();
>  
>  	temp_mp = &xfs_m;
>  	setbuf(stdout, NULL);
> -- 
> 1.8.4.rc3
> 
> _______________________________________________
> xfs mailing list
> xfs@xxxxxxxxxxx
> http://oss.sgi.com/mailman/listinfo/xfs

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs




[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux