Re: [PATCH dwarves v3 7/8] dwarf_loader: multithreading with a job/worker model

Jiri Olsa <olsajiri@xxxxxxxxx> · Wed, 1 Jan 2025 17:56:16 +0100

On Sat, Dec 21, 2024 at 01:23:38AM +0000, Ihor Solodrai wrote:

SNIP

> diff --git a/btf_encoder.c b/btf_encoder.c
> index 90f1b9a..7e03ba4 100644
> --- a/btf_encoder.c
> +++ b/btf_encoder.c
> @@ -1326,7 +1326,7 @@ static void btf_encoder__delete_saved_funcs(struct btf_encoder *encoder)
>  	}
>  }
>  
> -int btf_encoder__add_saved_funcs(bool skip_encoding_inconsistent_proto)
> +static int btf_encoder__add_saved_funcs(bool skip_encoding_inconsistent_proto)
>  {
>  	struct btf_encoder_func_state **saved_fns, *s;
>  	struct btf_encoder *e = NULL;
> @@ -2134,7 +2134,6 @@ int btf_encoder__encode(struct btf_encoder *encoder, struct conf_load *conf)
>  	int err;
>  	size_t shndx;
>  
> -	/* for single-threaded case, saved funcs are added here */
>  	btf_encoder__add_saved_funcs(conf->skip_encoding_btf_inconsistent_proto);

should we check the return value in here? now it's the only caller

SNIP

> -struct dwarf_thread {
> -	struct dwarf_cus	*dcus;
> -	void			*data;
> +/* Multithreading is implemented using a job/worker model.
> + * cus_processing_queue represents a collection of jobs to be
> + * completed by workers.
> + * dwarf_loader__worker_thread is the worker loop, taking jobs from
> + * the queue, executing them and re-enqueuing new jobs as necessary.
> + * Implementation of this queue ensures two constraints:
> + *   * JOB_STEAL jobs for a CU are executed in the order of cu->id, as
> + *     a consequence JOB_STEAL jobs always run one at a time.
> + *   * Initial number of JOB_DECODE jobs in the queue is effectively a
> + *     limit on how many decoded CUs can be held in memory.
> + *     See dwarf_loader__decoded_cus_limit()
> + */
> +static struct {
> +	pthread_mutex_t mutex;
> +	pthread_cond_t job_added;
> +	/* next_cu_id determines the next CU ready to be stealed
> +	 * This enforces the order of CU stealing.
> +	 */
> +	uint32_t next_cu_id;
> +	struct list_head jobs;
> +} cus_processing_queue;
> +
> +enum job_type {
> +	JOB_NONE = 0,

nit, no need for JOB_NONE?

SNIP

> +static struct cu_processing_job *cus_queue__try_dequeue(void)
> +{
> +	struct cu_processing_job *job, *dequeued_job = NULL;
> +	struct list_head *pos, *tmp;
> +
> +	list_for_each_safe(pos, tmp, &cus_processing_queue.jobs) {
> +		job = list_entry(pos, struct cu_processing_job, node);
> +		if (job->type == JOB_STEAL && job->cu->id == cus_processing_queue.next_cu_id) {
> +			dequeued_job = job;
> +			break;
> +		}
> +		if (job->type == JOB_DECODE) {
> +			/* all JOB_STEALs are added to the head, so no viable JOB_STEAL available */
> +			dequeued_job = job;
> +			break;
> +		}
> +	}
> +	/* No jobs or only steals out of order */
> +	if (!dequeued_job)
> +		return NULL;
> +
> +	list_del(&dequeued_job->node);
> +	return job;

IIUC job == dequeued_job at this point, but I think we should return
dequeued_job to be clear

SNIP

> -static void *dwarf_cus__process_cu_thread(void *arg)
> +static struct cu *dwarf_loader__decode_next_cu(struct dwarf_cus *dcus)
>  {
> -	struct dwarf_thread *dthr = arg;
> -	struct dwarf_cus *dcus = dthr->dcus;
>  	uint8_t pointer_size, offset_size;
> +	struct dwarf_cu *dcu = NULL;
>  	Dwarf_Die die_mem, *cu_die;
> -	struct dwarf_cu *dcu;
> +	int err;
>  
> -	while (dwarf_cus__nextcu(dcus, &dcu, &die_mem, &cu_die, &pointer_size, &offset_size) == 0) {
> -		if (cu_die == NULL)
> +	err = dwarf_cus__nextcu(dcus, &dcu, &die_mem, &cu_die, &pointer_size, &offset_size);
> +
> +	if (err < 0)
> +		goto out_error;
> +	else if (err == 1) /* no more CUs */
> +		return NULL;
> +
> +	err = die__process_and_recode(cu_die, dcu->cu, dcus->conf);
> +	if (err)
> +		goto out_error;
> +	if (cu_die == NULL)
> +		return NULL;

should this check be placed before calling die__process_and_recode ?

SNIP

> -static int dwarf_cus__process_cus(struct dwarf_cus *dcus)
> -{
> -	if (dcus->conf->nr_jobs > 1)
> -		return dwarf_cus__threaded_process_cus(dcus);
> -
> -	return __dwarf_cus__process_cus(dcus);
> -}
> -
>  static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
>  				     Dwfl_Module *mod, Dwarf *dw, Elf *elf,
>  				     const char *filename,
> @@ -3733,7 +3859,8 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf,
>  	if (cu__resolve_func_ret_types_optimized(cu) != LSK__KEEPIT)
>  		goto out_abort;
>  
> -	if (cus__finalize(cus, cu, conf, NULL) == LSK__STOP_LOADING)
> +	cu__finalize(cu, cus, conf);
> +	if (cus__steal_now(cus, cu, conf) == LSK__STOP_LOADING)
>  		goto out_abort;
>  
>  	return 0;
> @@ -3765,7 +3892,8 @@ static int cus__load_module(struct cus *cus, struct conf_load *conf,
>  	}
>  
>  	if (type_cu != NULL) {
> -		type_lsk = cu__finalize(type_cu, cus, conf, NULL);
> +		cu__finalize(type_cu, cus, conf);
> +		type_lsk = cus__steal_now(cus, type_cu, conf);
>  		if (type_lsk == LSK__DELETE) {
>  			cus__remove(cus, type_cu);
>  		}
> @@ -3787,6 +3915,7 @@ static int cus__load_module(struct cus *cus, struct conf_load *conf,
>  			.type_dcu = type_cu ? &type_dcu : NULL,
>  			.build_id = build_id,
>  			.build_id_len = build_id_len,
> +			.nr_cus_created = 0,

should go to the previous patch? if needed at all..

thanks,
jirka

>  		};
>  		res = dwarf_cus__process_cus(&dcus);
>  	}
> diff --git a/dwarves.c b/dwarves.c
> index ae512b9..7c3e878 100644
> --- a/dwarves.c
> +++ b/dwarves.c
> @@ -530,48 +530,6 @@ void cus__unlock(struct cus *cus)
>  	pthread_mutex_unlock(&cus->mutex);
>  }
>  

SNIP