On Sat, Dec 21, 2024 at 01:23:38AM +0000, Ihor Solodrai wrote: SNIP > diff --git a/btf_encoder.c b/btf_encoder.c > index 90f1b9a..7e03ba4 100644 > --- a/btf_encoder.c > +++ b/btf_encoder.c > @@ -1326,7 +1326,7 @@ static void btf_encoder__delete_saved_funcs(struct btf_encoder *encoder) > } > } > > -int btf_encoder__add_saved_funcs(bool skip_encoding_inconsistent_proto) > +static int btf_encoder__add_saved_funcs(bool skip_encoding_inconsistent_proto) > { > struct btf_encoder_func_state **saved_fns, *s; > struct btf_encoder *e = NULL; > @@ -2134,7 +2134,6 @@ int btf_encoder__encode(struct btf_encoder *encoder, struct conf_load *conf) > int err; > size_t shndx; > > - /* for single-threaded case, saved funcs are added here */ > btf_encoder__add_saved_funcs(conf->skip_encoding_btf_inconsistent_proto); should we check the return value in here? now it's the only caller SNIP > -struct dwarf_thread { > - struct dwarf_cus *dcus; > - void *data; > +/* Multithreading is implemented using a job/worker model. > + * cus_processing_queue represents a collection of jobs to be > + * completed by workers. > + * dwarf_loader__worker_thread is the worker loop, taking jobs from > + * the queue, executing them and re-enqueuing new jobs as necessary. > + * Implementation of this queue ensures two constraints: > + * * JOB_STEAL jobs for a CU are executed in the order of cu->id, as > + * a consequence JOB_STEAL jobs always run one at a time. > + * * Initial number of JOB_DECODE jobs in the queue is effectively a > + * limit on how many decoded CUs can be held in memory. > + * See dwarf_loader__decoded_cus_limit() > + */ > +static struct { > + pthread_mutex_t mutex; > + pthread_cond_t job_added; > + /* next_cu_id determines the next CU ready to be stealed > + * This enforces the order of CU stealing. > + */ > + uint32_t next_cu_id; > + struct list_head jobs; > +} cus_processing_queue; > + > +enum job_type { > + JOB_NONE = 0, nit, no need for JOB_NONE? SNIP > +static struct cu_processing_job *cus_queue__try_dequeue(void) > +{ > + struct cu_processing_job *job, *dequeued_job = NULL; > + struct list_head *pos, *tmp; > + > + list_for_each_safe(pos, tmp, &cus_processing_queue.jobs) { > + job = list_entry(pos, struct cu_processing_job, node); > + if (job->type == JOB_STEAL && job->cu->id == cus_processing_queue.next_cu_id) { > + dequeued_job = job; > + break; > + } > + if (job->type == JOB_DECODE) { > + /* all JOB_STEALs are added to the head, so no viable JOB_STEAL available */ > + dequeued_job = job; > + break; > + } > + } > + /* No jobs or only steals out of order */ > + if (!dequeued_job) > + return NULL; > + > + list_del(&dequeued_job->node); > + return job; IIUC job == dequeued_job at this point, but I think we should return dequeued_job to be clear SNIP > -static void *dwarf_cus__process_cu_thread(void *arg) > +static struct cu *dwarf_loader__decode_next_cu(struct dwarf_cus *dcus) > { > - struct dwarf_thread *dthr = arg; > - struct dwarf_cus *dcus = dthr->dcus; > uint8_t pointer_size, offset_size; > + struct dwarf_cu *dcu = NULL; > Dwarf_Die die_mem, *cu_die; > - struct dwarf_cu *dcu; > + int err; > > - while (dwarf_cus__nextcu(dcus, &dcu, &die_mem, &cu_die, &pointer_size, &offset_size) == 0) { > - if (cu_die == NULL) > + err = dwarf_cus__nextcu(dcus, &dcu, &die_mem, &cu_die, &pointer_size, &offset_size); > + > + if (err < 0) > + goto out_error; > + else if (err == 1) /* no more CUs */ > + return NULL; > + > + err = die__process_and_recode(cu_die, dcu->cu, dcus->conf); > + if (err) > + goto out_error; > + if (cu_die == NULL) > + return NULL; should this check be placed before calling die__process_and_recode ? SNIP > -static int dwarf_cus__process_cus(struct dwarf_cus *dcus) > -{ > - if (dcus->conf->nr_jobs > 1) > - return dwarf_cus__threaded_process_cus(dcus); > - > - return __dwarf_cus__process_cus(dcus); > -} > - > static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf, > Dwfl_Module *mod, Dwarf *dw, Elf *elf, > const char *filename, > @@ -3733,7 +3859,8 @@ static int cus__merge_and_process_cu(struct cus *cus, struct conf_load *conf, > if (cu__resolve_func_ret_types_optimized(cu) != LSK__KEEPIT) > goto out_abort; > > - if (cus__finalize(cus, cu, conf, NULL) == LSK__STOP_LOADING) > + cu__finalize(cu, cus, conf); > + if (cus__steal_now(cus, cu, conf) == LSK__STOP_LOADING) > goto out_abort; > > return 0; > @@ -3765,7 +3892,8 @@ static int cus__load_module(struct cus *cus, struct conf_load *conf, > } > > if (type_cu != NULL) { > - type_lsk = cu__finalize(type_cu, cus, conf, NULL); > + cu__finalize(type_cu, cus, conf); > + type_lsk = cus__steal_now(cus, type_cu, conf); > if (type_lsk == LSK__DELETE) { > cus__remove(cus, type_cu); > } > @@ -3787,6 +3915,7 @@ static int cus__load_module(struct cus *cus, struct conf_load *conf, > .type_dcu = type_cu ? &type_dcu : NULL, > .build_id = build_id, > .build_id_len = build_id_len, > + .nr_cus_created = 0, should go to the previous patch? if needed at all.. thanks, jirka > }; > res = dwarf_cus__process_cus(&dcus); > } > diff --git a/dwarves.c b/dwarves.c > index ae512b9..7c3e878 100644 > --- a/dwarves.c > +++ b/dwarves.c > @@ -530,48 +530,6 @@ void cus__unlock(struct cus *cus) > pthread_mutex_unlock(&cus->mutex); > } > SNIP