On Sun 29 Dec 21:00 PST 2019, Sibi Sankar wrote: [..] > diff --git a/drivers/soc/qcom/pdr_interface.c b/drivers/soc/qcom/pdr_interface.c [..] > +static int servreg_locator_new_server(struct qmi_handle *qmi, > + struct qmi_service *svc) > +{ > + struct pdr_handle *pdr = container_of(qmi, struct pdr_handle, > + servloc_client); > + struct pdr_service *pds, *tmp; > + > + /* Create a Local client port for QMI communication */ > + pdr->servloc_addr.sq_family = AF_QIPCRTR; > + pdr->servloc_addr.sq_node = svc->node; > + pdr->servloc_addr.sq_port = svc->port; > + > + mutex_lock(&pdr->locator_lock); > + pdr->locator_available = true; > + mutex_unlock(&pdr->locator_lock); > + > + /* Service pending lookup requests */ > + mutex_lock(&pdr->list_lock); > + list_for_each_entry_safe(pds, tmp, &pdr->lookups, node) { No need to make this _safe, as you're not modifying the list in the loop. > + if (pds->need_servreg_lookup) > + schedule_work(&pdr->servloc_work); > + } > + mutex_unlock(&pdr->list_lock); > + > + return 0; > +} [..] > +static void pdr_servreg_link_create(struct pdr_handle *pdr, > + struct pdr_service *pds) > +{ > + struct pdr_service *pds_iter, *tmp; > + bool link_exists = false; > + > + /* Check if a QMI link to SERVREG instance already exists */ > + mutex_lock(&pdr->list_lock); > + list_for_each_entry_safe(pds_iter, tmp, &pdr->lookups, node) { > + if (pds_iter->instance == pds->instance && Flip this condition around and continue if it's not a match, to save indentation and to split the two expressions into two distinct checks. > + strcmp(pds_iter->service_path, pds->service_path)) { Isn't this just saying: if (pds_iter == pds) continue; With the purpose of link_exists to be !empty(set(lookups) - pds) ? But if I read pdr_add_lookup() correctly it's possible that a client could call pdr_add_lookup() more than once before pdr_servloc_work() is scheduled, in which case "set(lookup) - pds" isn't empty and as such you won't add the lookup? > + link_exists = true; > + pds->service_connected = pds_iter->service_connected; > + if (pds_iter->service_connected) > + pds->need_servreg_register = true; > + else > + pds->need_servreg_remove = true; > + queue_work(pdr->servreg_wq, &pdr->servreg_work); > + break; > + } > + } [..] > +static void pdr_servloc_work(struct work_struct *work) > +{ > + struct pdr_handle *pdr = container_of(work, struct pdr_handle, > + servloc_work); > + struct pdr_service *pds, *tmp; > + int ret; > + > + /* Bail out early if PD Mapper is not up */ > + mutex_lock(&pdr->locator_lock); > + if (!pdr->locator_available) { > + mutex_unlock(&pdr->locator_lock); > + pr_warn("PDR: SERVICE LOCATOR service not available\n"); > + return; > + } > + mutex_unlock(&pdr->locator_lock); > + > + mutex_lock(&pdr->list_lock); > + list_for_each_entry_safe(pds, tmp, &pdr->lookups, node) { As written right now you don't need _safe here, because in the only case you're modifying the list you end up exiting the loop. > + if (!pds->need_servreg_lookup) > + continue; > + > + pds->need_servreg_lookup = false; > + mutex_unlock(&pdr->list_lock); You should probably just hold on to list_lock over this entire loop. > + > + ret = pdr_locate_service(pdr, pds); > + if (ret < 0) { > + if (ret == -ENXIO) > + pds->state = SERVREG_LOCATOR_UNKNOWN_SERVICE; > + else if (ret == -EAGAIN) > + pds->state = SERVREG_LOCATOR_DB_UPDATED; Isn't this something that we should recover from? > + else > + pds->state = SERVREG_LOCATOR_ERR; > + > + pr_err("PDR: service lookup for %s failed: %d\n", > + pds->service_name, ret); > + > + /* Remove from lookup list */ > + mutex_lock(&pdr->list_lock); > + list_del(&pds->node); What should I do in my driver when this happens? > + mutex_unlock(&pdr->list_lock); > + > + /* Notify Lookup failed */ > + mutex_lock(&pdr->status_lock); > + pdr->status(pdr, pds); > + mutex_unlock(&pdr->status_lock); > + kfree(pds); > + } else { > + pdr_servreg_link_create(pdr, pds); > + } > + > + return; There might be more pds entries with need_servreg_lookup in the list, shouldn't we allow this to continue? This would though imply that you should hold onto the list_lock over the entire loop, which I think looks fine. > + } > + mutex_unlock(&pdr->list_lock); > +} > + > +/** > + * pdr_add_lookup() - register a tracking request for a PD > + * @pdr: PDR client handle > + * @service_name: service name of the tracking request > + * @service_path: service path of the tracking request > + * > + * Registering a pdr lookup allows for tracking the life cycle of the PD. > + * > + * Return: 0 on success, negative errno on failure. > + */ > +int pdr_add_lookup(struct pdr_handle *pdr, const char *service_name, > + const char *service_path) > +{ > + struct pdr_service *pds, *pds_iter, *tmp; > + int ret; > + > + if (!service_name || strlen(service_name) > SERVREG_NAME_LENGTH || > + !service_path || strlen(service_path) > SERVREG_NAME_LENGTH) > + return -EINVAL; > + > + pds = kzalloc(sizeof(*pds), GFP_KERNEL); > + if (!pds) > + return -ENOMEM; > + > + pds->service = SERVREG_NOTIFIER_SERVICE; > + strcpy(pds->service_name, service_name); > + strcpy(pds->service_path, service_path); > + pds->need_servreg_lookup = true; > + > + mutex_lock(&pdr->list_lock); > + list_for_each_entry_safe(pds_iter, tmp, &pdr->lookups, node) { No _safe > + if (!strcmp(pds_iter->service_path, service_path)) { > + mutex_unlock(&pdr->list_lock); > + ret = -EALREADY; > + goto err; > + } > + } > + > + list_add(&pds->node, &pdr->lookups); > + mutex_unlock(&pdr->list_lock); > + > + schedule_work(&pdr->servloc_work); > + > + return 0; > +err: > + kfree(pds); > + > + return ret; > +} > +EXPORT_SYMBOL(pdr_add_lookup); > + > +/** > + * pdr_restart_pd() - restart PD > + * @pdr: PDR client handle > + * @service_path: service path of restart request > + * > + * Restarts the PD tracked by the PDR client handle for a given service path. > + * > + * Return: 0 on success, negative errno on failure. > + */ > +int pdr_restart_pd(struct pdr_handle *pdr, const char *service_path) > +{ > + struct servreg_restart_pd_req req; > + struct servreg_restart_pd_resp resp; > + struct pdr_service *pds = NULL, *pds_iter, *tmp; > + struct qmi_txn txn; > + int ret; > + > + if (!service_path || strlen(service_path) > SERVREG_NAME_LENGTH) > + return -EINVAL; > + > + mutex_lock(&pdr->list_lock); > + list_for_each_entry_safe(pds_iter, tmp, &pdr->lookups, node) { > + if (!pds_iter->service_connected) > + continue; > + > + if (!strcmp(pds_iter->service_path, service_path)) { > + pds = pds_iter; > + break; > + } > + } > + mutex_unlock(&pdr->list_lock); > + > + if (!pds) Given that you may only call pdr_restart_pd() on something created by first calling pdr_add_lookup(), how about returning the struct pdr_service from pdr_add_lookup() instead and then have the client pass that as an argument to this function. Most clients doesn't care about pdr_restart_pd() so they would only have to IS_ERR(pdr_add_lookup()) anyways, and the ones that care can carry the returned pointer. Note that the struct pdr_service doesn't have to be defined in a way that it's possible to dereference by clients. > + return -EINVAL; > + > + /* Prepare req message */ > + strcpy(req.service_path, pds->service_path); > + > + ret = qmi_txn_init(&pdr->servreg_client, &txn, > + servreg_restart_pd_resp_ei, > + &resp); > + if (ret < 0) > + return ret; > + > + ret = qmi_send_request(&pdr->servreg_client, &pdr->servreg_addr, > + &txn, SERVREG_RESTART_PD_REQ, > + SERVREG_RESTART_PD_REQ_MAX_LEN, > + servreg_restart_pd_req_ei, &req); > + if (ret < 0) { > + qmi_txn_cancel(&txn); > + return ret; > + } > + > + ret = qmi_txn_wait(&txn, 5 * HZ); > + if (ret < 0) { > + pr_err("PDR: %s PD restart txn wait failed: %d\n", > + pds->service_path, ret); > + return ret; > + } > + > + /* Check response if PDR is disabled */ > + if (resp.resp.result == QMI_RESULT_FAILURE_V01 && > + resp.resp.error == QMI_ERR_DISABLED_V01) { > + pr_err("PDR: %s PD restart is disabled: 0x%x\n", > + pds->service_path, resp.resp.error); > + return -EOPNOTSUPP; > + } > + > + /* Check the response for other error case*/ > + if (resp.resp.result != QMI_RESULT_SUCCESS_V01) { > + pr_err("PDR: %s request for PD restart failed: 0x%x\n", > + pds->service_path, resp.resp.error); > + return -EREMOTEIO; > + } > + > + return ret; > +} > +EXPORT_SYMBOL(pdr_restart_pd); [..] > +/** > + * struct pdr_service - context to track lookups/restarts > + * @service_name: name of the service running on the PD > + * @service_path: service path of the PD > + * @service_data_valid: indicates if service_data field has valid data > + * @service_data: service data provided by servreg_locator service > + * @need_servreg_lookup: state flag for tracking servreg lookup requests > + * @need_servreg_register: state flag for tracking pending servreg register > + * @need_servreg_remove: state flag for tracking pending servreg remove > + * @service_connected: current state of servreg_notifier qmi service > + * @state: current state of PD > + * @service: servreg_notifer service type > + * @instance: instance id of the @service > + * @priv: handle for client's use > + * @node: list_head for house keeping > + */ > +struct pdr_service { This is primarily internal bookkeeping, how about not exposing it to the clients? This would imply that status() would have to be called with pdr_service->priv and pdr_service->state as arguments instead. > + char service_name[SERVREG_NAME_LENGTH + 1]; > + char service_path[SERVREG_NAME_LENGTH + 1]; > + > + u8 service_data_valid; > + u32 service_data; > + > + bool need_servreg_lookup; > + bool need_servreg_register; > + bool need_servreg_remove; > + bool service_connected; > + int state; > + > + unsigned int instance; > + unsigned int service; > + > + void *priv; > + struct list_head node; > +}; > + [..] > + void (*status)(struct pdr_handle *pdr, struct pdr_service *pds); > +}; Regards, Bjorn