Hello Muneendra, On Sun, 2022-01-23 at 15:13 -0800, Muneendra Kumar wrote: > This patch incorporates the functionality to handle > FPIN ELS events present as part of FCTransport daemon > (available in EPEL8) into the multipathd. This helps us to > reduce the response time to react and take the necessary actions > on receiving the FPIN events. > > This patch currently support FPIN-Li Events. > > It adds a new thread to listen for ELS frames from driver and on > receiving the frame payload, push the payload to a list and notify > the > fpin_els_li_consumer thread to process it.Once consumer thread is > notified, it returns to listen for more ELS frames from driver. > > The consumer thread process the ELS frames and moves the devices > paths > which are affected due to link integrity to marginal path groups. > This also sets the associated portstate to marginal. > The paths which are set to marginal path group will be unset > on receiving the RSCN events > > Signed-off-by: Muneendra Kumar <muneendra.kumar@xxxxxxxxxxxx> > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> > --- > > Notes: > V2: > Removed the newly added config option and added a new > value "fpin" to the marginal_pathgroups option. > > Added support to handle the multipathd reconfigure case. > > Addressed the review comments from Martin > > Makefile.inc | 13 + > libmultipath/Makefile | 5 + > libmultipath/dict.c | 56 ++- > libmultipath/libmultipath.version | 1 + > libmultipath/propsel.c | 47 ++- > libmultipath/structs.h | 7 + > multipath/multipath.conf.5 | 19 +- > multipathd/Makefile | 10 + > multipathd/fpin.h | 20 ++ > multipathd/fpin_handlers.c | 547 > ++++++++++++++++++++++++++++++ > multipathd/main.c | 43 ++- > 11 files changed, 752 insertions(+), 16 deletions(-) > create mode 100644 multipathd/fpin.h > create mode 100644 multipathd/fpin_handlers.c > This looks good. I only have a few minor remarks, see below. Martin > +/*set/unset the path state to marginal*/ > +static int fpin_set_pathstate(struct path *pp, bool set) > +{ > + const char *action = set ? "set" : "unset"; > + > + if (!pp || !pp->mpp || !pp->mpp->alias) > + return -1; > + > + condlog(3, "\n%s: %s marginal path %s (fpin)", > + action, pp->mpp->alias, pp->dev_t); > + if (set) > + pp->marginal = 1; > + else > + pp->marginal = 0; Nitpick: pp->marginal = set ? > + > + pp->mpp->fpin_must_reload = true; > + return 0; > + > +} > + > +/* This will unset marginal state of a device*/ > +static void fpin_path_unsetmarginal(char *devname, struct vectors > *vecs) > +{ > + struct path *pp; > + > + pp = find_path_by_dev(vecs->pathvec, devname); > + if (!pp) > + pp = find_path_by_devt(vecs->pathvec, devname); > + > + fpin_set_pathstate(pp, 0); Nitpick: as you use bool for "set", you should be using "false" and "true" when calling it. > +} > + > +/*This will set the marginal state of a device*/ > +static int fpin_path_setmarginal(struct path *pp) > +{ > + return fpin_set_pathstate(pp, 1); > +} > + > +/* Unsets all the devices in the list from marginal state */ > +static void > +fpin_unset_marginal_dev(uint32_t host_num, struct vectors *vecs) > +{ > + struct marginal_dev_list *tmp_marg = NULL; > + struct list_head *current_node = NULL; > + struct list_head *temp = NULL; > + struct multipath *mpp; > + int ret = 0; > + int i; > + > + pthread_cleanup_push(cleanup_lock, &vecs->lock); > + lock(&vecs->lock); > + pthread_testcancel(); > + > + pthread_mutex_lock(&fpin_li_marginal_dev_mutex); > + pthread_cleanup_push(cleanup_mutex, > &fpin_li_marginal_dev_mutex); > + pthread_testcancel(); > + if (list_empty(&fpin_li_marginal_dev_list_head)) { > + condlog(3, "Marginal List is empty\n"); > + goto empty; > + } I don't think you need this. list_for_each_safe will be (almost) a noop in this case. Also, no need to log at level 3. that you aren't doing anything. Use level 4. > + list_for_each_safe(current_node, temp, > &fpin_li_marginal_dev_list_head) { why not use list_for_each_entry_safe() here? > + tmp_marg = list_entry(current_node, > + struct marginal_dev_list, > + node); > + > + if (tmp_marg->host_num != host_num) > + continue; > + condlog(4, " unsetting marginal dev: is %s %d\n", > + tmp_marg->dev_t, tmp_marg->host_num); > + fpin_path_unsetmarginal(tmp_marg->dev_t, vecs); > + list_del(current_node); > + free(tmp_marg); > + } > +empty: > + pthread_cleanup_pop(1); > + > + vector_foreach_slot_backwards(vecs->mpvec, mpp, i) { Any special reason why you walk the vector backwards? > + if (mpp->fpin_must_reload) { > + ret = reload_and_sync_map(mpp, vecs, 0); > + if (ret == 2) > + condlog(2, "map removed during > reload"); > + else > + mpp->fpin_must_reload = false; > + } > + } > + pthread_cleanup_pop(1); > +} > + > +/* > + * On Receiving the frame from HBA driver, insert the frame into > link > + * integrity frame list which will be picked up later by consumer > thread for > + * processing. > + */ > +static int > +fpin_els_add_li_frame(struct fc_nl_event *fc_event) > +{ > + struct els_marginal_list *els_mrg = NULL; > + int ret = 0; > + > + if (fc_event->event_datalen > FC_PAYLOAD_MAXLEN) > + return -EINVAL; > + > + pthread_mutex_lock(&fpin_li_mutex); > + pthread_cleanup_push(cleanup_mutex, &fpin_li_mutex); > + pthread_testcancel(); > + els_mrg = calloc(sizeof(struct els_marginal_list), 1); It doesn't matter much, but the struct size is the 2nd arg to calloc. I recommend "calloc(1, sizeof(*els_mrg))" > + if (els_mrg != NULL) { > + els_mrg->host_num = fc_event->host_no; > + els_mrg->event_code = fc_event->event_code; > + els_mrg->length = fc_event->event_datalen; > + memcpy(els_mrg->payload, &(fc_event->event_data), > fc_event->event_datalen); > + list_add_tail(&els_mrg->node, > &els_marginal_list_head); > + pthread_cond_signal(&fpin_li_cond); > + } else > + ret = -ENOMEM; > + pthread_cleanup_pop(1); > + return ret; > + > +} > + > +/*Sets the rport port_state to marginal*/ > +static void fpin_set_rport_marginal(struct udev_device *rport_dev) > +{ > + sysfs_attr_set_value(rport_dev, "port_state", > + "Marginal", strlen("Marginal")); > +} > + > +/*Add the marginal devices info into the list*/ > +static void > +fpin_add_marginal_dev_info(uint32_t host_num, char *devname) > +{ > + struct marginal_dev_list *newdev = NULL; > + > + newdev = (struct marginal_dev_list *) calloc(1, > + sizeof(struct marginal_dev_list)); Nitpick: No need to cast here. Without the cast, the code fits on one line ;-) > + if (newdev != NULL) { > + newdev->host_num = host_num; > + strlcpy(newdev->dev_t, devname, BLK_DEV_SIZE); > + condlog(4, "\n%s hostno %d devname %s\n", __func__, > + host_num, newdev->dev_t); > + pthread_mutex_lock(&fpin_li_marginal_dev_mutex); > + list_add_tail(&(newdev->node), > + &fpin_li_marginal_dev_list_head); > + pthread_mutex_unlock(&fpin_li_marginal_dev_mutex); > + } > +} > + > +/* > + * This function goes through the vecs->pathvec, and for > + * each path, check that the host number, > + * the target WWPN associated with the path matches > + * with the els wwpn and sets the path and port state to > + * Marginal > + */ > +static int fpin_chk_wwn_setpath_marginal(uint16_t host_num, struct > vectors *vecs, > + uint64_t els_wwpn) > +{ > + struct path *pp; > + struct multipath *mpp; > + int i, k; > + char rport_id[42]; > + const char *value = NULL; > + struct udev_device *rport_dev = NULL; > + uint64_t wwpn; > + int ret = 0; > + > + pthread_cleanup_push(cleanup_lock, &vecs->lock); > + lock(&vecs->lock); > + pthread_testcancel(); > + > + vector_foreach_slot(vecs->pathvec, pp, k) { > + /* Checks the host number and also for the SCSI FCP > */ > + if (pp->sg_id.proto_id != SCSI_PROTOCOL_FCP || > host_num != pp->sg_id.host_no) > + continue; > + sprintf(rport_id, "rport-%d:%d-%d", > + pp->sg_id.host_no, pp->sg_id.channel, > pp->sg_id.transport_id); > + rport_dev = > udev_device_new_from_subsystem_sysname(udev, > + "fc_remote_ports", rport_id); > + if (!rport_dev) { > + condlog(2, "%s: No fc_remote_port device for > '%s'", pp->dev, > + rport_id); > + continue; > + } > + pthread_cleanup_push(_udev_device_unref, rport_dev); > + value = udev_device_get_sysattr_value(rport_dev, > "port_name"); > + if (!value) > + goto unref; > + > + if (value) > + wwpn = strtol(value, NULL, 16); > + /* > + * If the port wwpn matches sets the path and port > state > + * to marginal > + */ > + if (wwpn == els_wwpn) { > + ret = fpin_path_setmarginal(pp); > + if (ret < 0) > + goto unref; > + fpin_set_rport_marginal(rport_dev); > + fpin_add_marginal_dev_info(host_num, pp- > >dev); > + } > +unref: > + pthread_cleanup_pop(1); > + } > + > + vector_foreach_slot_backwards(vecs->mpvec, mpp, i) { Again, why backwards? > + if (mpp->fpin_must_reload) { > + ret = reload_and_sync_map(mpp, vecs, 0); > + if (ret == 2) > + condlog(2, "map removed during > reload"); > + else > + mpp->fpin_must_reload = false; > + } > + } > + pthread_cleanup_pop(1); > + return ret; > +} > + > +/* > + * This function loops around all the impacted wwns received as part > of els > + * frame and sets the associated path and port states to marginal. > + */ > +static int > +fpin_parse_li_els_setpath_marginal(uint16_t host_num, struct > fc_tlv_desc *tlv, > + struct vectors *vecs) > +{ > + uint32_t wwn_count = 0, iter = 0; > + uint64_t wwpn; > + struct fc_fn_li_desc *li_desc = (struct fc_fn_li_desc *)tlv; > + int count = 0; > + int ret = 0; > + > + /* Update the wwn to list */ > + wwn_count = be32_to_cpu(li_desc->pname_count); > + condlog(4, "Got wwn count as %d\n", wwn_count); > + > + for (iter = 0; iter < wwn_count; iter++) { > + wwpn = be64_to_cpu(li_desc->pname_list[iter]); > + ret = fpin_chk_wwn_setpath_marginal(host_num, vecs, > wwpn); > + if (ret < 0) > + condlog(2, "failed to set the path marginal > associated with wwpn: 0x%lx\n", wwpn); > + > + count++; > + } > + return count; > +} > + > +/* > + * This function process the ELS frame received from HBA driver, > + * and sets the path associated with the port wwn to marginal > + * and also set the port state to marginal. > + */ > +static int > +fpin_process_els_frame(uint16_t host_num, char *fc_payload, struct > vectors *vecs) > +{ > + > + int count = -1; > + struct fc_els_fpin *fpin = (struct fc_els_fpin *)fc_payload; > + struct fc_tlv_desc *tlv; > + > + tlv = (struct fc_tlv_desc *)&fpin->fpin_desc[0]; > + > + /* > + * Parse the els frame and set the affected paths and port > + * state to marginal > + */ > + count = fpin_parse_li_els_setpath_marginal(host_num, tlv, > vecs); > + if (count <= 0) > + condlog(4, "Could not find any WWNs, ret = %d\n", > + count); > + return count; > +} > + > +/* > + * This function process the FPIN ELS frame received from HBA > driver, > + * and push the frame to appropriate frame list. Currently we have > only FPIN > + * LI frame list. > + */ > +static int > +fpin_handle_els_frame(struct fc_nl_event *fc_event) > +{ > + int ret = -1; > + uint32_t els_cmd; > + struct fc_els_fpin *fpin = (struct fc_els_fpin *)&fc_event- > >event_data; > + struct fc_tlv_desc *tlv; > + uint32_t dtag; > + > + els_cmd = (uint32_t)fc_event->event_data; > + tlv = (struct fc_tlv_desc *)&fpin->fpin_desc[0]; > + dtag = be32_to_cpu(tlv->desc_tag); > + condlog(4, "Got CMD in add as 0x%x fpin_cmd 0x%x dtag > 0x%x\n", > + els_cmd, fpin->fpin_cmd, dtag); > + > + if ((fc_event->event_code == FCH_EVT_LINK_FPIN) || > + (fc_event->event_code == FCH_EVT_LINKUP) || > + (fc_event->event_code == FCH_EVT_RSCN)) { > + > + if (els_cmd == ELS_FPIN) { > + /* > + * Check the type of fpin by checking the tag > info > + * At present we are supporting only LI > events > + */ > + if (dtag == ELS_DTAG_LNK_INTEGRITY) { > + /*Push the Payload to FPIN frame > queue. */ > + ret = > fpin_els_add_li_frame(fc_event); > + if (ret != 0) > + condlog(0, "Failed to process > LI frame with error %d\n", > + ret); > + } else { > + condlog(4, "Unsupported FPIN received > 0x%x\n", dtag); > + return ret; > + } > + } else { > + /*Push the Payload to FPIN frame queue. */ > + ret = fpin_els_add_li_frame(fc_event); > + if (ret != 0) > + condlog(0, "Failed to process > Linkup/RSCN event with error %d evnt %d\n", > + ret, fc_event- > >event_code); > + } > + } else > + condlog(4, "Invalid command received: 0x%x\n", > els_cmd); > + return ret; > +} > + > +/*cleans the global marginal dev list*/ > +void fpin_clean_marginal_dev_list(__attribute__((unused)) void *arg) > +{ > + struct marginal_dev_list *tmp_marg = NULL; > + > + pthread_mutex_lock(&fpin_li_marginal_dev_mutex); > + while (!list_empty(&fpin_li_marginal_dev_list_head)) { > + tmp_marg = > list_first_entry(&fpin_li_marginal_dev_list_head, > + struct marginal_dev_list, node); > + list_del(&tmp_marg->node); > + free(tmp_marg); > + } > + pthread_mutex_unlock(&fpin_li_marginal_dev_mutex); > +} > + > +/* Cleans the global els marginal list */ > +static void fpin_clean_els_marginal_list(void *arg) > +{ > + struct list_head *head = (struct list_head *)arg; > + struct els_marginal_list *els_marg; > + > + while (!list_empty(head)) { > + els_marg = list_first_entry(head, struct > els_marginal_list, > + node); > + list_del(&els_marg->node); > + free(els_marg); > + } > +} > + > +static void rcu_unregister(__attribute__((unused)) void *param) > +{ > + rcu_unregister_thread(); > +} > +/* > + * This is the FPIN ELS consumer thread. The thread sleeps on > pthread cond > + * variable unless notified by fpin_fabric_notification_receiver > thread. > + * This thread is only to process FPIN-LI ELS frames. A new thread > and frame > + * list will be added if any more ELS frames types are to be > supported. > + */ > +void *fpin_els_li_consumer(void *data) > +{ > + struct list_head marginal_list_head; > + int ret = 0; > + uint16_t host_num; > + struct els_marginal_list *els_marg; > + uint32_t event_code; > + struct vectors *vecs = (struct vectors *)data; > + > + pthread_cleanup_push(rcu_unregister, NULL); > + rcu_register_thread(); > + pthread_cleanup_push(fpin_clean_marginal_dev_list, NULL); > + INIT_LIST_HEAD(&marginal_list_head); > + pthread_cleanup_push(fpin_clean_els_marginal_list, > + (void *)&marginal_list_head); > + for ( ; ; ) { > + pthread_mutex_lock(&fpin_li_mutex); > + pthread_cleanup_push(cleanup_mutex, &fpin_li_mutex); > + pthread_testcancel(); > + if (list_empty(&els_marginal_list_head)) It is safer to use while (list_empty(...)) here, in case the wait is interrupted somehow. > + pthread_cond_wait(&fpin_li_cond, > &fpin_li_mutex); > + > + if (!list_empty(&els_marginal_list_head)) { > + condlog(4, "Invoke List splice tail\n"); > + list_splice_tail_init(&els_marginal_list_head > , &marginal_list_head); > + } > + pthread_cleanup_pop(1); > + > + while (!list_empty(&marginal_list_head)) { > + els_marg = > list_first_entry(&marginal_list_head, > + struct > els_marginal_list, node); > + host_num = els_marg->host_num; > + event_code = els_marg->event_code; > + /* Now finally process FPIN LI ELS Frame */ > + condlog(4, "Got a new Payload buffer, > processing it\n"); > + if ((event_code == FCH_EVT_LINKUP) || > (event_code == FCH_EVT_RSCN)) > + fpin_unset_marginal_dev(host_num, > vecs); > + else { > + ret = > fpin_process_els_frame(host_num, els_marg->payload, vecs); > + if (ret <= 0) > + condlog(0, "ELS frame > processing failed with ret %d\n", ret); > + } > + list_del(&els_marg->node); > + free(els_marg); > + > + } > + } > + > + pthread_cleanup_pop(1); > + pthread_cleanup_pop(1); > + pthread_cleanup_pop(1); > + return NULL; > +} > + > +static void receiver_cleanup_list(__attribute__((unused)) void *arg) > +{ > + pthread_mutex_lock(&fpin_li_mutex); > + fpin_clean_els_marginal_list(&els_marginal_list_head); > + pthread_mutex_unlock(&fpin_li_mutex); > +} > + > +/* > + * Listen for ELS frames from driver. on receiving the frame > payload, > + * push the payload to a list, and notify the fpin_els_li_consumer > thread to > + * process it. Once consumer thread is notified, return to listen > for more ELS > + * frames from driver. > + */ > +void *fpin_fabric_notification_receiver(__attribute__((unused))void > *unused) > +{ > + int ret; > + long fd; > + uint32_t els_cmd; > + struct fc_nl_event *fc_event = NULL; > + struct sockaddr_nl fc_local; > + unsigned char buf[DEF_RX_BUF_SIZE] > __attribute__((aligned(sizeof(uint64_t)))); > + size_t plen = 0; > + > + pthread_cleanup_push(rcu_unregister, NULL); > + rcu_register_thread(); > + > + pthread_cleanup_push(receiver_cleanup_list, NULL); > + fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_SCSITRANSPORT); > + if (fd < 0) { > + condlog(0, "fc socket error %ld", fd); > + return NULL; > + } > + > + pthread_cleanup_push(close_fd, (void *)fd); > + memset(&fc_local, 0, sizeof(fc_local)); > + fc_local.nl_family = AF_NETLINK; > + fc_local.nl_groups = ~0; > + fc_local.nl_pid = getpid(); > + ret = bind(fd, (struct sockaddr *)&fc_local, > sizeof(fc_local)); > + if (ret == -1) { > + condlog(0, "fc socket bind error %d\n", ret); > + goto out; > + } > + for ( ; ; ) { > + condlog(4, "Waiting for ELS...\n"); > + ret = read(fd, buf, DEF_RX_BUF_SIZE); You should check the return value of read(). > + condlog(4, "Got a new request %d\n", ret); > + if (!NLMSG_OK((struct nlmsghdr *)buf, ret)) { > + condlog(0, "bad els frame read (%d)", ret); > + continue; > + } > + /* Push the frame to appropriate frame list */ > + plen = NLMSG_PAYLOAD((struct nlmsghdr *)buf, 0); > + fc_event = (struct fc_nl_event *)NLMSG_DATA(buf); > + if (plen < sizeof(*fc_event)) { > + condlog(0, "too short (%d) to be an FC > event", ret); > + continue; > + } > + els_cmd = (uint32_t)fc_event->event_data; > + condlog(4, "Got host no as %d, event 0x%x, len %d > evntnum %d evntcode %d\n", > + fc_event->host_no, els_cmd, fc_event- > >event_datalen, > + fc_event->event_num, fc_event- > >event_code); > + fpin_handle_els_frame(fc_event); > + } > +out: > + pthread_cleanup_pop(1); > + pthread_cleanup_pop(1); > + pthread_cleanup_pop(1); > + return NULL; > +} > diff --git a/multipathd/main.c b/multipathd/main.c > index 44ca5b12..6bc5178d 100644 > --- a/multipathd/main.c > +++ b/multipathd/main.c > @@ -16,6 +16,7 @@ > #include <linux/oom.h> > #include <libudev.h> > #include <urcu.h> > +#include "fpin.h" > #ifdef USE_SYSTEMD > #include <systemd/sd-daemon.h> > #endif > @@ -132,9 +133,11 @@ static bool __delayed_reconfig; > pid_t daemon_pid; > static pthread_mutex_t config_lock = PTHREAD_MUTEX_INITIALIZER; > static pthread_cond_t config_cond; > -static pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, > dmevent_thr; > +static pthread_t check_thr, uevent_thr, uxlsnr_thr, uevq_thr, > dmevent_thr, > + fpin_thr, fpin_consumer_thr; > static bool check_thr_started, uevent_thr_started, > uxlsnr_thr_started, > - uevq_thr_started, dmevent_thr_started; > + uevq_thr_started, dmevent_thr_started, fpin_thr_started, > + fpin_consumer_thr_started; > static int pid_fd = -1; > > static inline enum daemon_status get_running_state(void) > @@ -2879,7 +2882,9 @@ reconfigure (struct vectors * vecs) > conf->sequence_nr = old->sequence_nr + 1; > rcu_assign_pointer(multipath_conf, conf); > call_rcu(&old->rcu, rcu_free_config); > - > +#ifdef FPIN_EVENT_HANDLER > + fpin_clean_marginal_dev_list(NULL); > +#endif > configure(vecs); > > > @@ -3098,6 +3103,11 @@ static void cleanup_threads(void) > pthread_cancel(uevq_thr); > if (dmevent_thr_started) > pthread_cancel(dmevent_thr); > + if (fpin_thr_started) > + pthread_cancel(fpin_thr); > + if (fpin_consumer_thr_started) > + pthread_cancel(fpin_consumer_thr); > + > > if (check_thr_started) > pthread_join(check_thr, NULL); > @@ -3109,6 +3119,11 @@ static void cleanup_threads(void) > pthread_join(uevq_thr, NULL); > if (dmevent_thr_started) > pthread_join(dmevent_thr, NULL); > + if (fpin_thr_started) > + pthread_join(fpin_thr, NULL); > + if (fpin_consumer_thr_started) > + pthread_join(fpin_consumer_thr, NULL); > + > > /* > * As all threads are joined now, and we're in > DAEMON_SHUTDOWN > @@ -3202,6 +3217,7 @@ child (__attribute__((unused)) void *param) > char *envp; > enum daemon_status state; > int exit_code = 1; > + int fpin_marginal_paths = 0; > > init_unwinder(); > mlockall(MCL_CURRENT | MCL_FUTURE); > @@ -3280,7 +3296,10 @@ child (__attribute__((unused)) void *param) > > setscheduler(); > set_oom_adj(); > - > +#ifdef FPIN_EVENT_HANDLER > + if (conf->marginal_pathgroups == MARGINAL_PATHGROUP_FPIN) > + fpin_marginal_paths = 1; > +#endif > /* > * Startup done, invalidate configuration > */ > @@ -3348,6 +3367,22 @@ child (__attribute__((unused)) void *param) > goto failed; > } else > uevq_thr_started = true; > + > + if (fpin_marginal_paths) { > + if ((rc = pthread_create(&fpin_thr, &misc_attr, > + fpin_fabric_notification_receiver, NULL))) { > + condlog(0, "failed to create the fpin > receiver thread: %d", rc); > + goto failed; > + } else > + fpin_thr_started = true; > + > + if ((rc = pthread_create(&fpin_consumer_thr, > + &misc_attr, fpin_els_li_consumer, vecs))) { > + condlog(0, "failed to create the fpin > consumer thread thread: %d", rc); > + goto failed; > + } else > + fpin_consumer_thr_started = true; > + } > pthread_attr_destroy(&misc_attr); > > while (1) { -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel