在 2022/9/20 17:20, Wu Guanghao 写道: > > > 在 2022/9/16 2:17, Benjamin Marzinski 写道: >> On Thu, Sep 15, 2022 at 02:56:36PM +0800, Wu Guanghao wrote: >>> Sorry for the late feedback. >>> >>> The version we are currently testing is 0.8.4, so we only merge the >>> first 3 patches in this series of patches. Then after the actual test, >>> it was found that the effect improvement is not very obvious. >>> >>> Test environment: 1000 multipath devices, 16 paths per device >>> Test command: Aggregate multipath devices using multipathd add path >>> Time consuming (time for 16 paths to aggregate 1 multipath device): >>> 1. Original: >>> < 4s: 76.9%; >>> 4s~10s: 18.4%; >>> >10s: 4.7%; >>> 2. After using the patches: >>> < 4s: 79.1%; >>> 4s~10s: 18.2%; >>> >10s: 2.7%; >>> >>> >From the results, the time-consuming improvement of the patch is not obvious, >>> so we made a few changes to the patch and it worked as expected. The modification >>> of the patch is as follows: >>> >>> Paths_checked is changed to configurable, current setting n = 2. >>> Removed judgment on diff_time. >>> Sleep change from 10us to 5ms >> >> I worry that this is giving too much deference to the uevents. If there >> is a uevent storm, checking will stop for 5ms every 2 paths checked. I'm >> worried that this will make it take significantly longer for the the >> path checker to complete a cycle. Making paths_checked configurable, so >> that this doesn't trigger too often does help to avoid the issue that >> checking the time from chk_start_time was dealing with, and makes the >> mechanics of this simpler. But 5ms seems like a long time to have to >> wait, just to make sure that another process had the time to grab the >> vecs lock. Perhaps it would be better to sleep for a shorter length of >> time, but in a loop, where we can check to see if progress has been >> made, perhaps by checking some counter of events and user commands >> serviced. This way, we aren't sleeping too long for no good reason. >> I agree with this, we are also going to adjust the sleep time, and then > continue the test. We have tested the delay times of 10us, 100us and 1ms, but the results weren't very good. More than 30% of the luns aggregation time is greater than 4s. Whether the delay time can also be used as a configurable item and configured according to the situation. The following is the patch content after we have modified checker_loop(). Signed-off-by: wuguanghao <wuguanghao3@xxxxxxxxxx> --- libmultipath/config.c | 3 ++ libmultipath/config.h | 3 ++ libmultipath/defaults.h | 3 ++ libmultipath/dict.c | 58 +++++++++++++++++++++++++++ libmultipath/structs.h | 1 + multipathd/main.c | 87 +++++++++++++++++++++++++++++++++-------- 6 files changed, 138 insertions(+), 17 deletions(-) diff --git a/libmultipath/config.c b/libmultipath/config.c index 7d0d711..7658626 100644 --- a/libmultipath/config.c +++ b/libmultipath/config.c @@ -752,6 +752,9 @@ load_config (char * file) conf->remove_retries = 0; conf->ghost_delay = DEFAULT_GHOST_DELAY; conf->all_tg_pt = DEFAULT_ALL_TG_PT; + conf->check_delay_time = DEFAULT_CHECK_DELAY_TIME; + conf->check_path_num_per_splice = DEFAULT_CHECK_PATH_NUM_PER_SPLICE; + /* * preload default hwtable */ diff --git a/libmultipath/config.h b/libmultipath/config.h index ceecff2..a26dd99 100644 --- a/libmultipath/config.h +++ b/libmultipath/config.h @@ -229,6 +229,9 @@ struct config { vector elist_property; vector elist_protocol; char *enable_foreign; + + int check_delay_time; + int check_path_num_per_splice; }; extern struct udev * udev; diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h index 52fe05b..b3fb1bc 100644 --- a/libmultipath/defaults.h +++ b/libmultipath/defaults.h @@ -50,6 +50,9 @@ #define DEFAULT_FIND_MULTIPATHS_TIMEOUT -10 #define DEFAULT_UNKNOWN_FIND_MULTIPATHS_TIMEOUT 1 #define DEFAULT_ALL_TG_PT ALL_TG_PT_OFF +#define DEFAULT_CHECK_DELAY_TIME 10 +#define DEFAULT_CHECK_PATH_NUM_PER_SPLICE 1 + /* Enable all foreign libraries by default */ #define DEFAULT_ENABLE_FOREIGN "" diff --git a/libmultipath/dict.c b/libmultipath/dict.c index 2c8ea10..2262caa 100644 --- a/libmultipath/dict.c +++ b/libmultipath/dict.c @@ -1407,6 +1407,46 @@ def_uxsock_timeout_handler(struct config *conf, vector strvec) free(buff); return 0; } +static int +def_check_delay_time_handler(struct config *conf, vector strvec) +{ + int local_check_delay_time; + char *buff; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (sscanf(buff, "%u", &local_check_delay_time) == 1 && + local_check_delay_time > DEFAULT_CHECK_DELAY_TIME) + conf->check_delay_time = local_check_delay_time; + else + conf->check_delay_time = DEFAULT_CHECK_DELAY_TIME; + + free(buff); + return 0; +} + +static int +def_check_path_num_per_splice_handler(struct config *conf, vector strvec) +{ + int local_check_path_num_per_splice; + char *buff; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (sscanf(buff, "%u", &local_check_path_num_per_splice) == 1 && + local_check_path_num_per_splice > DEFAULT_CHECK_PATH_NUM_PER_SPLICE) + conf->check_path_num_per_splice = local_check_path_num_per_splice; + else + conf->check_path_num_per_splice = DEFAULT_CHECK_PATH_NUM_PER_SPLICE; + + free(buff); + return 0; +} + static int hw_vpd_vendor_handler(struct config *conf, vector strvec) @@ -1546,6 +1586,21 @@ snprint_def_uxsock_timeout(struct config *conf, char * buff, int len, return snprintf(buff, len, "%u", conf->uxsock_timeout); } +static int +snprint_def_check_delay_time(struct config *conf, char * buff, int len, + const void *data) +{ + return snprintf(buff, len, "%u", conf->check_delay_time); +} + +static int +snprint_def_check_path_num_per_splice(struct config *conf, char * buff, int len, + const void *data) +{ + return snprintf(buff, len, "%u", conf->check_path_num_per_splice); +} + + static int snprint_ble_simple (struct config *conf, char * buff, int len, const void * data) @@ -1804,6 +1859,9 @@ init_keywords(vector keywords) install_keyword("enable_foreign", &def_enable_foreign_handler, &snprint_def_enable_foreign); install_keyword("marginal_pathgroups", &def_marginal_pathgroups_handler, &snprint_def_marginal_pathgroups); + install_keyword("check_delay_time", &def_check_delay_time_handler, &snprint_def_check_delay_time); + install_keyword("check_path_num_per_splice", &def_check_path_num_per_splice_handler, &snprint_def_check_path_num_per_splice); + __deprecated install_keyword("default_selector", &def_selector_handler, NULL); __deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL); __deprecated install_keyword("default_uid_attribute", &def_uid_attribute_handler, NULL); diff --git a/libmultipath/structs.h b/libmultipath/structs.h index 11e516a..391de96 100644 --- a/libmultipath/structs.h +++ b/libmultipath/structs.h @@ -288,6 +288,7 @@ struct path { int find_multipaths_timeout; int marginal; int vpd_vendor_id; + bool is_checked; /* configlet pointers */ vector hwe; struct gen_path generic_path; diff --git a/multipathd/main.c b/multipathd/main.c index 5035d5b..865b7b5 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -2388,6 +2388,11 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) } return 1; } +enum checker_state { + CHECKER_STARTING, + CHECKER_RUNNING, + CHECKER_FINISHED, +}; static void * checkerloop (void *ap) @@ -2395,11 +2400,11 @@ checkerloop (void *ap) struct vectors *vecs; struct path *pp; int count = 0; - unsigned int i; struct timespec last_time; struct config *conf; int foreign_tick = 0; bool use_watchdog; + int check_delay_time, check_path_num_per_splice; pthread_cleanup_push(rcu_unregister, NULL); rcu_register_thread(); @@ -2414,12 +2419,15 @@ checkerloop (void *ap) /* use_watchdog is set from process environment and never changes */ conf = get_multipath_config(); use_watchdog = conf->use_watchdog; + check_delay_time = conf->check_delay_time; + check_path_num_per_splice = conf->check_path_num_per_splice; put_multipath_config(conf); while (1) { struct timespec diff_time, start_time, end_time; - int num_paths = 0, strict_timing, rc = 0; + int num_paths = 0, strict_timing, rc = 0, i = 0; unsigned int ticks = 0; + enum checker_state checker_state = CHECKER_STARTING; get_monotonic_time(&start_time); if (start_time.tv_sec && last_time.tv_sec) { @@ -2443,21 +2451,66 @@ checkerloop (void *ap) } else if (rc == EINVAL) /* daemon shutdown */ break; - - pthread_cleanup_push(cleanup_lock, &vecs->lock); - lock(&vecs->lock); - pthread_testcancel(); - vector_foreach_slot (vecs->pathvec, pp, i) { - rc = check_path(vecs, pp, ticks); - if (rc < 0) { - vector_del_slot(vecs->pathvec, i); - free_path(pp); - i--; - } else - num_paths += rc; - } - lock_cleanup_pop(vecs->lock); - + condlog(4, "check_delay_time is %u, check_path_num_per_splice is %u", check_delay_time, check_path_num_per_splice); + + while (checker_state != CHECKER_FINISHED) { + unsigned int paths_checked = 0; + struct timespec chk_start_time; + pthread_cleanup_push(cleanup_lock, &vecs->lock); + lock(&vecs->lock); + pthread_testcancel(); + get_monotonic_time(&chk_start_time); + if (checker_state == CHECKER_STARTING) { + vector_foreach_slot(vecs->pathvec, pp, i) + pp->is_checked = false; + i = 0; + checker_state = CHECKER_RUNNING; + } else { + /* + * Paths could have been removed since we + * dropped the lock. Find the path to continue + * checking at. Since paths can be removed from + * anywhere in the vector, but can only be added + * at the end, the last checked path must be + * between its old location, and the start or + * the vector. + */ + if (i >= VECTOR_SIZE(vecs->pathvec)) + i = VECTOR_SIZE(vecs->pathvec) - 1; + while ((pp = VECTOR_SLOT(vecs->pathvec, i))) { + if (pp->is_checked == true) + break; + i--; + } + i++; + } + vector_foreach_slot_after (vecs->pathvec, pp, i) { + pp->is_checked = true; + rc = check_path(vecs, pp, ticks); + if (rc < 0) { + condlog(1, "%s: check_path() failed, removing", + pp->dev); + vector_del_slot(vecs->pathvec, i); + free_path(pp); + i--; + } else + num_paths += rc; + if (++paths_checked % check_path_num_per_splice == 0 && + lock_has_waiters(&vecs->lock)) { + get_monotonic_time(&end_time); + timespecsub(&end_time, &chk_start_time, + &diff_time); + goto unlock; + } + } + checker_state = CHECKER_FINISHED; +unlock: + lock_cleanup_pop(vecs->lock); + if (checker_state != CHECKER_FINISHED) { + /* Yield to waiters */ + usleep(check_delay_time); + } + } pthread_cleanup_push(cleanup_lock, &vecs->lock); lock(&vecs->lock); pthread_testcancel(); -- 2.27.0 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel