From: Jinshan Xiong <jinshan.xiong@xxxxxxxxx> Define osc_lru_reserve() and osc_lru_unreserve() to reserve LRU slots in osc_io_write_iter_init() and unreserve them in fini(); Signed-off-by: Jinshan Xiong <jinshan.xiong@xxxxxxxxx> Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-6271 Reviewed-on: http://review.whamcloud.com/16456 Reviewed-by: Bobi Jam <bobijam@xxxxxxxxxxx> Reviewed-by: John L. Hammond <john.hammond@xxxxxxxxx> Reviewed-by: James Simmons <uja.ornl@xxxxxxxxx> Reviewed-by: Oleg Drokin <oleg.drokin@xxxxxxxxx> Signed-off-by: James Simmons <jsimmons@xxxxxxxxxxxxx> --- .../staging/lustre/lustre/osc/osc_cl_internal.h | 4 +- drivers/staging/lustre/lustre/osc/osc_internal.h | 3 +- drivers/staging/lustre/lustre/osc/osc_io.c | 48 +++++--------- drivers/staging/lustre/lustre/osc/osc_lock.c | 46 ++++++++------ drivers/staging/lustre/lustre/osc/osc_object.c | 8 ++- drivers/staging/lustre/lustre/osc/osc_page.c | 73 +++++++++++++++++++--- 6 files changed, 118 insertions(+), 64 deletions(-) diff --git a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h index c09ab97d..270212f 100644 --- a/drivers/staging/lustre/lustre/osc/osc_cl_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_cl_internal.h @@ -62,7 +62,9 @@ struct osc_io { /** super class */ struct cl_io_slice oi_cl; /** true if this io is lockless. */ - unsigned int oi_lockless; + unsigned int oi_lockless:1, + /** true if this io is counted as active IO */ + oi_is_active:1; /** how many LRU pages are reserved for this IO */ unsigned long oi_lru_reserved; diff --git a/drivers/staging/lustre/lustre/osc/osc_internal.h b/drivers/staging/lustre/lustre/osc/osc_internal.h index 8abd83f..845e795 100644 --- a/drivers/staging/lustre/lustre/osc/osc_internal.h +++ b/drivers/staging/lustre/lustre/osc/osc_internal.h @@ -133,7 +133,8 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, struct list_head *ext_list, int cmd); long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, long target, bool force); -long osc_lru_reclaim(struct client_obd *cli, unsigned long npages); +unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages); +void osc_lru_unreserve(struct client_obd *cli, unsigned long npages); unsigned long osc_ldlm_weigh_ast(struct ldlm_lock *dlmlock); diff --git a/drivers/staging/lustre/lustre/osc/osc_io.c b/drivers/staging/lustre/lustre/osc/osc_io.c index 0b4cc42..f991bee 100644 --- a/drivers/staging/lustre/lustre/osc/osc_io.c +++ b/drivers/staging/lustre/lustre/osc/osc_io.c @@ -354,7 +354,10 @@ static int osc_io_iter_init(const struct lu_env *env, spin_lock(&imp->imp_lock); if (likely(!imp->imp_invalid)) { + struct osc_io *oio = osc_env_io(env); + atomic_inc(&osc->oo_nr_ios); + oio->oi_is_active = 1; rc = 0; } spin_unlock(&imp->imp_lock); @@ -368,10 +371,7 @@ static int osc_io_write_iter_init(const struct lu_env *env, struct cl_io *io = ios->cis_io; struct osc_io *oio = osc_env_io(env); struct osc_object *osc = cl2osc(ios->cis_obj); - struct client_obd *cli = osc_cli(osc); - unsigned long c; unsigned long npages; - unsigned long max_pages; if (cl_io_is_append(io)) return osc_io_iter_init(env, ios); @@ -380,31 +380,7 @@ static int osc_io_write_iter_init(const struct lu_env *env, if (io->u.ci_rw.crw_pos & ~PAGE_MASK) ++npages; - max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight; - if (npages > max_pages) - npages = max_pages; - - c = atomic_long_read(cli->cl_lru_left); - if (c < npages && osc_lru_reclaim(cli, npages) > 0) - c = atomic_long_read(cli->cl_lru_left); - while (c >= npages) { - if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) { - oio->oi_lru_reserved = npages; - break; - } - c = atomic_long_read(cli->cl_lru_left); - } - if (atomic_long_read(cli->cl_lru_left) < max_pages) { - /* - * If there aren't enough pages in the per-OSC LRU then - * wake up the LRU thread to try and clear out space, so - * we don't block if pages are being dirtied quickly. - */ - CDEBUG(D_CACHE, "%s: queue LRU, left: %lu/%ld.\n", - cli_name(cli), atomic_long_read(cli->cl_lru_left), - max_pages); - (void)ptlrpcd_queue_work(cli->cl_lru_work); - } + oio->oi_lru_reserved = osc_lru_reserve(osc_cli(osc), npages); return osc_io_iter_init(env, ios); } @@ -412,11 +388,16 @@ static int osc_io_write_iter_init(const struct lu_env *env, static void osc_io_iter_fini(const struct lu_env *env, const struct cl_io_slice *ios) { - struct osc_object *osc = cl2osc(ios->cis_obj); + struct osc_io *oio = osc_env_io(env); - LASSERT(atomic_read(&osc->oo_nr_ios) > 0); - if (atomic_dec_and_test(&osc->oo_nr_ios)) - wake_up_all(&osc->oo_io_waitq); + if (oio->oi_is_active) { + struct osc_object *osc = cl2osc(ios->cis_obj); + + oio->oi_is_active = 0; + LASSERT(atomic_read(&osc->oo_nr_ios) > 0); + if (atomic_dec_and_test(&osc->oo_nr_ios)) + wake_up_all(&osc->oo_io_waitq); + } } static void osc_io_write_iter_fini(const struct lu_env *env, @@ -424,10 +405,9 @@ static void osc_io_write_iter_fini(const struct lu_env *env, { struct osc_io *oio = osc_env_io(env); struct osc_object *osc = cl2osc(ios->cis_obj); - struct client_obd *cli = osc_cli(osc); if (oio->oi_lru_reserved > 0) { - atomic_long_add(oio->oi_lru_reserved, cli->cl_lru_left); + osc_lru_unreserve(osc_cli(osc), oio->oi_lru_reserved); oio->oi_lru_reserved = 0; } oio->oi_write_osclock = NULL; diff --git a/drivers/staging/lustre/lustre/osc/osc_lock.c b/drivers/staging/lustre/lustre/osc/osc_lock.c index efecd92..5f7c030 100644 --- a/drivers/staging/lustre/lustre/osc/osc_lock.c +++ b/drivers/staging/lustre/lustre/osc/osc_lock.c @@ -840,13 +840,14 @@ static void osc_lock_wake_waiters(const struct lu_env *env, spin_unlock(&oscl->ols_lock); } -static void osc_lock_enqueue_wait(const struct lu_env *env, - struct osc_object *obj, - struct osc_lock *oscl) +static int osc_lock_enqueue_wait(const struct lu_env *env, + struct osc_object *obj, + struct osc_lock *oscl) { struct osc_lock *tmp_oscl; struct cl_lock_descr *need = &oscl->ols_cl.cls_lock->cll_descr; struct cl_sync_io *waiter = &osc_env_info(env)->oti_anchor; + int rc = 0; spin_lock(&obj->oo_ol_spin); list_add_tail(&oscl->ols_nextlock_oscobj, &obj->oo_ol_list); @@ -883,13 +884,17 @@ static void osc_lock_enqueue_wait(const struct lu_env *env, spin_unlock(&tmp_oscl->ols_lock); spin_unlock(&obj->oo_ol_spin); - (void)cl_sync_io_wait(env, waiter, 0); - + rc = cl_sync_io_wait(env, waiter, 0); spin_lock(&obj->oo_ol_spin); + if (rc < 0) + break; + oscl->ols_owner = NULL; goto restart; } spin_unlock(&obj->oo_ol_spin); + + return rc; } /** @@ -937,7 +942,9 @@ static int osc_lock_enqueue(const struct lu_env *env, goto enqueue_base; } - osc_lock_enqueue_wait(env, osc, oscl); + result = osc_lock_enqueue_wait(env, osc, oscl); + if (result < 0) + goto out; /* we can grant lockless lock right after all conflicting locks * are canceled. @@ -962,7 +969,6 @@ static int osc_lock_enqueue(const struct lu_env *env, * osc_lock. */ ostid_build_res_name(&osc->oo_oinfo->loi_oi, resname); - osc_lock_build_einfo(env, lock, osc, &oscl->ols_einfo); osc_lock_build_policy(env, lock, policy); if (oscl->ols_agl) { oscl->ols_einfo.ei_cbdata = NULL; @@ -977,18 +983,7 @@ static int osc_lock_enqueue(const struct lu_env *env, upcall, cookie, &oscl->ols_einfo, PTLRPCD_SET, async, oscl->ols_agl); - if (result != 0) { - oscl->ols_state = OLS_CANCELLED; - osc_lock_wake_waiters(env, osc, oscl); - - /* hide error for AGL lock. */ - if (oscl->ols_agl) { - cl_object_put(env, osc2cl(osc)); - result = 0; - } - if (anchor) - cl_sync_io_note(env, anchor, result); - } else { + if (!result) { if (osc_lock_is_lockless(oscl)) { oio->oi_lockless = 1; } else if (!async) { @@ -996,6 +991,18 @@ static int osc_lock_enqueue(const struct lu_env *env, LASSERT(oscl->ols_hold); LASSERT(oscl->ols_dlmlock); } + } else if (oscl->ols_agl) { + cl_object_put(env, osc2cl(osc)); + result = 0; + } + +out: + if (result < 0) { + oscl->ols_state = OLS_CANCELLED; + osc_lock_wake_waiters(env, osc, oscl); + + if (anchor) + cl_sync_io_note(env, anchor, result); } return result; } @@ -1159,6 +1166,7 @@ int osc_lock_init(const struct lu_env *env, oscl->ols_flags |= LDLM_FL_BLOCK_GRANTED; oscl->ols_glimpse = 1; } + osc_lock_build_einfo(env, lock, cl2osc(obj), &oscl->ols_einfo); cl_lock_slice_add(lock, &oscl->ols_cl, obj, &osc_lock_ops); diff --git a/drivers/staging/lustre/lustre/osc/osc_object.c b/drivers/staging/lustre/lustre/osc/osc_object.c index 4f8e78b..fa621bd 100644 --- a/drivers/staging/lustre/lustre/osc/osc_object.c +++ b/drivers/staging/lustre/lustre/osc/osc_object.c @@ -453,9 +453,15 @@ int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc) l_wait_event(osc->oo_io_waitq, !atomic_read(&osc->oo_nr_ios), &lwi); - /* Discard all pages of this object. */ + /* Discard all dirty pages of this object. */ osc_cache_truncate_start(env, osc, 0, NULL); + /* Discard all caching pages */ + osc_lock_discard_pages(env, osc, 0, CL_PAGE_EOF, CLM_WRITE); + + /* Clear ast data of dlm lock. Do this after discarding all pages */ + osc_object_prune(env, osc2cl(osc)); + return 0; } diff --git a/drivers/staging/lustre/lustre/osc/osc_page.c b/drivers/staging/lustre/lustre/osc/osc_page.c index ab9d0d7..03ee340 100644 --- a/drivers/staging/lustre/lustre/osc/osc_page.c +++ b/drivers/staging/lustre/lustre/osc/osc_page.c @@ -42,8 +42,8 @@ static void osc_lru_del(struct client_obd *cli, struct osc_page *opg); static void osc_lru_use(struct client_obd *cli, struct osc_page *opg); -static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, - struct osc_page *opg); +static int osc_lru_alloc(const struct lu_env *env, struct client_obd *cli, + struct osc_page *opg); /** \addtogroup osc * @{ @@ -273,7 +273,7 @@ int osc_page_init(const struct lu_env *env, struct cl_object *obj, /* reserve an LRU space for this page */ if (page->cp_type == CPT_CACHEABLE && result == 0) { - result = osc_lru_reserve(env, osc, opg); + result = osc_lru_alloc(env, osc_cli(osc), opg); if (result == 0) { spin_lock(&osc->oo_tree_lock); result = radix_tree_insert(&osc->oo_tree, index, opg); @@ -676,7 +676,7 @@ long osc_lru_shrink(const struct lu_env *env, struct client_obd *cli, * LRU pages in batch. Therefore, the actual number is adjusted at least * max_pages_per_rpc. */ -long osc_lru_reclaim(struct client_obd *cli, unsigned long npages) +static long osc_lru_reclaim(struct client_obd *cli, unsigned long npages) { struct lu_env *env; struct cl_client_cache *cache = cli->cl_cache; @@ -749,18 +749,17 @@ long osc_lru_reclaim(struct client_obd *cli, unsigned long npages) } /** - * osc_lru_reserve() is called to reserve an LRU slot for a cl_page. + * osc_lru_alloc() is called to reserve an LRU slot for a cl_page. * * Usually the LRU slots are reserved in osc_io_iter_rw_init(). * Only in the case that the LRU slots are in extreme shortage, it should * have reserved enough slots for an IO. */ -static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, - struct osc_page *opg) +static int osc_lru_alloc(const struct lu_env *env, struct client_obd *cli, + struct osc_page *opg) { struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); struct osc_io *oio = osc_env_io(env); - struct client_obd *cli = osc_cli(obj); int rc = 0; if (!cli->cl_cache) /* shall not be in LRU */ @@ -801,6 +800,64 @@ static int osc_lru_reserve(const struct lu_env *env, struct osc_object *obj, } /** + * osc_lru_reserve() is called to reserve enough LRU slots for I/O. + * + * The benefit of doing this is to reduce contention against atomic counter + * cl_lru_left by changing it from per-page access to per-IO access. + */ +unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages) +{ + unsigned long reserved = 0; + unsigned long max_pages; + unsigned long c; + + /* + * reserve a full RPC window at most to avoid that a thread accidentally + * consumes too many LRU slots + */ + max_pages = cli->cl_max_pages_per_rpc * cli->cl_max_rpcs_in_flight; + if (npages > max_pages) + npages = max_pages; + + c = atomic_long_read(cli->cl_lru_left); + if (c < npages && osc_lru_reclaim(cli, npages) > 0) + c = atomic_long_read(cli->cl_lru_left); + while (c >= npages) { + if (c == atomic_long_cmpxchg(cli->cl_lru_left, c, c - npages)) { + reserved = npages; + break; + } + c = atomic_long_read(cli->cl_lru_left); + } + if (atomic_long_read(cli->cl_lru_left) < max_pages) { + /* + * If there aren't enough pages in the per-OSC LRU then + * wake up the LRU thread to try and clear out space, so + * we don't block if pages are being dirtied quickly. + */ + CDEBUG(D_CACHE, "%s: queue LRU, left: %lu/%ld.\n", + cli_name(cli), atomic_long_read(cli->cl_lru_left), + max_pages); + (void)ptlrpcd_queue_work(cli->cl_lru_work); + } + + return reserved; +} + +/** + * osc_lru_unreserve() is called to unreserve LRU slots. + * + * LRU slots reserved by osc_lru_reserve() may have entries left due to several + * reasons such as page already existing or I/O error. Those reserved slots + * should be freed by calling this function. + */ +void osc_lru_unreserve(struct client_obd *cli, unsigned long npages) +{ + atomic_long_add(npages, cli->cl_lru_left); + wake_up_all(&osc_lru_waitq); +} + +/** * Atomic operations are expensive. We accumulate the accounting for the * same page pgdat to get better performance. * In practice this can work pretty good because the pages in the same RPC -- 1.8.3.1 _______________________________________________ devel mailing list devel@xxxxxxxxxxxxxxxxxxxxxx http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel