On Fri, Nov 29, 2019 at 12:04:37AM +0300, Pavel Begunkov wrote: > bvec_iter_advance() is quite popular, but compilers fail to do proper > alias analysis and optimise it good enough. The assembly is checked > for gcc 9.2, x86-64. > > - remove @iter->bi_size from min(...), as it's always less than @bytes. > Modify at the beginning and forget about it. > > - the compiler isn't able to collapse memory dependencies and remove > writes in the loop. Help it by explicitely using local vars. > > Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> > --- > include/linux/bvec.h | 23 ++++++++++++++--------- > 1 file changed, 14 insertions(+), 9 deletions(-) > > diff --git a/include/linux/bvec.h b/include/linux/bvec.h > index a032f01e928c..7b2f05faae14 100644 > --- a/include/linux/bvec.h > +++ b/include/linux/bvec.h > @@ -87,26 +87,31 @@ struct bvec_iter_all { > static inline bool bvec_iter_advance(const struct bio_vec *bv, > struct bvec_iter *iter, unsigned bytes) > { > + unsigned int done = iter->bi_bvec_done; > + unsigned int idx = iter->bi_idx; > + > if (WARN_ONCE(bytes > iter->bi_size, > "Attempted to advance past end of bvec iter\n")) { > iter->bi_size = 0; > return false; > } > > + iter->bi_size -= bytes; > + > while (bytes) { > - const struct bio_vec *cur = bv + iter->bi_idx; > - unsigned len = min3(bytes, iter->bi_size, > - cur->bv_len - iter->bi_bvec_done); > + const struct bio_vec *cur = bv + idx; > + unsigned int len = min(bytes, cur->bv_len - done); > > bytes -= len; > - iter->bi_size -= len; > - iter->bi_bvec_done += len; > - > - if (iter->bi_bvec_done == cur->bv_len) { > - iter->bi_bvec_done = 0; > - iter->bi_idx++; > + done += len; > + if (done == cur->bv_len) { > + idx++; > + done = 0; > } > } > + > + iter->bi_idx = idx; > + iter->bi_bvec_done = done; > return true; > } > > -- > 2.24.0 > The loop can be simplified a bit further, as done has to be 0 once we go beyond the current bio_vec. See below for the simplified version. I also check if bi_size became zero so we can skip the rest of the calculations in that case. If we want to preserve the current behavior of updating iter->bi_idx and iter->bi_bvec_done even if bi_size is going to become zero, the loop condition should change to while (bytes && bytes >= cur->bv_len) to ensure that we don't try to access past the end of the bio_vec array. diff --git a/include/linux/bvec.h b/include/linux/bvec.h index a032f01e928c..380d188dfbd2 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -87,25 +87,26 @@ struct bvec_iter_all { static inline bool bvec_iter_advance(const struct bio_vec *bv, struct bvec_iter *iter, unsigned bytes) { + unsigned int idx = iter->bi_idx; + const struct bio_vec *cur = bv + idx; + if (WARN_ONCE(bytes > iter->bi_size, "Attempted to advance past end of bvec iter\n")) { iter->bi_size = 0; return false; } - while (bytes) { - const struct bio_vec *cur = bv + iter->bi_idx; - unsigned len = min3(bytes, iter->bi_size, - cur->bv_len - iter->bi_bvec_done); - - bytes -= len; - iter->bi_size -= len; - iter->bi_bvec_done += len; - - if (iter->bi_bvec_done == cur->bv_len) { - iter->bi_bvec_done = 0; - iter->bi_idx++; + iter->bi_size -= bytes; + if (iter->bi_size != 0) { + bytes += iter->bi_bvec_done; + while (bytes >= cur->bv_len) { + bytes -= cur->bv_len; + idx++; + cur++; } + + iter->bi_idx = idx; + iter->bi_bvec_done = bytes; } return true; }