> > @@ -324,6 +325,13 @@ void force_page_cache_ra(struct readahead_control *ractl, > > * be up to the optimal hardware IO size > > */ > > index = readahead_index(ractl); > > + if (!IS_ALIGNED(index, min_nrpages)) { > > + unsigned long old_index = index; > > + > > + index = round_down(index, min_nrpages); > > + nr_to_read += (old_index - index); > > + } > > new_index = mapping_align_start_index(mapping, index); > if (new_index != index) { > nr_to_read += index - new_index; > index = new_index Looks good. > } > > > + > > max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); > > nr_to_read = min_t(unsigned long, nr_to_read, max_pages); > > This needs to have a size of at least the minimum folio order size > so readahead can fill entire folios, not get neutered to the maximum > IO size the underlying storage supports. So something like: > > max_pages = max_t(unsigned long, bdi->io_pages, ra->ra_pages); > > nr_to_read = min_t(unsigned long, nr_to_read, max_pages); nr_to_read = max(nr_to_read, min_order); > > > + * For higher order address space requirements we ensure no initial reads > > + * are ever less than the min number of pages required. > > + * > > + * We *always* cap the max io size allowed by the device. > > */ > > -static unsigned long get_init_ra_size(unsigned long size, unsigned long max) > > +static unsigned long get_init_ra_size(unsigned long size, > > + unsigned int min_nrpages, > > + unsigned long max) > > { > > unsigned long newsize = roundup_pow_of_two(size); > > > > + newsize = max_t(unsigned long, newsize, min_nrpages); > > This really doesn't need to care about min_nrpages. That rounding > can be done in the caller when the new size is returned. Sounds good. > > > if (newsize <= max / 32) > > newsize = newsize * 4; > > > > > > > @@ -561,7 +583,11 @@ static void ondemand_readahead(struct readahead_control *ractl, > > unsigned long add_pages; > > pgoff_t index = readahead_index(ractl); > > pgoff_t expected, prev_index; > > - unsigned int order = folio ? folio_order(folio) : 0; > > + unsigned int min_order = mapping_min_folio_order(ractl->mapping); > > + unsigned int min_nrpages = mapping_min_folio_nrpages(ractl->mapping); > > + unsigned int order = folio ? folio_order(folio) : min_order; > > Huh? If we have a folio, then the order is whatever that folio is, > otherwise we use min_order. What if the folio is larger than > min_order? Doesn't that mean that this: > > > @@ -583,8 +609,8 @@ static void ondemand_readahead(struct readahead_control *ractl, > > expected = round_down(ra->start + ra->size - ra->async_size, > > 1UL << order); > > if (index == expected || index == (ra->start + ra->size)) { > > - ra->start += ra->size; > > - ra->size = get_next_ra_size(ra, max_pages); > > + ra->start += round_down(ra->size, min_nrpages); > > + ra->size = get_next_ra_size(ra, min_nrpages, max_pages); > > may set up the incorrect readahead range because the folio order is > larger than min_nrpages? Hmm... So I think we should just increment ra->start by ra->size, and make sure to round the new size we get from get_next_ra_size() to min_nrpages. Then we will not disturb the readahead range and always increase the range in multiples of min_nrpages: ra->start += ra->size; ra->size = round_up(get_next_ra_size(ra, max_pages), min_nrpages); > > > ra->async_size = ra->size; > > goto readit; > > } > > @@ -603,13 +629,18 @@ static void ondemand_readahead(struct readahead_control *ractl, > > max_pages); > > rcu_read_unlock(); > > > > + start = round_down(start, min_nrpages); > > start = mapping_align_start_index(mapping, start); > > + > > + VM_BUG_ON(folio->index & (folio_nr_pages(folio) - 1)); > > + > > if (!start || start - index > max_pages) > > return; > > > > ra->start = start; > > ra->size = start - index; /* old async_size */ > > + > > ra->size += req_size; > > - ra->size = get_next_ra_size(ra, max_pages); > > + ra->size = get_next_ra_size(ra, min_nrpages, max_pages); > > ra->size = max(min_nrpages, get_next_ra_size(ra, max_pages)); If this is a round_up of size instead of max operation, we can always ensure the ra->start from index aligned to min_nrpages. See my reasoning in the previous comment. -- Pankaj