On Thu 21-12-23 16:58:53, Yu Kuai wrote: > From: Yu Kuai <yukuai3@xxxxxxxxxx> > > Unlike __bread_gfp(), ext4 has special handing while reading sb block: > > 1) __GFP_NOFAIL is not set, and memory allocation can fail; > 2) If buffer write failed before, set buffer uptodate and don't read > block from disk; > 3) REQ_META is set for all IO, and REQ_PRIO is set for reading xattr; > 4) If failed, return error ptr instead of NULL; > > This patch add a new helper __bread_gfp2() that will match above 2 and 3( > 1 will be used, and 4 will still be encapsulated by ext4), and prepare to > prevent calling mapping_gfp_constraint() directly on bd_inode->i_mapping > in ext4. > > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> I'm not enthusiastic about this but I guess it is as good as it gets without larger cleanups in this area. So feel free to add: Reviewed-by: Jan Kara <jack@xxxxxxx> Honza > --- > fs/buffer.c | 68 ++++++++++++++++++++++++++----------- > include/linux/buffer_head.h | 18 +++++++++- > 2 files changed, 65 insertions(+), 21 deletions(-) > > diff --git a/fs/buffer.c b/fs/buffer.c > index 967f34b70aa8..188bd36c9fea 100644 > --- a/fs/buffer.c > +++ b/fs/buffer.c > @@ -1255,16 +1255,19 @@ void __bforget(struct buffer_head *bh) > } > EXPORT_SYMBOL(__bforget); > > -static struct buffer_head *__bread_slow(struct buffer_head *bh) > +static struct buffer_head *__bread_slow(struct buffer_head *bh, > + blk_opf_t op_flags, > + bool check_write_error) > { > lock_buffer(bh); > - if (buffer_uptodate(bh)) { > + if (buffer_uptodate(bh) || > + (check_write_error && buffer_uptodate_or_error(bh))) { > unlock_buffer(bh); > return bh; > } else { > get_bh(bh); > bh->b_end_io = end_buffer_read_sync; > - submit_bh(REQ_OP_READ, bh); > + submit_bh(REQ_OP_READ | op_flags, bh); > wait_on_buffer(bh); > if (buffer_uptodate(bh)) > return bh; > @@ -1445,6 +1448,31 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) > } > EXPORT_SYMBOL(__breadahead); > > +static struct buffer_head * > +bread_gfp(struct block_device *bdev, sector_t block, unsigned int size, > + blk_opf_t op_flags, gfp_t gfp, bool check_write_error) > +{ > + struct buffer_head *bh; > + > + gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); > + > + /* > + * Prefer looping in the allocator rather than here, at least that > + * code knows what it's doing. > + */ > + gfp |= __GFP_NOFAIL; > + > + bh = bdev_getblk(bdev, block, size, gfp); > + if (unlikely(!bh)) > + return NULL; > + > + if (buffer_uptodate(bh) || > + (check_write_error && buffer_uptodate_or_error(bh))) > + return bh; > + > + return __bread_slow(bh, op_flags, check_write_error); > +} > + > /** > * __bread_gfp() - reads a specified block and returns the bh > * @bdev: the block_device to read from > @@ -1458,27 +1486,27 @@ EXPORT_SYMBOL(__breadahead); > * It returns NULL if the block was unreadable. > */ > struct buffer_head * > -__bread_gfp(struct block_device *bdev, sector_t block, > - unsigned size, gfp_t gfp) > +__bread_gfp(struct block_device *bdev, sector_t block, unsigned int size, > + gfp_t gfp) > { > - struct buffer_head *bh; > - > - gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); > - > - /* > - * Prefer looping in the allocator rather than here, at least that > - * code knows what it's doing. > - */ > - gfp |= __GFP_NOFAIL; > - > - bh = bdev_getblk(bdev, block, size, gfp); > - > - if (likely(bh) && !buffer_uptodate(bh)) > - bh = __bread_slow(bh); > - return bh; > + return bread_gfp(bdev, block, size, 0, gfp, false); > } > EXPORT_SYMBOL(__bread_gfp); > > +/* > + * This works like __bread_gfp() except: > + * 1) If buffer write failed before, set buffer uptodate and don't read > + * block from disk; > + * 2) Caller can pass in additional op_flags like REQ_META; > + */ > +struct buffer_head * > +__bread_gfp2(struct block_device *bdev, sector_t block, unsigned int size, > + blk_opf_t op_flags, gfp_t gfp) > +{ > + return bread_gfp(bdev, block, size, op_flags, gfp, true); > +} > +EXPORT_SYMBOL(__bread_gfp2); > + > static void __invalidate_bh_lrus(struct bh_lru *b) > { > int i; > diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h > index 5f23ee599889..751b2744b4ae 100644 > --- a/include/linux/buffer_head.h > +++ b/include/linux/buffer_head.h > @@ -171,6 +171,18 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) > return test_bit_acquire(BH_Uptodate, &bh->b_state); > } > > +static __always_inline int buffer_uptodate_or_error(struct buffer_head *bh) > +{ > + /* > + * If the buffer has the write error flag, data was failed to write > + * out in the block. In this case, set buffer uptodate to prevent > + * reading old data. > + */ > + if (buffer_write_io_error(bh)) > + set_buffer_uptodate(bh); > + return buffer_uptodate(bh); > +} > + > static inline unsigned long bh_offset(const struct buffer_head *bh) > { > return (unsigned long)(bh)->b_data & (page_size(bh->b_page) - 1); > @@ -231,7 +243,11 @@ void __brelse(struct buffer_head *); > void __bforget(struct buffer_head *); > void __breadahead(struct block_device *, sector_t block, unsigned int size); > struct buffer_head *__bread_gfp(struct block_device *, > - sector_t block, unsigned size, gfp_t gfp); > + sector_t block, unsigned int size, gfp_t gfp); > +struct buffer_head *__bread_gfp2(struct block_device *bdev, sector_t block, > + unsigned int size, blk_opf_t op_flags, > + gfp_t gfp); > + > struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); > void free_buffer_head(struct buffer_head * bh); > void unlock_buffer(struct buffer_head *bh); > -- > 2.39.2 > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR