On Sun, May 22, 2022 at 12:32 AM Donald Buczek <buczek@xxxxxxxxxxxxx> wrote: > > On 19.05.22 21:13, Logan Gunthorpe wrote: > > The mdadm test 21raid5cache randomly fails with NULL pointer accesses > > conf->log when run repeatedly. conf->log was sort of protected with > > a RCU, but most dereferences were not done with the correct functions. > > > > Add rcu_read_locks() and rcu_access_pointers() to the appropriate > > places. > > > > Signed-off-by: Logan Gunthorpe <logang@xxxxxxxxxxxx> [...] > > diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h > > index f26e6f4c7f9a..24b4dbd5b25c 100644 > > --- a/drivers/md/raid5-log.h > > +++ b/drivers/md/raid5-log.h > > @@ -58,7 +58,7 @@ static inline int log_stripe(struct stripe_head *sh, struct stripe_head_state *s > > { > > struct r5conf *conf = sh->raid_conf; > > > > - if (conf->log) { > > + if (rcu_access_pointer(conf->log)) { > > > A problem here is that `struct r5l_log` of `conf->log` is private to raid5-cache.c and gcc below version 10 (wrongly) regards the `typeof(*p) *local` declaration of __rcu_access_pointer as a dereference: > > CC drivers/md/raid5.o > > In file included from ./include/linux/rculist.h:11:0, > > from ./include/linux/dcache.h:8, > > from ./include/linux/fs.h:8, > > from ./include/linux/highmem.h:5, > > from ./include/linux/bvec.h:10, > > from ./include/linux/blk_types.h:10, > > from ./include/linux/blkdev.h:9, > > from drivers/md/raid5.c:38: > > drivers/md/raid5-log.h: In function ‘log_stripe’: > > ./include/linux/rcupdate.h:384:9: error: dereferencing pointer to incomplete type ‘struct r5l_log’ > > typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \ > > ^ > > ./include/linux/rcupdate.h:495:31: note: in expansion of macro ‘__rcu_access_pointer’ > > #define rcu_access_pointer(p) __rcu_access_pointer((p), __UNIQUE_ID(rcu), __rcu) > > ^~~~~~~~~~~~~~~~~~~~ > > drivers/md/raid5-log.h:61:6: note: in expansion of macro ‘rcu_access_pointer’ > > if (rcu_access_pointer(conf->log)) { > > ^~~~~~~~~~~~~~~~~~ > > make[2]: *** [scripts/Makefile.build:288: drivers/md/raid5.o] Error 1 > > make[1]: *** [scripts/Makefile.build:550: drivers/md] Error 2 > > make: *** [Makefile:1834: drivers] Error 2 This is annoying.. And there are a few other cases in raid5-log.h and raid5.c. Maybe we should move the definition of r5l_log to raid5-log.h? Thanks, Song > > > See https://godbolt.org/z/TPP8MdKbc to test compiler versions with this construct. > > Best > > Donald > > > > if (!test_bit(STRIPE_R5C_CACHING, &sh->state)) { > > /* writing out phase */ > > if (s->waiting_extra_page) > > @@ -79,7 +79,7 @@ static inline void log_stripe_write_finished(struct stripe_head *sh) > > { > > struct r5conf *conf = sh->raid_conf; > > > > - if (conf->log) > > + if (rcu_access_pointer(conf->log)) > > r5l_stripe_write_finished(sh); > > else if (raid5_has_ppl(conf)) > > ppl_stripe_write_finished(sh); > > @@ -87,7 +87,7 @@ static inline void log_stripe_write_finished(struct stripe_head *sh) > > > > static inline void log_write_stripe_run(struct r5conf *conf) > > { > > - if (conf->log) > > + if (rcu_access_pointer(conf->log)) > > r5l_write_stripe_run(conf); > > else if (raid5_has_ppl(conf)) > > ppl_write_stripe_run(conf); > > @@ -95,7 +95,7 @@ static inline void log_write_stripe_run(struct r5conf *conf) > > > > static inline void log_flush_stripe_to_raid(struct r5conf *conf) > > { > > - if (conf->log) > > + if (rcu_access_pointer(conf->log)) > > r5l_flush_stripe_to_raid(conf); > > else if (raid5_has_ppl(conf)) > > ppl_write_stripe_run(conf); > > @@ -105,7 +105,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio) > > { > > int ret = -ENODEV; > > > > - if (conf->log) > > + if (rcu_access_pointer(conf->log)) > > ret = r5l_handle_flush_request(conf, bio); > > else if (raid5_has_ppl(conf)) > > ret = ppl_handle_flush_request(bio); > > @@ -115,7 +115,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio) > > > > static inline void log_quiesce(struct r5conf *conf, int quiesce) > > { > > - if (conf->log) > > + if (rcu_access_pointer(conf->log)) > > r5l_quiesce(conf, quiesce); > > else if (raid5_has_ppl(conf)) > > ppl_quiesce(conf, quiesce); > > @@ -123,7 +123,7 @@ static inline void log_quiesce(struct r5conf *conf, int quiesce) > > > > static inline void log_exit(struct r5conf *conf) > > { > > - if (conf->log) > > + if (rcu_access_pointer(conf->log)) > > r5l_exit_log(conf); > > else if (raid5_has_ppl(conf)) > > ppl_exit_log(conf); > > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c [...]