Hi, tejun
On 2020/2/14 22:05, Tejun Heo wrote:
Hello,
On Fri, Feb 14, 2020 at 10:50:01AM +0800, Yufen Yu wrote:
So, unregistering can leave ->dev along and re-registering can test
whether it's NULL and if not put the existing one and put a new one
there. Wouldn't that work?
Do you mean set bdi->dev as 'NULL' in call_rcu() callback function
(i.e. bdi_release_device()) and test 'bdi->dev' in bdi_register_va()?
I think that may do not work.
We cannot make sure the order of rcu callback function and re-registering.
Then bdi_release_device() may put the new allocated device by re-registering.
No, I meant not freeing bdi->dev on deregistration and only doing so
when it actually needs to - on re-registration or release. So, sth
like the following.
* Unregister: Unregister bdi->dev but don't free it. Leave the pointer
alone.
* Re-register: If bdi->dev is not null, initiate RCU-free and update
bdi->dev to the new dev.
>
> * Release: If bdi->dev is not NULL, initiate RCU-free of it.
Okay, I think we can do that.
When do re-register, we need to update bdi->dev as 'NULL' or the new dev
before invoking call_rcu() to free '->dev'. So readers started after call_rcu()
cannot read the old dev, like:
bdi_register_va()
{
...
if (bdi->dev) {
//rcu_assgin_pointer(bdi->dev, new_dev);
rcu_assgin_pointer(bdi->dev, NULL);
call_rcu();//rcu callback function will free the old dev
}
...
}
After assigning new value for bdi->dev, rcu callback function cannot get
the old device. So I think we may need to replace the '->dev' with a new
struct pointer, in which includes '->dev' and 'rcu_head'. We pass the
struct.rcu_head pointer to call_rcu() and then it can get old dev address.
IMO, maybe we can maintain the original code logic, fix the problem like:
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 4fc87dee005a..e2de4a4e5392 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -185,6 +185,11 @@ struct bdi_writeback {
#endif
};
+struct bdi_rcu_device {
+ struct device *dev;
+ struct rcu_head rcu_head;
+};
+
struct backing_dev_info {
u64 id;
struct rb_node rb_node; /* keyed by ->id */
@@ -219,7 +224,7 @@ struct backing_dev_info {
#endif
wait_queue_head_t wb_waitq;
- struct device *dev;
+ struct bdi_rcu_device *rcu_dev;
struct device *owner;
struct timer_list laptop_mode_wb_timer;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 62f05f605fb5..05f07ce19091 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -850,7 +850,7 @@ static int bdi_init(struct backing_dev_info *bdi)
{
int ret;
- bdi->dev = NULL;
+ bdi->rcu_dev = NULL;
kref_init(&bdi->refcnt);
bdi->min_ratio = 0;
@@ -932,20 +932,28 @@ struct backing_dev_info *bdi_get_by_id(u64 id)
int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
{
- struct device *dev;
struct rb_node *parent, **p;
+ struct bdi_rcu_device *rcu_dev;
- if (bdi->dev) /* The driver needs to use separate queues per device */
+ /* The driver needs to use separate queues per device */
+ if (bdi->rcu_dev)
return 0;
- dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args);
- if (IS_ERR(dev))
- return PTR_ERR(dev);
+ rcu_dev = kzalloc(sizeof(struct bdi_rcu_device), GFP_KERNEL);
+ if (!rcu_dev)
+ return -ENOMEM;
+
+ rcu_dev->dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0),
+ bdi, fmt, args);
+ if (IS_ERR(rcu_dev->dev)) {
+ kfree(rcu_dev);
+ return PTR_ERR(rcu_dev->dev);
+ }
cgwb_bdi_register(bdi);
- bdi->dev = dev;
+ bdi->rcu_dev = rcu_dev;
- bdi_debug_register(bdi, dev_name(dev));
+ bdi_debug_register(bdi, dev_name(rcu_dev->dev));
set_bit(WB_registered, &bdi->wb.state);
spin_lock_bh(&bdi_lock);
@@ -1005,17 +1013,28 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)
synchronize_rcu_expedited();
}
+static void bdi_put_device_rcu(struct rcu_head *rcu)
+{
+ struct bdi_rcu_device *rcu_dev = container_of(rcu,
+ struct bdi_rcu_device, rcu_head);
+ put_device(rcu_dev->dev);
+ kfree(rcu_dev);
+}
+
void bdi_unregister(struct backing_dev_info *bdi)
{
+ struct bdi_rcu_device *rcu_dev = bdi->rcu_dev;
/* make sure nobody finds us on the bdi_list anymore */
bdi_remove_from_list(bdi);
wb_shutdown(&bdi->wb);
cgwb_bdi_unregister(bdi);
- if (bdi->dev) {
+ if (rcu_dev) {
bdi_debug_unregister(bdi);
- device_unregister(bdi->dev);
- bdi->dev = NULL;
+ get_device(rcu_dev->dev);
+ device_unregister(rcu_dev->dev);
+ rcu_assign_pointer(bdi->rcu_dev, NULL);
+ call_rcu(&rcu_dev->rcu_head, bdi_put_device_rcu);
}
if (bdi->owner) {
@@ -1031,7 +1050,7 @@ static void release_bdi(struct kref *ref)
if (test_bit(WB_registered, &bdi->wb.state))
bdi_unregister(bdi);
- WARN_ON_ONCE(bdi->dev);
+ WARN_ON_ONCE(bdi->rcu_dev);
wb_exit(&bdi->wb);
cgwb_bdi_exit(bdi);
kfree(bdi);