On Tue 28-01-14 18:54:33, Tejun Heo wrote: > css_from_dir() returns the matching css (cgroup_subsys_state) given a > dentry and subsystem. The function doesn't pin the css before > returning and requires the caller to be holding RCU read lock or > cgroup_mutex and handling pinning on the caller side. > > Given that users of the function are likely to want to pin the > returned css (both existing users do) and that getting and putting > css's are very cheap, there's no reason for the interface to be tricky > like this. > > Rename css_from_dir() to css_tryget_from_dir() and make it try to pin > the found css and return it only if pinning succeeded. The callers > are updated so that they no longer do RCU locking and pinning around > the function and just use the returned css. > > This will also ease converting cgroup to kernfs. > > Signed-off-by: Tejun Heo <tj@xxxxxxxxxx> > Cc: Steven Rostedt <rostedt@xxxxxxxxxxx> > Cc: Frederic Weisbecker <fweisbec@xxxxxxxxx> > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > Cc: Johannes Weiner <hannes@xxxxxxxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxx> > Cc: Balbir Singh <bsingharora@xxxxxxxxx> > Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> yes, this is handy. For memcg part Acked-by: Michal Hocko <mhocko@xxxxxxx> Thanks! > --- > include/linux/cgroup.h | 4 ++-- > kernel/cgroup.c | 25 ++++++++++++++++--------- > kernel/events/core.c | 17 +---------------- > mm/memcontrol.c | 16 +++++++--------- > 4 files changed, 26 insertions(+), 36 deletions(-) > > diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h > index 198c7fc..2255639 100644 > --- a/include/linux/cgroup.h > +++ b/include/linux/cgroup.h > @@ -823,8 +823,8 @@ int css_scan_tasks(struct cgroup_subsys_state *css, > int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); > int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from); > > -struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, > - struct cgroup_subsys *ss); > +struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, > + struct cgroup_subsys *ss); > > #else /* !CONFIG_CGROUPS */ > > diff --git a/kernel/cgroup.c b/kernel/cgroup.c > index 3d92fd0..823e250 100644 > --- a/kernel/cgroup.c > +++ b/kernel/cgroup.c > @@ -4964,28 +4964,35 @@ static int __init cgroup_disable(char *str) > __setup("cgroup_disable=", cgroup_disable); > > /** > - * css_from_dir - get corresponding css from the dentry of a cgroup dir > + * css_tryget_from_dir - get corresponding css from the dentry of a cgroup dir > * @dentry: directory dentry of interest > * @ss: subsystem of interest > * > - * Must be called under cgroup_mutex or RCU read lock. The caller is > - * responsible for pinning the returned css if it needs to be accessed > - * outside the critical section. > + * If @dentry is a directory for a cgroup which has @ss enabled on it, try > + * to get the corresponding css and return it. If such css doesn't exist > + * or can't be pinned, an ERR_PTR value is returned. > */ > -struct cgroup_subsys_state *css_from_dir(struct dentry *dentry, > - struct cgroup_subsys *ss) > +struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry, > + struct cgroup_subsys *ss) > { > struct cgroup *cgrp; > - > - cgroup_assert_mutex_or_rcu_locked(); > + struct cgroup_subsys_state *css; > > /* is @dentry a cgroup dir? */ > if (!dentry->d_inode || > dentry->d_inode->i_op != &cgroup_dir_inode_operations) > return ERR_PTR(-EBADF); > > + rcu_read_lock(); > + > cgrp = __d_cgrp(dentry); > - return cgroup_css(cgrp, ss) ?: ERR_PTR(-ENOENT); > + css = cgroup_css(cgrp, ss); > + > + if (!css || !css_tryget(css)) > + css = ERR_PTR(-ENOENT); > + > + rcu_read_unlock(); > + return css; > } > > /** > diff --git a/kernel/events/core.c b/kernel/events/core.c > index 6490373..a3c3ab5 100644 > --- a/kernel/events/core.c > +++ b/kernel/events/core.c > @@ -370,11 +370,6 @@ perf_cgroup_match(struct perf_event *event) > event->cgrp->css.cgroup); > } > > -static inline bool perf_tryget_cgroup(struct perf_event *event) > -{ > - return css_tryget(&event->cgrp->css); > -} > - > static inline void perf_put_cgroup(struct perf_event *event) > { > css_put(&event->cgrp->css); > @@ -593,9 +588,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, > if (!f.file) > return -EBADF; > > - rcu_read_lock(); > - > - css = css_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys); > + css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys); > if (IS_ERR(css)) { > ret = PTR_ERR(css); > goto out; > @@ -604,13 +597,6 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, > cgrp = container_of(css, struct perf_cgroup, css); > event->cgrp = cgrp; > > - /* must be done before we fput() the file */ > - if (!perf_tryget_cgroup(event)) { > - event->cgrp = NULL; > - ret = -ENOENT; > - goto out; > - } > - > /* > * all events in a group must monitor > * the same cgroup because a task belongs > @@ -621,7 +607,6 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, > ret = -EINVAL; > } > out: > - rcu_read_unlock(); > fdput(f); > return ret; > } > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index d4a8c40..ab4e412 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -6184,17 +6184,15 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css, > * automatically removed on cgroup destruction but the removal is > * asynchronous, so take an extra ref on @css. > */ > - rcu_read_lock(); > - > + cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent, > + &memory_cgrp_subsys); > ret = -EINVAL; > - cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, > - &memory_cgrp_subsys); > - if (cfile_css == css && css_tryget(css)) > - ret = 0; > - > - rcu_read_unlock(); > - if (ret) > + if (IS_ERR(cfile_css)) > goto out_put_cfile; > + if (cfile_css != css) { > + css_put(cfile_css); > + goto out_put_cfile; > + } > > ret = event->register_event(memcg, event->eventfd, buffer); > if (ret) > -- > 1.8.5.3 > -- Michal Hocko SUSE Labs -- To unsubscribe from this list: send the line "unsubscribe cgroups" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html