Add two new sleepable tracepoints in cgroup: cgroup_mkdir_s and cgroup_rmdir_s. The suffix _s means they are in a sleepable context. These two tracepoints don't need full cgroup paths, they don't have to live in atomic context. These two tracepoints are also called without holding cgroup_mutex. They can be used for bpf to monitor cgroup creation and deletion. Bpf sleepable programs can attach to these two tracepoints and create corresponding directories in bpffs. The created directories don't need the cgroup paths, cgroup id is sufficient to identify the cgroup. Once the bpffs directories have been created, the bpf prog can further pin bpf objects inside the directories and allow users to read the pinned objects. This serves a way to extend the fixed cgroup interface. Cc: Tejun Heo <tj@xxxxxxxxxx> Signed-off-by: Hao Luo <haoluo@xxxxxxxxxx> --- include/trace/events/cgroup.h | 45 +++++++++++++++++++++++++++++++++++ kernel/cgroup/cgroup.c | 5 ++++ 2 files changed, 50 insertions(+) diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h index dd7d7c9efecd..4483a7d6c43a 100644 --- a/include/trace/events/cgroup.h +++ b/include/trace/events/cgroup.h @@ -204,6 +204,51 @@ DEFINE_EVENT(cgroup_event, cgroup_notify_frozen, TP_ARGS(cgrp, path, val) ); +/* + * The following tracepoints are supposed to be called in a sleepable context. + */ +DECLARE_EVENT_CLASS(cgroup_sleepable_tp, + + TP_PROTO(struct cgroup *cgrp), + + TP_ARGS(cgrp), + + TP_STRUCT__entry( + __field( int, root ) + __field( int, level ) + __field( u64, id ) + ), + + TP_fast_assign( + __entry->root = cgrp->root->hierarchy_id; + __entry->id = cgroup_id(cgrp); + __entry->level = cgrp->level; + ), + + TP_printk("root=%d id=%llu level=%d", + __entry->root, __entry->id, __entry->level) +); + +#ifdef DEFINE_EVENT_SLEEPABLE +#undef DEFINE_EVENT +#define DEFINE_EVENT(template, call, proto, args) \ + DEFINE_EVENT_SLEEPABLE(template, call, PARAMS(proto), PARAMS(args)) +#endif + +DEFINE_EVENT(cgroup_sleepable_tp, cgroup_mkdir_s, + + TP_PROTO(struct cgroup *cgrp), + + TP_ARGS(cgrp) +); + +DEFINE_EVENT(cgroup_sleepable_tp, cgroup_rmdir_s, + + TP_PROTO(struct cgroup *cgrp), + + TP_ARGS(cgrp) +); + #endif /* _TRACE_CGROUP_H */ /* This part must be outside protection */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 9d05c3ca2d5e..f14ab00d9ef5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5535,6 +5535,8 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) cgroup_destroy_locked(cgrp); out_unlock: cgroup_kn_unlock(parent_kn); + if (!ret) + trace_cgroup_mkdir_s(cgrp); return ret; } @@ -5725,6 +5727,9 @@ int cgroup_rmdir(struct kernfs_node *kn) TRACE_CGROUP_PATH(rmdir, cgrp); cgroup_kn_unlock(kn); + + if (!ret) + trace_cgroup_rmdir_s(cgrp); return ret; } -- 2.35.1.574.g5d30c73bfb-goog