On Sun, 17 Jul 2022 22:54:47 -0700 Song Liu <song@xxxxxxxxxx> wrote: Again, make the subject: ftrace: Allow IPMODIFY and DIRECT ops on the same function > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h > index acb35243ce5d..306bf08acda6 100644 > --- a/include/linux/ftrace.h > +++ b/include/linux/ftrace.h > @@ -208,6 +208,43 @@ enum { > FTRACE_OPS_FL_DIRECT = BIT(17), > }; > > +/* > + * FTRACE_OPS_CMD_* commands allow the ftrace core logic to request changes > + * to a ftrace_ops. Note, the requests may fail. > + * > + * ENABLE_SHARE_IPMODIFY_SELF - enable a DIRECT ops to work on the same > + * function as an ops with IPMODIFY. Called > + * when the DIRECT ops is being registered. > + * This is called with both direct_mutex and > + * ftrace_lock are locked. > + * > + * ENABLE_SHARE_IPMODIFY_PEER - enable a DIRECT ops to work on the same > + * function as an ops with IPMODIFY. Called > + * when the other ops (the one with IPMODIFY) > + * is being registered. > + * This is called with direct_mutex locked. > + * > + * DISABLE_SHARE_IPMODIFY_PEER - disable a DIRECT ops to work on the same > + * function as an ops with IPMODIFY. Called > + * when the other ops (the one with IPMODIFY) > + * is being unregistered. > + * This is called with direct_mutex locked. > + */ > +enum ftrace_ops_cmd { > + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF, > + FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER, > + FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER, > +}; > + > +/* > + * For most ftrace_ops_cmd, > + * Returns: > + * 0 - Success. > + * -EBUSY - The operation cannot process > + * -EAGAIN - The operation cannot process tempoorarily. Just state: Returns: 0 - Success Negative on failure. The return value is dependent on the callback. Let's not bind policy of the callback with ftrace. > + */ > +typedef int (*ftrace_ops_func_t)(struct ftrace_ops *op, enum ftrace_ops_cmd cmd); > + > #ifdef CONFIG_DYNAMIC_FTRACE > /* The hash used to know what functions callbacks trace */ > struct ftrace_ops_hash { > @@ -250,6 +287,7 @@ struct ftrace_ops { > unsigned long trampoline; > unsigned long trampoline_size; > struct list_head list; > + ftrace_ops_func_t ops_func; > #endif > }; > > diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c > index 0c15ec997c13..f52efbd13e51 100644 > --- a/kernel/trace/ftrace.c > +++ b/kernel/trace/ftrace.c > @@ -1861,6 +1861,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, > ftrace_hash_rec_update_modify(ops, filter_hash, 1); > } > > +static bool ops_references_ip(struct ftrace_ops *ops, unsigned long ip); > + > /* > * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK > * or no-needed to update, -EBUSY if it detects a conflict of the flag > @@ -1869,6 +1871,13 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, > * - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected) > * - If the hash is EMPTY_HASH, it hits nothing > * - Anything else hits the recs which match the hash entries. > + * > + * DIRECT ops does not have IPMODIFY flag, but we still need to check it > + * against functions with FTRACE_FL_IPMODIFY. If there is any overlap, call > + * ops_func(SHARE_IPMODIFY_SELF) to make sure current ops can share with > + * IPMODIFY. If ops_func(SHARE_IPMODIFY_SELF) returns non-zero, propagate > + * the return value to the caller and eventually to the owner of the DIRECT > + * ops. > */ > static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, > struct ftrace_hash *old_hash, > @@ -1877,17 +1886,23 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, > struct ftrace_page *pg; > struct dyn_ftrace *rec, *end = NULL; > int in_old, in_new; > + bool is_ipmodify, is_direct; > > /* Only update if the ops has been registered */ > if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) > return 0; > > - if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) > + is_ipmodify = ops->flags & FTRACE_OPS_FL_IPMODIFY; > + is_direct = ops->flags & FTRACE_OPS_FL_DIRECT; > + > + /* either IPMODIFY nor DIRECT, skip */ > + if (!is_ipmodify && !is_direct) > return 0; I wonder if we should also add: if (WARN_ON_ONCE(is_ipmodify && is_direct)) return 0; As a direct should never have an ipmodify. > > /* > - * Since the IPMODIFY is a very address sensitive action, we do not > - * allow ftrace_ops to set all functions to new hash. > + * Since the IPMODIFY and DIRECT are very address sensitive > + * actions, we do not allow ftrace_ops to set all functions to new > + * hash. > */ > if (!new_hash || !old_hash) > return -EINVAL; > @@ -1905,12 +1920,30 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, > continue; > > if (in_new) { > - /* New entries must ensure no others are using it */ > - if (rec->flags & FTRACE_FL_IPMODIFY) > - goto rollback; > - rec->flags |= FTRACE_FL_IPMODIFY; > - } else /* Removed entry */ > + if (rec->flags & FTRACE_FL_IPMODIFY) { > + int ret; > + > + /* Cannot have two ipmodify on same rec */ > + if (is_ipmodify) > + goto rollback; > + I might add a FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT); Just to be safe. That is, if this is true, we are adding a new direct function to a record that already has one. > + /* > + * Another ops with IPMODIFY is already > + * attached. We are now attaching a direct > + * ops. Run SHARE_IPMODIFY_SELF, to check > + * whether sharing is supported. > + */ > + if (!ops->ops_func) > + return -EBUSY; > + ret = ops->ops_func(ops, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_SELF); > + if (ret) > + return ret; > + } else if (is_ipmodify) { > + rec->flags |= FTRACE_FL_IPMODIFY; > + } > + } else if (is_ipmodify) { > rec->flags &= ~FTRACE_FL_IPMODIFY; > + } > } while_for_each_ftrace_rec(); > > return 0; > @@ -2455,7 +2488,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip, > struct ftrace_ops direct_ops = { > .func = call_direct_funcs, > .flags = FTRACE_OPS_FL_IPMODIFY > - | FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS > + | FTRACE_OPS_FL_SAVE_REGS > | FTRACE_OPS_FL_PERMANENT, > /* > * By declaring the main trampoline as this trampoline > @@ -3072,14 +3105,14 @@ static inline int ops_traces_mod(struct ftrace_ops *ops) > } > > /* > - * Check if the current ops references the record. > + * Check if the current ops references the given ip. > * > * If the ops traces all functions, then it was already accounted for. > * If the ops does not trace the current record function, skip it. > * If the ops ignores the function via notrace filter, skip it. > */ > -static inline bool > -ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) > +static bool > +ops_references_ip(struct ftrace_ops *ops, unsigned long ip) > { > /* If ops isn't enabled, ignore it */ > if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) > @@ -3091,16 +3124,29 @@ ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) > > /* The function must be in the filter */ > if (!ftrace_hash_empty(ops->func_hash->filter_hash) && > - !__ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) > + !__ftrace_lookup_ip(ops->func_hash->filter_hash, ip)) > return false; > > /* If in notrace hash, we ignore it too */ > - if (ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) > + if (ftrace_lookup_ip(ops->func_hash->notrace_hash, ip)) > return false; > > return true; > } > > +/* > + * Check if the current ops references the record. > + * > + * If the ops traces all functions, then it was already accounted for. > + * If the ops does not trace the current record function, skip it. > + * If the ops ignores the function via notrace filter, skip it. > + */ > +static bool > +ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec) > +{ > + return ops_references_ip(ops, rec->ip); > +} > + > static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) > { > bool init_nop = ftrace_need_init_nop(); > @@ -5545,8 +5591,7 @@ int modify_ftrace_direct(unsigned long ip, > } > EXPORT_SYMBOL_GPL(modify_ftrace_direct); > > -#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \ > - FTRACE_OPS_FL_SAVE_REGS) > +#define MULTI_FLAGS (FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS) > > static int check_direct_multi(struct ftrace_ops *ops) > { > @@ -8004,6 +8049,137 @@ int ftrace_is_dead(void) > return ftrace_disabled; > } > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS > +/* > + * When registering ftrace_ops with IPMODIFY, it is necessary to make sure > + * it doesn't conflict with any direct ftrace_ops. If there is existing > + * direct ftrace_ops on a kernel function being patched, call > + * FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER on it to enable sharing. > + * > + * @ops: ftrace_ops being registered. > + * > + * Returns: > + * 0 - @ops does not have IPMODIFY or @ops itself is DIRECT, no > + * change needed; > + * 1 - @ops has IPMODIFY, hold direct_mutex; > + * -EBUSY - currently registered DIRECT ftrace_ops cannot share the > + * same function with IPMODIFY, abort the register. > + * -EAGAIN - cannot make changes to currently registered DIRECT > + * ftrace_ops due to rare race conditions. Should retry > + * later. This is needed to avoid potential deadlocks > + * on the DIRECT ftrace_ops side. Again, these are ops_func() specific and has nothing to do with the logic in this file. Just state: * Returns: * 0 - @ops does not have IPMODIFY or @ops itself is DIRECT, no * change needed; * 1 - @ops has IPMODIFY, hold direct_mutex; * Negative on error. And if we move the logic that this does not keep hold of the direct_mutex, we could just let the callback return any non-zero on error. > + */ > +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) > + __acquires(&direct_mutex) > +{ > + struct ftrace_func_entry *entry; > + struct ftrace_hash *hash; > + struct ftrace_ops *op; > + int size, i, ret; > + > + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) > + return 0; > + > + mutex_lock(&direct_mutex); > + > + hash = ops->func_hash->filter_hash; > + size = 1 << hash->size_bits; > + for (i = 0; i < size; i++) { > + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { > + unsigned long ip = entry->ip; > + bool found_op = false; > + > + mutex_lock(&ftrace_lock); > + do_for_each_ftrace_op(op, ftrace_ops_list) { > + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) > + continue; > + if (ops_references_ip(op, ip)) { > + found_op = true; > + break; I think you want a goto here. The macros "do_for_each_ftrace_op() { .. } while_for_each_ftrace_op()" is a double loop. The break just moves to the next set of pages and does not break out of the outer loop. goto out_loop; > + } > + } while_for_each_ftrace_op(op); out_loop: > + mutex_unlock(&ftrace_lock); > + > + if (found_op) { > + if (!op->ops_func) { > + ret = -EBUSY; > + goto err_out; > + } > + ret = op->ops_func(op, FTRACE_OPS_CMD_ENABLE_SHARE_IPMODIFY_PEER); > + if (ret) > + goto err_out; > + } > + } > + } > + > + /* > + * Didn't find any overlap with direct ftrace_ops, or the direct > + * function can share with ipmodify. Hold direct_mutex to make sure > + * this doesn't change until we are done. > + */ > + return 1; > + > +err_out: > + mutex_unlock(&direct_mutex); > + return ret; > +} > + > +/* > + * Similar to prepare_direct_functions_for_ipmodify, clean up after ops > + * with IPMODIFY is unregistered. The cleanup is optional for most DIRECT > + * ops. > + */ > +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) > +{ > + struct ftrace_func_entry *entry; > + struct ftrace_hash *hash; > + struct ftrace_ops *op; > + int size, i; > + > + if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) > + return; > + > + mutex_lock(&direct_mutex); > + > + hash = ops->func_hash->filter_hash; > + size = 1 << hash->size_bits; > + for (i = 0; i < size; i++) { > + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { > + unsigned long ip = entry->ip; > + bool found_op = false; > + > + mutex_lock(&ftrace_lock); > + do_for_each_ftrace_op(op, ftrace_ops_list) { > + if (!(op->flags & FTRACE_OPS_FL_DIRECT)) > + continue; > + if (ops_references_ip(op, ip)) { > + found_op = true; > + break; goto out_loop; > + } > + } while_for_each_ftrace_op(op); out_loop: > + mutex_unlock(&ftrace_lock); > + > + /* The cleanup is optional, iggore any errors */ "ignore" > + if (found_op && op->ops_func) > + op->ops_func(op, FTRACE_OPS_CMD_DISABLE_SHARE_IPMODIFY_PEER); > + } > + } > + mutex_unlock(&direct_mutex); > +} > + > +#else /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ > + > +static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops) > +{ > + return 0; > +} > + > +static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops) > +{ > +} > + > +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ > + > /** > * register_ftrace_function - register a function for profiling > * @ops: ops structure that holds the function for profiling. > @@ -8016,17 +8192,29 @@ int ftrace_is_dead(void) > * recursive loop. > */ > int register_ftrace_function(struct ftrace_ops *ops) > + __releases(&direct_mutex) > { > + bool direct_mutex_locked = false; > int ret; > > ftrace_ops_init(ops); > I agree with Petr. Just grab the direct_mutex_lock here. mutex_lock(&direct_mutex); > + ret = prepare_direct_functions_for_ipmodify(ops); > + if (ret < 0) > + return ret; goto out_unlock; > + else if (ret == 1) > + direct_mutex_locked = true; > + Nuke the above; > mutex_lock(&ftrace_lock); > > ret = ftrace_startup(ops, 0); > > mutex_unlock(&ftrace_lock); > > +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS > + if (direct_mutex_locked) > + mutex_unlock(&direct_mutex); > +#endif Change this to: out_unlock: mutex_unlock(&direct_mutex); > return ret; > } -- Steve > EXPORT_SYMBOL_GPL(register_ftrace_function); > @@ -8045,6 +8233,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) > ret = ftrace_shutdown(ops, 0); > mutex_unlock(&ftrace_lock); > > + cleanup_direct_functions_after_ipmodify(ops); > return ret; > } > EXPORT_SYMBOL_GPL(unregister_ftrace_function);