Re: [PATCH nf] netfilter: nf_tables: make destruction work queue pernet

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Florian,

On Tue, Mar 04, 2025 at 12:55:53PM +0100, Florian Westphal wrote:
> The call to flush_work before tearing down a table from the netlink
> notifier was supposed to make sure that all earlier updates (e.g. rule
> add) that might reference that table have been processed.
> 
> Unfortunately, flush_work() waits for the last queued instance.
> This could be an instance that is different from the one that we must
> wait for.
> 
> This is because transactions are protected with a pernet mutex, but the
> work item is global, so holding the transaction mutex doesn't prevent
> another netns from queueing more work.
> 
> Make the work item pernet so that flush_work() will wait for all
> transactions queued from this netns.
> 
> A welcome side effect is that we no longer need to wait for transaction
> objects from other network namespaces.
> 
> The gc work queue is still global.  This seems to be ok because nft_set
> structures are reference counted and each container structure owns a
> reference on the net namespace.
> 
> The destroy_list is still protected by a global spinlock rather than
> pernet one but the hold time is very short anyway.

A few questions below.

> Fixes: 9f6958ba2e90 ("netfilter: nf_tables: unconditionally flush pending work before notifier")
> Reported-by: syzbot+5d8c5789c8cb076b2c25@xxxxxxxxxxxxxxxxxxxxxxxxx
> Closes: https://syzkaller.appspot.com/bug?extid=5d8c5789c8cb076b2c25
> Signed-off-by: Florian Westphal <fw@xxxxxxxxx>
> ---
>  include/net/netfilter/nf_tables.h |  4 +++-
>  net/netfilter/nf_tables_api.c     | 25 +++++++++++++++----------
>  net/netfilter/nft_compat.c        |  8 ++++----
>  3 files changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
> index 60d5dcdb289c..803d5f1601f9 100644
> --- a/include/net/netfilter/nf_tables.h
> +++ b/include/net/netfilter/nf_tables.h
> @@ -1891,7 +1891,7 @@ void nft_chain_filter_fini(void);
>  void __init nft_chain_route_init(void);
>  void nft_chain_route_fini(void);
>  
> -void nf_tables_trans_destroy_flush_work(void);
> +void nf_tables_trans_destroy_flush_work(struct net *net);
>  
>  int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result);
>  __be64 nf_jiffies64_to_msecs(u64 input);
> @@ -1905,6 +1905,7 @@ static inline int nft_request_module(struct net *net, const char *fmt, ...) { re
>  struct nftables_pernet {
>  	struct list_head	tables;
>  	struct list_head	commit_list;
> +	struct list_head	destroy_list;
>  	struct list_head	commit_set_list;
>  	struct list_head	binding_list;
>  	struct list_head	module_list;
> @@ -1915,6 +1916,7 @@ struct nftables_pernet {
>  	unsigned int		base_seq;
>  	unsigned int		gc_seq;
>  	u8			validate_state;
> +	struct work_struct	destroy_work;
>  };
>  
>  extern unsigned int nf_tables_net_id;
> diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
> index a34de9c17cf1..adf8b2b37fc3 100644
> --- a/net/netfilter/nf_tables_api.c
> +++ b/net/netfilter/nf_tables_api.c
> @@ -34,7 +34,6 @@ unsigned int nf_tables_net_id __read_mostly;
>  static LIST_HEAD(nf_tables_expressions);
>  static LIST_HEAD(nf_tables_objects);
>  static LIST_HEAD(nf_tables_flowtables);
> -static LIST_HEAD(nf_tables_destroy_list);
>  static LIST_HEAD(nf_tables_gc_list);
>  static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
>  static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
> @@ -125,7 +124,6 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
>  	table->validate_state = new_validate_state;
>  }
>  static void nf_tables_trans_destroy_work(struct work_struct *w);
> -static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
>  
>  static void nft_trans_gc_work(struct work_struct *work);
>  static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
> @@ -10006,11 +10004,12 @@ static void nft_commit_release(struct nft_trans *trans)
>  
>  static void nf_tables_trans_destroy_work(struct work_struct *w)
>  {
> +	struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work);
>  	struct nft_trans *trans, *next;
>  	LIST_HEAD(head);
>  
>  	spin_lock(&nf_tables_destroy_list_lock);
> -	list_splice_init(&nf_tables_destroy_list, &head);
> +	list_splice_init(&nft_net->destroy_list, &head);
>  	spin_unlock(&nf_tables_destroy_list_lock);
>  
>  	if (list_empty(&head))
> @@ -10024,9 +10023,11 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
>  	}
>  }
>  
> -void nf_tables_trans_destroy_flush_work(void)
> +void nf_tables_trans_destroy_flush_work(struct net *net)
>  {
> -	flush_work(&trans_destroy_work);
> +	struct nftables_pernet *nft_net = nft_pernet(net);
> +
> +	flush_work(&nft_net->destroy_work);
>  }
>  EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work);
>  
> @@ -10484,11 +10485,11 @@ static void nf_tables_commit_release(struct net *net)
>  
>  	trans->put_net = true;
>  	spin_lock(&nf_tables_destroy_list_lock);
> -	list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list);
> +	list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list);
>  	spin_unlock(&nf_tables_destroy_list_lock);
>  
>  	nf_tables_module_autoload_cleanup(net);
> -	schedule_work(&trans_destroy_work);
> +	schedule_work(&nft_net->destroy_work);
>  
>  	mutex_unlock(&nft_net->commit_mutex);
>  }
> @@ -11853,7 +11854,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
>  
>  	gc_seq = nft_gc_seq_begin(nft_net);
>  
> -	nf_tables_trans_destroy_flush_work();
> +	nf_tables_trans_destroy_flush_work(net);
>  again:
>  	list_for_each_entry(table, &nft_net->tables, list) {
>  		if (nft_table_has_owner(table) &&
> @@ -11895,6 +11896,7 @@ static int __net_init nf_tables_init_net(struct net *net)
>  
>  	INIT_LIST_HEAD(&nft_net->tables);
>  	INIT_LIST_HEAD(&nft_net->commit_list);
> +	INIT_LIST_HEAD(&nft_net->destroy_list);
>  	INIT_LIST_HEAD(&nft_net->commit_set_list);
>  	INIT_LIST_HEAD(&nft_net->binding_list);
>  	INIT_LIST_HEAD(&nft_net->module_list);
> @@ -11903,6 +11905,7 @@ static int __net_init nf_tables_init_net(struct net *net)
>  	nft_net->base_seq = 1;
>  	nft_net->gc_seq = 0;
>  	nft_net->validate_state = NFT_VALIDATE_SKIP;
> +	INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);
>  
>  	return 0;
>  }
> @@ -11936,9 +11939,13 @@ static void __net_exit nf_tables_exit_net(struct net *net)
>  	nft_gc_seq_end(nft_net, gc_seq);
>  
>  	mutex_unlock(&nft_net->commit_mutex);
> +
> +	cancel_work_sync(&nft_net->destroy_work);

__nft_release_tables() is called in this nf_tables_exit_net()
function, cancel_work_sync() needs to be called before it?

> +
>  	WARN_ON_ONCE(!list_empty(&nft_net->tables));
>  	WARN_ON_ONCE(!list_empty(&nft_net->module_list));
>  	WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
> +	WARN_ON_ONCE(!list_empty(&nft_net->destroy_list));
>  }
>  
>  static void nf_tables_exit_batch(struct list_head *net_exit_list)
> @@ -12029,10 +12036,8 @@ static void __exit nf_tables_module_exit(void)
>  	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
>  	nft_chain_filter_fini();
>  	nft_chain_route_fini();
> -	nf_tables_trans_destroy_flush_work();

My understanding is that this is not required anymore because of the
new cancel_work_sync() in the exit_net() path?

>  	unregister_pernet_subsys(&nf_tables_net_ops);
>  	cancel_work_sync(&trans_gc_work);
> -	cancel_work_sync(&trans_destroy_work);
>  	rcu_barrier();
>  	rhltable_destroy(&nft_objname_ht);
>  	nf_tables_core_module_exit();
> diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
> index 7ca4f0d21fe2..72711d62fddf 100644
> --- a/net/netfilter/nft_compat.c
> +++ b/net/netfilter/nft_compat.c
> @@ -228,7 +228,7 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
>  	return 0;
>  }
>  
> -static void nft_compat_wait_for_destructors(void)
> +static void nft_compat_wait_for_destructors(struct net *net)
>  {
>  	/* xtables matches or targets can have side effects, e.g.
>  	 * creation/destruction of /proc files.
> @@ -236,7 +236,7 @@ static void nft_compat_wait_for_destructors(void)
>  	 * work queue.  If we have pending invocations we thus
>  	 * need to wait for those to finish.
>  	 */
> -	nf_tables_trans_destroy_flush_work();
> +	nf_tables_trans_destroy_flush_work(net);
>  }
>  
>  static int
> @@ -262,7 +262,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
>  
>  	nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
>  
> -	nft_compat_wait_for_destructors();
> +	nft_compat_wait_for_destructors(ctx->net);
>  
>  	ret = xt_check_target(&par, size, proto, inv);
>  	if (ret < 0) {
> @@ -515,7 +515,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
>  
>  	nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
>  
> -	nft_compat_wait_for_destructors();
> +	nft_compat_wait_for_destructors(ctx->net);
>  
>  	return xt_check_match(&par, size, proto, inv);
>  }
> -- 
> 2.48.1
> 
> 




[Index of Archives]     [Netfitler Users]     [Berkeley Packet Filter]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux