We can make all dumps and lookups lockless. Dumps currently only hold the nfnl mutex on the dump request itself. Dumps can span multiple syscalls, dump continuation doesn't acquire the nfnl mutex anywhere, i.e. the dump callbacks in nf_tables already use rcu and never rely on nfnl mutex being held. So, just switch all dumpers to rcu. This requires taking a module reference before dropping the rcu lock so rmmod is blocked, we also need to hold module reference over the entire dump operation sequence. netlink already supports this via the .module member in the netlink_dump_control struct. For the non-dump case (i.e. lookup of a specific tables, chains, etc), we need to swtich to _rcu list iteration primitive and make sure we use GFP_ATOMIC. This initial patch also adds the new nft_netlink_dump_start_rcu() helper that takes care of the get_ref, drop-rcu-lock,start dump, get-rcu-lock,put-ref sequence. The helper will be reused for all dumps in the followup patches. Signed-off-by: Florian Westphal <fw@xxxxxxxxx> --- net/netfilter/nf_tables_api.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c7e74545a1a8..c22795db2953 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -373,7 +373,7 @@ static struct nft_table *nft_table_lookup(const struct net *net, if (nla == NULL) return ERR_PTR(-EINVAL); - list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry_rcu(table, &net->nft.tables, list) { if (!nla_strcmp(nla, table->name) && table->family == family && nft_active_genmask(table, genmask)) @@ -546,6 +546,24 @@ static int nf_tables_dump_tables(struct sk_buff *skb, return skb->len; } +static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *c) +{ + int err; + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + + rcu_read_unlock(); + err = netlink_dump_start(nlsk, skb, nlh, c); + rcu_read_lock(); + module_put(THIS_MODULE); + + return err; +} + +/* called with rcu_read_lock held */ static int nf_tables_gettable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -561,8 +579,10 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_tables, + .module = THIS_MODULE, }; - return netlink_dump_start(nlsk, skb, nlh, &c); + + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); } table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask); @@ -571,7 +591,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk, return PTR_ERR(table); } - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) return -ENOMEM; @@ -5676,7 +5696,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_table_policy, }, [NFT_MSG_GETTABLE] = { - .call = nf_tables_gettable, + .call_rcu = nf_tables_gettable, .attr_count = NFTA_TABLE_MAX, .policy = nft_table_policy, }, -- 2.16.1 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html