[RFC v2] nf_tables: Transaction API proposal

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



It reworks the current atomic API:
- propose START/COMMIT/ABORT transaction messages
- remove rule flags: rule manipalution is part of a transaction once
  one has been started
- Enable 1 transaction per-client
- 1 commit at a time (mutex based)
---

Hi,

Ok I tried now to fix a bit better the proposal.
The nft message first are more like the other ones:
 -> not too long, and the meaningful part is concatenated.

@Pablo: I took your idea of "BEGIN" instead of "START", it sounds better.

I still believe we should enable 1 transaction per-client so now instead
of the bogus sk_user_data pointer, it stores the client transaction in a local
list. I did not yet put a limit on how many clients can start a transaction actually,
it has to be added.

It's actually cleaner that way. 

I also refactored a bit the nf_tables_newrule(), it's cleaner and optimized.
(no list_del_rcu(rule) in case of not added properly to the transaction)

About the commit, you are right Pablo: only one at a time, this is mandatory. 
I missed the point on my first proposal.
 
To do so I introduced a mutex. If it's already locked it return -EAGAIN, so it's up
to the client to retry. I guess this is anyway not going to happen very often.
I don't know if we could get something better here, at least now no client can
lock up the other indefinitely, it enables per-client transaction... 

Please review,

 include/net/netfilter/nf_tables.h        |  11 ++
 include/uapi/linux/netfilter/nf_tables.h |  11 +-
 net/netfilter/nf_tables_api.c            | 216 +++++++++++++++++++------------
 3 files changed, 150 insertions(+), 88 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 69cb5ea..028f832 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -354,6 +354,17 @@ struct nft_rule_update {
 	struct net			*net;
 };
 
+/**
+ * struct nft_transaction - nf_tables transaction
+ *
+ * @updates: list of rule updates for the current transaction
+ */
+struct nft_transaction {
+	struct list_head                list;
+	struct list_head		updates;
+	u32                             pid_owner;
+};
+
 static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
 {
 	return (struct nft_expr *)&rule->data[0];
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 76b215f..5f3251b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -37,8 +37,9 @@ enum nf_tables_msg_types {
 	NFT_MSG_NEWSETELEM,
 	NFT_MSG_GETSETELEM,
 	NFT_MSG_DELSETELEM,
-	NFT_MSG_COMMIT,
-	NFT_MSG_ABORT,
+	NFT_MSG_BEGINTRANSACT,
+	NFT_MSG_COMMITTRANSACT,
+	NFT_MSG_ABORTTRANSACT,
 	NFT_MSG_MAX,
 };
 
@@ -88,18 +89,12 @@ enum nft_chain_attributes {
 };
 #define NFTA_CHAIN_MAX		(__NFTA_CHAIN_MAX - 1)
 
-enum {
-	NFT_RULE_F_COMMIT	= (1 << 0),
-	NFT_RULE_F_MASK		= NFT_RULE_F_COMMIT,
-};
-
 enum nft_rule_attributes {
 	NFTA_RULE_UNSPEC,
 	NFTA_RULE_TABLE,
 	NFTA_RULE_CHAIN,
 	NFTA_RULE_HANDLE,
 	NFTA_RULE_EXPRESSIONS,
-	NFTA_RULE_FLAGS,
 	NFTA_RULE_COMPAT,
 	__NFTA_RULE_MAX
 };
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d2da5df..0bc6318 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/netfilter.h>
@@ -22,6 +23,8 @@
 #include <net/sock.h>
 
 static LIST_HEAD(nf_tables_expressions);
+static LIST_HEAD(nf_tables_transactions);
+static DEFINE_MUTEX(nf_tables_commit_lock);
 
 /**
  *	nft_register_afinfo - register nf_tables address family info
@@ -1264,7 +1267,6 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
 				    .len = NFT_CHAIN_MAXNAMELEN - 1 },
 	[NFTA_RULE_HANDLE]	= { .type = NLA_U64 },
 	[NFTA_RULE_EXPRESSIONS]	= { .type = NLA_NESTED },
-	[NFTA_RULE_FLAGS]	= { .type = NLA_U32 },
 	[NFTA_RULE_COMPAT]	= { .type = NLA_NESTED },
 };
 
@@ -1518,15 +1520,35 @@ static void nf_tables_rule_destroy(struct nft_rule *rule)
 
 static struct nft_expr_info *info;
 
-static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx)
+static struct nft_transaction *nf_tables_transaction_lookup(u32 pid_owner)
 {
-	struct nft_rule_update *rupd;
+	struct nft_transaction *transaction;
 
-	/* Another socket owns the dirty list? */
-	if (!ctx->net->nft.pid_owner)
-		ctx->net->nft.pid_owner = ctx->nlh->nlmsg_pid;
-	else if (ctx->net->nft.pid_owner != ctx->nlh->nlmsg_pid)
-		return -EBUSY;
+	if (list_empty(&nf_tables_transactions))
+		return NULL;
+
+	list_for_each_entry(transaction, &nf_tables_transactions, list) {
+		if (pid_owner == transaction->pid_owner)
+			return transaction;
+	}
+
+	return NULL;
+}
+
+static void nf_tables_transaction_remove(struct nft_transaction *transaction)
+{
+	nfnl_lock();
+	list_del(&transaction->list);
+	nfnl_unlock();
+
+	kfree(transaction);
+}
+
+static int nf_tables_transaction_add(const struct nft_ctx *ctx,
+				     struct nft_transaction *transaction,
+				     struct nft_rule *rule)
+{
+	struct nft_rule_update *rupd;
 
 	rupd = kmalloc(sizeof(struct nft_rule_update), GFP_KERNEL);
 	if (rupd == NULL)
@@ -1536,7 +1558,7 @@ static int nf_tables_dirty_add(struct nft_rule *rule, const struct nft_ctx *ctx)
 	rupd->table = ctx->table;
 	rupd->rule = rule;
 	rupd->net = ctx->net;
-	list_add(&rupd->list, &ctx->net->nft.dirty_rules);
+	list_add(&rupd->list, &transaction->updates);
 
 	return 0;
 }
@@ -1547,6 +1569,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nft_af_info *afi;
+	struct nft_transaction *transaction;
 	struct net *net = sock_net(skb->sk);
 	struct nft_table *table;
 	struct nft_chain *chain;
@@ -1557,7 +1580,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	unsigned int size, i, n;
 	int err, rem;
 	bool create;
-	u32 flags = 0;
 	u64 handle;
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
@@ -1616,15 +1638,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	if (rule == NULL)
 		goto err1;
 
-	if (nla[NFTA_RULE_FLAGS]) {
-		flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS]));
-		if (flags & ~NFT_RULE_F_MASK)
-			return -EINVAL;
-
-		if (flags & NFT_RULE_F_COMMIT)
-			nft_rule_activate_next(net, rule);
-	}
-
 	rule->handle = handle;
 	rule->dlen   = size;
 
@@ -1637,8 +1650,16 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 		expr = nft_expr_next(expr);
 	}
 
+	transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid);
+	if (transaction != NULL) {
+		err = nf_tables_transaction_add(&ctx, transaction, rule);
+		if (err < 0)
+			goto err2;
+		nft_rule_activate_next(net, rule);
+	}
+
 	if (nlh->nlmsg_flags & NLM_F_REPLACE) {
-		if (flags & NFT_RULE_F_COMMIT) {
+		if (transaction != NULL) {
 			nft_rule_disactivate_next(net, old_rule);
 			list_add_tail_rcu(&rule->list, &chain->rules);
 		} else {
@@ -1650,16 +1671,11 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 	else
 		list_add_rcu(&rule->list, &chain->rules);
 
-	if (flags & NFT_RULE_F_COMMIT) {
-		err = nf_tables_dirty_add(rule, &ctx);
-		if (err < 0) {
-			list_del_rcu(&rule->list);
-			goto err2;
-		}
-	} else {
+	if (transaction == NULL) {
 		nf_tables_rule_notify(skb, nlh, table, chain, rule,
 				      NFT_MSG_NEWRULE,
-				      nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE),
+				      nlh->nlmsg_flags &
+				      (NLM_F_APPEND | NLM_F_REPLACE),
 				      nfmsg->nfgen_family);
 	}
 	return 0;
@@ -1674,17 +1690,18 @@ err1:
 	return err;
 }
 
-static int
-nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule, u32 flags)
+static int nf_tables_delrule_one(struct nft_ctx *ctx,
+				 struct nft_transaction *transaction,
+				 struct nft_rule *rule)
 {
 	int ret = 0;
 
-	if (flags & NFT_RULE_F_COMMIT) {
+	if (transaction != NULL) {
 		/* Will be deleted already in the next generation */
 		if (!nft_rule_is_active_next(ctx->net, rule))
 			return -EBUSY;
 
-		ret = nf_tables_dirty_add(rule, ctx);
+		ret = nf_tables_transaction_add(ctx, transaction, rule);
 		if (ret >= 0)
 			nft_rule_disactivate_next(ctx->net, rule);
 	} else {
@@ -1703,13 +1720,13 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nft_af_info *afi;
+	struct nft_transaction *transaction;
 	struct net *net = sock_net(skb->sk);
 	const struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_rule *rule, *tmp;
 	int family = nfmsg->nfgen_family, err;
 	struct nft_ctx ctx;
-	u32 flags = 0;
 
 	afi = nf_tables_afinfo_lookup(net, family, false);
 	if (IS_ERR(afi))
@@ -1725,44 +1742,68 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
 
 	nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
 
-	if (nla[NFTA_RULE_FLAGS]) {
-		flags = ntohl(nla_get_be32(nla[NFTA_RULE_FLAGS]));
-
-		if (flags & ~NFT_RULE_F_MASK)
-			return -EINVAL;
-	}
+	transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid);
 
 	if (nla[NFTA_RULE_HANDLE]) {
 		rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
 		if (IS_ERR(rule))
 			return PTR_ERR(rule);
 
-		err = nf_tables_delrule_one(&ctx, rule, flags);
+		err = nf_tables_delrule_one(&ctx, transaction, rule);
 		if (err < 0)
 			return err;
 	} else {
 		/* Remove all rules in this chain */
 		list_for_each_entry_safe(rule, tmp, &chain->rules, list)
-			nf_tables_delrule_one(&ctx, rule, flags);
+			nf_tables_delrule_one(&ctx, transaction, rule);
 	}
 
 	return 0;
 }
 
-static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb,
-			    const struct nlmsghdr *nlh,
-			    const struct nlattr * const nla[])
+static int nf_tables_begin_transaction(struct sock *nlsk, struct sk_buff *skb,
+				       const struct nlmsghdr *nlh,
+				       const struct nlattr * const nla[])
+{
+	struct nft_transaction *transaction;
+
+	transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid);
+	if (transaction != NULL)
+		return -EALREADY;
+
+	transaction = kmalloc(sizeof(struct nft_transaction), GFP_KERNEL);
+	if (transaction == NULL)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&transaction->list);
+	INIT_LIST_HEAD(&transaction->updates);
+	transaction->pid_owner = nlh->nlmsg_pid;
+
+	nfnl_lock();
+	list_add_tail(&transaction->list, &nf_tables_transactions);
+	nfnl_unlock();
+
+	return 0;
+}
+
+static int nf_tables_commit_transaction(struct sock *nlsk, struct sk_buff *skb,
+					const struct nlmsghdr *nlh,
+					const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nft_af_info *afi;
+	struct nft_transaction *transaction;
 	struct net *net = sock_net(skb->sk);
 	struct nft_rule_update *rupd, *tmp;
 	int family = nfmsg->nfgen_family;
 	bool create;
 
-	/* Are you the owner of the dirty list? */
-	if (nlh->nlmsg_pid != net->nft.pid_owner)
-		return -EBUSY;
+	transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid);
+	if (transaction == NULL)
+		return -EINVAL;
+
+	if (list_empty(&transaction->updates))
+		goto done;
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
@@ -1770,6 +1811,10 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
+	/* Check if a commit is on-going */
+	if (mutex_trylock(&nf_tables_commit_lock))
+		return -EAGAIN;
+
 	/* Bump generation counter, invalidate any dump in progress */
 	net->nft.genctr++;
 
@@ -1781,7 +1826,7 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb,
 	 */
 	synchronize_rcu();
 
-	list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) {
+	list_for_each_entry_safe(rupd, tmp, &transaction->updates, list) {
 		/* Delete this rule from the dirty list */
 		list_del(&rupd->list);
 
@@ -1806,47 +1851,49 @@ static int nf_tables_commit(struct sock *nlsk, struct sk_buff *skb,
 		nf_tables_rule_destroy(rupd->rule);
 		kfree(rupd);
 	}
-	/* Clear owner of this dirty list */
-	net->nft.pid_owner = 0;
 
+	/* Release the commit lock */
+	mutex_unlock(&nf_tables_commit_lock);
+
+done:
+	nf_tables_transaction_remove(transaction);
 	return 0;
 }
 
-static void nf_tables_abort_all(struct net *net)
+static void nf_tables_abort_all(struct nft_transaction *transaction)
 {
 	struct nft_rule_update *rupd, *tmp;
 
-	list_for_each_entry_safe(rupd, tmp, &net->nft.dirty_rules, list) {
-		/* Delete all rules from the dirty list */
+	list_for_each_entry_safe(rupd, tmp, &transaction->updates, list) {
+		/* Delete all rules from the update list */
 		list_del(&rupd->list);
 
-		if (!nft_rule_is_active_next(rupd->net, rupd->rule)) {
+		if (nft_rule_is_active_next(rupd->net, rupd->rule)) {
+			list_del_rcu(&rupd->rule->list);
+			nf_tables_rule_destroy(rupd->rule);
+		} else
 			nft_rule_clear(rupd->net, rupd->rule);
-			kfree(rupd);
-			return;
-		}
 
-		/* This rule is inactive, get rid of it */
-		list_del_rcu(&rupd->rule->list);
-		nf_tables_rule_destroy(rupd->rule);
 		kfree(rupd);
 	}
-	/* Clear the owner of the dirty list */
-	net->nft.pid_owner = 0;
 }
 
-static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb,
-			   const struct nlmsghdr *nlh,
-			   const struct nlattr * const nla[])
+static int nf_tables_abort_transaction(struct sock *nlsk, struct sk_buff *skb,
+				       const struct nlmsghdr *nlh,
+				       const struct nlattr * const nla[])
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 	const struct nft_af_info *afi;
+	struct nft_transaction *transaction;
 	struct net *net = sock_net(skb->sk);
 	bool create;
 
-	/* Are you the owner of the dirty list? */
-	if (nlh->nlmsg_pid != net->nft.pid_owner)
-		return -EBUSY;
+	transaction = nf_tables_transaction_lookup(nlh->nlmsg_pid);
+	if (transaction == NULL)
+		return -EINVAL;
+
+	if (list_empty(&transaction->updates))
+		goto done;
 
 	create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
 
@@ -1854,24 +1901,28 @@ static int nf_tables_abort(struct sock *nlsk, struct sk_buff *skb,
 	if (IS_ERR(afi))
 		return PTR_ERR(afi);
 
-	nf_tables_abort_all(net);
+	nf_tables_abort_all(transaction);
 
+done:
+	nf_tables_transaction_remove(transaction);
 	return 0;
 }
 
-static int
-nf_tables_rcv_nl_event(struct notifier_block *this,
-		       unsigned long event, void *ptr)
+static int nf_tables_rcv_nl_event(struct notifier_block *this,
+				  unsigned long event, void *ptr)
 {
 	struct netlink_notify *n = ptr;
+	struct nft_transaction *transaction;
 
 	if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
 		return NOTIFY_DONE;
 
-	if (n->portid != n->net->nft.pid_owner)
-		return NOTIFY_DONE;
-
-	nf_tables_abort_all(n->net);
+	transaction = nf_tables_transaction_lookup(n->net->nft.pid_owner);
+	if (transaction != NULL) {
+		if (!list_empty(&transaction->updates))
+			nf_tables_abort_all(transaction);
+		nf_tables_transaction_remove(transaction);
+	}
 
 	return NOTIFY_DONE;
 }
@@ -2840,13 +2891,18 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
 		.attr_count	= NFTA_SET_ELEM_LIST_MAX,
 		.policy		= nft_set_elem_list_policy,
 	},
-	[NFT_MSG_COMMIT] = {
-		.call		= nf_tables_commit,
+	[NFT_MSG_BEGINTRANSACT] = {
+		.call           = nf_tables_begin_transaction,
+		.attr_count     = NFTA_TABLE_MAX,
+		.policy         = nft_rule_policy,
+	},
+	[NFT_MSG_COMMITTRANSACT] = {
+		.call		= nf_tables_commit_transaction,
 		.attr_count	= NFTA_TABLE_MAX,
 		.policy		= nft_rule_policy,
 	},
-	[NFT_MSG_ABORT] = {
-		.call		= nf_tables_abort,
+	[NFT_MSG_ABORTTRANSACT] = {
+		.call		= nf_tables_abort_transaction,
 		.attr_count	= NFTA_TABLE_MAX,
 		.policy		= nft_rule_policy,
 	},
-- 
1.8.1.5

--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Netfitler Users]     [LARTC]     [Bugtraq]     [Yosemite Forum]

  Powered by Linux