On 7 April 2014 09:20, Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> wrote: > Hi Mathieu, > > On Tue, Apr 01, 2014 at 04:52:51PM -0600, mathieu.poirier@xxxxxxxxxx wrote: >> From: Mathieu Poirier <mathieu.poirier@xxxxxxxxxx> >> >> Nf_acct objects already support accounting at the byte and packet >> level. As such it is a natural extention to add the possiblity to >> define a ceiling limit for both metrics. >> >> All the support for quotas itself is added to nfnetlink acctounting >> framework to stay coherent with current accounting object management. >> Quota limit checks are implemented in xt nfacct filter where >> statistic collection is already done. > > This looks good, we didn't reach the merge window though (it closed > the same day you sent me this). So please address some minor issues > here and resend, I'll keep this in my local tree until the net-next > merge window opens back. Perfect. > > As said, comments below. > >> Pablo Niera Ayuso has also contributed to this feature. >> >> Signed-off-by: Mathieu Poirier <mathieu.poirier@xxxxxxxxxx> >> --- >> include/linux/netfilter/nfnetlink_acct.h | 8 ++- >> include/uapi/linux/netfilter/nfnetlink.h | 2 + >> include/uapi/linux/netfilter/nfnetlink_acct.h | 9 +++ >> net/netfilter/nfnetlink_acct.c | 86 +++++++++++++++++++++++++++ >> net/netfilter/xt_nfacct.c | 6 ++ >> 5 files changed, 110 insertions(+), 1 deletion(-) >> >> diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h >> index b2e85e5..6ec9757 100644 >> --- a/include/linux/netfilter/nfnetlink_acct.h >> +++ b/include/linux/netfilter/nfnetlink_acct.h >> @@ -3,11 +3,17 @@ >> >> #include <uapi/linux/netfilter/nfnetlink_acct.h> >> >> +enum { >> + NFACCT_NO_QUOTA = -1, >> + NFACCT_UNDERQUOTA, >> + NFACCT_OVERQUOTA, >> +}; >> >> struct nf_acct; >> >> struct nf_acct *nfnl_acct_find_get(const char *filter_name); >> void nfnl_acct_put(struct nf_acct *acct); >> void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); >> - >> +extern int nfnl_acct_overquota(const struct sk_buff *skb, >> + struct nf_acct *nfacct); >> #endif /* _NFNL_ACCT_H */ >> diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h >> index 596ddd4..354a7e5 100644 >> --- a/include/uapi/linux/netfilter/nfnetlink.h >> +++ b/include/uapi/linux/netfilter/nfnetlink.h >> @@ -20,6 +20,8 @@ enum nfnetlink_groups { >> #define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY >> NFNLGRP_NFTABLES, >> #define NFNLGRP_NFTABLES NFNLGRP_NFTABLES >> + NFNLGRP_ACCT_QUOTA, >> +#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA >> __NFNLGRP_MAX, >> }; >> #define NFNLGRP_MAX (__NFNLGRP_MAX - 1) >> diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h >> index c7b6269..51404ec 100644 >> --- a/include/uapi/linux/netfilter/nfnetlink_acct.h >> +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h >> @@ -10,15 +10,24 @@ enum nfnl_acct_msg_types { >> NFNL_MSG_ACCT_GET, >> NFNL_MSG_ACCT_GET_CTRZERO, >> NFNL_MSG_ACCT_DEL, >> + NFNL_MSG_ACCT_OVERQUOTA, >> NFNL_MSG_ACCT_MAX >> }; >> >> +enum nfnl_acct_flags { >> + NFACCT_F_QUOTA_PKTS = (1 << 0), >> + NFACCT_F_QUOTA_BYTES = (1 << 1), >> + NFACCT_F_OVERQUOTA = (1 << 2), /* can't be set from userspace */ >> +}; >> + >> enum nfnl_acct_type { >> NFACCT_UNSPEC, >> NFACCT_NAME, >> NFACCT_PKTS, >> NFACCT_BYTES, >> NFACCT_USE, >> + NFACCT_FLAGS, >> + NFACCT_QUOTA, >> __NFACCT_MAX >> }; >> #define NFACCT_MAX (__NFACCT_MAX - 1) >> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c >> index c7b6d46..dae35eb 100644 >> --- a/net/netfilter/nfnetlink_acct.c >> +++ b/net/netfilter/nfnetlink_acct.c >> @@ -32,18 +32,24 @@ static LIST_HEAD(nfnl_acct_list); >> struct nf_acct { >> atomic64_t pkts; >> atomic64_t bytes; >> + unsigned long flags; >> struct list_head head; >> atomic_t refcnt; >> char name[NFACCT_NAME_MAX]; >> struct rcu_head rcu_head; >> + char data[0]; >> }; >> >> +#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES) >> + >> static int >> nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, >> const struct nlmsghdr *nlh, const struct nlattr * const tb[]) >> { >> struct nf_acct *nfacct, *matching = NULL; >> char *acct_name; >> + unsigned int size = 0; >> + u32 flags = 0; >> >> if (!tb[NFACCT_NAME]) >> return -EINVAL; >> @@ -68,15 +74,39 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, >> /* reset counters if you request a replacement. */ >> atomic64_set(&matching->pkts, 0); >> atomic64_set(&matching->bytes, 0); >> + smp_mb__before_clear_bit(); >> + /* reset overquota flag if quota is enabled. */ >> + if ((matching->flags & NFACCT_F_QUOTA)) >> + clear_bit(NFACCT_F_OVERQUOTA, &matching->flags); >> return 0; >> } >> return -EBUSY; >> } >> >> nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); >> + if (tb[NFACCT_FLAGS]) { >> + flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS])); >> + if (flags & ~NFACCT_F_QUOTA) >> + return -EOPNOTSUPP; >> + if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA) >> + return -EINVAL; >> + if (flags & NFACCT_F_OVERQUOTA) >> + return -EINVAL; >> + >> + size += sizeof(u64); >> + } >> + >> + nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL); >> if (nfacct == NULL) >> return -ENOMEM; >> >> + if (flags & NFACCT_F_QUOTA) { >> + u64 *quota = (u64 *)nfacct->data; >> + >> + *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA])); >> + nfacct->flags = flags; >> + } >> + >> strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); >> >> if (tb[NFACCT_BYTES]) { >> @@ -117,6 +147,10 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, >> if (type == NFNL_MSG_ACCT_GET_CTRZERO) { >> pkts = atomic64_xchg(&acct->pkts, 0); >> bytes = atomic64_xchg(&acct->bytes, 0); >> + if (acct->flags & NFACCT_F_QUOTA) { >> + smp_mb__before_clear_bit(); > ^------------------------^ > > I think you have to move this just before the if branch. I've been wondering about that while looking at your code. Why execute a memory barrier if we aren't sure that clear_bit() will be called? I'm thinking you have a valid reason that I don't see. > >> + clear_bit(NFACCT_F_OVERQUOTA, &acct->flags); >> + } >> } else { >> pkts = atomic64_read(&acct->pkts); >> bytes = atomic64_read(&acct->bytes); >> @@ -125,7 +159,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, >> nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) || >> nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)))) >> goto nla_put_failure; >> + if (acct->flags & NFACCT_F_QUOTA) { >> + u64 *quota = (u64 *)acct->data; >> >> + if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) || >> + nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota))) >> + goto nla_put_failure; >> + } >> nlmsg_end(skb, nlh); >> return skb->len; >> >> @@ -270,6 +310,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { >> [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, >> [NFACCT_BYTES] = { .type = NLA_U64 }, >> [NFACCT_PKTS] = { .type = NLA_U64 }, >> + [NFACCT_FLAGS] = { .type = NLA_U32 }, >> + [NFACCT_QUOTA] = { .type = NLA_U64 }, >> }; >> >> static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { >> @@ -336,6 +378,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) >> } >> EXPORT_SYMBOL_GPL(nfnl_acct_update); >> >> +static void nfnl_overquota_report(struct nf_acct *nfacct) >> +{ >> + int ret; >> + struct sk_buff *skb; >> + >> + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); >> + if (skb == NULL) >> + return; >> + >> + ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0, >> + nfacct); >> + if (ret <= 0) { >> + kfree_skb(skb); >> + return; >> + } >> + netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA, >> + GFP_ATOMIC); >> +} >> + >> +int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) >> +{ >> + u64 now; >> + u64 *quota; >> + bool ret = NFACCT_UNDERQUOTA; > > enums are u32, so you have to use int there at least. Of course - the side effects of doing things hastily. > >> + /* no place here if we don't have a quota */ >> + if (!(nfacct->flags & NFACCT_F_QUOTA)) >> + return NFACCT_NO_QUOTA; >> + >> + quota = (u64 *)nfacct->data; >> + now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ? >> + atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes); >> + >> + ret = now > *quota; >> + >> + if (now >= *quota && > > This should be: > > if (now > *quota && > > I believe we report once we're at least 1 byte over quota, right? I take quota limits to be inclusive. As such if we set a limit of 50 packets, I expect the 50th packet to be allowed through but not the 51st one. On the flip side there is no way to tell when that 51st packet will arrive - it can potentially take a long time and as such decided it was worthy to send the notification right away. > >> + !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) { >> + nfnl_overquota_report(nfacct); >> + } >> + >> + return ret; >> +} >> +EXPORT_SYMBOL_GPL(nfnl_acct_overquota); >> + >> static int __init nfnl_acct_init(void) >> { >> int ret; >> diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c >> index b3be0ef..94b0f09 100644 >> --- a/net/netfilter/xt_nfacct.c >> +++ b/net/netfilter/xt_nfacct.c >> @@ -21,10 +21,16 @@ MODULE_ALIAS("ip6t_nfacct"); >> >> static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) >> { >> + bool overquota; > > this needs to be convert to enum nfacct_quota_state or simply int as > it is not a boolean anymore. Evidently yes. > >> const struct xt_nfacct_match_info *info = par->targinfo; >> >> nfnl_acct_update(skb, info->nfacct); >> >> + overquota = nfnl_acct_overquota(skb, info->nfacct); >> + >> + if (overquota == NFACCT_UNDERQUOTA) >> + return false; >> + >> return true; > > We can save a couple of lines with this: > > quota_state = nfnl_acct_overquota(skb, info->nfacct); > > return quota_state == NFACCT_UNDERQUOTA ? false : true; ack > > It would be also great if you look at the nfacct tool extensions. I'd > like to have a monitor mode that spots quota warnings for testing > this. I have most of the nfacct changes done but didn't send the patches as it wasn't ready for submission yet. Once we agreed on the kernel side I was going to send the userspace. Ok for the monitoring mode, I already have a test application that does that. Where will you want the patches to be sent? Netdev and netfilter-dev even if they are userspace? > > Thanks! -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html