Op vr, 07-01-2005 te 10:00 -0800, schreef Stephen Hemminger: > On Fri, 07 Jan 2005 17:05:59 +0000 > David Woodhouse <dwmw2@xxxxxxxxxxxxx> wrote: > > I don't think it's recursing -- I think the stack trace is just a bit > > noisy. The problem is that the bridge code, especially with br_netfilter > > in the equation, is implicated in code paths which are just _too_ deep. > > This happens when you're bridging packets received in an interrupt while > > you were deep in journalling code, and it's also been seen with a call > > trace something like nfs->sunrpc->ip->bridge->br_netfilter. > > Sounds like an argument for interrupt stacks. > > > One option might be to make br_dev_xmit() just queue the packet rather > > than trying to deliver it to all the slave devices immediately. Then the > > actual retransmission can be handled from a context where we're _not_ > > short of stack; perhaps from a dedicated kernel thread. > > Probably the solution would be to handle it in the filter code > that way if we are not filtering, we can use the interrupt path, > but if filtering just defer to a safer context (like soft irq). How about something like the patch below (untested but compiles)? The current netfilter scheme adds one function call to the call chain for each NF_HOOK and NF_HOOK_THRESH. This can be prevented by executing the okfn in the calling function instead of in nf_hook_slow(). I didn't check if there's any code that actually uses the return value from NF_HOOK. If so, this patch won't work well in its current form as - EPERM is now also returned for NF_QUEUE and NF_STOLEN. Another 2 calls of okfn can be postponed in br_netfilter.c by adding NF_STOP, which would work like NF_STOLEN except that okfn is still called. But I'd first like to get the IPv4/IPv6 fix for br_netfilter.c accepted (see another thread on netdev). cheers, Bart --- linux-2.6.10/include/linux/netfilter.h.old 2005-01-07 20:41:30.000000000 +0100 +++ linux-2.6.10/include/linux/netfilter.h 2005-01-07 22:04:57.096626176 +0100 @@ -138,21 +138,32 @@ void nf_log_packet(int pf, /* This is gross, but inline doesn't cut it for avoiding the function call in fast path: gcc doesn't inline (needs value tracking?). --RR */ #ifdef CONFIG_NETFILTER_DEBUG -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ - nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), INT_MIN) -#define NF_HOOK_THRESH nf_hook_slow +#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ +({int __ret = nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN); \ +if (!__ret) \ + __ret = (okfn)(skb); \ +__ret;}) +#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ +({int __ret = nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh); \ +if (!__ret) \ + __ret = (okfn)(skb); \ +__ret;}) #else -#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ -(list_empty(&nf_hooks[(pf)][(hook)]) \ - ? (okfn)(skb) \ - : nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), INT_MIN)) -#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ -(list_empty(&nf_hooks[(pf)][(hook)]) \ - ? (okfn)(skb) \ - : nf_hook_slow((pf), (hook), (skb), (indev), (outdev), (okfn), (thresh))) +#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \ +({int __ret = 0; \ +if (list_empty(&nf_hooks[pf][hook]) || \ + !(__ret = nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN))) \ + __ret = (okfn)(skb); \ +__ret;}) +#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ +({int __ret = 0; \ +if (list_empty(&nf_hooks[pf][hook]) || \ + !(__ret = nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh))) \ + __ret = (okfn)(skb); \ +__ret;}) #endif -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh); --- linux-2.6.10/net/core/netfilter.c.old 2005-01-07 20:41:47.000000000 +0100 +++ linux-2.6.10/net/core/netfilter.c 2005-01-07 21:52:13.659686232 +0100 @@ -494,7 +494,7 @@ static int nf_queue(struct sk_buff *skb, return 1; } -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -508,30 +508,28 @@ int nf_hook_slow(int pf, unsigned int ho rcu_read_lock(); #ifdef CONFIG_NETFILTER_DEBUG - if (skb->nf_debug & (1 << hook)) { + if ((*pskb)->nf_debug & (1 << hook)) { printk("nf_hook: hook %i already set.\n", hook); - nf_dump_skb(pf, skb); + nf_dump_skb(pf, *pskb); } skb->nf_debug |= (1 << hook); #endif elem = &nf_hooks[pf][hook]; next_hook: - verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev, + verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, outdev, &elem, okfn, hook_thresh); if (verdict == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn)) + if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn)) goto next_hook; } switch (verdict) { - case NF_ACCEPT: - ret = okfn(skb); - break; - case NF_DROP: - kfree_skb(skb); + kfree_skb(*pskb); + case NF_STOLEN: + case NF_QUEUE: ret = -EPERM; break; }