Andi Kleen a écrit : > Eric Dumazet <dada1@xxxxxxxxxxxxx> writes: > >> While doing oprofile tests I noticed two loops are not properly unrolled by gcc > > That's because nobody passed -funroll-loops. Did you try that for > that file? Likely will need -O2 too I dont want to unroll all loops, only those two :) I wish gcc (4.3.2 here) was litle bit smarter :( Without using -funroll-loops size ipv4/netfilter/ip_tables.o text data bss dec hex filename 6424 368 16 6808 1a98 net/ipv4/netfilter/ip_tables.o With -funroll-loops : size ipv4/netfilter/ip_tables.o text data bss dec hex filename 7144 368 16 7528 1d68 net/ipv4/netfilter/ip_tables.o With my patch and no -funroll-loops text data bss dec hex filename 6488 368 16 6872 1ad8 net/ipv4/netfilter/ip_tables.o > >> +static unsigned long ifname_compare(const void *_a, const void *_b, const void *_mask) >> +{ >> + const unsigned long *a = (const unsigned long *)_a; >> + const unsigned long *b = (const unsigned long *)_b; >> + const unsigned long *mask = (const unsigned long *)_mask; >> + unsigned long ret; >> + >> + ret = (a[0] ^ b[0]) & mask[0]; >> + ret |= (a[1] ^ b[1]) & mask[1]; >> + if (IFNAMSIZ > 2 * sizeof(unsigned long)) >> + ret |= (a[2] ^ b[2]) & mask[2]; >> + if (IFNAMSIZ > 3 * sizeof(unsigned long)) >> + ret |= (a[3] ^ b[3]) & mask[3]; > > That will silently break for IFNAMSIZ >= 4*sizeof(unsigned long) > You should add a dummy loop for that or at least a BUILD_BUG_ON It will also break the day we port linux to a 128 bits machine :) Thanks Andi (By the way, I still use the patch on arch/x86/oprofile/op_model_ppro.c to have a working oprofile on my dev machine...) [PATCH] netfilter: unfold two critical loops in ip_packet_match() While doing oprofile tests I noticed two loops are not properly unrolled by gcc Using hand coded unrolled loop provides nice speedup : ipt_do_table credited of 2.52 % of cpu instead of 3.29 % in tbench, for a small text size increase (62 bytes for both loops) Signed-off-by: Eric Dumazet <dada1@xxxxxxxxxxxxx> --- net/ipv4/netfilter/ip_tables.c | 34 ++++++++++++++++++------------- 1 files changed, 20 insertions(+), 14 deletions(-) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef8b6ca..9298d0a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -65,6 +65,24 @@ do { \ #define inline #endif +static unsigned long ifname_compare(const void *_a, const void *_b, const void *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + /* We keep a set of rules for each CPU, so we can avoid write-locking them in the softirq when updating the counters and therefore @@ -83,7 +101,6 @@ ip_packet_match(const struct iphdr *ip, const struct ipt_ip *ipinfo, int isfrag) { - size_t i; unsigned long ret; #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) @@ -103,13 +120,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ipinfo->iniface)[i]) - & ((const unsigned long *)ipinfo->iniface_mask)[i]; - } - + ret = ifname_compare(indev, ipinfo->iniface, ipinfo->iniface_mask); if (FWINV(ret != 0, IPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", indev, ipinfo->iniface, @@ -117,12 +128,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ipinfo->outiface)[i]) - & ((const unsigned long *)ipinfo->outiface_mask)[i]; - } - + ret = ifname_compare(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", outdev, ipinfo->outiface, -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html