The current code for the patch is like this: #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/route.h> #include <linux/version.h> #include <linux/if_arp.h> #include <linux/ip.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter/nf_conntrack_common.h> //#include <linux/netfilter_ipv4/ipt_ROUTE.h> #include <net/netfilter/nf_conntrack.h> #include <net/ip.h> #include <net/route.h> #include <net/icmp.h> #include <net/checksum.h> #include "ipt_ROUTE.h" #if 0 #define DEBUGP printk #else #define DEBUGP(format, args...) #endif ; /* Try to route the packet according to the routing keys specified in * route_info. Keys are : * - ifindex : * 0 if no oif preferred, * otherwise set to the index of the desired oif * - route_info->gw : * 0 if no gateway specified, * otherwise set to the next host to which the pkt must be routed * If success, skb->dev is the output device to which the packet must * be sent and skb->dst is not NULL * * RETURN: -1 if an error occured * 1 if the packet was succesfully routed to the * destination desired * 0 if the kernel routing table could not route the packet * according to the keys specified */ static int route(struct sk_buff *skb, unsigned int ifindex, const struct ipt_route_target_info *route_info) { int err; struct rtable *rt; struct iphdr *iph = ip_hdr(skb); struct flowi fl = { .oif = ifindex, .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = 0, .tos = RT_TOS(iph->tos), .scope = RT_SCOPE_UNIVERSE, } } }; /* The destination address may be overloaded by the target */ if (route_info->gw) fl.fl4_dst = route_info->gw; /* Trying to route the packet using the standard routing table. */ if ((err = ip_route_output_key(&init_net, &rt, &fl))) { if (net_ratelimit()) DEBUGP("ipt_ROUTE: couldn't route pkt (err: %i)",err); return -1; } /* Drop old route. */ dst_release(skb->dst); skb->dst = NULL; /* Success if no oif specified or if the oif correspond to the * one desired */ if (!ifindex || rt->u.dst.dev->ifindex == ifindex) { skb->dst = &rt->u.dst; skb->dev = skb->dst->dev; skb->protocol = htons(ETH_P_IP); return 1; } /* The interface selected by the routing table is not the one * specified by the user. This may happen because the dst address * is one of our own addresses. */ if (net_ratelimit()) DEBUGP("ipt_ROUTE: failed to route as desired gw=%u.%u.%u.%u oif=%i (got oif=%i)\n", NIPQUAD(route_info->gw), ifindex, rt->u.dst.dev->ifindex); return 0; } /* Stolen from ip_finish_output2 * PRE : skb->dev is set to the device we are leaving by * skb->dst is not NULL * POST: the packet is sent with the link layer header pushed * the packet is destroyed */ static void ip_direct_send(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; struct rtable *rt = (struct rtable *)dst; struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); if (rt->rt_type == RTN_MULTICAST) IP_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS); else if (rt->rt_type == RTN_BROADCAST) IP_INC_STATS(IPSTATS_MIB_OUTBCASTPKTS); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { struct sk_buff *skb2; skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); if (skb2 == NULL) { kfree_skb(skb); return; } if (skb->sk) skb_set_owner_w(skb2, skb->sk); kfree_skb(skb); skb = skb2; } if (dst->hh){ neigh_hh_output(dst->hh, skb); return; } else if (dst->neighbour){ dst->neighbour->output(skb); return; } if (net_ratelimit()) printk(KERN_DEBUG "ip_finish_output2: No header cache and no neighbour!\n"); kfree_skb(skb); return; } /* PRE : skb->dev is set to the device we are leaving by * POST: - the packet is directly sent to the skb->dev device, without * pushing the link layer header. * - the packet is destroyed */ static inline int dev_direct_send(struct sk_buff *skb) { return dev_queue_xmit(skb); } static unsigned int route_oif(const struct ipt_route_target_info *route_info, struct sk_buff *skb) { unsigned int ifindex = 0; struct net_device *dev_out = NULL; /* The user set the interface name to use. * Getting the current interface index. */ if ((dev_out = dev_get_by_name(&init_net, route_info->oif))) { ifindex = dev_out->ifindex; } else { /* Unknown interface name : packet dropped */ if (net_ratelimit()) DEBUGP("ipt_ROUTE: oif interface %s not found\n", route_info->oif); return NF_DROP; } /* Trying the standard way of routing packets */ switch (route(skb, ifindex, route_info)) { case 1: dev_put(dev_out); if (route_info->flags & IPT_ROUTE_CONTINUE) return IPT_CONTINUE; ip_direct_send(skb); return NF_STOLEN; case 0: /* Failed to send to oif. Trying the hard way */ if (route_info->flags & IPT_ROUTE_CONTINUE) return NF_DROP; if (net_ratelimit()) DEBUGP("ipt_ROUTE: forcing the use of %i\n", ifindex); /* We have to force the use of an interface. * This interface must be a tunnel interface since * otherwise we can't guess the hw address for * the packet. For a tunnel interface, no hw address * is needed. */ if ((dev_out->type != ARPHRD_TUNNEL) && (dev_out->type != ARPHRD_IPGRE)) { if (net_ratelimit()) DEBUGP("ipt_ROUTE: can't guess the hw addr !\n"); dev_put(dev_out); return NF_DROP; } /* Send the packet. This will also free skb * Do not go through the POST_ROUTING hook because * skb->dst is not set and because it will probably * get confused by the destination IP address. */ skb->dev = dev_out; dev_direct_send(skb); dev_put(dev_out); return NF_STOLEN; default: /* Unexpected error */ dev_put(dev_out); return NF_DROP; } } static unsigned int route_iif(const struct ipt_route_target_info *route_info, struct sk_buff *skb) { struct net_device *dev_in = NULL; /* Getting the current interface index. */ if (!(dev_in = dev_get_by_name(&init_net, route_info->iif))) { if (net_ratelimit()) DEBUGP("ipt_ROUTE: iif interface %s not found\n", route_info->iif); return NF_DROP; } skb->dev = dev_in; dst_release(skb->dst); skb->dst = NULL; netif_rx(skb); dev_put(dev_in); return NF_STOLEN; } static unsigned int route_gw(const struct ipt_route_target_info *route_info, struct sk_buff *skb) { if (route(skb, 0, route_info)!=1) return NF_DROP; if (route_info->flags & IPT_ROUTE_CONTINUE) return IPT_CONTINUE; ip_direct_send(skb); return NF_STOLEN; } /* To detect and deter routed packet loopback when using the --tee option, * we take a page out of the raw.patch book: on the copied skb, we set up * a fake ->nfct entry, pointing to the local &route_tee_track. We skip * routing packets when we see they already have that ->nfct. */ static struct nf_conn route_tee_track; static unsigned int route_tg(struct sk_buff *pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { const struct ipt_route_target_info *route_info = targinfo; struct sk_buff *skb; unsigned int res; skb = pskb; if (skb->nfct == &route_tee_track.ct_general) { /* Loopback - a packet we already routed, is to be * routed another time. Avoid that, now. */ if (net_ratelimit()) DEBUGP(KERN_DEBUG "ipt_ROUTE: loopback - DROP!\n"); return NF_DROP; } /* If we are at PREROUTING or INPUT hook * the TTL isn't decreased by the IP stack */ if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) { struct iphdr *iph = ip_hdr(skb); if (iph->ttl <= 1) { struct rtable *rt; struct flowi fl = { .oif = 0, .nl_u = { .ip4_u = { .daddr = iph->daddr, .saddr = iph->saddr, .tos = RT_TOS(iph->tos), .scope = ((iph->tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE) } } }; if (ip_route_output_key(&init_net, &rt, &fl)) { return NF_DROP; } if (skb->dev == rt->u.dst.dev) { /* Drop old route. */ dst_release(skb->dst); skb->dst = &rt->u.dst; /* this will traverse normal stack, and * thus call conntrack on the icmp packet */ icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0); } return NF_DROP; } /* * If we are at INPUT the checksum must be recalculated since * the length could change as the result of a defragmentation. */ if(hooknum == NF_INET_LOCAL_IN) { iph->ttl = iph->ttl - 1; iph->check = 0; iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } else { ip_decrease_ttl(iph); } } if ((route_info->flags & IPT_ROUTE_TEE)) { /* * Copy the *pskb, and route the copy. Will later return * IPT_CONTINUE for the original skb, which should continue * on its way as if nothing happened. The copy should be * independantly delivered to the ROUTE --gw. */ skb = skb_copy(pskb, GFP_ATOMIC); if (!skb) { if (net_ratelimit()) DEBUGP(KERN_DEBUG "ipt_ROUTE: copy failed!\n"); return IPT_CONTINUE; } } /* Tell conntrack to forget this packet since it may get confused * when a packet is leaving with dst address == our address. * Good idea ? Dunno. Need advice. * * NEW: mark the skb with our &route_tee_track, so we avoid looping * on any already routed packet. */ if (!(route_info->flags & IPT_ROUTE_CONTINUE)) { nf_conntrack_put(skb->nfct); skb->nfct = &route_tee_track.ct_general; skb->nfctinfo = IP_CT_NEW; nf_conntrack_get(skb->nfct); } if (route_info->oif[0] != '\0') { res = route_oif(route_info, skb); } else if (route_info->iif[0] != '\0') { res = route_iif(route_info, skb); } else if (route_info->gw) { res = route_gw(route_info, skb); } else { if (net_ratelimit()) DEBUGP(KERN_DEBUG "ipt_ROUTE: no parameter !\n"); res = IPT_CONTINUE; } if ((route_info->flags & IPT_ROUTE_TEE)) res = IPT_CONTINUE; return res; } static bool route_tg_checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, unsigned int hook_mask) { if (strcmp(tablename, "mangle") != 0) { printk("ipt_ROUTE: bad table `%s', use the `mangle' table.\n", tablename); return false; } /* No need of comparing the hook, they don't exist in kernel space anymore. * Only exist in userspace. The check above for mangle table is also redundant */ if (hook_mask & ~( (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) { printk("ipt_ROUTE: bad hook\n"); return false; } return true; } static struct xt_target route_tg_reg __read_mostly = { .name = "ROUTE", .family = AF_INET, .target = route_tg, .targetsize = sizeof(struct ipt_route_target_info), .table = "mangle", .checkentry = route_tg_checkentry, .me = THIS_MODULE, }; static int __init init(void) { /* Set up fake conntrack (stolen from raw.patch): - to never be deleted, not in any hashes */ atomic_set(&route_tee_track.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &route_tee_track.status); /* Initialize fake conntrack so that NAT will skip it */ route_tee_track.status |= IPS_NAT_DONE_MASK; return xt_register_target(&route_tg_reg); } static void __exit fini(void) { xt_unregister_target(&route_tg_reg); } module_init(init); module_exit(fini); Regards Abhishek On Tue, Feb 24, 2009 at 9:03 PM, Patrick McHardy <kaber@xxxxxxxxx> wrote: > Jan Engelhardt wrote: >> >> On Tuesday 2009-02-24 14:41, Patrick McHardy wrote: >>> >>> Jan Engelhardt wrote: >>>> >>>> On Tuesday 2009-02-24 09:59, Abhishek Singh wrote: >>>>> >>>>> What I would like to know is that if someone would like to add it to >>>>> the main iptables tree and the patchomatic repository. I am not sure >>>>> how to go about it. If someone is interested, please let me know. I >>>>> shall contribute the code and if would be happy to incorporate review >>>>> comments by other developers. >>>> >>>> [omg timeline!] >>>> >>>> Short answer, no. There is iproute2 and xt_TEE which replace it, >>>> and even patchomatic is gone. >>>> >>>> [/me takes a leap forward] >>> >>> Perhaps we can finally get this merged. IIRC the only reason against >>> it is the IP layer duplication instead of simply using dst_output(). >>> >> It cannot use dst_output because that would cause reentrancy into >> iptablse. >> Want a patch, though? > > I would like to have a look at the current patch, yes. Don't > bother fixing anything though, I mainly want to have a look > at the routing part. >
Attachment:
ipt_ROUTE.c
Description: Binary data