Patrick McHardy wrote:
Philip Prindeville wrote:
Patrick McHardy wrote:
Philip Prindeville wrote:
That version is known not to work very well, 2.6.26 includes
a largely rewritten version.
This is a backport to 2.6.25:
git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-2.6.25-sip.git
Well, it might be easier to simply jump to 2.6.26.8 (unless there's
something horribly wrong with that version).
Just means going through patch hell again as we munge patches for
ocf, imq, squashfs, unionfs, multi-routing, etc.
Say, I've been wondering about that: what is the status of dead
default gateway detection? Not sure why it requires a patch and
isn't just something selectable via CONFIG_xxx...
I'm guessing the patch hasn't been submitted yet.
This might be more of a question for linux-net.
I tried to take the 2.6.25.15 patches (which worked with .19 just fine)
and rewrite them for 2.6.26.8, but the changes related to CONFIG_NET_NS
seem to be fouling us up... little confused about that, because the code:
+ if (ip_route_output_key(out->nd_net, &rt, &fl) != 0) {
+ /* Funky routing can do this. */
+ if (net_ratelimit())
+ printk("MASQUERADE:"
+ " No route: Rusty's brain broke!\n");
+ return NF_DROP;
+ }
compiled just fine in 2.6.25.19 as I said...
I haven't groveled through all the changes from 2.6.25.19 to 2.6.26.8... In 2.6.26 I'm seeing:
#ifdef CONFIG_NET_NS
/* Network namespace this network device is inside */
struct net *nd_net;
#endif
in include/linux/netdevice.h. In 2.6.25.19 I see:
/* Network namespace this network device is inside */
struct net *nd_net;
(no #ifdef's).
Can someone please look over my patch and confirm it's correctness? Mostly I took:
+ if (fib_lookup(dev->nd_net, &fl, &res) != 0)
+ continue;
+ if (res.type != RTN_UNICAST &&
+ res.type != RTN_LOCAL) {
+ fib_res_put(&res);
+ continue;
+ }
and rewrote it as:
+ #ifdef CONFIG_NET_NS
+ if (fib_lookup(dev->nd_net, &fl, &res) != 0)
+ #else
+ if (fib_lookup(&init_net, &fl, &res) != 0)
+ #endif
+ continue;
+ if (res.type != RTN_UNICAST &&
+ res.type != RTN_LOCAL) {
+ fib_res_put(&res);
+ continue;
+ }
Ditto for:
+ #ifdef CONFIG_NET_NS
+ if (ip_route_output_key(out->nd_net, &rt, &fl) != 0) {
+ #else
+ if (ip_route_output_key(&init_net, &rt, &fl) != 0) {
+ #endif
+ /* Funky routing can do this. */
+ if (net_ratelimit())
+ printk("MASQUERADE:"
+ " No route: Rusty's brain broke!\n");
+ return NF_DROP;
+ }
Might be handy to have a convenience wrapper that takes a struct
net_device *, and returns its ->nd_net if CONFIG_NET_NS is enabled, and
otherwise returns &init_net...
Maybe like nf_forward_net() works, but simpler. Or should I have
written the patches above using nf_local_out_net() instead???
It would be nice if the patch (the original, not my tweaked version)
became part of the upstream source. One of the things about the patch
is that it's not a compile-time option. It has sequences such as:
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
-#else
if (FIB_RES_DEV(res) == dev)
-#endif
and:
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(ifa->ifa_dev->dev);
-#endif
which I don't understand. You can't simple turn it off or on via
CONFIG_IP_ROUTE_MULTIPATH=y ...
Thanks,
-Philip
diff -urp v2.6.25/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
--- v2.6.25/linux/include/linux/rtnetlink.h 2008-04-17 09:58:08.000000000 +0300
+++ linux/include/linux/rtnetlink.h 2008-04-19 18:30:04.000000000 +0300
@@ -303,6 +303,8 @@ struct rtnexthop
#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_SUSPECT 8 /* We don't know the real state */
+#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
/* Macros to handle hexthops */
diff -urp v2.6.25/linux/include/net/flow.h linux/include/net/flow.h
--- v2.6.25/linux/include/net/flow.h 2008-04-17 09:58:08.000000000 +0300
+++ linux/include/net/flow.h 2008-04-19 18:30:17.000000000 +0300
@@ -19,6 +19,8 @@ struct flowi {
struct {
__be32 daddr;
__be32 saddr;
+ __be32 lsrc;
+ __be32 gw;
__u8 tos;
__u8 scope;
} ip4_u;
@@ -43,6 +45,8 @@ struct flowi {
#define fl6_flowlabel nl_u.ip6_u.flowlabel
#define fl4_dst nl_u.ip4_u.daddr
#define fl4_src nl_u.ip4_u.saddr
+#define fl4_lsrc nl_u.ip4_u.lsrc
+#define fl4_gw nl_u.ip4_u.gw
#define fl4_tos nl_u.ip4_u.tos
#define fl4_scope nl_u.ip4_u.scope
diff -urp v2.6.25/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
--- v2.6.25/linux/include/net/ip_fib.h 2008-04-17 09:58:08.000000000 +0300
+++ linux/include/net/ip_fib.h 2008-04-19 18:30:04.000000000 +0300
@@ -207,6 +207,8 @@ extern int fib_lookup(struct net *n, str
extern struct fib_table *fib_new_table(struct net *net, u32 id);
extern struct fib_table *fib_get_table(struct net *net, u32 id);
+extern int fib_result_table(struct fib_result *res);
+
#endif /* CONFIG_IP_MULTIPLE_TABLES */
/* Exported by fib_frontend.c */
@@ -276,4 +278,6 @@ static inline void fib_proc_exit(struct
}
#endif
+extern rwlock_t fib_nhflags_lock;
+
#endif /* _NET_FIB_H */
diff -urp v2.6.25/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
--- v2.6.25/linux/include/net/netfilter/nf_nat.h 2008-04-17 09:58:08.000000000 +0300
+++ linux/include/net/netfilter/nf_nat.h 2008-04-19 18:30:17.000000000 +0300
@@ -77,6 +77,13 @@ struct nf_conn_nat
#endif
};
+/* Call input routing for SNAT-ed traffic */
+extern unsigned int ip_nat_route_input(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *));
+
/* Set up the info structure to map into this range. */
extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
diff -urp v2.6.25/linux/include/net/route.h linux/include/net/route.h
--- v2.6.25/linux/include/net/route.h 2008-04-17 09:58:08.000000000 +0300
+++ linux/include/net/route.h 2008-04-19 18:30:17.000000000 +0300
@@ -116,6 +116,7 @@
extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
+extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
extern void ip_rt_send_redirect(struct sk_buff *skb);
diff -urp v2.6.25/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
--- v2.6.25/linux/net/bridge/br_netfilter.c 2008-04-17 09:58:08.000000000 +0300
+++ linux/net/bridge/br_netfilter.c 2008-04-19 18:30:17.000000000 +0300
@@ -325,6 +325,10 @@ static int br_nf_pre_routing_finish(stru
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
int err;
+ /* Old skb->dst is not expected, it is lost in all cases */
+ dst_release(skb->dst);
+ skb->dst = NULL;
+
if (nf_bridge->mask & BRNF_PKT_TYPE) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->mask ^= BRNF_PKT_TYPE;
diff -urp v2.6.25/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
--- v2.6.25/linux/net/ipv4/fib_frontend.c 2008-04-17 09:58:09.000000000 +0300
+++ linux/net/ipv4/fib_frontend.c 2008-04-19 18:30:04.000000000 +0300
@@ -49,6 +49,8 @@
#ifndef CONFIG_IP_MULTIPLE_TABLES
+#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
+
static int __net_init fib4_rules_init(struct net *net)
{
struct fib_table *local_table, *main_table;
@@ -73,6 +75,8 @@ fail:
}
#else
+#define FIB_RES_TABLE(r) (fib_result_table(r))
+
struct fib_table *fib_new_table(struct net *net, u32 id)
{
struct fib_table *tb;
@@ -127,7 +131,8 @@ void fib_select_default(struct net *net,
table = res->r->table;
#endif
tb = fib_get_table(net, table);
- if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+ if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
+ FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
tb->tb_select_default(tb, flp, res);
}
@@ -241,6 +246,9 @@ int fib_validate_source(__be32 src, __be
.tos = tos } },
.iif = oif };
struct fib_result res;
+ int table;
+ unsigned char prefixlen;
+ unsigned char scope;
int no_addr, rpf;
int ret;
struct net *net;
@@ -264,31 +272,35 @@ int fib_validate_source(__be32 src, __be
goto e_inval_res;
*spec_dst = FIB_RES_PREFSRC(res);
fib_combine_itag(itag, &res);
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
-#else
if (FIB_RES_DEV(res) == dev)
-#endif
{
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
fib_res_put(&res);
return ret;
}
+ table = FIB_RES_TABLE(&res);
+ prefixlen = res.prefixlen;
+ scope = res.scope;
fib_res_put(&res);
if (no_addr)
goto last_resort;
- if (rpf)
- goto e_inval;
fl.oif = dev->ifindex;
ret = 0;
if (fib_lookup(net, &fl, &res) == 0) {
- if (res.type == RTN_UNICAST) {
+ if (res.type == RTN_UNICAST &&
+ ((table == FIB_RES_TABLE(&res) &&
+ res.prefixlen >= prefixlen && res.scope >= scope) ||
+ !rpf)) {
*spec_dst = FIB_RES_PREFSRC(res);
ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+ fib_res_put(&res);
+ return ret;
}
fib_res_put(&res);
}
+ if (rpf)
+ goto e_inval;
return ret;
last_resort:
@@ -911,9 +923,7 @@ static int fib_inetaddr_event(struct not
switch (event) {
case NETDEV_UP:
fib_add_ifaddr(ifa);
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(ifa->ifa_dev->dev);
-#endif
rt_cache_flush(-1);
break;
case NETDEV_DOWN:
@@ -949,9 +959,7 @@ static int fib_netdev_event(struct notif
for_ifa(in_dev) {
fib_add_ifaddr(ifa);
} endfor_ifa(in_dev);
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
fib_sync_up(dev);
-#endif
rt_cache_flush(-1);
break;
case NETDEV_DOWN:
diff -urp v2.6.25/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
--- v2.6.25/linux/net/ipv4/fib_hash.c 2008-04-17 09:58:09.000000000 +0300
+++ linux/net/ipv4/fib_hash.c 2008-04-19 18:30:04.000000000 +0300
@@ -280,25 +280,35 @@ out:
static void
fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
{
- int order, last_idx;
+ int order, last_idx, last_dflt, last_nhsel;
+ struct fib_alias *first_fa = NULL;
+ struct hlist_head *head;
struct hlist_node *node;
struct fib_node *f;
struct fib_info *fi = NULL;
struct fib_info *last_resort;
struct fn_hash *t = (struct fn_hash*)tb->tb_data;
- struct fn_zone *fz = t->fn_zones[0];
+ struct fn_zone *fz = t->fn_zones[res->prefixlen];
+ __be32 k;
if (fz == NULL)
return;
+ k = fz_key(flp->fl4_dst, fz);
+ last_dflt = -2;
+ last_nhsel = 0;
last_idx = -1;
last_resort = NULL;
order = -1;
read_lock(&fib_hash_lock);
- hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
+ head = &fz->fz_hash[fn_hash(k, fz)];
+ hlist_for_each_entry(f, node, head, fn_hash) {
struct fib_alias *fa;
+ if (f->fn_key != k)
+ continue;
+
list_for_each_entry(fa, &f->fn_alias, fa_list) {
struct fib_info *next_fi = fa->fa_info;
@@ -306,42 +316,56 @@ fn_hash_select_default(struct fib_table
fa->fa_type != RTN_UNICAST)
continue;
+ if (fa->fa_tos &&
+ fa->fa_tos != flp->fl4_tos)
+ continue;
if (next_fi->fib_priority > res->fi->fib_priority)
break;
- if (!next_fi->fib_nh[0].nh_gw ||
- next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
- continue;
fa->fa_state |= FA_S_ACCESSED;
- if (fi == NULL) {
- if (next_fi != res->fi)
- break;
- } else if (!fib_detect_death(fi, order, &last_resort,
- &last_idx, tb->tb_default)) {
+ if (!first_fa) {
+ last_dflt = fa->fa_last_dflt;
+ first_fa = fa;
+ }
+ if (fi && !fib_detect_death(fi, order, &last_resort,
+ &last_idx, &last_dflt, &last_nhsel, flp)) {
fib_result_assign(res, fi);
- tb->tb_default = order;
+ first_fa->fa_last_dflt = order;
goto out;
}
fi = next_fi;
order++;
}
+ break;
}
if (order <= 0 || fi == NULL) {
- tb->tb_default = -1;
+ if (fi && fi->fib_nhs > 1 &&
+ fib_detect_death(fi, order, &last_resort, &last_idx,
+ &last_dflt, &last_nhsel, flp) &&
+ last_resort == fi) {
+ read_lock_bh(&fib_nhflags_lock);
+ fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
+ read_unlock_bh(&fib_nhflags_lock);
+ }
+ if (first_fa) first_fa->fa_last_dflt = -1;
goto out;
}
if (!fib_detect_death(fi, order, &last_resort, &last_idx,
- tb->tb_default)) {
+ &last_dflt, &last_nhsel, flp)) {
fib_result_assign(res, fi);
- tb->tb_default = order;
+ first_fa->fa_last_dflt = order;
goto out;
}
- if (last_idx >= 0)
+ if (last_idx >= 0) {
fib_result_assign(res, last_resort);
- tb->tb_default = last_idx;
+ read_lock_bh(&fib_nhflags_lock);
+ last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
+ read_unlock_bh(&fib_nhflags_lock);
+ first_fa->fa_last_dflt = last_idx;
+ }
out:
read_unlock(&fib_hash_lock);
}
@@ -465,6 +489,7 @@ static int fn_hash_insert(struct fib_tab
write_lock_bh(&fib_hash_lock);
fi_drop = fa->fa_info;
fa->fa_info = fi;
+ fa->fa_last_dflt = -1;
fa->fa_type = cfg->fc_type;
fa->fa_scope = cfg->fc_scope;
state = fa->fa_state;
@@ -519,6 +544,7 @@ static int fn_hash_insert(struct fib_tab
new_fa->fa_type = cfg->fc_type;
new_fa->fa_scope = cfg->fc_scope;
new_fa->fa_state = 0;
+ new_fa->fa_last_dflt = -1;
/*
* Insert new entry to the list.
diff -urp v2.6.25/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
--- v2.6.25/linux/net/ipv4/fib_lookup.h 2008-04-17 09:58:09.000000000 +0300
+++ linux/net/ipv4/fib_lookup.h 2008-04-19 18:30:04.000000000 +0300
@@ -8,6 +8,7 @@
struct fib_alias {
struct list_head fa_list;
struct fib_info *fa_info;
+ int fa_last_dflt;
u8 fa_tos;
u8 fa_type;
u8 fa_scope;
@@ -38,7 +39,8 @@ extern struct fib_alias *fib_find_alias(
u8 tos, u32 prio);
extern int fib_detect_death(struct fib_info *fi, int order,
struct fib_info **last_resort,
- int *last_idx, int dflt);
+ int *last_idx, int *dflt, int *last_nhsel,
+ const struct flowi *flp);
static inline void fib_result_assign(struct fib_result *res,
struct fib_info *fi)
diff -urp v2.6.25/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
--- v2.6.25/linux/net/ipv4/fib_rules.c 2008-04-17 09:58:09.000000000 +0300
+++ linux/net/ipv4/fib_rules.c 2008-04-19 18:30:04.000000000 +0300
@@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result *
}
#endif
+int fib_result_table(struct fib_result *res)
+{
+ return res->r->table;
+}
+
int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
{
struct fib_lookup_arg arg = {
diff -urp v2.6.25/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
--- v2.6.25/linux/net/ipv4/fib_semantics.c 2008-04-17 09:58:09.000000000 +0300
+++ linux/net/ipv4/fib_semantics.c 2008-04-19 18:30:17.000000000 +0300
@@ -52,6 +52,7 @@ static struct hlist_head *fib_info_hash;
static struct hlist_head *fib_info_laddrhash;
static unsigned int fib_hash_size;
static unsigned int fib_info_cnt;
+rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
#define DEVINDEX_HASHBITS 8
#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
@@ -187,7 +188,7 @@ static __inline__ int nh_comp(const stru
#ifdef CONFIG_NET_CLS_ROUTE
nh->nh_tclassid != onh->nh_tclassid ||
#endif
- ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
+ ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
return -1;
onh++;
} endfor_nexthops(fi);
@@ -238,7 +239,7 @@ static struct fib_info *fib_find_info(co
nfi->fib_priority == fi->fib_priority &&
memcmp(nfi->fib_metrics, fi->fib_metrics,
sizeof(fi->fib_metrics)) == 0 &&
- ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
+ ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
(nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
return fi;
}
@@ -349,26 +350,70 @@ struct fib_alias *fib_find_alias(struct
}
int fib_detect_death(struct fib_info *fi, int order,
- struct fib_info **last_resort, int *last_idx, int dflt)
+ struct fib_info **last_resort, int *last_idx, int *dflt,
+ int *last_nhsel, const struct flowi *flp)
{
struct neighbour *n;
- int state = NUD_NONE;
+ int nhsel;
+ int state;
+ struct fib_nh * nh;
+ __be32 dst;
+ int flag, dead = 1;
+
+ /* change_nexthops(fi) { */
+ for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
+ if (flp->oif && flp->oif != nh->nh_oif)
+ continue;
+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK)
+ continue;
+ if (nh->nh_flags & RTNH_F_DEAD)
+ continue;
- n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
- if (n) {
- state = n->nud_state;
- neigh_release(n);
- }
- if (state==NUD_REACHABLE)
- return 0;
- if ((state&NUD_VALID) && order != dflt)
- return 0;
- if ((state&NUD_VALID) ||
- (*last_idx<0 && order > dflt)) {
- *last_resort = fi;
- *last_idx = order;
+ flag = 0;
+ if (nh->nh_dev->flags & IFF_NOARP) {
+ dead = 0;
+ goto setfl;
+ }
+
+ dst = nh->nh_gw;
+ if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
+ dst = flp->fl4_dst;
+
+ state = NUD_NONE;
+ n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
+ if (n) {
+ state = n->nud_state;
+ neigh_release(n);
+ }
+ if (state==NUD_REACHABLE ||
+ ((state&NUD_VALID) && order != *dflt)) {
+ dead = 0;
+ goto setfl;
+ }
+ if (!(state&NUD_VALID))
+ flag = 1;
+ if (!dead)
+ goto setfl;
+ if ((state&NUD_VALID) ||
+ (*last_idx<0 && order >= *dflt)) {
+ *last_resort = fi;
+ *last_idx = order;
+ *last_nhsel = nhsel;
+ }
+
+ setfl:
+
+ read_lock_bh(&fib_nhflags_lock);
+ if (flag)
+ nh->nh_flags |= RTNH_F_SUSPECT;
+ else
+ nh->nh_flags &= ~RTNH_F_SUSPECT;
+ read_unlock_bh(&fib_nhflags_lock);
}
- return 1;
+ /* } endfor_nexthops(fi) */
+
+ return dead;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -540,8 +585,11 @@ static int fib_check_nh(struct fib_confi
return -EINVAL;
if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
return -ENODEV;
- if (!(dev->flags&IFF_UP))
- return -ENETDOWN;
+ if (!(dev->flags&IFF_UP)) {
+ if (fi->fib_protocol != RTPROT_STATIC)
+ return -ENETDOWN;
+ nh->nh_flags |= RTNH_F_DEAD;
+ }
nh->nh_dev = dev;
dev_hold(dev);
nh->nh_scope = RT_SCOPE_LINK;
@@ -561,24 +609,48 @@ static int fib_check_nh(struct fib_confi
/* It is not necessary, but requires a bit of thinking */
if (fl.fl4_scope < RT_SCOPE_LINK)
fl.fl4_scope = RT_SCOPE_LINK;
- if ((err = fib_lookup(net, &fl, &res)) != 0)
- return err;
+ err = fib_lookup(net, &fl, &res);
}
- err = -EINVAL;
- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
- goto out;
- nh->nh_scope = res.scope;
- nh->nh_oif = FIB_RES_OIF(res);
- if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
- goto out;
- dev_hold(nh->nh_dev);
- err = -ENETDOWN;
- if (!(nh->nh_dev->flags & IFF_UP))
- goto out;
- err = 0;
+ if (err) {
+ struct in_device *in_dev;
+
+ if (err != -ENETUNREACH ||
+ fi->fib_protocol != RTPROT_STATIC)
+ return err;
+
+ in_dev = inetdev_by_index(net, nh->nh_oif);
+ if (in_dev == NULL ||
+ in_dev->dev->flags & IFF_UP) {
+ if (in_dev)
+ in_dev_put(in_dev);
+ return err;
+ }
+ nh->nh_flags |= RTNH_F_DEAD;
+ nh->nh_scope = RT_SCOPE_LINK;
+ nh->nh_dev = in_dev->dev;
+ dev_hold(nh->nh_dev);
+ in_dev_put(in_dev);
+ } else {
+ err = -EINVAL;
+ if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
+ goto out;
+ nh->nh_scope = res.scope;
+ nh->nh_oif = FIB_RES_OIF(res);
+ if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
+ goto out;
+ dev_hold(nh->nh_dev);
+ if (!(nh->nh_dev->flags & IFF_UP)) {
+ if (fi->fib_protocol != RTPROT_STATIC) {
+ err = -ENETDOWN;
+ goto out;
+ }
+ nh->nh_flags |= RTNH_F_DEAD;
+ }
+ err = 0;
out:
- fib_res_put(&res);
- return err;
+ fib_res_put(&res);
+ return err;
+ }
} else {
struct in_device *in_dev;
@@ -589,8 +661,11 @@ out:
if (in_dev == NULL)
return -ENODEV;
if (!(in_dev->dev->flags&IFF_UP)) {
- in_dev_put(in_dev);
- return -ENETDOWN;
+ if (fi->fib_protocol != RTPROT_STATIC) {
+ in_dev_put(in_dev);
+ return -ENETDOWN;
+ }
+ nh->nh_flags |= RTNH_F_DEAD;
}
nh->nh_dev = in_dev->dev;
dev_hold(nh->nh_dev);
@@ -900,8 +975,12 @@ int fib_semantic_match(struct list_head
for_nexthops(fi) {
if (nh->nh_flags&RTNH_F_DEAD)
continue;
- if (!flp->oif || flp->oif == nh->nh_oif)
- break;
+ if (flp->oif && flp->oif != nh->nh_oif)
+ continue;
+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
+ continue;
+ break;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (nhsel < fi->fib_nhs) {
@@ -1078,18 +1157,29 @@ int fib_sync_down_dev(struct net_device
prev_fi = fi;
dead = 0;
change_nexthops(fi) {
- if (nh->nh_flags&RTNH_F_DEAD)
- dead++;
- else if (nh->nh_dev == dev &&
- nh->nh_scope != scope) {
- nh->nh_flags |= RTNH_F_DEAD;
+ if (nh->nh_flags&RTNH_F_DEAD) {
+ if (fi->fib_protocol!=RTPROT_STATIC ||
+ nh->nh_dev == NULL ||
+ __in_dev_get_rtnl(nh->nh_dev) == NULL ||
+ nh->nh_dev->flags&IFF_UP)
+ dead++;
+ } else if (nh->nh_dev == dev &&
+ nh->nh_scope != scope) {
+ write_lock_bh(&fib_nhflags_lock);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- spin_lock_bh(&fib_multipath_lock);
+ spin_lock(&fib_multipath_lock);
+ nh->nh_flags |= RTNH_F_DEAD;
fi->fib_power -= nh->nh_power;
nh->nh_power = 0;
- spin_unlock_bh(&fib_multipath_lock);
+ spin_unlock(&fib_multipath_lock);
+#else
+ nh->nh_flags |= RTNH_F_DEAD;
#endif
- dead++;
+ write_unlock_bh(&fib_nhflags_lock);
+ if (fi->fib_protocol!=RTPROT_STATIC ||
+ force ||
+ __in_dev_get_rtnl(dev) == NULL)
+ dead++;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (force > 1 && nh->nh_dev == dev) {
@@ -1107,11 +1197,8 @@ int fib_sync_down_dev(struct net_device
return ret;
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
/*
- Dead device goes up. We wake up dead nexthops.
- It takes sense only on multipath routes.
+ Dead device goes up or new address is added. We wake up dead nexthops.
*/
int fib_sync_up(struct net_device *dev)
@@ -1121,8 +1208,10 @@ int fib_sync_up(struct net_device *dev)
struct hlist_head *head;
struct hlist_node *node;
struct fib_nh *nh;
- int ret;
+ struct fib_result res;
+ int ret, rep;
+repeat:
if (!(dev->flags&IFF_UP))
return 0;
@@ -1130,6 +1219,7 @@ int fib_sync_up(struct net_device *dev)
hash = fib_devindex_hashfn(dev->ifindex);
head = &fib_info_devhash[hash];
ret = 0;
+ rep = 0;
hlist_for_each_entry(nh, node, head, nh_hash) {
struct fib_info *fi = nh->nh_parent;
@@ -1142,19 +1232,43 @@ int fib_sync_up(struct net_device *dev)
prev_fi = fi;
alive = 0;
change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD)) {
- alive++;
+ if (!(nh->nh_flags&RTNH_F_DEAD))
continue;
- }
if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
continue;
if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
continue;
+ if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
+ struct flowi fl = {
+ .nl_u = { .ip4_u =
+ { .daddr = nh->nh_gw,
+ .scope = nh->nh_scope } },
+ .oif = nh->nh_oif,
+ };
+ #ifdef CONFIG_NET_NS
+ if (fib_lookup(dev->nd_net, &fl, &res) != 0)
+ #else
+ if (fib_lookup(&init_net, &fl, &res) != 0)
+ #endif
+ continue;
+ if (res.type != RTN_UNICAST &&
+ res.type != RTN_LOCAL) {
+ fib_res_put(&res);
+ continue;
+ }
+ nh->nh_scope = res.scope;
+ fib_res_put(&res);
+ rep = 1;
+ }
alive++;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
spin_lock_bh(&fib_multipath_lock);
nh->nh_power = 0;
+#endif
nh->nh_flags &= ~RTNH_F_DEAD;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
spin_unlock_bh(&fib_multipath_lock);
+#endif
} endfor_nexthops(fi)
if (alive > 0) {
@@ -1162,10 +1272,14 @@ int fib_sync_up(struct net_device *dev)
ret++;
}
}
+ if (rep)
+ goto repeat;
return ret;
}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
/*
The algorithm is suboptimal, but it provides really
fair weighted route distribution.
@@ -1174,24 +1288,45 @@ int fib_sync_up(struct net_device *dev)
void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
{
struct fib_info *fi = res->fi;
- int w;
+ int w, alive;
spin_lock_bh(&fib_multipath_lock);
+ if (flp->oif) {
+ int sel = -1;
+ w = -1;
+ change_nexthops(fi) {
+ if (flp->oif != nh->nh_oif)
+ continue;
+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
+ continue;
+ if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
+ if (nh->nh_power > w) {
+ w = nh->nh_power;
+ sel = nhsel;
+ }
+ }
+ } endfor_nexthops(fi);
+ if (sel >= 0) {
+ spin_unlock_bh(&fib_multipath_lock);
+ res->nh_sel = sel;
+ return;
+ }
+ goto last_resort;
+ }
+
+repeat:
if (fi->fib_power <= 0) {
int power = 0;
change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD)) {
+ if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
power += nh->nh_weight;
nh->nh_power = nh->nh_weight;
}
} endfor_nexthops(fi);
fi->fib_power = power;
- if (power <= 0) {
- spin_unlock_bh(&fib_multipath_lock);
- /* Race condition: route has just become dead. */
- res->nh_sel = 0;
- return;
- }
+ if (power <= 0)
+ goto last_resort;
}
@@ -1201,20 +1336,40 @@ void fib_select_multipath(const struct f
w = jiffies % fi->fib_power;
+ alive = 0;
change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
+ if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) {
if ((w -= nh->nh_power) <= 0) {
nh->nh_power--;
fi->fib_power--;
- res->nh_sel = nhsel;
spin_unlock_bh(&fib_multipath_lock);
+ res->nh_sel = nhsel;
return;
}
+ alive = 1;
+ }
+ } endfor_nexthops(fi);
+ if (alive) {
+ fi->fib_power = 0;
+ goto repeat;
+ }
+
+last_resort:
+
+ for_nexthops(fi) {
+ if (!(nh->nh_flags&RTNH_F_DEAD)) {
+ if (flp->oif && flp->oif != nh->nh_oif)
+ continue;
+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
+ continue;
+ spin_unlock_bh(&fib_multipath_lock);
+ res->nh_sel = nhsel;
+ return;
}
} endfor_nexthops(fi);
/* Race condition: route has just become dead. */
- res->nh_sel = 0;
spin_unlock_bh(&fib_multipath_lock);
}
#endif
diff -urp v2.6.25/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
--- v2.6.25/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-04-17 09:58:09.000000000 +0300
+++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-04-19 18:30:17.000000000 +0300
@@ -59,7 +59,7 @@ masquerade_tg(struct sk_buff *skb, const
enum ip_conntrack_info ctinfo;
struct nf_nat_range newrange;
const struct nf_nat_multi_range_compat *mr;
- const struct rtable *rt;
+ struct rtable *rt;
__be32 newsrc;
NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
@@ -77,13 +77,32 @@ masquerade_tg(struct sk_buff *skb, const
return NF_ACCEPT;
mr = targinfo;
- rt = skb->rtable;
- newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
- if (!newsrc) {
- printk("MASQUERADE: %s ate my IP address\n", out->name);
- return NF_DROP;
+
+ {
+ struct flowi fl = { .nl_u = { .ip4_u =
+ { .daddr = ip_hdr(skb)->daddr,
+ .tos = (RT_TOS(ip_hdr(skb)->tos) |
+ RTO_CONN),
+ .gw = ((struct rtable *) skb->dst)->rt_gateway,
+ } },
+ .mark = skb->mark,
+ .oif = out->ifindex };
+ #ifdef CONFIG_NET_NS
+ if (ip_route_output_key(out->nd_net, &rt, &fl) != 0) {
+ #else
+ if (ip_route_output_key(&init_net, &rt, &fl) != 0) {
+ #endif
+ /* Funky routing can do this. */
+ if (net_ratelimit())
+ printk("MASQUERADE:"
+ " No route: Rusty's brain broke!\n");
+ return NF_DROP;
+ }
}
+ newsrc = rt->rt_src;
+ ip_rt_put(rt);
+
write_lock_bh(&masq_lock);
nat->masq_index = out->ifindex;
write_unlock_bh(&masq_lock);
diff -urp v2.6.25/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
--- v2.6.25/linux/net/ipv4/netfilter/nf_nat_core.c 2008-04-17 09:58:09.000000000 +0300
+++ linux/net/ipv4/netfilter/nf_nat_core.c 2008-04-19 18:30:17.000000000 +0300
@@ -624,6 +624,52 @@ static struct nf_ct_ext_type nat_extend
.flags = NF_CT_EXT_F_PREALLOC,
};
+unsigned int
+ip_nat_route_input(unsigned int hooknum,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct iphdr *iph;
+ struct nf_conn *conn;
+ enum ip_conntrack_info ctinfo;
+ enum ip_conntrack_dir dir;
+ unsigned long statusbit;
+ __be32 saddr;
+
+ if (!(conn = nf_ct_get(skb, &ctinfo)))
+ return NF_ACCEPT;
+
+ if (!(conn->status & IPS_NAT_DONE_MASK))
+ return NF_ACCEPT;
+ dir = CTINFO2DIR(ctinfo);
+ statusbit = IPS_SRC_NAT;
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+ if (!(conn->status & statusbit))
+ return NF_ACCEPT;
+
+ if (skb->dst)
+ return NF_ACCEPT;
+
+ if (skb->len < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ /* use daddr in other direction as masquerade address (lsrc) */
+ iph = ip_hdr(skb);
+ saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
+ if (saddr == iph->saddr)
+ return NF_ACCEPT;
+
+ if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
+ skb->dev, saddr))
+ return NF_DROP;
+
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(ip_nat_route_input);
+
static int __init nf_nat_init(void)
{
size_t i;
diff -urp v2.6.25/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
--- v2.6.25/linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-04-17 09:58:09.000000000 +0300
+++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-04-19 18:30:17.000000000 +0300
@@ -282,6 +282,14 @@ static struct nf_hook_ops nf_nat_ops[] _
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
},
+ /* Before routing, route before mangling */
+ {
+ .hook = ip_nat_route_input,
+ .owner = THIS_MODULE,
+ .pf = PF_INET,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_LAST-1,
+ },
/* After packet filtering, change source */
{
.hook = nf_nat_out,
diff -urp v2.6.25/linux/net/ipv4/route.c linux/net/ipv4/route.c
--- v2.6.25/linux/net/ipv4/route.c 2008-04-17 09:58:09.000000000 +0300
+++ linux/net/ipv4/route.c 2008-04-19 18:30:17.000000000 +0300
@@ -1207,6 +1207,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
/* Gateway is different ... */
rt->rt_gateway = new_gw;
+ if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
/* Redirect received -> path was valid */
dst_confirm(&rth->u.dst);
@@ -1647,6 +1648,7 @@ static int ip_route_input_mc(struct sk_b
rth->fl.fl4_tos = tos;
rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
+ rth->fl.fl4_lsrc = 0;
rth->rt_src = saddr;
#ifdef CONFIG_NET_CLS_ROUTE
rth->u.dst.tclassid = itag;
@@ -1657,6 +1659,7 @@ static int ip_route_input_mc(struct sk_b
dev_hold(rth->u.dst.dev);
rth->idev = in_dev_get(rth->u.dst.dev);
rth->fl.oif = 0;
+ rth->fl.fl4_gw = 0;
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
rth->rt_genid = atomic_read(&rt_genid);
@@ -1762,7 +1762,7 @@ static int __mkroute_input(struct sk_buf
struct fib_result *res,
struct in_device *in_dev,
__be32 daddr, __be32 saddr, u32 tos,
- struct rtable **result)
+ __be32 lsrc, struct rtable **result)
{
struct rtable *rth;
@@ -1796,6 +1796,7 @@ static int __mkroute_input(struct sk_buf
flags |= RTCF_DIRECTSRC;
if (out_dev == in_dev && err &&
+ !lsrc &&
(IN_DEV_SHARED_MEDIA(out_dev) ||
inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
flags |= RTCF_DOREDIRECT;
@@ -1789,6 +1793,7 @@ static inline int __mkroute_input(struct
rth->fl.mark = skb->mark;
rth->fl.fl4_src = saddr;
rth->rt_src = saddr;
+ rth->fl.fl4_lsrc = lsrc;
rth->rt_gateway = daddr;
rth->rt_iif =
rth->fl.iif = in_dev->dev->ifindex;
@@ -1796,6 +1801,7 @@ static inline int __mkroute_input(struct
dev_hold(rth->u.dst.dev);
rth->idev = in_dev_get(rth->u.dst.dev);
rth->fl.oif = 0;
+ rth->fl.fl4_gw = 0;
rth->rt_spec_dst= spec_dst;
rth->u.dst.input = ip_forward;
@@ -1858,21 +1859,23 @@ static int __mkroute_input(struct sk_buf
static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
+ struct net *net,
const struct flowi *fl,
struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+ __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
{
struct rtable* rth = NULL;
int err;
unsigned hash;
+ fib_select_default(net, fl, res);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
+ if (res->fi && res->fi->fib_nhs > 1)
fib_select_multipath(fl, res);
#endif
/* create a routing cache entry */
- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
+ err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
if (err)
return err;
@@ -1850,18 +1858,19 @@ static inline int ip_mkroute_input(struc
*/
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+ u8 tos, struct net_device *dev, __be32 lsrc)
{
struct fib_result res;
struct in_device *in_dev = in_dev_get(dev);
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = daddr,
- .saddr = saddr,
+ .saddr = lsrc? : saddr,
.tos = tos,
.scope = RT_SCOPE_UNIVERSE,
} },
.mark = skb->mark,
- .iif = dev->ifindex };
+ .iif = lsrc?
+ init_net.loopback_dev->ifindex : dev->ifindex };
unsigned flags = 0;
u32 itag = 0;
struct rtable * rth;
@@ -1897,6 +1906,12 @@ static int ip_route_input_slow(struct sk
ipv4_is_loopback(daddr))
goto martian_destination;
+ if (lsrc) {
+ if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) ||
+ ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc))
+ goto e_inval;
+ }
+
/*
* Now we are ready to route packet.
*/
@@ -1906,6 +1921,8 @@ static int ip_route_input_slow(struct sk
goto no_route;
}
free_res = 1;
+ fl.iif = dev->ifindex;
+ fl.fl4_src = saddr;
RT_CACHE_STAT_INC(in_slow_tot);
@@ -1930,7 +1947,7 @@ static int ip_route_input_slow(struct sk
if (res.type != RTN_UNICAST)
goto martian_destination;
- err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
+ err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc);
done:
in_dev_put(in_dev);
if (free_res)
@@ -1940,6 +1957,8 @@ out: return err;
brd_input:
if (skb->protocol != htons(ETH_P_IP))
goto e_inval;
+ if (lsrc)
+ goto e_inval;
if (ipv4_is_zeronet(saddr))
spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
@@ -1981,6 +2000,7 @@ local_input:
rth->u.dst.dev = net->loopback_dev;
dev_hold(rth->u.dst.dev);
rth->idev = in_dev_get(rth->u.dst.dev);
+ rth->fl.fl4_gw = 0;
rth->rt_gateway = daddr;
rth->rt_spec_dst= spec_dst;
rth->u.dst.input= ip_local_deliver;
@@ -2032,8 +2052,9 @@ martian_source:
goto e_inval;
}
-int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+static inline int
+ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev, __be32 lsrc)
{
struct rtable * rth;
unsigned hash;
@@ -2105,6 +2108,7 @@ ip_route_input_cached(struct sk_buff *sk
if (((rth->fl.fl4_dst ^ daddr) |
(rth->fl.fl4_src ^ saddr) |
(rth->fl.iif ^ iif) |
+ (rth->fl.fl4_lsrc ^ lsrc) |
rth->fl.oif |
(rth->fl.fl4_tos ^ tos)) == 0 &&
rth->fl.mark == skb->mark &&
@@ -2097,7 +2119,19 @@ int ip_route_input(struct sk_buff *skb,
rcu_read_unlock();
return -EINVAL;
}
- return ip_route_input_slow(skb, daddr, saddr, tos, dev);
+ return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
+}
+
+int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev)
+{
+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0);
+}
+
+int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev, __be32 lsrc)
+{
+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc);
}
static inline int __mkroute_output(struct rtable **result,
@@ -2169,6 +2203,7 @@ static inline int __mkroute_output(struc
rth->fl.fl4_tos = tos;
rth->fl.fl4_src = oldflp->fl4_src;
rth->fl.oif = oldflp->oif;
+ rth->fl.fl4_gw = oldflp->fl4_gw;
rth->fl.mark = oldflp->mark;
rth->rt_dst = fl->fl4_dst;
rth->rt_src = fl->fl4_src;
@@ -2249,6 +2284,7 @@ static int ip_route_output_slow(struct n
struct flowi fl = { .nl_u = { .ip4_u =
{ .daddr = oldflp->fl4_dst,
.saddr = oldflp->fl4_src,
+ .gw = oldflp->fl4_gw,
.tos = tos & IPTOS_RT_MASK,
.scope = ((tos & RTO_ONLINK) ?
RT_SCOPE_LINK :
@@ -2354,6 +2390,7 @@ static int ip_route_output_slow(struct n
dev_out = net->loopback_dev;
dev_hold(dev_out);
fl.oif = net->loopback_dev->ifindex;
+ fl.fl4_gw = 0;
res.type = RTN_LOCAL;
flags |= RTCF_LOCAL;
goto make_route;
@@ -2361,7 +2398,7 @@ static int ip_route_output_slow(struct n
if (fib_lookup(net, &fl, &res)) {
res.fi = NULL;
- if (oldflp->oif) {
+ if (oldflp->oif && dev_out->flags & IFF_UP) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
@@ -2401,6 +2438,7 @@ static int ip_route_output_slow(struct n
dev_out = net->loopback_dev;
dev_hold(dev_out);
fl.oif = dev_out->ifindex;
+ fl.fl4_gw = 0;
if (res.fi)
fib_info_put(res.fi);
res.fi = NULL;
@@ -2408,13 +2446,12 @@ static int ip_route_output_slow(struct n
goto make_route;
}
+ if (res.type == RTN_UNICAST)
+ fib_select_default(net, &fl, &res);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res.fi->fib_nhs > 1 && fl.oif == 0)
+ if (res.fi->fib_nhs > 1)
fib_select_multipath(&fl, &res);
- else
#endif
- if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
- fib_select_default(net, &fl, &res);
if (!fl.fl4_src)
fl.fl4_src = FIB_RES_PREFSRC(res);
@@ -2452,6 +2489,7 @@ int __ip_route_output_key(struct net *ne
rth->fl.fl4_src == flp->fl4_src &&
rth->fl.iif == 0 &&
rth->fl.oif == flp->oif &&
+ rth->fl.fl4_gw == flp->fl4_gw &&
rth->fl.mark == flp->mark &&
!((rth->fl.fl4_tos ^ flp->fl4_tos) &
(IPTOS_RT_MASK | RTO_ONLINK)) &&
@@ -3054,3 +3092,4 @@ int __init ip_rt_init(void)
EXPORT_SYMBOL(__ip_select_ident);
EXPORT_SYMBOL(ip_route_input);
EXPORT_SYMBOL(ip_route_output_key);
+EXPORT_SYMBOL(ip_route_input_lookup);