Thanks for your detailed review, sending v8 patch to summerize all changes On Mon Nov 4, 2024 at 8:44 PM MSK, Ido Schimmel wrote: > + Guillaume: > please see comment below about route_get_dst_pmtu_from_exception()) > Original patch: > https://lore.kernel.org/netdev/20241104072753.77042-1-deliran@xxxxxxxxxx/ > > On Mon, Nov 04, 2024 at 07:27:50AM +0000, Vladimir Vdovin wrote: > > diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh > > index 569bce8b6383..f24c84184c61 100755 > > --- a/tools/testing/selftests/net/pmtu.sh > > +++ b/tools/testing/selftests/net/pmtu.sh > > @@ -197,6 +197,12 @@ > > # > > # - pmtu_ipv6_route_change > > # Same as above but with IPv6 > > +# > > +# - pmtu_ipv4_mp_exceptions > > +# Use the same topology as in pmtu_ipv4, but add routeable "dummy" > > No need for "dummy" as these are regular addresses on the loopback > device > > > +# addresses on host A and B on lo0 reachable via both routers. > > There is no "lo0" device. Only "lo". Run with "-v" and you will see a > lot of errors > > > +# Host A and B "dummy" addresses have multipath routes to each other. > > +# Check that PMTU exceptions are created for both paths. > > > > source lib.sh > > source net_helper.sh > > @@ -266,7 +272,8 @@ tests=" > > list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 > > list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 > > pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 > > - pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" > > + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1 > > + pmtu_ipv4_mp_exceptions ipv4: PMTU multipath nh exceptions 1" > > > > # Addressing and routing for tests with routers: four network segments, with > > # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an > > @@ -343,6 +350,9 @@ tunnel6_a_addr="fd00:2::a" > > tunnel6_b_addr="fd00:2::b" > > tunnel6_mask="64" > > > > +dummy4_a_addr="192.168.99.99" > > +dummy4_b_addr="192.168.88.88" > > Let's change to "host4_a_addr" and "host4_b_addr" (or similar) as we are > no longer using a dummy device > > > + > > dummy6_0_prefix="fc00:1000::" > > dummy6_1_prefix="fc00:1001::" > > dummy6_mask="64" > > @@ -984,6 +994,50 @@ setup_ovs_bridge() { > > run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2 > > } > > > > +setup_multipath() { > > + if [ "$USE_NH" = "yes" ]; then > > + setup_multipath_new > > + else > > + setup_multipath_old > > + fi > > Please move setup_multipath_{new,old}() before setup_multipath() like > setup_routing() and related functions > > > + > > + # Set up routers with routes to dummies > > + run_cmd ${ns_r1} ip route add ${dummy4_a_addr} via ${prefix4}.${a_r1}.1 > > + run_cmd ${ns_r2} ip route add ${dummy4_a_addr} via ${prefix4}.${a_r2}.1 > > + run_cmd ${ns_r1} ip route add ${dummy4_b_addr} via ${prefix4}.${b_r1}.1 > > + run_cmd ${ns_r2} ip route add ${dummy4_b_addr} via ${prefix4}.${b_r2}.1 > > +} > > + > > +setup_multipath_new() { > > + # Set up host A with multipath routes to host B dummy4_b_addr > > + run_cmd ${ns_a} ip addr add ${dummy4_a_addr} dev lo0 > > s/lo0/lo/ same in other places > > > + run_cmd ${ns_a} ip nexthop add id 201 via ${prefix4}.${a_r1}.2 dev veth_A-R1 > > + run_cmd ${ns_a} ip nexthop add id 202 via ${prefix4}.${a_r2}.2 dev veth_A-R2 > > + run_cmd ${ns_a} ip nexthop add id 203 group 201/202 > > + run_cmd ${ns_a} ip route add ${dummy4_b_addr} nhid 203 > > Maybe number the nexthops 401..403 so that we can later use 601..603 for > IPv6 like $routes_nh is doing > > > + > > + # Set up host B with multipath routes to host A dummy4_a_addr > > + run_cmd ${ns_b} ip addr add ${dummy4_b_addr} dev lo0 > > + run_cmd ${ns_b} ip nexthop add id 201 via ${prefix4}.${b_r1}.2 dev veth_A-R1 > > s/veth_A-R1/veth_B-R1/ > > > + run_cmd ${ns_b} ip nexthop add id 202 via ${prefix4}.${b_r2}.2 dev veth_A-R2 > > s/veth_A-R2/veth_B-R2/ > > > + run_cmd ${ns_b} ip nexthop add id 203 group 201/202 > > + run_cmd ${ns_b} ip route add ${dummy4_a_addr} nhid 203 > > +} > > + > > +setup_multipath_old() { > > + # Set up host A with multipath routes to host B dummy4_b_addr > > + run_cmd ${ns_a} ip addr add ${dummy4_a_addr} dev lo0 > > + run_cmd ${ns_a} ip route add ${dummy4_b_addr} \ > > + nexthop via ${prefix4}.${a_r1}.2 weight 1 \ > > + nexthop via ${prefix4}.${a_r2}.2 weight 1 > > + > > + # Set up host B with multipath routes to host A dummy4_a_addr > > + run_cmd ${ns_b} ip addr add ${dummy4_b_addr} dev lo0 > > + run_cmd ${ns_a} ip route add ${dummy4_a_addr} \ > > s/ns_a/ns_b/ > > > + nexthop via ${prefix4}.${a_b1}.2 weight 1 \ > > s/a_b1/b_r1/ > > > + nexthop via ${prefix4}.${a_b2}.2 weight 1 > > s/a_b2/b_r2/ > > > +} > > + > > setup() { > > [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip > > > > @@ -2329,6 +2383,45 @@ test_pmtu_ipv6_route_change() { > > test_pmtu_ipvX_route_change 6 > > } > > > > +test_pmtu_ipv4_mp_exceptions() { > > + setup namespaces routing multipath || return $ksft_skip > > + > > + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ > > + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ > > + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ > > + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 > > + > > + # Set up initial MTU values > > + mtu "${ns_a}" veth_A-R1 2000 > > + mtu "${ns_r1}" veth_R1-A 2000 > > + mtu "${ns_r1}" veth_R1-B 1400 > > + mtu "${ns_b}" veth_B-R1 1400 > > + > > + mtu "${ns_a}" veth_A-R2 2000 > > + mtu "${ns_r2}" veth_R2-A 2000 > > + mtu "${ns_r2}" veth_R2-B 1500 > > + mtu "${ns_b}" veth_B-R2 1500 > > + > > + fail=0 > > + > > + # Ping and expect two nexthop exceptions for two routes in nh group > > + run_cmd ${ns_a} ping -q -M want -i 0.1 -c 1 -s 1800 "${dummy4_b_addr}" > > + > > I looked more into pmtu.sh and this hunk from here ... > > > + # Do route lookup before checking cached exceptions. > > + # The following commands are needed for dst entries to be cached > > + # in both paths exceptions and therefore dumped to user space > > + run_cmd ${ns_a} ip route get ${dummy4_b_addr} oif veth_A-R1 > > + run_cmd ${ns_a} ip route get ${dummy4_b_addr} oif veth_A-R2 > > + > > + # Check cached exceptions > > + if [ "$(${ns_a} ip -oneline route list cache | grep mtu | wc -l)" -ne 2 ]; then > > + err " there are not enough cached exceptions" > > + fail=1 > > + fi > > ... until here can be replaced by route_get_dst_pmtu_from_exception() and > check_pmtu_value() like in other test cases. There are two > prerequisites: > > 1. We should set the same MTU in both paths as otherwise we don't know > which MTU will be cached and what to pass to check_pmtu_value() as the > expected value. I did see that check_pmtu_value() accepts "any", but I > think it's better to check for a specific value. > > 2. route_get_dst_pmtu_from_exception() is not very flexible in the > keywords it accepts for "ip route get" and we need to pass "oif". It can > be solved by [1] (please test), but Guillaume might have a better idea. > Then, the above hunk can be replaced by [2]. > > [1] > diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh > index 569bce8b6383..6e790d38e5d9 100755 > --- a/tools/testing/selftests/net/pmtu.sh > +++ b/tools/testing/selftests/net/pmtu.sh > @@ -1076,23 +1076,15 @@ link_get_mtu() { > } > > route_get_dst_exception() { > - ns_cmd="${1}" > - dst="${2}" > - dsfield="${3}" > - > - if [ -z "${dsfield}" ]; then > - dsfield=0 > - fi > + ns_cmd="${1}"; shift > > - ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}" > + ${ns_cmd} ip route get "$@" > } > > route_get_dst_pmtu_from_exception() { > - ns_cmd="${1}" > - dst="${2}" > - dsfield="${3}" > + ns_cmd="${1}"; shift > > - mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")" > + mtu_parse "$(route_get_dst_exception "${ns_cmd}" "$@")" > } > > check_pmtu_value() { > @@ -1235,10 +1227,10 @@ test_pmtu_ipv4_dscp_icmp_exception() { > run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}" > > # Check that exceptions have been created with the correct PMTU > - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" > + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1} dsfield ${policy_mark})" > check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 > > - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" > + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2} dsfield ${policy_mark})" > check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 > } > > @@ -1285,9 +1277,9 @@ test_pmtu_ipv4_dscp_udp_exception() { > UDP:"${dst2}":50000,tos="${dsfield}" > > # Check that exceptions have been created with the correct PMTU > - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" > + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1} dsfield ${policy_mark})" > check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 > - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" > + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2} dsfield ${policy_mark})" > check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 > } > > [2] > diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh > index a3c3f7f99e5b..10b8ac2d7f47 100755 > --- a/tools/testing/selftests/net/pmtu.sh > +++ b/tools/testing/selftests/net/pmtu.sh > @@ -2399,19 +2399,11 @@ test_pmtu_ipv4_mp_exceptions() { > # Ping and expect two nexthop exceptions for two routes in nh group > run_cmd ${ns_a} ping -q -M want -i 0.1 -c 1 -s 1800 "${dummy4_b_addr}" > > - # Do route lookup before checking cached exceptions. > - # The following commands are needed for dst entries to be cached > - # in both paths exceptions and therefore dumped to user space > - run_cmd ${ns_a} ip route get ${dummy4_b_addr} oif veth_A-R1 > - run_cmd ${ns_a} ip route get ${dummy4_b_addr} oif veth_A-R2 > - > - # Check cached exceptions > - if [ "$(${ns_a} ip -oneline route list cache | grep mtu | wc -l)" -ne 2 ]; then > - err " there are not enough cached exceptions" > - fail=1 > - fi > - > - return ${fail} > + # Check that exceptions have been created with the correct PMTU > + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dummy4_b_addr} oif veth_A-R1)" > + check_pmtu_value "1500" "${pmtu}" "exceeding MTU (veth_A-R1)" || return 1 > + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dummy4_b_addr} oif veth_A-R2)" > + check_pmtu_value "1500" "${pmtu}" "exceeding MTU (veth_A-R2)" || return 1 > } > > usage() {