cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

pmtu.sh (71247B)


      1#!/bin/sh
      2# SPDX-License-Identifier: GPL-2.0
      3#
      4# Check that route PMTU values match expectations, and that initial device MTU
      5# values are assigned correctly
      6#
      7# Tests currently implemented:
      8#
      9# - pmtu_ipv4
     10#	Set up two namespaces, A and B, with two paths between them over routers
     11#	R1 and R2 (also implemented with namespaces), with different MTUs:
     12#
     13#	  segment a_r1    segment b_r1		a_r1: 2000
     14#	.--------------R1--------------.	b_r1: 1400
     15#	A                               B	a_r2: 2000
     16#	'--------------R2--------------'	b_r2: 1500
     17#	  segment a_r2    segment b_r2
     18#
     19#	Check that PMTU exceptions with the correct PMTU are created. Then
     20#	decrease and increase the MTU of the local link for one of the paths,
     21#	A to R1, checking that route exception PMTU changes accordingly over
     22#	this path. Also check that locked exceptions are created when an ICMP
     23#	message advertising a PMTU smaller than net.ipv4.route.min_pmtu is
     24#	received
     25#
     26# - pmtu_ipv6
     27#	Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
     28#
     29# - pmtu_ipv4_dscp_icmp_exception
     30#	Set up the same network topology as pmtu_ipv4, but use non-default
     31#	routing table in A. A fib-rule is used to jump to this routing table
     32#	based on DSCP. Send ICMPv4 packets with the expected DSCP value and
     33#	verify that ECN doesn't interfere with the creation of PMTU exceptions.
     34#
     35# - pmtu_ipv4_dscp_udp_exception
     36#	Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.
     37#
     38# - pmtu_ipv4_vxlan4_exception
     39#	Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
     40#	over IPv4 between A and B, routed via R1. On the link between R1 and B,
     41#	set a MTU lower than the VXLAN MTU and the MTU on the link between A and
     42#	R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN
     43#	from A to B and check that the PMTU exception is created with the right
     44#	value on A
     45#
     46# - pmtu_ipv6_vxlan4_exception
     47#	Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B
     48#
     49# - pmtu_ipv4_vxlan6_exception
     50#	Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B
     51#
     52# - pmtu_ipv6_vxlan6_exception
     53#	Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B
     54#
     55# - pmtu_ipv4_geneve4_exception
     56#	Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of
     57#	VXLAN
     58#
     59# - pmtu_ipv6_geneve4_exception
     60#	Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of
     61#	VXLAN
     62#
     63# - pmtu_ipv4_geneve6_exception
     64#	Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of
     65#	VXLAN
     66#
     67# - pmtu_ipv6_geneve6_exception
     68#	Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
     69#	VXLAN
     70#
     71# - pmtu_ipv{4,6}_br_vxlan{4,6}_exception
     72#	Set up three namespaces, A, B, and C, with routing between A and B over
     73#	R1. R2 is unused in these tests. A has a veth connection to C, and is
     74#	connected to B via a VXLAN endpoint, which is directly bridged to C.
     75#	MTU on the B-R1 link is lower than other MTUs.
     76#
     77#	Check that both C and A are able to communicate with B over the VXLAN
     78#	tunnel, and that PMTU exceptions with the correct values are created.
     79#
     80#	                  segment a_r1    segment b_r1            b_r1: 4000
     81#	                .--------------R1--------------.    everything
     82#	   C---veth     A                               B         else: 5000
     83#	        ' bridge                                |
     84#	            '---- - - - - - VXLAN - - - - - - - '
     85#
     86# - pmtu_ipv{4,6}_br_geneve{4,6}_exception
     87#	Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel
     88#	instead.
     89#
     90# - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception
     91#	Set up two namespaces, B, and C, with routing between the init namespace
     92#	and B over R1. A and R2 are unused in these tests. The init namespace
     93#	has a veth connection to C, and is connected to B via a VXLAN endpoint,
     94#	which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link
     95#	is lower than other MTUs.
     96#
     97#	Check that C is able to communicate with B over the VXLAN tunnel, and
     98#	that PMTU exceptions with the correct values are created.
     99#
    100#	                  segment a_r1    segment b_r1            b_r1: 4000
    101#	                .--------------R1--------------.    everything
    102#	   C---veth    init                             B         else: 5000
    103#	        '- ovs                                  |
    104#	            '---- - - - - - VXLAN - - - - - - - '
    105#
    106# - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception
    107#	Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel
    108#	instead.
    109#
    110# - pmtu_ipv{4,6}_fou{4,6}_exception
    111#	Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
    112#	(FoU) over IPv4/IPv6, instead of VXLAN
    113#
    114# - pmtu_ipv{4,6}_fou{4,6}_exception
    115#	Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
    116#	encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
    117#
    118# - pmtu_ipv{4,6}_ipv{4,6}_exception
    119#	Same as pmtu_ipv4_vxlan4, but using a IPv4/IPv6 tunnel over IPv4/IPv6,
    120#	instead of VXLAN
    121#
    122# - pmtu_vti4_exception
    123#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
    124#	namespaces with matching endpoints. Check that route exception is not
    125#	created if link layer MTU is not exceeded, then exceed it and check that
    126#	exception is created with the expected PMTU. The approach described
    127#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
    128#	changes alone won't affect PMTU
    129#
    130# - pmtu_vti4_udp_exception
    131#       Same as pmtu_vti4_exception, but using ESP-in-UDP
    132#
    133# - pmtu_vti4_udp_routed_exception
    134#       Set up vti tunnel on top of veth connected through routing namespace and
    135#	add xfrm states and policies with ESP-in-UDP encapsulation. Check that
    136#	route exception is not created if link layer MTU is not exceeded, then
    137#	lower MTU on second part of routed environment and check that exception
    138#	is created with the expected PMTU.
    139#
    140# - pmtu_vti6_exception
    141#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
    142#	namespaces with matching endpoints. Check that route exception is
    143#	created by exceeding link layer MTU with ping to other endpoint. Then
    144#	decrease and increase MTU of tunnel, checking that route exception PMTU
    145#	changes accordingly
    146#
    147# - pmtu_vti6_udp_exception
    148#       Same as pmtu_vti6_exception, but using ESP-in-UDP
    149#
    150# - pmtu_vti6_udp_routed_exception
    151#	Same as pmtu_vti6_udp_routed_exception but with routing between vti
    152#	endpoints
    153#
    154# - pmtu_vti4_default_mtu
    155#	Set up vti4 tunnel on top of veth, in two namespaces with matching
    156#	endpoints. Check that MTU assigned to vti interface is the MTU of the
    157#	lower layer (veth) minus additional lower layer headers (zero, for veth)
    158#	minus IPv4 header length
    159#
    160# - pmtu_vti6_default_mtu
    161#	Same as above, for IPv6
    162#
    163# - pmtu_vti4_link_add_mtu
    164#	Set up vti4 interface passing MTU value at link creation, check MTU is
    165#	configured, and that link is not created with invalid MTU values
    166#
    167# - pmtu_vti6_link_add_mtu
    168#	Same as above, for IPv6
    169#
    170# - pmtu_vti6_link_change_mtu
    171#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
    172#	and check that configured MTU is used on link creation and changes, and
    173#	that MTU is properly calculated instead when MTU is not configured from
    174#	userspace
    175#
    176# - cleanup_ipv4_exception
    177#	Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU
    178#	exceptions on multiple CPUs and check that the veth device tear-down
    179# 	happens in a timely manner
    180#
    181# - cleanup_ipv6_exception
    182#	Same as above, but use IPv6 transport from A to B
    183#
    184# - list_flush_ipv4_exception
    185#	Using the same topology as in pmtu_ipv4, create exceptions, and check
    186#	they are shown when listing exception caches, gone after flushing them
    187#
    188# - list_flush_ipv6_exception
    189#	Using the same topology as in pmtu_ipv6, create exceptions, and check
    190#	they are shown when listing exception caches, gone after flushing them
    191#
    192# - pmtu_ipv4_route_change
    193#	Use the same topology as in pmtu_ipv4, but issue a route replacement
    194#	command and delete the corresponding device afterward. This tests for
    195#	proper cleanup of the PMTU exceptions by the route replacement path.
    196#	Device unregistration should complete successfully
    197#
    198# - pmtu_ipv6_route_change
    199#	Same as above but with IPv6
    200
    201# Kselftest framework requirement - SKIP code is 4.
    202ksft_skip=4
    203
    204PAUSE_ON_FAIL=no
    205VERBOSE=0
    206TRACING=0
    207
    208# Some systems don't have a ping6 binary anymore
    209which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
    210
    211#               Name                          Description                  re-run with nh
    212tests="
    213	pmtu_ipv4_exception		ipv4: PMTU exceptions			1
    214	pmtu_ipv6_exception		ipv6: PMTU exceptions			1
    215	pmtu_ipv4_dscp_icmp_exception	ICMPv4 with DSCP and ECN: PMTU exceptions	1
    216	pmtu_ipv4_dscp_udp_exception	UDPv4 with DSCP and ECN: PMTU exceptions	1
    217	pmtu_ipv4_vxlan4_exception	IPv4 over vxlan4: PMTU exceptions	1
    218	pmtu_ipv6_vxlan4_exception	IPv6 over vxlan4: PMTU exceptions	1
    219	pmtu_ipv4_vxlan6_exception	IPv4 over vxlan6: PMTU exceptions	1
    220	pmtu_ipv6_vxlan6_exception	IPv6 over vxlan6: PMTU exceptions	1
    221	pmtu_ipv4_geneve4_exception	IPv4 over geneve4: PMTU exceptions	1
    222	pmtu_ipv6_geneve4_exception	IPv6 over geneve4: PMTU exceptions	1
    223	pmtu_ipv4_geneve6_exception	IPv4 over geneve6: PMTU exceptions	1
    224	pmtu_ipv6_geneve6_exception	IPv6 over geneve6: PMTU exceptions	1
    225	pmtu_ipv4_br_vxlan4_exception	IPv4, bridged vxlan4: PMTU exceptions	1
    226	pmtu_ipv6_br_vxlan4_exception	IPv6, bridged vxlan4: PMTU exceptions	1
    227	pmtu_ipv4_br_vxlan6_exception	IPv4, bridged vxlan6: PMTU exceptions	1
    228	pmtu_ipv6_br_vxlan6_exception	IPv6, bridged vxlan6: PMTU exceptions	1
    229	pmtu_ipv4_br_geneve4_exception	IPv4, bridged geneve4: PMTU exceptions	1
    230	pmtu_ipv6_br_geneve4_exception	IPv6, bridged geneve4: PMTU exceptions	1
    231	pmtu_ipv4_br_geneve6_exception	IPv4, bridged geneve6: PMTU exceptions	1
    232	pmtu_ipv6_br_geneve6_exception	IPv6, bridged geneve6: PMTU exceptions	1
    233	pmtu_ipv4_ovs_vxlan4_exception	IPv4, OVS vxlan4: PMTU exceptions	1
    234	pmtu_ipv6_ovs_vxlan4_exception	IPv6, OVS vxlan4: PMTU exceptions	1
    235	pmtu_ipv4_ovs_vxlan6_exception	IPv4, OVS vxlan6: PMTU exceptions	1
    236	pmtu_ipv6_ovs_vxlan6_exception	IPv6, OVS vxlan6: PMTU exceptions	1
    237	pmtu_ipv4_ovs_geneve4_exception	IPv4, OVS geneve4: PMTU exceptions	1
    238	pmtu_ipv6_ovs_geneve4_exception	IPv6, OVS geneve4: PMTU exceptions	1
    239	pmtu_ipv4_ovs_geneve6_exception	IPv4, OVS geneve6: PMTU exceptions	1
    240	pmtu_ipv6_ovs_geneve6_exception	IPv6, OVS geneve6: PMTU exceptions	1
    241	pmtu_ipv4_fou4_exception	IPv4 over fou4: PMTU exceptions		1
    242	pmtu_ipv6_fou4_exception	IPv6 over fou4: PMTU exceptions		1
    243	pmtu_ipv4_fou6_exception	IPv4 over fou6: PMTU exceptions		1
    244	pmtu_ipv6_fou6_exception	IPv6 over fou6: PMTU exceptions		1
    245	pmtu_ipv4_gue4_exception	IPv4 over gue4: PMTU exceptions		1
    246	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions		1
    247	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions		1
    248	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions		1
    249	pmtu_ipv4_ipv4_exception	IPv4 over IPv4: PMTU exceptions		1
    250	pmtu_ipv6_ipv4_exception	IPv6 over IPv4: PMTU exceptions		1
    251	pmtu_ipv4_ipv6_exception	IPv4 over IPv6: PMTU exceptions		1
    252	pmtu_ipv6_ipv6_exception	IPv6 over IPv6: PMTU exceptions		1
    253	pmtu_vti6_exception		vti6: PMTU exceptions			0
    254	pmtu_vti4_exception		vti4: PMTU exceptions			0
    255	pmtu_vti6_udp_exception		vti6: PMTU exceptions (ESP-in-UDP)	0
    256	pmtu_vti4_udp_exception		vti4: PMTU exceptions (ESP-in-UDP)	0
    257	pmtu_vti6_udp_routed_exception	vti6: PMTU exceptions, routed (ESP-in-UDP)	0
    258	pmtu_vti4_udp_routed_exception	vti4: PMTU exceptions, routed (ESP-in-UDP)	0
    259	pmtu_vti4_default_mtu		vti4: default MTU assignment		0
    260	pmtu_vti6_default_mtu		vti6: default MTU assignment		0
    261	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation	0
    262	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation	0
    263	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes	0
    264	cleanup_ipv4_exception		ipv4: cleanup of cached exceptions	1
    265	cleanup_ipv6_exception		ipv6: cleanup of cached exceptions	1
    266	list_flush_ipv4_exception	ipv4: list and flush cached exceptions	1
    267	list_flush_ipv6_exception	ipv6: list and flush cached exceptions	1
    268	pmtu_ipv4_route_change		ipv4: PMTU exception w/route replace	1
    269	pmtu_ipv6_route_change		ipv6: PMTU exception w/route replace	1"
    270
    271NS_A="ns-A"
    272NS_B="ns-B"
    273NS_C="ns-C"
    274NS_R1="ns-R1"
    275NS_R2="ns-R2"
    276ns_a="ip netns exec ${NS_A}"
    277ns_b="ip netns exec ${NS_B}"
    278ns_c="ip netns exec ${NS_C}"
    279ns_r1="ip netns exec ${NS_R1}"
    280ns_r2="ip netns exec ${NS_R2}"
    281# Addressing and routing for tests with routers: four network segments, with
    282# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
    283# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
    284# Addresses are:
    285# - IPv4: PREFIX4.SEGMENT.ID (/24)
    286# - IPv6: PREFIX6:SEGMENT::ID (/64)
    287prefix4="10.0"
    288prefix6="fc00"
    289a_r1=1
    290a_r2=2
    291b_r1=3
    292b_r2=4
    293#	ns	peer	segment
    294routing_addrs="
    295	A	R1	${a_r1}
    296	A	R2	${a_r2}
    297	B	R1	${b_r1}
    298	B	R2	${b_r2}
    299"
    300# Traffic from A to B goes through R1 by default, and through R2, if destined to
    301# B's address on the b_r2 segment.
    302# Traffic from B to A goes through R1.
    303#	ns	destination		gateway
    304routes="
    305	A	default			${prefix4}.${a_r1}.2
    306	A	${prefix4}.${b_r2}.1	${prefix4}.${a_r2}.2
    307	B	default			${prefix4}.${b_r1}.2
    308
    309	A	default			${prefix6}:${a_r1}::2
    310	A	${prefix6}:${b_r2}::1	${prefix6}:${a_r2}::2
    311	B	default			${prefix6}:${b_r1}::2
    312"
    313USE_NH="no"
    314#	ns	family	nh id	   destination		gateway
    315nexthops="
    316	A	4	41	${prefix4}.${a_r1}.2	veth_A-R1
    317	A	4	42	${prefix4}.${a_r2}.2	veth_A-R2
    318	B	4	41	${prefix4}.${b_r1}.2	veth_B-R1
    319
    320	A	6	61	${prefix6}:${a_r1}::2	veth_A-R1
    321	A	6	62	${prefix6}:${a_r2}::2	veth_A-R2
    322	B	6	61	${prefix6}:${b_r1}::2	veth_B-R1
    323"
    324
    325# nexthop id correlates to id in nexthops config above
    326#	ns    family	prefix			nh id
    327routes_nh="
    328	A	4	default			41
    329	A	4	${prefix4}.${b_r2}.1	42
    330	B	4	default			41
    331
    332	A	6	default			61
    333	A	6	${prefix6}:${b_r2}::1	62
    334	B	6	default			61
    335"
    336
    337policy_mark=0x04
    338rt_table=main
    339
    340veth4_a_addr="192.168.1.1"
    341veth4_b_addr="192.168.1.2"
    342veth4_c_addr="192.168.2.10"
    343veth4_mask="24"
    344veth6_a_addr="fd00:1::a"
    345veth6_b_addr="fd00:1::b"
    346veth6_c_addr="fd00:2::c"
    347veth6_mask="64"
    348
    349tunnel4_a_addr="192.168.2.1"
    350tunnel4_b_addr="192.168.2.2"
    351tunnel4_mask="24"
    352tunnel6_a_addr="fd00:2::a"
    353tunnel6_b_addr="fd00:2::b"
    354tunnel6_mask="64"
    355
    356dummy6_0_prefix="fc00:1000::"
    357dummy6_1_prefix="fc00:1001::"
    358dummy6_mask="64"
    359
    360err_buf=
    361tcpdump_pids=
    362nettest_pids=
    363socat_pids=
    364
    365err() {
    366	err_buf="${err_buf}${1}
    367"
    368}
    369
    370err_flush() {
    371	echo -n "${err_buf}"
    372	err_buf=
    373}
    374
    375run_cmd() {
    376	cmd="$*"
    377
    378	if [ "$VERBOSE" = "1" ]; then
    379		printf "    COMMAND: $cmd\n"
    380	fi
    381
    382	out="$($cmd 2>&1)"
    383	rc=$?
    384	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
    385		echo "    $out"
    386		echo
    387	fi
    388
    389	return $rc
    390}
    391
    392run_cmd_bg() {
    393	cmd="$*"
    394
    395	if [ "$VERBOSE" = "1" ]; then
    396		printf "    COMMAND: %s &\n" "${cmd}"
    397	fi
    398
    399	$cmd 2>&1 &
    400}
    401
    402# Find the auto-generated name for this namespace
    403nsname() {
    404	eval echo \$NS_$1
    405}
    406
    407setup_fou_or_gue() {
    408	outer="${1}"
    409	inner="${2}"
    410	encap="${3}"
    411
    412	if [ "${outer}" = "4" ]; then
    413		modprobe fou || return $ksft_skip
    414		a_addr="${prefix4}.${a_r1}.1"
    415		b_addr="${prefix4}.${b_r1}.1"
    416		if [ "${inner}" = "4" ]; then
    417			type="ipip"
    418			ipproto="4"
    419		else
    420			type="sit"
    421			ipproto="41"
    422		fi
    423	else
    424		modprobe fou6 || return $ksft_skip
    425		a_addr="${prefix6}:${a_r1}::1"
    426		b_addr="${prefix6}:${b_r1}::1"
    427		if [ "${inner}" = "4" ]; then
    428			type="ip6tnl"
    429			mode="mode ipip6"
    430			ipproto="4 -6"
    431		else
    432			type="ip6tnl"
    433			mode="mode ip6ip6"
    434			ipproto="41 -6"
    435		fi
    436	fi
    437
    438	run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip
    439	run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip
    440
    441	run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
    442	run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
    443
    444	if [ "${inner}" = "4" ]; then
    445		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a
    446		run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b
    447	else
    448		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a
    449		run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b
    450	fi
    451
    452	run_cmd ${ns_a} ip link set ${encap}_a up
    453	run_cmd ${ns_b} ip link set ${encap}_b up
    454}
    455
    456setup_fou44() {
    457	setup_fou_or_gue 4 4 fou
    458}
    459
    460setup_fou46() {
    461	setup_fou_or_gue 4 6 fou
    462}
    463
    464setup_fou64() {
    465	setup_fou_or_gue 6 4 fou
    466}
    467
    468setup_fou66() {
    469	setup_fou_or_gue 6 6 fou
    470}
    471
    472setup_gue44() {
    473	setup_fou_or_gue 4 4 gue
    474}
    475
    476setup_gue46() {
    477	setup_fou_or_gue 4 6 gue
    478}
    479
    480setup_gue64() {
    481	setup_fou_or_gue 6 4 gue
    482}
    483
    484setup_gue66() {
    485	setup_fou_or_gue 6 6 gue
    486}
    487
    488setup_ipvX_over_ipvY() {
    489	inner=${1}
    490	outer=${2}
    491
    492	if [ "${outer}" -eq 4 ]; then
    493		a_addr="${prefix4}.${a_r1}.1"
    494		b_addr="${prefix4}.${b_r1}.1"
    495		if [ "${inner}" -eq 4 ]; then
    496			type="ipip"
    497			mode="ipip"
    498		else
    499			type="sit"
    500			mode="ip6ip"
    501		fi
    502	else
    503		a_addr="${prefix6}:${a_r1}::1"
    504		b_addr="${prefix6}:${b_r1}::1"
    505		type="ip6tnl"
    506		if [ "${inner}" -eq 4 ]; then
    507			mode="ipip6"
    508		else
    509			mode="ip6ip6"
    510		fi
    511	fi
    512
    513	run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip
    514	run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
    515
    516	run_cmd ${ns_a} ip link set ip_a up
    517	run_cmd ${ns_b} ip link set ip_b up
    518
    519	if [ "${inner}" = "4" ]; then
    520		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ip_a
    521		run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ip_b
    522	else
    523		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ip_a
    524		run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ip_b
    525	fi
    526}
    527
    528setup_ip4ip4() {
    529	setup_ipvX_over_ipvY 4 4
    530}
    531
    532setup_ip6ip4() {
    533	setup_ipvX_over_ipvY 6 4
    534}
    535
    536setup_ip4ip6() {
    537	setup_ipvX_over_ipvY 4 6
    538}
    539
    540setup_ip6ip6() {
    541	setup_ipvX_over_ipvY 6 6
    542}
    543
    544setup_namespaces() {
    545	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
    546		ip netns add ${n} || return 1
    547
    548		# Disable DAD, so that we don't have to wait to use the
    549		# configured IPv6 addresses
    550		ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
    551	done
    552}
    553
    554setup_veth() {
    555	run_cmd ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
    556	run_cmd ${ns_a} ip link set veth_b netns ${NS_B}
    557
    558	run_cmd ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
    559	run_cmd ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
    560
    561	run_cmd ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
    562	run_cmd ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
    563
    564	run_cmd ${ns_a} ip link set veth_a up
    565	run_cmd ${ns_b} ip link set veth_b up
    566}
    567
    568setup_vti() {
    569	proto=${1}
    570	veth_a_addr="${2}"
    571	veth_b_addr="${3}"
    572	vti_a_addr="${4}"
    573	vti_b_addr="${5}"
    574	vti_mask=${6}
    575
    576	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
    577
    578	run_cmd ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
    579	run_cmd ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
    580
    581	run_cmd ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
    582	run_cmd ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
    583
    584	run_cmd ${ns_a} ip link set vti${proto}_a up
    585	run_cmd ${ns_b} ip link set vti${proto}_b up
    586}
    587
    588setup_vti4() {
    589	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
    590}
    591
    592setup_vti6() {
    593	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
    594}
    595
    596setup_vti4routed() {
    597	setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
    598}
    599
    600setup_vti6routed() {
    601	setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
    602}
    603
    604setup_vxlan_or_geneve() {
    605	type="${1}"
    606	a_addr="${2}"
    607	b_addr="${3}"
    608	opts="${4}"
    609	br_if_a="${5}"
    610
    611	if [ "${type}" = "vxlan" ]; then
    612		opts="${opts} ttl 64 dstport 4789"
    613		opts_a="local ${a_addr}"
    614		opts_b="local ${b_addr}"
    615	else
    616		opts_a=""
    617		opts_b=""
    618	fi
    619
    620	run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
    621	run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
    622
    623	if [ -n "${br_if_a}" ]; then
    624		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a}
    625		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a}
    626		run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a}
    627	else
    628		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
    629		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
    630	fi
    631
    632	run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
    633	run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
    634
    635	run_cmd ${ns_a} ip link set ${type}_a up
    636	run_cmd ${ns_b} ip link set ${type}_b up
    637}
    638
    639setup_geneve4() {
    640	setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
    641}
    642
    643setup_vxlan4() {
    644	setup_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set"
    645}
    646
    647setup_geneve6() {
    648	setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
    649}
    650
    651setup_vxlan6() {
    652	setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
    653}
    654
    655setup_bridged_geneve4() {
    656	setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set" "br0"
    657}
    658
    659setup_bridged_vxlan4() {
    660	setup_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1  "df set" "br0"
    661}
    662
    663setup_bridged_geneve6() {
    664	setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
    665}
    666
    667setup_bridged_vxlan6() {
    668	setup_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
    669}
    670
    671setup_xfrm() {
    672	proto=${1}
    673	veth_a_addr="${2}"
    674	veth_b_addr="${3}"
    675	encap=${4}
    676
    677	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1
    678	run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
    679	run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
    680	run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
    681
    682	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
    683	run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
    684	run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
    685	run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
    686}
    687
    688setup_nettest_xfrm() {
    689	which nettest >/dev/null
    690	if [ $? -ne 0 ]; then
    691		echo "'nettest' command not found; skipping tests"
    692	        return 1
    693	fi
    694
    695	[ ${1} -eq 6 ] && proto="-6" || proto=""
    696	port=${2}
    697
    698	run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
    699	nettest_pids="${nettest_pids} $!"
    700
    701	run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
    702	nettest_pids="${nettest_pids} $!"
    703}
    704
    705setup_xfrm4() {
    706	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
    707}
    708
    709setup_xfrm6() {
    710	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
    711}
    712
    713setup_xfrm4udp() {
    714	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0"
    715	setup_nettest_xfrm 4 4500
    716}
    717
    718setup_xfrm6udp() {
    719	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0"
    720	setup_nettest_xfrm 6 4500
    721}
    722
    723setup_xfrm4udprouted() {
    724	setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0"
    725	setup_nettest_xfrm 4 4500
    726}
    727
    728setup_xfrm6udprouted() {
    729	setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0"
    730	setup_nettest_xfrm 6 4500
    731}
    732
    733setup_routing_old() {
    734	for i in ${routes}; do
    735		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
    736		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
    737		[ "${gw}" = "" ]	&& gw="${i}"
    738
    739		ns_name="$(nsname ${ns})"
    740
    741		ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"
    742
    743		ns=""; addr=""; gw=""
    744	done
    745}
    746
    747setup_routing_new() {
    748	for i in ${nexthops}; do
    749		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
    750		[ "${fam}" = "" ]	&& fam="${i}"		&& continue
    751		[ "${nhid}" = "" ]	&& nhid="${i}"		&& continue
    752		[ "${gw}" = "" ]	&& gw="${i}"		&& continue
    753		[ "${dev}" = "" ]	&& dev="${i}"
    754
    755		ns_name="$(nsname ${ns})"
    756
    757		ip -n ${ns_name} -${fam} nexthop add id ${nhid} via ${gw} dev ${dev}
    758
    759		ns=""; fam=""; nhid=""; gw=""; dev=""
    760
    761	done
    762
    763	for i in ${routes_nh}; do
    764		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
    765		[ "${fam}" = "" ]	&& fam="${i}"		&& continue
    766		[ "${addr}" = "" ]	&& addr="${i}"		&& continue
    767		[ "${nhid}" = "" ]	&& nhid="${i}"
    768
    769		ns_name="$(nsname ${ns})"
    770
    771		ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"
    772
    773		ns=""; fam=""; addr=""; nhid=""
    774	done
    775}
    776
    777setup_routing() {
    778	for i in ${NS_R1} ${NS_R2}; do
    779		ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1
    780		ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1
    781	done
    782
    783	for i in ${routing_addrs}; do
    784		[ "${ns}" = "" ]	&& ns="${i}"		&& continue
    785		[ "${peer}" = "" ]	&& peer="${i}"		&& continue
    786		[ "${segment}" = "" ]	&& segment="${i}"
    787
    788		ns_name="$(nsname ${ns})"
    789		peer_name="$(nsname ${peer})"
    790		if="veth_${ns}-${peer}"
    791		ifpeer="veth_${peer}-${ns}"
    792
    793		# Create veth links
    794		ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1
    795		ip -n ${peer_name} link set dev ${ifpeer} up
    796
    797		# Add addresses
    798		ip -n ${ns_name}   addr add ${prefix4}.${segment}.1/24  dev ${if}
    799		ip -n ${ns_name}   addr add ${prefix6}:${segment}::1/64 dev ${if}
    800
    801		ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24  dev ${ifpeer}
    802		ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer}
    803
    804		ns=""; peer=""; segment=""
    805	done
    806
    807	if [ "$USE_NH" = "yes" ]; then
    808		setup_routing_new
    809	else
    810		setup_routing_old
    811	fi
    812
    813	return 0
    814}
    815
    816setup_policy_routing() {
    817	setup_routing
    818
    819	ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \
    820		table "${rt_table}"
    821
    822	# Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to
    823	# have an option do to it.
    824	tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio
    825	tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio
    826	tc -netns "${NS_A}" filter add dev veth_A-R1                      \
    827		protocol ipv4 flower ip_proto udp                         \
    828		action pedit ex munge ip df set 0x40 pipe csum ip and udp
    829	tc -netns "${NS_A}" filter add dev veth_A-R2                      \
    830		protocol ipv4 flower ip_proto udp                         \
    831		action pedit ex munge ip df set 0x40 pipe csum ip and udp
    832}
    833
    834setup_bridge() {
    835	run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
    836	run_cmd ${ns_a} ip link set br0 up
    837
    838	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
    839	run_cmd ${ns_c} ip link set veth_A-C netns ns-A
    840
    841	run_cmd ${ns_a} ip link set veth_A-C up
    842	run_cmd ${ns_c} ip link set veth_C-A up
    843	run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
    844	run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
    845	run_cmd ${ns_a} ip link set veth_A-C master br0
    846}
    847
    848setup_ovs_vxlan_or_geneve() {
    849	type="${1}"
    850	a_addr="${2}"
    851	b_addr="${3}"
    852
    853	if [ "${type}" = "vxlan" ]; then
    854		opts="${opts} ttl 64 dstport 4789"
    855		opts_b="local ${b_addr}"
    856	fi
    857
    858	run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
    859		set interface ${type}_a type=${type} \
    860		options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
    861
    862	run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1
    863
    864	run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
    865	run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
    866
    867	run_cmd ${ns_b} ip link set ${type}_b up
    868}
    869
    870setup_ovs_geneve4() {
    871	setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1
    872}
    873
    874setup_ovs_vxlan4() {
    875	setup_ovs_vxlan_or_geneve vxlan  ${prefix4}.${a_r1}.1  ${prefix4}.${b_r1}.1
    876}
    877
    878setup_ovs_geneve6() {
    879	setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
    880}
    881
    882setup_ovs_vxlan6() {
    883	setup_ovs_vxlan_or_geneve vxlan  ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
    884}
    885
    886setup_ovs_bridge() {
    887	run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip
    888	run_cmd ip link set ovs_br0 up
    889
    890	run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
    891	run_cmd ${ns_c} ip link set veth_A-C netns 1
    892
    893	run_cmd         ip link set veth_A-C up
    894	run_cmd ${ns_c} ip link set veth_C-A up
    895	run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
    896	run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
    897	run_cmd ovs-vsctl add-port ovs_br0 veth_A-C
    898
    899	# Move veth_A-R1 to init
    900	run_cmd ${ns_a} ip link set veth_A-R1 netns 1
    901	run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1
    902	run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1
    903	run_cmd ip link set veth_A-R1 up
    904	run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2
    905	run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
    906}
    907
    908setup() {
    909	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
    910
    911	for arg do
    912		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
    913	done
    914}
    915
    916trace() {
    917	[ $TRACING -eq 0 ] && return
    918
    919	for arg do
    920		[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
    921		${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
    922		tcpdump_pids="${tcpdump_pids} $!"
    923		ns_cmd=
    924	done
    925	sleep 1
    926}
    927
    928cleanup() {
    929	for pid in ${tcpdump_pids}; do
    930		kill ${pid}
    931	done
    932	tcpdump_pids=
    933
    934	for pid in ${nettest_pids}; do
    935		kill ${pid}
    936	done
    937	nettest_pids=
    938
    939	for pid in ${socat_pids}; do
    940		kill "${pid}"
    941	done
    942	socat_pids=
    943
    944	for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
    945		ip netns del ${n} 2> /dev/null
    946	done
    947
    948	ip link del veth_A-C			2>/dev/null
    949	ip link del veth_A-R1			2>/dev/null
    950	ovs-vsctl --if-exists del-port vxlan_a	2>/dev/null
    951	ovs-vsctl --if-exists del-br ovs_br0	2>/dev/null
    952}
    953
    954mtu() {
    955	ns_cmd="${1}"
    956	dev="${2}"
    957	mtu="${3}"
    958
    959	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
    960}
    961
    962mtu_parse() {
    963	input="${1}"
    964
    965	next=0
    966	for i in ${input}; do
    967		[ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue
    968		[ ${next} -eq 1 ] && echo "${i}" && return
    969		[ ${next} -eq 2 ] && echo "lock ${i}" && return
    970		[ "${i}" = "mtu" ] && next=1
    971	done
    972}
    973
    974link_get() {
    975	ns_cmd="${1}"
    976	name="${2}"
    977
    978	${ns_cmd} ip link show dev "${name}"
    979}
    980
    981link_get_mtu() {
    982	ns_cmd="${1}"
    983	name="${2}"
    984
    985	mtu_parse "$(link_get "${ns_cmd}" ${name})"
    986}
    987
    988route_get_dst_exception() {
    989	ns_cmd="${1}"
    990	dst="${2}"
    991	dsfield="${3}"
    992
    993	if [ -z "${dsfield}" ]; then
    994		dsfield=0
    995	fi
    996
    997	${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
    998}
    999
   1000route_get_dst_pmtu_from_exception() {
   1001	ns_cmd="${1}"
   1002	dst="${2}"
   1003	dsfield="${3}"
   1004
   1005	mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
   1006}
   1007
   1008check_pmtu_value() {
   1009	expected="${1}"
   1010	value="${2}"
   1011	event="${3}"
   1012
   1013	[ "${expected}" = "any" ] && [ -n "${value}" ] && return 0
   1014	[ "${value}" = "${expected}" ] && return 0
   1015	[ -z "${value}" ] &&    err "  PMTU exception wasn't created after ${event}" && return 1
   1016	[ -z "${expected}" ] && err "  PMTU exception shouldn't exist after ${event}" && return 1
   1017	err "  found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}"
   1018	return 1
   1019}
   1020
   1021test_pmtu_ipvX() {
   1022	family=${1}
   1023
   1024	setup namespaces routing || return $ksft_skip
   1025	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
   1026	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
   1027	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
   1028	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
   1029
   1030	if [ ${family} -eq 4 ]; then
   1031		ping=ping
   1032		dst1="${prefix4}.${b_r1}.1"
   1033		dst2="${prefix4}.${b_r2}.1"
   1034	else
   1035		ping=${ping6}
   1036		dst1="${prefix6}:${b_r1}::1"
   1037		dst2="${prefix6}:${b_r2}::1"
   1038	fi
   1039
   1040	# Set up initial MTU values
   1041	mtu "${ns_a}"  veth_A-R1 2000
   1042	mtu "${ns_r1}" veth_R1-A 2000
   1043	mtu "${ns_r1}" veth_R1-B 1400
   1044	mtu "${ns_b}"  veth_B-R1 1400
   1045
   1046	mtu "${ns_a}"  veth_A-R2 2000
   1047	mtu "${ns_r2}" veth_R2-A 2000
   1048	mtu "${ns_r2}" veth_R2-B 1500
   1049	mtu "${ns_b}"  veth_B-R2 1500
   1050
   1051	# Create route exceptions
   1052	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
   1053	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
   1054
   1055	# Check that exceptions have been created with the correct PMTU
   1056	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
   1057	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
   1058	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
   1059	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
   1060
   1061	# Decrease local MTU below PMTU, check for PMTU decrease in route exception
   1062	mtu "${ns_a}"  veth_A-R1 1300
   1063	mtu "${ns_r1}" veth_R1-A 1300
   1064	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
   1065	check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1
   1066	# Second exception shouldn't be modified
   1067	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
   1068	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
   1069
   1070	# Increase MTU, check for PMTU increase in route exception
   1071	mtu "${ns_a}"  veth_A-R1 1700
   1072	mtu "${ns_r1}" veth_R1-A 1700
   1073	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
   1074	check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1
   1075	# Second exception shouldn't be modified
   1076	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
   1077	check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1
   1078
   1079	# Skip PMTU locking tests for IPv6
   1080	[ $family -eq 6 ] && return 0
   1081
   1082	# Decrease remote MTU on path via R2, get new exception
   1083	mtu "${ns_r2}" veth_R2-B 400
   1084	mtu "${ns_b}"  veth_B-R2 400
   1085	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
   1086	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
   1087	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
   1088
   1089	# Decrease local MTU below PMTU
   1090	mtu "${ns_a}"  veth_A-R2 500
   1091	mtu "${ns_r2}" veth_R2-A 500
   1092	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
   1093	check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1
   1094
   1095	# Increase local MTU
   1096	mtu "${ns_a}"  veth_A-R2 1500
   1097	mtu "${ns_r2}" veth_R2-A 1500
   1098	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
   1099	check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1
   1100
   1101	# Get new exception
   1102	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2}
   1103	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
   1104	check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1
   1105}
   1106
   1107test_pmtu_ipv4_exception() {
   1108	test_pmtu_ipvX 4
   1109}
   1110
   1111test_pmtu_ipv6_exception() {
   1112	test_pmtu_ipvX 6
   1113}
   1114
   1115test_pmtu_ipv4_dscp_icmp_exception() {
   1116	rt_table=100
   1117
   1118	setup namespaces policy_routing || return $ksft_skip
   1119	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
   1120	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
   1121	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
   1122	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
   1123
   1124	# Set up initial MTU values
   1125	mtu "${ns_a}"  veth_A-R1 2000
   1126	mtu "${ns_r1}" veth_R1-A 2000
   1127	mtu "${ns_r1}" veth_R1-B 1400
   1128	mtu "${ns_b}"  veth_B-R1 1400
   1129
   1130	mtu "${ns_a}"  veth_A-R2 2000
   1131	mtu "${ns_r2}" veth_R2-A 2000
   1132	mtu "${ns_r2}" veth_R2-B 1500
   1133	mtu "${ns_b}"  veth_B-R2 1500
   1134
   1135	len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
   1136
   1137	dst1="${prefix4}.${b_r1}.1"
   1138	dst2="${prefix4}.${b_r2}.1"
   1139
   1140	# Create route exceptions
   1141	dsfield=${policy_mark} # No ECN bit set (Not-ECT)
   1142	run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"
   1143
   1144	dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
   1145	run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
   1146
   1147	# Check that exceptions have been created with the correct PMTU
   1148	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
   1149	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
   1150
   1151	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
   1152	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
   1153}
   1154
   1155test_pmtu_ipv4_dscp_udp_exception() {
   1156	rt_table=100
   1157
   1158	if ! which socat > /dev/null 2>&1; then
   1159		echo "'socat' command not found; skipping tests"
   1160		return $ksft_skip
   1161	fi
   1162
   1163	setup namespaces policy_routing || return $ksft_skip
   1164	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
   1165	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
   1166	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
   1167	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
   1168
   1169	# Set up initial MTU values
   1170	mtu "${ns_a}"  veth_A-R1 2000
   1171	mtu "${ns_r1}" veth_R1-A 2000
   1172	mtu "${ns_r1}" veth_R1-B 1400
   1173	mtu "${ns_b}"  veth_B-R1 1400
   1174
   1175	mtu "${ns_a}"  veth_A-R2 2000
   1176	mtu "${ns_r2}" veth_R2-A 2000
   1177	mtu "${ns_r2}" veth_R2-B 1500
   1178	mtu "${ns_b}"  veth_B-R2 1500
   1179
   1180	len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
   1181
   1182	dst1="${prefix4}.${b_r1}.1"
   1183	dst2="${prefix4}.${b_r2}.1"
   1184
   1185	# Create route exceptions
   1186	run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1
   1187	socat_pids="${socat_pids} $!"
   1188
   1189	dsfield=${policy_mark} # No ECN bit set (Not-ECT)
   1190	run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
   1191		UDP:"${dst1}":50000,tos="${dsfield}"
   1192
   1193	dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
   1194	run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
   1195		UDP:"${dst2}":50000,tos="${dsfield}"
   1196
   1197	# Check that exceptions have been created with the correct PMTU
   1198	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
   1199	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
   1200	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
   1201	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
   1202}
   1203
   1204test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
   1205	type=${1}
   1206	family=${2}
   1207	outer_family=${3}
   1208	ll_mtu=4000
   1209
   1210	if [ ${outer_family} -eq 4 ]; then
   1211		setup namespaces routing ${type}4 || return $ksft_skip
   1212		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
   1213		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
   1214	else
   1215		setup namespaces routing ${type}6 || return $ksft_skip
   1216		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
   1217		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
   1218	fi
   1219
   1220	trace "${ns_a}" ${type}_a    "${ns_b}"  ${type}_b \
   1221	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
   1222	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
   1223
   1224	if [ ${family} -eq 4 ]; then
   1225		ping=ping
   1226		dst=${tunnel4_b_addr}
   1227	else
   1228		ping=${ping6}
   1229		dst=${tunnel6_b_addr}
   1230	fi
   1231
   1232	# Create route exception by exceeding link layer MTU
   1233	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
   1234	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
   1235	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
   1236	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
   1237
   1238	mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
   1239	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
   1240	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
   1241
   1242	# Check that exception was created
   1243	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
   1244	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface"
   1245}
   1246
   1247test_pmtu_ipv4_vxlan4_exception() {
   1248	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 4
   1249}
   1250
   1251test_pmtu_ipv6_vxlan4_exception() {
   1252	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 4
   1253}
   1254
   1255test_pmtu_ipv4_geneve4_exception() {
   1256	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4
   1257}
   1258
   1259test_pmtu_ipv6_geneve4_exception() {
   1260	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4
   1261}
   1262
   1263test_pmtu_ipv4_vxlan6_exception() {
   1264	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  4 6
   1265}
   1266
   1267test_pmtu_ipv6_vxlan6_exception() {
   1268	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan  6 6
   1269}
   1270
   1271test_pmtu_ipv4_geneve6_exception() {
   1272	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6
   1273}
   1274
   1275test_pmtu_ipv6_geneve6_exception() {
   1276	test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
   1277}
   1278
   1279test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
   1280	type=${1}
   1281	family=${2}
   1282	outer_family=${3}
   1283	ll_mtu=4000
   1284
   1285	if [ ${outer_family} -eq 4 ]; then
   1286		setup namespaces routing bridge bridged_${type}4 || return $ksft_skip
   1287		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
   1288		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
   1289	else
   1290		setup namespaces routing bridge bridged_${type}6 || return $ksft_skip
   1291		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
   1292		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
   1293	fi
   1294
   1295	trace "${ns_a}" ${type}_a    "${ns_b}"  ${type}_b \
   1296	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
   1297	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B \
   1298	      "${ns_a}" br0          "${ns_a}"  veth-A-C  \
   1299	      "${ns_c}" veth_C-A
   1300
   1301	if [ ${family} -eq 4 ]; then
   1302		ping=ping
   1303		dst=${tunnel4_b_addr}
   1304	else
   1305		ping=${ping6}
   1306		dst=${tunnel6_b_addr}
   1307	fi
   1308
   1309	# Create route exception by exceeding link layer MTU
   1310	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
   1311	mtu "${ns_a}"  br0       $((${ll_mtu} + 1000))
   1312	mtu "${ns_a}"  veth_A-C  $((${ll_mtu} + 1000))
   1313	mtu "${ns_c}"  veth_C-A  $((${ll_mtu} + 1000))
   1314	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
   1315	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
   1316	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
   1317
   1318	mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
   1319	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
   1320
   1321	run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1
   1322	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1  -s $((${ll_mtu} + 500)) ${dst} || return 1
   1323
   1324	# Check that exceptions were created
   1325	pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
   1326	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
   1327	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
   1328	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
   1329}
   1330
   1331test_pmtu_ipv4_br_vxlan4_exception() {
   1332	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  4 4
   1333}
   1334
   1335test_pmtu_ipv6_br_vxlan4_exception() {
   1336	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  6 4
   1337}
   1338
   1339test_pmtu_ipv4_br_geneve4_exception() {
   1340	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4
   1341}
   1342
   1343test_pmtu_ipv6_br_geneve4_exception() {
   1344	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4
   1345}
   1346
   1347test_pmtu_ipv4_br_vxlan6_exception() {
   1348	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  4 6
   1349}
   1350
   1351test_pmtu_ipv6_br_vxlan6_exception() {
   1352	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan  6 6
   1353}
   1354
   1355test_pmtu_ipv4_br_geneve6_exception() {
   1356	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6
   1357}
   1358
   1359test_pmtu_ipv6_br_geneve6_exception() {
   1360	test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6
   1361}
   1362
   1363test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
   1364	type=${1}
   1365	family=${2}
   1366	outer_family=${3}
   1367	ll_mtu=4000
   1368
   1369	if [ ${outer_family} -eq 4 ]; then
   1370		setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip
   1371		#                      IPv4 header   UDP header   VXLAN/GENEVE header   Ethernet header
   1372		exp_mtu=$((${ll_mtu} - 20          - 8          - 8                   - 14))
   1373	else
   1374		setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip
   1375		#                      IPv6 header   UDP header   VXLAN/GENEVE header   Ethernet header
   1376		exp_mtu=$((${ll_mtu} - 40          - 8          - 8                   - 14))
   1377	fi
   1378
   1379	if [ "${type}" = "vxlan" ]; then
   1380		tun_a="vxlan_sys_4789"
   1381	elif [ "${type}" = "geneve" ]; then
   1382		tun_a="genev_sys_6081"
   1383	fi
   1384
   1385	trace ""        "${tun_a}"  "${ns_b}"  ${type}_b \
   1386	      ""        veth_A-R1   "${ns_r1}" veth_R1-A \
   1387	      "${ns_b}" veth_B-R1   "${ns_r1}" veth_R1-B \
   1388	      ""        ovs_br0     ""         veth-A-C  \
   1389	      "${ns_c}" veth_C-A
   1390
   1391	if [ ${family} -eq 4 ]; then
   1392		ping=ping
   1393		dst=${tunnel4_b_addr}
   1394	else
   1395		ping=${ping6}
   1396		dst=${tunnel6_b_addr}
   1397	fi
   1398
   1399	# Create route exception by exceeding link layer MTU
   1400	mtu ""         veth_A-R1 $((${ll_mtu} + 1000))
   1401	mtu ""         ovs_br0   $((${ll_mtu} + 1000))
   1402	mtu ""         veth_A-C  $((${ll_mtu} + 1000))
   1403	mtu "${ns_c}"  veth_C-A  $((${ll_mtu} + 1000))
   1404	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
   1405	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
   1406	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
   1407
   1408	mtu ""        ${tun_a}  $((${ll_mtu} + 1000))
   1409	mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
   1410
   1411	run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1
   1412
   1413	# Check that exceptions were created
   1414	pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
   1415	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface"
   1416}
   1417
   1418test_pmtu_ipv4_ovs_vxlan4_exception() {
   1419	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  4 4
   1420}
   1421
   1422test_pmtu_ipv6_ovs_vxlan4_exception() {
   1423	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  6 4
   1424}
   1425
   1426test_pmtu_ipv4_ovs_geneve4_exception() {
   1427	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4
   1428}
   1429
   1430test_pmtu_ipv6_ovs_geneve4_exception() {
   1431	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4
   1432}
   1433
   1434test_pmtu_ipv4_ovs_vxlan6_exception() {
   1435	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  4 6
   1436}
   1437
   1438test_pmtu_ipv6_ovs_vxlan6_exception() {
   1439	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan  6 6
   1440}
   1441
   1442test_pmtu_ipv4_ovs_geneve6_exception() {
   1443	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6
   1444}
   1445
   1446test_pmtu_ipv6_ovs_geneve6_exception() {
   1447	test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6
   1448}
   1449
   1450test_pmtu_ipvX_over_fouY_or_gueY() {
   1451	inner_family=${1}
   1452	outer_family=${2}
   1453	encap=${3}
   1454	ll_mtu=4000
   1455
   1456	setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip
   1457	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
   1458	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
   1459	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
   1460
   1461	if [ ${inner_family} -eq 4 ]; then
   1462		ping=ping
   1463		dst=${tunnel4_b_addr}
   1464	else
   1465		ping=${ping6}
   1466		dst=${tunnel6_b_addr}
   1467	fi
   1468
   1469	if [ "${encap}" = "gue" ]; then
   1470		encap_overhead=4
   1471	else
   1472		encap_overhead=0
   1473	fi
   1474
   1475	if [ ${outer_family} -eq 4 ]; then
   1476		#                      IPv4 header   UDP header
   1477		exp_mtu=$((${ll_mtu} - 20          - 8         - ${encap_overhead}))
   1478	else
   1479		#                      IPv6 header   Option 4   UDP header
   1480		exp_mtu=$((${ll_mtu} - 40          - 8        - 8       - ${encap_overhead}))
   1481	fi
   1482
   1483	# Create route exception by exceeding link layer MTU
   1484	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
   1485	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
   1486	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
   1487	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
   1488
   1489	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
   1490	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
   1491	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
   1492
   1493	# Check that exception was created
   1494	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
   1495	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface"
   1496}
   1497
   1498test_pmtu_ipv4_fou4_exception() {
   1499	test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou
   1500}
   1501
   1502test_pmtu_ipv6_fou4_exception() {
   1503	test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou
   1504}
   1505
   1506test_pmtu_ipv4_fou6_exception() {
   1507	test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou
   1508}
   1509
   1510test_pmtu_ipv6_fou6_exception() {
   1511	test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou
   1512}
   1513
   1514test_pmtu_ipv4_gue4_exception() {
   1515	test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue
   1516}
   1517
   1518test_pmtu_ipv6_gue4_exception() {
   1519	test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue
   1520}
   1521
   1522test_pmtu_ipv4_gue6_exception() {
   1523	test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue
   1524}
   1525
   1526test_pmtu_ipv6_gue6_exception() {
   1527	test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
   1528}
   1529
   1530test_pmtu_ipvX_over_ipvY_exception() {
   1531	inner=${1}
   1532	outer=${2}
   1533	ll_mtu=4000
   1534
   1535	setup namespaces routing ip${inner}ip${outer} || return $ksft_skip
   1536
   1537	trace "${ns_a}" ip_a         "${ns_b}"  ip_b  \
   1538	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
   1539	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
   1540
   1541	if [ ${inner} -eq 4 ]; then
   1542		ping=ping
   1543		dst=${tunnel4_b_addr}
   1544	else
   1545		ping=${ping6}
   1546		dst=${tunnel6_b_addr}
   1547	fi
   1548
   1549	if [ ${outer} -eq 4 ]; then
   1550		#                      IPv4 header
   1551		exp_mtu=$((${ll_mtu} - 20))
   1552	else
   1553		#                      IPv6 header   Option 4
   1554		exp_mtu=$((${ll_mtu} - 40          - 8))
   1555	fi
   1556
   1557	# Create route exception by exceeding link layer MTU
   1558	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
   1559	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
   1560	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
   1561	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
   1562
   1563	mtu "${ns_a}" ip_a $((${ll_mtu} + 1000)) || return
   1564	mtu "${ns_b}" ip_b $((${ll_mtu} + 1000)) || return
   1565	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
   1566
   1567	# Check that exception was created
   1568	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
   1569	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ip${inner}ip${outer} interface"
   1570}
   1571
   1572test_pmtu_ipv4_ipv4_exception() {
   1573	test_pmtu_ipvX_over_ipvY_exception 4 4
   1574}
   1575
   1576test_pmtu_ipv6_ipv4_exception() {
   1577	test_pmtu_ipvX_over_ipvY_exception 6 4
   1578}
   1579
   1580test_pmtu_ipv4_ipv6_exception() {
   1581	test_pmtu_ipvX_over_ipvY_exception 4 6
   1582}
   1583
   1584test_pmtu_ipv6_ipv6_exception() {
   1585	test_pmtu_ipvX_over_ipvY_exception 6 6
   1586}
   1587
   1588test_pmtu_vti4_exception() {
   1589	setup namespaces veth vti4 xfrm4 || return $ksft_skip
   1590	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
   1591	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
   1592
   1593	veth_mtu=1500
   1594	vti_mtu=$((veth_mtu - 20))
   1595
   1596	#                                SPI   SN   IV  ICV   pad length   next header
   1597	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
   1598	ping_payload=$((esp_payload_rfc4106 - 28))
   1599
   1600	mtu "${ns_a}" veth_a ${veth_mtu}
   1601	mtu "${ns_b}" veth_b ${veth_mtu}
   1602	mtu "${ns_a}" vti4_a ${vti_mtu}
   1603	mtu "${ns_b}" vti4_b ${vti_mtu}
   1604
   1605	# Send DF packet without exceeding link layer MTU, check that no
   1606	# exception is created
   1607	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
   1608	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
   1609	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
   1610
   1611	# Now exceed link layer MTU by one byte, check that exception is created
   1612	# with the right PMTU value
   1613	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
   1614	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
   1615	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
   1616}
   1617
   1618test_pmtu_vti6_exception() {
   1619	setup namespaces veth vti6 xfrm6 || return $ksft_skip
   1620	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
   1621	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
   1622	fail=0
   1623
   1624	# Create route exception by exceeding link layer MTU
   1625	mtu "${ns_a}" veth_a 4000
   1626	mtu "${ns_b}" veth_b 4000
   1627	mtu "${ns_a}" vti6_a 5000
   1628	mtu "${ns_b}" vti6_b 5000
   1629	run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
   1630
   1631	# Check that exception was created
   1632	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
   1633	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
   1634
   1635	# Decrease tunnel MTU, check for PMTU decrease in route exception
   1636	mtu "${ns_a}" vti6_a 3000
   1637	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
   1638	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
   1639
   1640	# Increase tunnel MTU, check for PMTU increase in route exception
   1641	mtu "${ns_a}" vti6_a 9000
   1642	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
   1643	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
   1644
   1645	return ${fail}
   1646}
   1647
   1648test_pmtu_vti4_udp_exception() {
   1649	setup namespaces veth vti4 xfrm4udp || return $ksft_skip
   1650	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
   1651	      "${ns_a}" vti4_a    "${ns_b}" vti4_b
   1652
   1653	veth_mtu=1500
   1654	vti_mtu=$((veth_mtu - 20))
   1655
   1656	#                                UDP   SPI   SN   IV  ICV   pad length   next header
   1657	esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
   1658	ping_payload=$((esp_payload_rfc4106 - 28))
   1659
   1660	mtu "${ns_a}" veth_a ${veth_mtu}
   1661	mtu "${ns_b}" veth_b ${veth_mtu}
   1662	mtu "${ns_a}" vti4_a ${vti_mtu}
   1663	mtu "${ns_b}" vti4_b ${vti_mtu}
   1664
   1665	# Send DF packet without exceeding link layer MTU, check that no
   1666	# exception is created
   1667	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
   1668	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
   1669	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
   1670
   1671	# Now exceed link layer MTU by one byte, check that exception is created
   1672	# with the right PMTU value
   1673	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
   1674	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
   1675	check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
   1676}
   1677
   1678test_pmtu_vti6_udp_exception() {
   1679	setup namespaces veth vti6 xfrm6udp || return $ksft_skip
   1680	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
   1681	      "${ns_a}" vti6_a    "${ns_b}" vti6_b
   1682	fail=0
   1683
   1684	# Create route exception by exceeding link layer MTU
   1685	mtu "${ns_a}" veth_a 4000
   1686	mtu "${ns_b}" veth_b 4000
   1687	mtu "${ns_a}" vti6_a 5000
   1688	mtu "${ns_b}" vti6_b 5000
   1689	run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
   1690
   1691	# Check that exception was created
   1692	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
   1693	check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
   1694
   1695	# Decrease tunnel MTU, check for PMTU decrease in route exception
   1696	mtu "${ns_a}" vti6_a 3000
   1697	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
   1698	check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
   1699
   1700	# Increase tunnel MTU, check for PMTU increase in route exception
   1701	mtu "${ns_a}" vti6_a 9000
   1702	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
   1703	check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
   1704
   1705	return ${fail}
   1706}
   1707
   1708test_pmtu_vti4_udp_routed_exception() {
   1709	setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip
   1710	trace "${ns_a}" veth_A-R1    "${ns_b}" veth_B-R1 \
   1711	      "${ns_a}" vti4_a       "${ns_b}" vti4_b
   1712
   1713	veth_mtu=1500
   1714	vti_mtu=$((veth_mtu - 20))
   1715
   1716	#                                UDP   SPI   SN   IV  ICV   pad length   next header
   1717	esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
   1718	ping_payload=$((esp_payload_rfc4106 - 28))
   1719
   1720        mtu "${ns_a}"  veth_A-R1 ${veth_mtu}
   1721        mtu "${ns_r1}" veth_R1-A ${veth_mtu}
   1722        mtu "${ns_b}"  veth_B-R1 ${veth_mtu}
   1723        mtu "${ns_r1}" veth_R1-B ${veth_mtu}
   1724
   1725	mtu "${ns_a}" vti4_a ${vti_mtu}
   1726	mtu "${ns_b}" vti4_b ${vti_mtu}
   1727
   1728	# Send DF packet without exceeding link layer MTU, check that no
   1729	# exception is created
   1730	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
   1731	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
   1732	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
   1733
   1734	# Now decrease link layer MTU by 8 bytes on R1, check that exception is created
   1735	# with the right PMTU value
   1736        mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
   1737	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr}
   1738	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
   1739	check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
   1740}
   1741
   1742test_pmtu_vti6_udp_routed_exception() {
   1743	setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip
   1744	trace "${ns_a}" veth_A-R1    "${ns_b}" veth_B-R1 \
   1745	      "${ns_a}" vti6_a       "${ns_b}" vti6_b
   1746
   1747	veth_mtu=1500
   1748	vti_mtu=$((veth_mtu - 40))
   1749
   1750	#                                UDP   SPI   SN   IV  ICV   pad length   next header
   1751	esp_payload_rfc4106=$((vti_mtu - 8   - 4   - 4  - 8 - 16  - 1          - 1))
   1752	ping_payload=$((esp_payload_rfc4106 - 48))
   1753
   1754        mtu "${ns_a}"  veth_A-R1 ${veth_mtu}
   1755        mtu "${ns_r1}" veth_R1-A ${veth_mtu}
   1756        mtu "${ns_b}"  veth_B-R1 ${veth_mtu}
   1757        mtu "${ns_r1}" veth_R1-B ${veth_mtu}
   1758
   1759	# mtu "${ns_a}" vti6_a ${vti_mtu}
   1760	# mtu "${ns_b}" vti6_b ${vti_mtu}
   1761
   1762	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr}
   1763
   1764	# Check that exception was not created
   1765	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
   1766	check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
   1767
   1768	# Now decrease link layer MTU by 8 bytes on R1, check that exception is created
   1769	# with the right PMTU value
   1770        mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
   1771	run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr}
   1772	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
   1773	check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
   1774
   1775}
   1776
   1777test_pmtu_vti4_default_mtu() {
   1778	setup namespaces veth vti4 || return $ksft_skip
   1779
   1780	# Check that MTU of vti device is MTU of veth minus IPv4 header length
   1781	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
   1782	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
   1783	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
   1784		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
   1785		return 1
   1786	fi
   1787}
   1788
   1789test_pmtu_vti6_default_mtu() {
   1790	setup namespaces veth vti6 || return $ksft_skip
   1791
   1792	# Check that MTU of vti device is MTU of veth minus IPv6 header length
   1793	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
   1794	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
   1795	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
   1796		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
   1797		return 1
   1798	fi
   1799}
   1800
   1801test_pmtu_vti4_link_add_mtu() {
   1802	setup namespaces || return $ksft_skip
   1803
   1804	run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
   1805	[ $? -ne 0 ] && err "  vti not supported" && return $ksft_skip
   1806	run_cmd ${ns_a} ip link del vti4_a
   1807
   1808	fail=0
   1809
   1810	min=68
   1811	max=$((65535 - 20))
   1812	# Check invalid values first
   1813	for v in $((min - 1)) $((max + 1)); do
   1814		run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
   1815		# This can fail, or MTU can be adjusted to a proper value
   1816		[ $? -ne 0 ] && continue
   1817		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
   1818		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
   1819			err "  vti tunnel created with invalid MTU ${mtu}"
   1820			fail=1
   1821		fi
   1822		run_cmd ${ns_a} ip link del vti4_a
   1823	done
   1824
   1825	# Now check valid values
   1826	for v in ${min} 1300 ${max}; do
   1827		run_cmd ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
   1828		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
   1829		run_cmd ${ns_a} ip link del vti4_a
   1830		if [ "${mtu}" != "${v}" ]; then
   1831			err "  vti MTU ${mtu} doesn't match configured value ${v}"
   1832			fail=1
   1833		fi
   1834	done
   1835
   1836	return ${fail}
   1837}
   1838
   1839test_pmtu_vti6_link_add_mtu() {
   1840	setup namespaces || return $ksft_skip
   1841
   1842	run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
   1843	[ $? -ne 0 ] && err "  vti6 not supported" && return $ksft_skip
   1844	run_cmd ${ns_a} ip link del vti6_a
   1845
   1846	fail=0
   1847
   1848	min=68			# vti6 can carry IPv4 packets too
   1849	max=$((65535 - 40))
   1850	# Check invalid values first
   1851	for v in $((min - 1)) $((max + 1)); do
   1852		run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
   1853		# This can fail, or MTU can be adjusted to a proper value
   1854		[ $? -ne 0 ] && continue
   1855		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
   1856		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
   1857			err "  vti6 tunnel created with invalid MTU ${v}"
   1858			fail=1
   1859		fi
   1860		run_cmd ${ns_a} ip link del vti6_a
   1861	done
   1862
   1863	# Now check valid values
   1864	for v in 68 1280 1300 $((65535 - 40)); do
   1865		run_cmd ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
   1866		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
   1867		run_cmd ${ns_a} ip link del vti6_a
   1868		if [ "${mtu}" != "${v}" ]; then
   1869			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
   1870			fail=1
   1871		fi
   1872	done
   1873
   1874	return ${fail}
   1875}
   1876
   1877test_pmtu_vti6_link_change_mtu() {
   1878	setup namespaces || return $ksft_skip
   1879
   1880	run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
   1881	[ $? -ne 0 ] && err "  dummy not supported" && return $ksft_skip
   1882	run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
   1883	run_cmd ${ns_a} ip link set dummy0 up
   1884	run_cmd ${ns_a} ip link set dummy1 up
   1885
   1886	run_cmd ${ns_a} ip addr add ${dummy6_0_prefix}1/${dummy6_mask} dev dummy0
   1887	run_cmd ${ns_a} ip addr add ${dummy6_1_prefix}1/${dummy6_mask} dev dummy1
   1888
   1889	fail=0
   1890
   1891	# Create vti6 interface bound to device, passing MTU, check it
   1892	run_cmd ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
   1893	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
   1894	if [ ${mtu} -ne 1300 ]; then
   1895		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
   1896		fail=1
   1897	fi
   1898
   1899	# Move to another device with different MTU, without passing MTU, check
   1900	# MTU is adjusted
   1901	run_cmd ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_prefix}2 local ${dummy6_1_prefix}1
   1902	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
   1903	if [ ${mtu} -ne $((3000 - 40)) ]; then
   1904		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
   1905		fail=1
   1906	fi
   1907
   1908	# Move it back, passing MTU, check MTU is not overridden
   1909	run_cmd ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_prefix}2 local ${dummy6_0_prefix}1
   1910	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
   1911	if [ ${mtu} -ne 1280 ]; then
   1912		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
   1913		fail=1
   1914	fi
   1915
   1916	return ${fail}
   1917}
   1918
   1919check_command() {
   1920	cmd=${1}
   1921
   1922	if ! which ${cmd} > /dev/null 2>&1; then
   1923		err "  missing required command: '${cmd}'"
   1924		return 1
   1925	fi
   1926	return 0
   1927}
   1928
   1929test_cleanup_vxlanX_exception() {
   1930	outer="${1}"
   1931	encap="vxlan"
   1932	ll_mtu=4000
   1933
   1934	check_command taskset || return $ksft_skip
   1935	cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
   1936
   1937	setup namespaces routing ${encap}${outer} || return $ksft_skip
   1938	trace "${ns_a}" ${encap}_a   "${ns_b}"  ${encap}_b \
   1939	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
   1940	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
   1941
   1942	# Create route exception by exceeding link layer MTU
   1943	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
   1944	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
   1945	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
   1946	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
   1947
   1948	mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000))
   1949	mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000))
   1950
   1951	# Fill exception cache for multiple CPUs (2)
   1952	# we can always use inner IPv4 for that
   1953	for cpu in ${cpu_list}; do
   1954		run_cmd taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr}
   1955	done
   1956
   1957	${ns_a} ip link del dev veth_A-R1 &
   1958	iplink_pid=$!
   1959	sleep 1
   1960	if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
   1961		err "  can't delete veth device in a timely manner, PMTU dst likely leaked"
   1962		return 1
   1963	fi
   1964}
   1965
   1966test_cleanup_ipv6_exception() {
   1967	test_cleanup_vxlanX_exception 6
   1968}
   1969
   1970test_cleanup_ipv4_exception() {
   1971	test_cleanup_vxlanX_exception 4
   1972}
   1973
   1974run_test() {
   1975	(
   1976	tname="$1"
   1977	tdesc="$2"
   1978
   1979	unset IFS
   1980
   1981	# Since cleanup() relies on variables modified by this subshell, it
   1982	# has to run in this context.
   1983	trap cleanup EXIT
   1984
   1985	if [ "$VERBOSE" = "1" ]; then
   1986		printf "\n##########################################################################\n\n"
   1987	fi
   1988
   1989	eval test_${tname}
   1990	ret=$?
   1991
   1992	if [ $ret -eq 0 ]; then
   1993		printf "TEST: %-60s  [ OK ]\n" "${tdesc}"
   1994	elif [ $ret -eq 1 ]; then
   1995		printf "TEST: %-60s  [FAIL]\n" "${tdesc}"
   1996		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
   1997			echo
   1998			echo "Pausing. Hit enter to continue"
   1999			read a
   2000		fi
   2001		err_flush
   2002		exit 1
   2003	elif [ $ret -eq $ksft_skip ]; then
   2004		printf "TEST: %-60s  [SKIP]\n" "${tdesc}"
   2005		err_flush
   2006	fi
   2007
   2008	return $ret
   2009	)
   2010	ret=$?
   2011	case $ret in
   2012		0)
   2013			all_skipped=false
   2014			[ $exitcode=$ksft_skip ] && exitcode=0
   2015		;;
   2016		$ksft_skip)
   2017			[ $all_skipped = true ] && exitcode=$ksft_skip
   2018		;;
   2019		*)
   2020			all_skipped=false
   2021			exitcode=1
   2022		;;
   2023	esac
   2024
   2025	return $ret
   2026}
   2027
   2028run_test_nh() {
   2029	tname="$1"
   2030	tdesc="$2"
   2031
   2032	USE_NH=yes
   2033	run_test "${tname}" "${tdesc} - nexthop objects"
   2034	USE_NH=no
   2035}
   2036
   2037test_list_flush_ipv4_exception() {
   2038	setup namespaces routing || return $ksft_skip
   2039	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
   2040	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
   2041	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
   2042	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
   2043
   2044	dst_prefix1="${prefix4}.${b_r1}."
   2045	dst2="${prefix4}.${b_r2}.1"
   2046
   2047	# Set up initial MTU values
   2048	mtu "${ns_a}"  veth_A-R1 2000
   2049	mtu "${ns_r1}" veth_R1-A 2000
   2050	mtu "${ns_r1}" veth_R1-B 1500
   2051	mtu "${ns_b}"  veth_B-R1 1500
   2052
   2053	mtu "${ns_a}"  veth_A-R2 2000
   2054	mtu "${ns_r2}" veth_R2-A 2000
   2055	mtu "${ns_r2}" veth_R2-B 1500
   2056	mtu "${ns_b}"  veth_B-R2 1500
   2057
   2058	fail=0
   2059
   2060	# Add 100 addresses for veth endpoint on B reached by default A route
   2061	for i in $(seq 100 199); do
   2062		run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
   2063	done
   2064
   2065	# Create 100 cached route exceptions for path via R1, one via R2. Note
   2066	# that with IPv4 we need to actually cause a route lookup that matches
   2067	# the exception caused by ICMP, in order to actually have a cached
   2068	# route, so we need to ping each destination twice
   2069	for i in $(seq 100 199); do
   2070		run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst_prefix1}${i}"
   2071	done
   2072	run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}"
   2073
   2074	if [ "$(${ns_a} ip -oneline route list cache | wc -l)" -ne 101 ]; then
   2075		err "  can't list cached exceptions"
   2076		fail=1
   2077	fi
   2078
   2079	run_cmd ${ns_a} ip route flush cache
   2080	pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}1)"
   2081	pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst_prefix}2)"
   2082	if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
   2083	   [ -n "$(${ns_a} ip route list cache)" ]; then
   2084		err "  can't flush cached exceptions"
   2085		fail=1
   2086	fi
   2087
   2088	return ${fail}
   2089}
   2090
   2091test_list_flush_ipv6_exception() {
   2092	setup namespaces routing || return $ksft_skip
   2093	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
   2094	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
   2095	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
   2096	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
   2097
   2098	dst_prefix1="${prefix6}:${b_r1}::"
   2099	dst2="${prefix6}:${b_r2}::1"
   2100
   2101	# Set up initial MTU values
   2102	mtu "${ns_a}"  veth_A-R1 2000
   2103	mtu "${ns_r1}" veth_R1-A 2000
   2104	mtu "${ns_r1}" veth_R1-B 1500
   2105	mtu "${ns_b}"  veth_B-R1 1500
   2106
   2107	mtu "${ns_a}"  veth_A-R2 2000
   2108	mtu "${ns_r2}" veth_R2-A 2000
   2109	mtu "${ns_r2}" veth_R2-B 1500
   2110	mtu "${ns_b}"  veth_B-R2 1500
   2111
   2112	fail=0
   2113
   2114	# Add 100 addresses for veth endpoint on B reached by default A route
   2115	for i in $(seq 100 199); do
   2116		run_cmd ${ns_b} ip addr add "${dst_prefix1}${i}" dev veth_B-R1
   2117	done
   2118
   2119	# Create 100 cached route exceptions for path via R1, one via R2
   2120	for i in $(seq 100 199); do
   2121		run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}"
   2122	done
   2123	run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}"
   2124	if [ "$(${ns_a} ip -oneline -6 route list cache | wc -l)" -ne 101 ]; then
   2125		err "  can't list cached exceptions"
   2126		fail=1
   2127	fi
   2128
   2129	run_cmd ${ns_a} ip -6 route flush cache
   2130	pmtu1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst_prefix1}100")"
   2131	pmtu2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
   2132	if [ -n "${pmtu1}" ] || [ -n "${pmtu2}" ] || \
   2133	   [ -n "$(${ns_a} ip -6 route list cache)" ]; then
   2134		err "  can't flush cached exceptions"
   2135		fail=1
   2136	fi
   2137
   2138	return ${fail}
   2139}
   2140
   2141test_pmtu_ipvX_route_change() {
   2142	family=${1}
   2143
   2144	setup namespaces routing || return 2
   2145	trace "${ns_a}"  veth_A-R1    "${ns_r1}" veth_R1-A \
   2146	      "${ns_r1}" veth_R1-B    "${ns_b}"  veth_B-R1 \
   2147	      "${ns_a}"  veth_A-R2    "${ns_r2}" veth_R2-A \
   2148	      "${ns_r2}" veth_R2-B    "${ns_b}"  veth_B-R2
   2149
   2150	if [ ${family} -eq 4 ]; then
   2151		ping=ping
   2152		dst1="${prefix4}.${b_r1}.1"
   2153		dst2="${prefix4}.${b_r2}.1"
   2154		gw="${prefix4}.${a_r1}.2"
   2155	else
   2156		ping=${ping6}
   2157		dst1="${prefix6}:${b_r1}::1"
   2158		dst2="${prefix6}:${b_r2}::1"
   2159		gw="${prefix6}:${a_r1}::2"
   2160	fi
   2161
   2162	# Set up initial MTU values
   2163	mtu "${ns_a}"  veth_A-R1 2000
   2164	mtu "${ns_r1}" veth_R1-A 2000
   2165	mtu "${ns_r1}" veth_R1-B 1400
   2166	mtu "${ns_b}"  veth_B-R1 1400
   2167
   2168	mtu "${ns_a}"  veth_A-R2 2000
   2169	mtu "${ns_r2}" veth_R2-A 2000
   2170	mtu "${ns_r2}" veth_R2-B 1500
   2171	mtu "${ns_b}"  veth_B-R2 1500
   2172
   2173	# Create route exceptions
   2174	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
   2175	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
   2176
   2177	# Check that exceptions have been created with the correct PMTU
   2178	pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
   2179	check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
   2180	pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
   2181	check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
   2182
   2183	# Replace the route from A to R1
   2184	run_cmd ${ns_a} ip route change default via ${gw}
   2185
   2186	# Delete the device in A
   2187	run_cmd ${ns_a} ip link del "veth_A-R1"
   2188}
   2189
   2190test_pmtu_ipv4_route_change() {
   2191	test_pmtu_ipvX_route_change 4
   2192}
   2193
   2194test_pmtu_ipv6_route_change() {
   2195	test_pmtu_ipvX_route_change 6
   2196}
   2197
   2198usage() {
   2199	echo
   2200	echo "$0 [OPTIONS] [TEST]..."
   2201	echo "If no TEST argument is given, all tests will be run."
   2202	echo
   2203	echo "Options"
   2204	echo "  --trace: capture traffic to TEST_INTERFACE.pcap"
   2205	echo
   2206	echo "Available tests${tests}"
   2207	exit 1
   2208}
   2209
   2210################################################################################
   2211#
   2212exitcode=0
   2213desc=0
   2214all_skipped=true
   2215
   2216while getopts :ptv o
   2217do
   2218	case $o in
   2219	p) PAUSE_ON_FAIL=yes;;
   2220	v) VERBOSE=1;;
   2221	t) if which tcpdump > /dev/null 2>&1; then
   2222		TRACING=1
   2223	   else
   2224		echo "=== tcpdump not available, tracing disabled"
   2225	   fi
   2226	   ;;
   2227	*) usage;;
   2228	esac
   2229done
   2230shift $(($OPTIND-1))
   2231
   2232IFS="	
   2233"
   2234
   2235for arg do
   2236	# Check first that all requested tests are available before running any
   2237	command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
   2238done
   2239
   2240trap cleanup EXIT
   2241
   2242# start clean
   2243cleanup
   2244
   2245HAVE_NH=no
   2246ip nexthop ls >/dev/null 2>&1
   2247[ $? -eq 0 ] && HAVE_NH=yes
   2248
   2249name=""
   2250desc=""
   2251rerun_nh=0
   2252for t in ${tests}; do
   2253	[ "${name}" = "" ]	&& name="${t}"	&& continue
   2254	[ "${desc}" = "" ]	&& desc="${t}"	&& continue
   2255
   2256	if [ "${HAVE_NH}" = "yes" ]; then
   2257		rerun_nh="${t}"
   2258	fi
   2259
   2260	run_this=1
   2261	for arg do
   2262		[ "${arg}" != "${arg#--*}" ] && continue
   2263		[ "${arg}" = "${name}" ] && run_this=1 && break
   2264		run_this=0
   2265	done
   2266	if [ $run_this -eq 1 ]; then
   2267		run_test "${name}" "${desc}"
   2268		# if test was skipped no need to retry with nexthop objects
   2269		[ $? -eq $ksft_skip ] && rerun_nh=0
   2270
   2271		if [ "${rerun_nh}" = "1" ]; then
   2272			run_test_nh "${name}" "${desc}"
   2273		fi
   2274	fi
   2275	name=""
   2276	desc=""
   2277	rerun_nh=0
   2278done
   2279
   2280exit ${exitcode}