cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

udp.c (46487B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *	UDP over IPv6
      4 *	Linux INET6 implementation
      5 *
      6 *	Authors:
      7 *	Pedro Roque		<roque@di.fc.ul.pt>
      8 *
      9 *	Based on linux/ipv4/udp.c
     10 *
     11 *	Fixes:
     12 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
     13 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
     14 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
     15 *					a single port at the same time.
     16 *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
     17 *      YOSHIFUJI Hideaki @USAGI:	convert /proc/net/udp6 to seq_file.
     18 */
     19
     20#include <linux/bpf-cgroup.h>
     21#include <linux/errno.h>
     22#include <linux/types.h>
     23#include <linux/socket.h>
     24#include <linux/sockios.h>
     25#include <linux/net.h>
     26#include <linux/in6.h>
     27#include <linux/netdevice.h>
     28#include <linux/if_arp.h>
     29#include <linux/ipv6.h>
     30#include <linux/icmpv6.h>
     31#include <linux/init.h>
     32#include <linux/module.h>
     33#include <linux/skbuff.h>
     34#include <linux/slab.h>
     35#include <linux/uaccess.h>
     36#include <linux/indirect_call_wrapper.h>
     37
     38#include <net/addrconf.h>
     39#include <net/ndisc.h>
     40#include <net/protocol.h>
     41#include <net/transp_v6.h>
     42#include <net/ip6_route.h>
     43#include <net/raw.h>
     44#include <net/seg6.h>
     45#include <net/tcp_states.h>
     46#include <net/ip6_checksum.h>
     47#include <net/ip6_tunnel.h>
     48#include <net/xfrm.h>
     49#include <net/inet_hashtables.h>
     50#include <net/inet6_hashtables.h>
     51#include <net/busy_poll.h>
     52#include <net/sock_reuseport.h>
     53
     54#include <linux/proc_fs.h>
     55#include <linux/seq_file.h>
     56#include <trace/events/skb.h>
     57#include "udp_impl.h"
     58
     59static u32 udp6_ehashfn(const struct net *net,
     60			const struct in6_addr *laddr,
     61			const u16 lport,
     62			const struct in6_addr *faddr,
     63			const __be16 fport)
     64{
     65	static u32 udp6_ehash_secret __read_mostly;
     66	static u32 udp_ipv6_hash_secret __read_mostly;
     67
     68	u32 lhash, fhash;
     69
     70	net_get_random_once(&udp6_ehash_secret,
     71			    sizeof(udp6_ehash_secret));
     72	net_get_random_once(&udp_ipv6_hash_secret,
     73			    sizeof(udp_ipv6_hash_secret));
     74
     75	lhash = (__force u32)laddr->s6_addr32[3];
     76	fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
     77
     78	return __inet6_ehashfn(lhash, lport, fhash, fport,
     79			       udp_ipv6_hash_secret + net_hash_mix(net));
     80}
     81
     82int udp_v6_get_port(struct sock *sk, unsigned short snum)
     83{
     84	unsigned int hash2_nulladdr =
     85		ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
     86	unsigned int hash2_partial =
     87		ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
     88
     89	/* precompute partial secondary hash */
     90	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
     91	return udp_lib_get_port(sk, snum, hash2_nulladdr);
     92}
     93
     94void udp_v6_rehash(struct sock *sk)
     95{
     96	u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
     97					  &sk->sk_v6_rcv_saddr,
     98					  inet_sk(sk)->inet_num);
     99
    100	udp_lib_rehash(sk, new_hash);
    101}
    102
    103static int compute_score(struct sock *sk, struct net *net,
    104			 const struct in6_addr *saddr, __be16 sport,
    105			 const struct in6_addr *daddr, unsigned short hnum,
    106			 int dif, int sdif)
    107{
    108	int bound_dev_if, score;
    109	struct inet_sock *inet;
    110	bool dev_match;
    111
    112	if (!net_eq(sock_net(sk), net) ||
    113	    udp_sk(sk)->udp_port_hash != hnum ||
    114	    sk->sk_family != PF_INET6)
    115		return -1;
    116
    117	if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
    118		return -1;
    119
    120	score = 0;
    121	inet = inet_sk(sk);
    122
    123	if (inet->inet_dport) {
    124		if (inet->inet_dport != sport)
    125			return -1;
    126		score++;
    127	}
    128
    129	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
    130		if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
    131			return -1;
    132		score++;
    133	}
    134
    135	bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
    136	dev_match = udp_sk_bound_dev_eq(net, bound_dev_if, dif, sdif);
    137	if (!dev_match)
    138		return -1;
    139	if (bound_dev_if)
    140		score++;
    141
    142	if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
    143		score++;
    144
    145	return score;
    146}
    147
    148static struct sock *lookup_reuseport(struct net *net, struct sock *sk,
    149				     struct sk_buff *skb,
    150				     const struct in6_addr *saddr,
    151				     __be16 sport,
    152				     const struct in6_addr *daddr,
    153				     unsigned int hnum)
    154{
    155	struct sock *reuse_sk = NULL;
    156	u32 hash;
    157
    158	if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
    159		hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
    160		reuse_sk = reuseport_select_sock(sk, hash, skb,
    161						 sizeof(struct udphdr));
    162	}
    163	return reuse_sk;
    164}
    165
    166/* called with rcu_read_lock() */
    167static struct sock *udp6_lib_lookup2(struct net *net,
    168		const struct in6_addr *saddr, __be16 sport,
    169		const struct in6_addr *daddr, unsigned int hnum,
    170		int dif, int sdif, struct udp_hslot *hslot2,
    171		struct sk_buff *skb)
    172{
    173	struct sock *sk, *result;
    174	int score, badness;
    175
    176	result = NULL;
    177	badness = -1;
    178	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
    179		score = compute_score(sk, net, saddr, sport,
    180				      daddr, hnum, dif, sdif);
    181		if (score > badness) {
    182			result = lookup_reuseport(net, sk, skb,
    183						  saddr, sport, daddr, hnum);
    184			/* Fall back to scoring if group has connections */
    185			if (result && !reuseport_has_conns(sk, false))
    186				return result;
    187
    188			result = result ? : sk;
    189			badness = score;
    190		}
    191	}
    192	return result;
    193}
    194
    195static inline struct sock *udp6_lookup_run_bpf(struct net *net,
    196					       struct udp_table *udptable,
    197					       struct sk_buff *skb,
    198					       const struct in6_addr *saddr,
    199					       __be16 sport,
    200					       const struct in6_addr *daddr,
    201					       u16 hnum, const int dif)
    202{
    203	struct sock *sk, *reuse_sk;
    204	bool no_reuseport;
    205
    206	if (udptable != &udp_table)
    207		return NULL; /* only UDP is supported */
    208
    209	no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_UDP, saddr, sport,
    210					    daddr, hnum, dif, &sk);
    211	if (no_reuseport || IS_ERR_OR_NULL(sk))
    212		return sk;
    213
    214	reuse_sk = lookup_reuseport(net, sk, skb, saddr, sport, daddr, hnum);
    215	if (reuse_sk)
    216		sk = reuse_sk;
    217	return sk;
    218}
    219
    220/* rcu_read_lock() must be held */
    221struct sock *__udp6_lib_lookup(struct net *net,
    222			       const struct in6_addr *saddr, __be16 sport,
    223			       const struct in6_addr *daddr, __be16 dport,
    224			       int dif, int sdif, struct udp_table *udptable,
    225			       struct sk_buff *skb)
    226{
    227	unsigned short hnum = ntohs(dport);
    228	unsigned int hash2, slot2;
    229	struct udp_hslot *hslot2;
    230	struct sock *result, *sk;
    231
    232	hash2 = ipv6_portaddr_hash(net, daddr, hnum);
    233	slot2 = hash2 & udptable->mask;
    234	hslot2 = &udptable->hash2[slot2];
    235
    236	/* Lookup connected or non-wildcard sockets */
    237	result = udp6_lib_lookup2(net, saddr, sport,
    238				  daddr, hnum, dif, sdif,
    239				  hslot2, skb);
    240	if (!IS_ERR_OR_NULL(result) && result->sk_state == TCP_ESTABLISHED)
    241		goto done;
    242
    243	/* Lookup redirect from BPF */
    244	if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
    245		sk = udp6_lookup_run_bpf(net, udptable, skb,
    246					 saddr, sport, daddr, hnum, dif);
    247		if (sk) {
    248			result = sk;
    249			goto done;
    250		}
    251	}
    252
    253	/* Got non-wildcard socket or error on first lookup */
    254	if (result)
    255		goto done;
    256
    257	/* Lookup wildcard sockets */
    258	hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
    259	slot2 = hash2 & udptable->mask;
    260	hslot2 = &udptable->hash2[slot2];
    261
    262	result = udp6_lib_lookup2(net, saddr, sport,
    263				  &in6addr_any, hnum, dif, sdif,
    264				  hslot2, skb);
    265done:
    266	if (IS_ERR(result))
    267		return NULL;
    268	return result;
    269}
    270EXPORT_SYMBOL_GPL(__udp6_lib_lookup);
    271
    272static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
    273					  __be16 sport, __be16 dport,
    274					  struct udp_table *udptable)
    275{
    276	const struct ipv6hdr *iph = ipv6_hdr(skb);
    277
    278	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
    279				 &iph->daddr, dport, inet6_iif(skb),
    280				 inet6_sdif(skb), udptable, skb);
    281}
    282
    283struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
    284				 __be16 sport, __be16 dport)
    285{
    286	const struct ipv6hdr *iph = ipv6_hdr(skb);
    287
    288	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
    289				 &iph->daddr, dport, inet6_iif(skb),
    290				 inet6_sdif(skb), &udp_table, NULL);
    291}
    292
    293/* Must be called under rcu_read_lock().
    294 * Does increment socket refcount.
    295 */
    296#if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
    297struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
    298			     const struct in6_addr *daddr, __be16 dport, int dif)
    299{
    300	struct sock *sk;
    301
    302	sk =  __udp6_lib_lookup(net, saddr, sport, daddr, dport,
    303				dif, 0, &udp_table, NULL);
    304	if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
    305		sk = NULL;
    306	return sk;
    307}
    308EXPORT_SYMBOL_GPL(udp6_lib_lookup);
    309#endif
    310
    311/* do not use the scratch area len for jumbogram: their length execeeds the
    312 * scratch area space; note that the IP6CB flags is still in the first
    313 * cacheline, so checking for jumbograms is cheap
    314 */
    315static int udp6_skb_len(struct sk_buff *skb)
    316{
    317	return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb);
    318}
    319
    320/*
    321 *	This should be easy, if there is something there we
    322 *	return it, otherwise we block.
    323 */
    324
    325int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
    326		  int flags, int *addr_len)
    327{
    328	struct ipv6_pinfo *np = inet6_sk(sk);
    329	struct inet_sock *inet = inet_sk(sk);
    330	struct sk_buff *skb;
    331	unsigned int ulen, copied;
    332	int off, err, peeking = flags & MSG_PEEK;
    333	int is_udplite = IS_UDPLITE(sk);
    334	struct udp_mib __percpu *mib;
    335	bool checksum_valid = false;
    336	int is_udp4;
    337
    338	if (flags & MSG_ERRQUEUE)
    339		return ipv6_recv_error(sk, msg, len, addr_len);
    340
    341	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
    342		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
    343
    344try_again:
    345	off = sk_peek_offset(sk, flags);
    346	skb = __skb_recv_udp(sk, flags, &off, &err);
    347	if (!skb)
    348		return err;
    349
    350	ulen = udp6_skb_len(skb);
    351	copied = len;
    352	if (copied > ulen - off)
    353		copied = ulen - off;
    354	else if (copied < ulen)
    355		msg->msg_flags |= MSG_TRUNC;
    356
    357	is_udp4 = (skb->protocol == htons(ETH_P_IP));
    358	mib = __UDPX_MIB(sk, is_udp4);
    359
    360	/*
    361	 * If checksum is needed at all, try to do it while copying the
    362	 * data.  If the data is truncated, or if we only want a partial
    363	 * coverage checksum (UDP-Lite), do it before the copy.
    364	 */
    365
    366	if (copied < ulen || peeking ||
    367	    (is_udplite && UDP_SKB_CB(skb)->partial_cov)) {
    368		checksum_valid = udp_skb_csum_unnecessary(skb) ||
    369				!__udp_lib_checksum_complete(skb);
    370		if (!checksum_valid)
    371			goto csum_copy_err;
    372	}
    373
    374	if (checksum_valid || udp_skb_csum_unnecessary(skb)) {
    375		if (udp_skb_is_linear(skb))
    376			err = copy_linear_skb(skb, copied, off, &msg->msg_iter);
    377		else
    378			err = skb_copy_datagram_msg(skb, off, msg, copied);
    379	} else {
    380		err = skb_copy_and_csum_datagram_msg(skb, off, msg);
    381		if (err == -EINVAL)
    382			goto csum_copy_err;
    383	}
    384	if (unlikely(err)) {
    385		if (!peeking) {
    386			atomic_inc(&sk->sk_drops);
    387			SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
    388		}
    389		kfree_skb(skb);
    390		return err;
    391	}
    392	if (!peeking)
    393		SNMP_INC_STATS(mib, UDP_MIB_INDATAGRAMS);
    394
    395	sock_recv_cmsgs(msg, sk, skb);
    396
    397	/* Copy the address. */
    398	if (msg->msg_name) {
    399		DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
    400		sin6->sin6_family = AF_INET6;
    401		sin6->sin6_port = udp_hdr(skb)->source;
    402		sin6->sin6_flowinfo = 0;
    403
    404		if (is_udp4) {
    405			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
    406					       &sin6->sin6_addr);
    407			sin6->sin6_scope_id = 0;
    408		} else {
    409			sin6->sin6_addr = ipv6_hdr(skb)->saddr;
    410			sin6->sin6_scope_id =
    411				ipv6_iface_scope_id(&sin6->sin6_addr,
    412						    inet6_iif(skb));
    413		}
    414		*addr_len = sizeof(*sin6);
    415
    416		BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk,
    417						      (struct sockaddr *)sin6);
    418	}
    419
    420	if (udp_sk(sk)->gro_enabled)
    421		udp_cmsg_recv(msg, sk, skb);
    422
    423	if (np->rxopt.all)
    424		ip6_datagram_recv_common_ctl(sk, msg, skb);
    425
    426	if (is_udp4) {
    427		if (inet->cmsg_flags)
    428			ip_cmsg_recv_offset(msg, sk, skb,
    429					    sizeof(struct udphdr), off);
    430	} else {
    431		if (np->rxopt.all)
    432			ip6_datagram_recv_specific_ctl(sk, msg, skb);
    433	}
    434
    435	err = copied;
    436	if (flags & MSG_TRUNC)
    437		err = ulen;
    438
    439	skb_consume_udp(sk, skb, peeking ? -err : err);
    440	return err;
    441
    442csum_copy_err:
    443	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
    444				 udp_skb_destructor)) {
    445		SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS);
    446		SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
    447	}
    448	kfree_skb(skb);
    449
    450	/* starting over for a new packet, but check if we need to yield */
    451	cond_resched();
    452	msg->msg_flags &= ~MSG_TRUNC;
    453	goto try_again;
    454}
    455
    456DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
    457void udpv6_encap_enable(void)
    458{
    459	static_branch_inc(&udpv6_encap_needed_key);
    460}
    461EXPORT_SYMBOL(udpv6_encap_enable);
    462
    463/* Handler for tunnels with arbitrary destination ports: no socket lookup, go
    464 * through error handlers in encapsulations looking for a match.
    465 */
    466static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb,
    467				      struct inet6_skb_parm *opt,
    468				      u8 type, u8 code, int offset, __be32 info)
    469{
    470	int i;
    471
    472	for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
    473		int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
    474			       u8 type, u8 code, int offset, __be32 info);
    475		const struct ip6_tnl_encap_ops *encap;
    476
    477		encap = rcu_dereference(ip6tun_encaps[i]);
    478		if (!encap)
    479			continue;
    480		handler = encap->err_handler;
    481		if (handler && !handler(skb, opt, type, code, offset, info))
    482			return 0;
    483	}
    484
    485	return -ENOENT;
    486}
    487
    488/* Try to match ICMP errors to UDP tunnels by looking up a socket without
    489 * reversing source and destination port: this will match tunnels that force the
    490 * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
    491 * lwtunnels might actually break this assumption by being configured with
    492 * different destination ports on endpoints, in this case we won't be able to
    493 * trace ICMP messages back to them.
    494 *
    495 * If this doesn't match any socket, probe tunnels with arbitrary destination
    496 * ports (e.g. FoU, GUE): there, the receiving socket is useless, as the port
    497 * we've sent packets to won't necessarily match the local destination port.
    498 *
    499 * Then ask the tunnel implementation to match the error against a valid
    500 * association.
    501 *
    502 * Return an error if we can't find a match, the socket if we need further
    503 * processing, zero otherwise.
    504 */
    505static struct sock *__udp6_lib_err_encap(struct net *net,
    506					 const struct ipv6hdr *hdr, int offset,
    507					 struct udphdr *uh,
    508					 struct udp_table *udptable,
    509					 struct sock *sk,
    510					 struct sk_buff *skb,
    511					 struct inet6_skb_parm *opt,
    512					 u8 type, u8 code, __be32 info)
    513{
    514	int (*lookup)(struct sock *sk, struct sk_buff *skb);
    515	int network_offset, transport_offset;
    516	struct udp_sock *up;
    517
    518	network_offset = skb_network_offset(skb);
    519	transport_offset = skb_transport_offset(skb);
    520
    521	/* Network header needs to point to the outer IPv6 header inside ICMP */
    522	skb_reset_network_header(skb);
    523
    524	/* Transport header needs to point to the UDP header */
    525	skb_set_transport_header(skb, offset);
    526
    527	if (sk) {
    528		up = udp_sk(sk);
    529
    530		lookup = READ_ONCE(up->encap_err_lookup);
    531		if (lookup && lookup(sk, skb))
    532			sk = NULL;
    533
    534		goto out;
    535	}
    536
    537	sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
    538			       &hdr->saddr, uh->dest,
    539			       inet6_iif(skb), 0, udptable, skb);
    540	if (sk) {
    541		up = udp_sk(sk);
    542
    543		lookup = READ_ONCE(up->encap_err_lookup);
    544		if (!lookup || lookup(sk, skb))
    545			sk = NULL;
    546	}
    547
    548out:
    549	if (!sk) {
    550		sk = ERR_PTR(__udp6_lib_err_encap_no_sk(skb, opt, type, code,
    551							offset, info));
    552	}
    553
    554	skb_set_transport_header(skb, transport_offset);
    555	skb_set_network_header(skb, network_offset);
    556
    557	return sk;
    558}
    559
    560int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
    561		   u8 type, u8 code, int offset, __be32 info,
    562		   struct udp_table *udptable)
    563{
    564	struct ipv6_pinfo *np;
    565	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
    566	const struct in6_addr *saddr = &hdr->saddr;
    567	const struct in6_addr *daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr;
    568	struct udphdr *uh = (struct udphdr *)(skb->data+offset);
    569	bool tunnel = false;
    570	struct sock *sk;
    571	int harderr;
    572	int err;
    573	struct net *net = dev_net(skb->dev);
    574
    575	sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
    576			       inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
    577
    578	if (!sk || udp_sk(sk)->encap_type) {
    579		/* No socket for error: try tunnels before discarding */
    580		if (static_branch_unlikely(&udpv6_encap_needed_key)) {
    581			sk = __udp6_lib_err_encap(net, hdr, offset, uh,
    582						  udptable, sk, skb,
    583						  opt, type, code, info);
    584			if (!sk)
    585				return 0;
    586		} else
    587			sk = ERR_PTR(-ENOENT);
    588
    589		if (IS_ERR(sk)) {
    590			__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
    591					  ICMP6_MIB_INERRORS);
    592			return PTR_ERR(sk);
    593		}
    594
    595		tunnel = true;
    596	}
    597
    598	harderr = icmpv6_err_convert(type, code, &err);
    599	np = inet6_sk(sk);
    600
    601	if (type == ICMPV6_PKT_TOOBIG) {
    602		if (!ip6_sk_accept_pmtu(sk))
    603			goto out;
    604		ip6_sk_update_pmtu(skb, sk, info);
    605		if (np->pmtudisc != IPV6_PMTUDISC_DONT)
    606			harderr = 1;
    607	}
    608	if (type == NDISC_REDIRECT) {
    609		if (tunnel) {
    610			ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
    611				     sk->sk_mark, sk->sk_uid);
    612		} else {
    613			ip6_sk_redirect(skb, sk);
    614		}
    615		goto out;
    616	}
    617
    618	/* Tunnels don't have an application socket: don't pass errors back */
    619	if (tunnel)
    620		goto out;
    621
    622	if (!np->recverr) {
    623		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
    624			goto out;
    625	} else {
    626		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
    627	}
    628
    629	sk->sk_err = err;
    630	sk_error_report(sk);
    631out:
    632	return 0;
    633}
    634
    635static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
    636{
    637	int rc;
    638
    639	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
    640		sock_rps_save_rxhash(sk, skb);
    641		sk_mark_napi_id(sk, skb);
    642		sk_incoming_cpu_update(sk);
    643	} else {
    644		sk_mark_napi_id_once(sk, skb);
    645	}
    646
    647	rc = __udp_enqueue_schedule_skb(sk, skb);
    648	if (rc < 0) {
    649		int is_udplite = IS_UDPLITE(sk);
    650
    651		/* Note that an ENOMEM error is charged twice */
    652		if (rc == -ENOMEM)
    653			UDP6_INC_STATS(sock_net(sk),
    654					 UDP_MIB_RCVBUFERRORS, is_udplite);
    655		else
    656			UDP6_INC_STATS(sock_net(sk),
    657				       UDP_MIB_MEMERRORS, is_udplite);
    658		UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
    659		kfree_skb(skb);
    660		return -1;
    661	}
    662
    663	return 0;
    664}
    665
    666static __inline__ int udpv6_err(struct sk_buff *skb,
    667				struct inet6_skb_parm *opt, u8 type,
    668				u8 code, int offset, __be32 info)
    669{
    670	return __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
    671}
    672
    673static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
    674{
    675	struct udp_sock *up = udp_sk(sk);
    676	int is_udplite = IS_UDPLITE(sk);
    677
    678	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
    679		goto drop;
    680
    681	if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) {
    682		int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
    683
    684		/*
    685		 * This is an encapsulation socket so pass the skb to
    686		 * the socket's udp_encap_rcv() hook. Otherwise, just
    687		 * fall through and pass this up the UDP socket.
    688		 * up->encap_rcv() returns the following value:
    689		 * =0 if skb was successfully passed to the encap
    690		 *    handler or was discarded by it.
    691		 * >0 if skb should be passed on to UDP.
    692		 * <0 if skb should be resubmitted as proto -N
    693		 */
    694
    695		/* if we're overly short, let UDP handle it */
    696		encap_rcv = READ_ONCE(up->encap_rcv);
    697		if (encap_rcv) {
    698			int ret;
    699
    700			/* Verify checksum before giving to encap */
    701			if (udp_lib_checksum_complete(skb))
    702				goto csum_error;
    703
    704			ret = encap_rcv(sk, skb);
    705			if (ret <= 0) {
    706				__UDP6_INC_STATS(sock_net(sk),
    707						 UDP_MIB_INDATAGRAMS,
    708						 is_udplite);
    709				return -ret;
    710			}
    711		}
    712
    713		/* FALLTHROUGH -- it's a UDP Packet */
    714	}
    715
    716	/*
    717	 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
    718	 */
    719	if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
    720
    721		if (up->pcrlen == 0) {          /* full coverage was set  */
    722			net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
    723					    UDP_SKB_CB(skb)->cscov, skb->len);
    724			goto drop;
    725		}
    726		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
    727			net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n",
    728					    UDP_SKB_CB(skb)->cscov, up->pcrlen);
    729			goto drop;
    730		}
    731	}
    732
    733	prefetch(&sk->sk_rmem_alloc);
    734	if (rcu_access_pointer(sk->sk_filter) &&
    735	    udp_lib_checksum_complete(skb))
    736		goto csum_error;
    737
    738	if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)))
    739		goto drop;
    740
    741	udp_csum_pull_header(skb);
    742
    743	skb_dst_drop(skb);
    744
    745	return __udpv6_queue_rcv_skb(sk, skb);
    746
    747csum_error:
    748	__UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
    749drop:
    750	__UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
    751	atomic_inc(&sk->sk_drops);
    752	kfree_skb(skb);
    753	return -1;
    754}
    755
    756static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
    757{
    758	struct sk_buff *next, *segs;
    759	int ret;
    760
    761	if (likely(!udp_unexpected_gso(sk, skb)))
    762		return udpv6_queue_rcv_one_skb(sk, skb);
    763
    764	__skb_push(skb, -skb_mac_offset(skb));
    765	segs = udp_rcv_segment(sk, skb, false);
    766	skb_list_walk_safe(segs, skb, next) {
    767		__skb_pull(skb, skb_transport_offset(skb));
    768
    769		udp_post_segment_fix_csum(skb);
    770		ret = udpv6_queue_rcv_one_skb(sk, skb);
    771		if (ret > 0)
    772			ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret,
    773						 true);
    774	}
    775	return 0;
    776}
    777
    778static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
    779				   __be16 loc_port, const struct in6_addr *loc_addr,
    780				   __be16 rmt_port, const struct in6_addr *rmt_addr,
    781				   int dif, int sdif, unsigned short hnum)
    782{
    783	struct inet_sock *inet = inet_sk(sk);
    784
    785	if (!net_eq(sock_net(sk), net))
    786		return false;
    787
    788	if (udp_sk(sk)->udp_port_hash != hnum ||
    789	    sk->sk_family != PF_INET6 ||
    790	    (inet->inet_dport && inet->inet_dport != rmt_port) ||
    791	    (!ipv6_addr_any(&sk->sk_v6_daddr) &&
    792		    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
    793	    !udp_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif) ||
    794	    (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
    795		    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
    796		return false;
    797	if (!inet6_mc_check(sk, loc_addr, rmt_addr))
    798		return false;
    799	return true;
    800}
    801
    802static void udp6_csum_zero_error(struct sk_buff *skb)
    803{
    804	/* RFC 2460 section 8.1 says that we SHOULD log
    805	 * this error. Well, it is reasonable.
    806	 */
    807	net_dbg_ratelimited("IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n",
    808			    &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source),
    809			    &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest));
    810}
    811
    812/*
    813 * Note: called only from the BH handler context,
    814 * so we don't need to lock the hashes.
    815 */
    816static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
    817		const struct in6_addr *saddr, const struct in6_addr *daddr,
    818		struct udp_table *udptable, int proto)
    819{
    820	struct sock *sk, *first = NULL;
    821	const struct udphdr *uh = udp_hdr(skb);
    822	unsigned short hnum = ntohs(uh->dest);
    823	struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum);
    824	unsigned int offset = offsetof(typeof(*sk), sk_node);
    825	unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10);
    826	int dif = inet6_iif(skb);
    827	int sdif = inet6_sdif(skb);
    828	struct hlist_node *node;
    829	struct sk_buff *nskb;
    830
    831	if (use_hash2) {
    832		hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) &
    833			    udptable->mask;
    834		hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
    835start_lookup:
    836		hslot = &udptable->hash2[hash2];
    837		offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
    838	}
    839
    840	sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) {
    841		if (!__udp_v6_is_mcast_sock(net, sk, uh->dest, daddr,
    842					    uh->source, saddr, dif, sdif,
    843					    hnum))
    844			continue;
    845		/* If zero checksum and no_check is not on for
    846		 * the socket then skip it.
    847		 */
    848		if (!uh->check && !udp_sk(sk)->no_check6_rx)
    849			continue;
    850		if (!first) {
    851			first = sk;
    852			continue;
    853		}
    854		nskb = skb_clone(skb, GFP_ATOMIC);
    855		if (unlikely(!nskb)) {
    856			atomic_inc(&sk->sk_drops);
    857			__UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS,
    858					 IS_UDPLITE(sk));
    859			__UDP6_INC_STATS(net, UDP_MIB_INERRORS,
    860					 IS_UDPLITE(sk));
    861			continue;
    862		}
    863
    864		if (udpv6_queue_rcv_skb(sk, nskb) > 0)
    865			consume_skb(nskb);
    866	}
    867
    868	/* Also lookup *:port if we are using hash2 and haven't done so yet. */
    869	if (use_hash2 && hash2 != hash2_any) {
    870		hash2 = hash2_any;
    871		goto start_lookup;
    872	}
    873
    874	if (first) {
    875		if (udpv6_queue_rcv_skb(first, skb) > 0)
    876			consume_skb(skb);
    877	} else {
    878		kfree_skb(skb);
    879		__UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI,
    880				 proto == IPPROTO_UDPLITE);
    881	}
    882	return 0;
    883}
    884
    885static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
    886{
    887	if (udp_sk_rx_dst_set(sk, dst)) {
    888		const struct rt6_info *rt = (const struct rt6_info *)dst;
    889
    890		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
    891	}
    892}
    893
    894/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
    895 * return code conversion for ip layer consumption
    896 */
    897static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb,
    898				struct udphdr *uh)
    899{
    900	int ret;
    901
    902	if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
    903		skb_checksum_try_convert(skb, IPPROTO_UDP, ip6_compute_pseudo);
    904
    905	ret = udpv6_queue_rcv_skb(sk, skb);
    906
    907	/* a return value > 0 means to resubmit the input */
    908	if (ret > 0)
    909		return ret;
    910	return 0;
    911}
    912
    913int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
    914		   int proto)
    915{
    916	enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
    917	const struct in6_addr *saddr, *daddr;
    918	struct net *net = dev_net(skb->dev);
    919	struct udphdr *uh;
    920	struct sock *sk;
    921	bool refcounted;
    922	u32 ulen = 0;
    923
    924	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
    925		goto discard;
    926
    927	saddr = &ipv6_hdr(skb)->saddr;
    928	daddr = &ipv6_hdr(skb)->daddr;
    929	uh = udp_hdr(skb);
    930
    931	ulen = ntohs(uh->len);
    932	if (ulen > skb->len)
    933		goto short_packet;
    934
    935	if (proto == IPPROTO_UDP) {
    936		/* UDP validates ulen. */
    937
    938		/* Check for jumbo payload */
    939		if (ulen == 0)
    940			ulen = skb->len;
    941
    942		if (ulen < sizeof(*uh))
    943			goto short_packet;
    944
    945		if (ulen < skb->len) {
    946			if (pskb_trim_rcsum(skb, ulen))
    947				goto short_packet;
    948			saddr = &ipv6_hdr(skb)->saddr;
    949			daddr = &ipv6_hdr(skb)->daddr;
    950			uh = udp_hdr(skb);
    951		}
    952	}
    953
    954	if (udp6_csum_init(skb, uh, proto))
    955		goto csum_error;
    956
    957	/* Check if the socket is already available, e.g. due to early demux */
    958	sk = skb_steal_sock(skb, &refcounted);
    959	if (sk) {
    960		struct dst_entry *dst = skb_dst(skb);
    961		int ret;
    962
    963		if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
    964			udp6_sk_rx_dst_set(sk, dst);
    965
    966		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
    967			if (refcounted)
    968				sock_put(sk);
    969			goto report_csum_error;
    970		}
    971
    972		ret = udp6_unicast_rcv_skb(sk, skb, uh);
    973		if (refcounted)
    974			sock_put(sk);
    975		return ret;
    976	}
    977
    978	/*
    979	 *	Multicast receive code
    980	 */
    981	if (ipv6_addr_is_multicast(daddr))
    982		return __udp6_lib_mcast_deliver(net, skb,
    983				saddr, daddr, udptable, proto);
    984
    985	/* Unicast */
    986	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
    987	if (sk) {
    988		if (!uh->check && !udp_sk(sk)->no_check6_rx)
    989			goto report_csum_error;
    990		return udp6_unicast_rcv_skb(sk, skb, uh);
    991	}
    992
    993	reason = SKB_DROP_REASON_NO_SOCKET;
    994
    995	if (!uh->check)
    996		goto report_csum_error;
    997
    998	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
    999		goto discard;
   1000
   1001	if (udp_lib_checksum_complete(skb))
   1002		goto csum_error;
   1003
   1004	__UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
   1005	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
   1006
   1007	kfree_skb_reason(skb, reason);
   1008	return 0;
   1009
   1010short_packet:
   1011	if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
   1012		reason = SKB_DROP_REASON_PKT_TOO_SMALL;
   1013	net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
   1014			    proto == IPPROTO_UDPLITE ? "-Lite" : "",
   1015			    saddr, ntohs(uh->source),
   1016			    ulen, skb->len,
   1017			    daddr, ntohs(uh->dest));
   1018	goto discard;
   1019
   1020report_csum_error:
   1021	udp6_csum_zero_error(skb);
   1022csum_error:
   1023	if (reason == SKB_DROP_REASON_NOT_SPECIFIED)
   1024		reason = SKB_DROP_REASON_UDP_CSUM;
   1025	__UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
   1026discard:
   1027	__UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
   1028	kfree_skb_reason(skb, reason);
   1029	return 0;
   1030}
   1031
   1032
   1033static struct sock *__udp6_lib_demux_lookup(struct net *net,
   1034			__be16 loc_port, const struct in6_addr *loc_addr,
   1035			__be16 rmt_port, const struct in6_addr *rmt_addr,
   1036			int dif, int sdif)
   1037{
   1038	unsigned short hnum = ntohs(loc_port);
   1039	unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
   1040	unsigned int slot2 = hash2 & udp_table.mask;
   1041	struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
   1042	const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
   1043	struct sock *sk;
   1044
   1045	udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
   1046		if (sk->sk_state == TCP_ESTABLISHED &&
   1047		    inet6_match(net, sk, rmt_addr, loc_addr, ports, dif, sdif))
   1048			return sk;
   1049		/* Only check first socket in chain */
   1050		break;
   1051	}
   1052	return NULL;
   1053}
   1054
   1055INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
   1056{
   1057	struct net *net = dev_net(skb->dev);
   1058	const struct udphdr *uh;
   1059	struct sock *sk;
   1060	struct dst_entry *dst;
   1061	int dif = skb->dev->ifindex;
   1062	int sdif = inet6_sdif(skb);
   1063
   1064	if (!pskb_may_pull(skb, skb_transport_offset(skb) +
   1065	    sizeof(struct udphdr)))
   1066		return;
   1067
   1068	uh = udp_hdr(skb);
   1069
   1070	if (skb->pkt_type == PACKET_HOST)
   1071		sk = __udp6_lib_demux_lookup(net, uh->dest,
   1072					     &ipv6_hdr(skb)->daddr,
   1073					     uh->source, &ipv6_hdr(skb)->saddr,
   1074					     dif, sdif);
   1075	else
   1076		return;
   1077
   1078	if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt))
   1079		return;
   1080
   1081	skb->sk = sk;
   1082	skb->destructor = sock_efree;
   1083	dst = rcu_dereference(sk->sk_rx_dst);
   1084
   1085	if (dst)
   1086		dst = dst_check(dst, sk->sk_rx_dst_cookie);
   1087	if (dst) {
   1088		/* set noref for now.
   1089		 * any place which wants to hold dst has to call
   1090		 * dst_hold_safe()
   1091		 */
   1092		skb_dst_set_noref(skb, dst);
   1093	}
   1094}
   1095
   1096INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb)
   1097{
   1098	return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
   1099}
   1100
   1101/*
   1102 * Throw away all pending data and cancel the corking. Socket is locked.
   1103 */
   1104static void udp_v6_flush_pending_frames(struct sock *sk)
   1105{
   1106	struct udp_sock *up = udp_sk(sk);
   1107
   1108	if (up->pending == AF_INET)
   1109		udp_flush_pending_frames(sk);
   1110	else if (up->pending) {
   1111		up->len = 0;
   1112		up->pending = 0;
   1113		ip6_flush_pending_frames(sk);
   1114	}
   1115}
   1116
   1117static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
   1118			     int addr_len)
   1119{
   1120	if (addr_len < offsetofend(struct sockaddr, sa_family))
   1121		return -EINVAL;
   1122	/* The following checks are replicated from __ip6_datagram_connect()
   1123	 * and intended to prevent BPF program called below from accessing
   1124	 * bytes that are out of the bound specified by user in addr_len.
   1125	 */
   1126	if (uaddr->sa_family == AF_INET) {
   1127		if (ipv6_only_sock(sk))
   1128			return -EAFNOSUPPORT;
   1129		return udp_pre_connect(sk, uaddr, addr_len);
   1130	}
   1131
   1132	if (addr_len < SIN6_LEN_RFC2133)
   1133		return -EINVAL;
   1134
   1135	return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
   1136}
   1137
   1138/**
   1139 *	udp6_hwcsum_outgoing  -  handle outgoing HW checksumming
   1140 *	@sk:	socket we are sending on
   1141 *	@skb:	sk_buff containing the filled-in UDP header
   1142 *		(checksum field must be zeroed out)
   1143 *	@saddr: source address
   1144 *	@daddr: destination address
   1145 *	@len:	length of packet
   1146 */
   1147static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
   1148				 const struct in6_addr *saddr,
   1149				 const struct in6_addr *daddr, int len)
   1150{
   1151	unsigned int offset;
   1152	struct udphdr *uh = udp_hdr(skb);
   1153	struct sk_buff *frags = skb_shinfo(skb)->frag_list;
   1154	__wsum csum = 0;
   1155
   1156	if (!frags) {
   1157		/* Only one fragment on the socket.  */
   1158		skb->csum_start = skb_transport_header(skb) - skb->head;
   1159		skb->csum_offset = offsetof(struct udphdr, check);
   1160		uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0);
   1161	} else {
   1162		/*
   1163		 * HW-checksum won't work as there are two or more
   1164		 * fragments on the socket so that all csums of sk_buffs
   1165		 * should be together
   1166		 */
   1167		offset = skb_transport_offset(skb);
   1168		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
   1169		csum = skb->csum;
   1170
   1171		skb->ip_summed = CHECKSUM_NONE;
   1172
   1173		do {
   1174			csum = csum_add(csum, frags->csum);
   1175		} while ((frags = frags->next));
   1176
   1177		uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
   1178					    csum);
   1179		if (uh->check == 0)
   1180			uh->check = CSUM_MANGLED_0;
   1181	}
   1182}
   1183
   1184/*
   1185 *	Sending
   1186 */
   1187
   1188static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
   1189			   struct inet_cork *cork)
   1190{
   1191	struct sock *sk = skb->sk;
   1192	struct udphdr *uh;
   1193	int err = 0;
   1194	int is_udplite = IS_UDPLITE(sk);
   1195	__wsum csum = 0;
   1196	int offset = skb_transport_offset(skb);
   1197	int len = skb->len - offset;
   1198	int datalen = len - sizeof(*uh);
   1199
   1200	/*
   1201	 * Create a UDP header
   1202	 */
   1203	uh = udp_hdr(skb);
   1204	uh->source = fl6->fl6_sport;
   1205	uh->dest = fl6->fl6_dport;
   1206	uh->len = htons(len);
   1207	uh->check = 0;
   1208
   1209	if (cork->gso_size) {
   1210		const int hlen = skb_network_header_len(skb) +
   1211				 sizeof(struct udphdr);
   1212
   1213		if (hlen + cork->gso_size > cork->fragsize) {
   1214			kfree_skb(skb);
   1215			return -EINVAL;
   1216		}
   1217		if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
   1218			kfree_skb(skb);
   1219			return -EINVAL;
   1220		}
   1221		if (udp_sk(sk)->no_check6_tx) {
   1222			kfree_skb(skb);
   1223			return -EINVAL;
   1224		}
   1225		if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
   1226		    dst_xfrm(skb_dst(skb))) {
   1227			kfree_skb(skb);
   1228			return -EIO;
   1229		}
   1230
   1231		if (datalen > cork->gso_size) {
   1232			skb_shinfo(skb)->gso_size = cork->gso_size;
   1233			skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
   1234			skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen,
   1235								 cork->gso_size);
   1236		}
   1237		goto csum_partial;
   1238	}
   1239
   1240	if (is_udplite)
   1241		csum = udplite_csum(skb);
   1242	else if (udp_sk(sk)->no_check6_tx) {   /* UDP csum disabled */
   1243		skb->ip_summed = CHECKSUM_NONE;
   1244		goto send;
   1245	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
   1246csum_partial:
   1247		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len);
   1248		goto send;
   1249	} else
   1250		csum = udp_csum(skb);
   1251
   1252	/* add protocol-dependent pseudo-header */
   1253	uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr,
   1254				    len, fl6->flowi6_proto, csum);
   1255	if (uh->check == 0)
   1256		uh->check = CSUM_MANGLED_0;
   1257
   1258send:
   1259	err = ip6_send_skb(skb);
   1260	if (err) {
   1261		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
   1262			UDP6_INC_STATS(sock_net(sk),
   1263				       UDP_MIB_SNDBUFERRORS, is_udplite);
   1264			err = 0;
   1265		}
   1266	} else {
   1267		UDP6_INC_STATS(sock_net(sk),
   1268			       UDP_MIB_OUTDATAGRAMS, is_udplite);
   1269	}
   1270	return err;
   1271}
   1272
   1273static int udp_v6_push_pending_frames(struct sock *sk)
   1274{
   1275	struct sk_buff *skb;
   1276	struct udp_sock  *up = udp_sk(sk);
   1277	int err = 0;
   1278
   1279	if (up->pending == AF_INET)
   1280		return udp_push_pending_frames(sk);
   1281
   1282	skb = ip6_finish_skb(sk);
   1283	if (!skb)
   1284		goto out;
   1285
   1286	err = udp_v6_send_skb(skb, &inet_sk(sk)->cork.fl.u.ip6,
   1287			      &inet_sk(sk)->cork.base);
   1288out:
   1289	up->len = 0;
   1290	up->pending = 0;
   1291	return err;
   1292}
   1293
   1294int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
   1295{
   1296	struct ipv6_txoptions opt_space;
   1297	struct udp_sock *up = udp_sk(sk);
   1298	struct inet_sock *inet = inet_sk(sk);
   1299	struct ipv6_pinfo *np = inet6_sk(sk);
   1300	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
   1301	struct in6_addr *daddr, *final_p, final;
   1302	struct ipv6_txoptions *opt = NULL;
   1303	struct ipv6_txoptions *opt_to_free = NULL;
   1304	struct ip6_flowlabel *flowlabel = NULL;
   1305	struct inet_cork_full cork;
   1306	struct flowi6 *fl6 = &cork.fl.u.ip6;
   1307	struct dst_entry *dst;
   1308	struct ipcm6_cookie ipc6;
   1309	int addr_len = msg->msg_namelen;
   1310	bool connected = false;
   1311	int ulen = len;
   1312	int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
   1313	int err;
   1314	int is_udplite = IS_UDPLITE(sk);
   1315	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
   1316
   1317	ipcm6_init(&ipc6);
   1318	ipc6.gso_size = READ_ONCE(up->gso_size);
   1319	ipc6.sockc.tsflags = sk->sk_tsflags;
   1320	ipc6.sockc.mark = sk->sk_mark;
   1321
   1322	/* destination address check */
   1323	if (sin6) {
   1324		if (addr_len < offsetof(struct sockaddr, sa_data))
   1325			return -EINVAL;
   1326
   1327		switch (sin6->sin6_family) {
   1328		case AF_INET6:
   1329			if (addr_len < SIN6_LEN_RFC2133)
   1330				return -EINVAL;
   1331			daddr = &sin6->sin6_addr;
   1332			if (ipv6_addr_any(daddr) &&
   1333			    ipv6_addr_v4mapped(&np->saddr))
   1334				ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
   1335						       daddr);
   1336			break;
   1337		case AF_INET:
   1338			goto do_udp_sendmsg;
   1339		case AF_UNSPEC:
   1340			msg->msg_name = sin6 = NULL;
   1341			msg->msg_namelen = addr_len = 0;
   1342			daddr = NULL;
   1343			break;
   1344		default:
   1345			return -EINVAL;
   1346		}
   1347	} else if (!up->pending) {
   1348		if (sk->sk_state != TCP_ESTABLISHED)
   1349			return -EDESTADDRREQ;
   1350		daddr = &sk->sk_v6_daddr;
   1351	} else
   1352		daddr = NULL;
   1353
   1354	if (daddr) {
   1355		if (ipv6_addr_v4mapped(daddr)) {
   1356			struct sockaddr_in sin;
   1357			sin.sin_family = AF_INET;
   1358			sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
   1359			sin.sin_addr.s_addr = daddr->s6_addr32[3];
   1360			msg->msg_name = &sin;
   1361			msg->msg_namelen = sizeof(sin);
   1362do_udp_sendmsg:
   1363			if (ipv6_only_sock(sk))
   1364				return -ENETUNREACH;
   1365			return udp_sendmsg(sk, msg, len);
   1366		}
   1367	}
   1368
   1369	/* Rough check on arithmetic overflow,
   1370	   better check is made in ip6_append_data().
   1371	   */
   1372	if (len > INT_MAX - sizeof(struct udphdr))
   1373		return -EMSGSIZE;
   1374
   1375	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;
   1376	if (up->pending) {
   1377		if (up->pending == AF_INET)
   1378			return udp_sendmsg(sk, msg, len);
   1379		/*
   1380		 * There are pending frames.
   1381		 * The socket lock must be held while it's corked.
   1382		 */
   1383		lock_sock(sk);
   1384		if (likely(up->pending)) {
   1385			if (unlikely(up->pending != AF_INET6)) {
   1386				release_sock(sk);
   1387				return -EAFNOSUPPORT;
   1388			}
   1389			dst = NULL;
   1390			goto do_append_data;
   1391		}
   1392		release_sock(sk);
   1393	}
   1394	ulen += sizeof(struct udphdr);
   1395
   1396	memset(fl6, 0, sizeof(*fl6));
   1397
   1398	if (sin6) {
   1399		if (sin6->sin6_port == 0)
   1400			return -EINVAL;
   1401
   1402		fl6->fl6_dport = sin6->sin6_port;
   1403		daddr = &sin6->sin6_addr;
   1404
   1405		if (np->sndflow) {
   1406			fl6->flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
   1407			if (fl6->flowlabel & IPV6_FLOWLABEL_MASK) {
   1408				flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
   1409				if (IS_ERR(flowlabel))
   1410					return -EINVAL;
   1411			}
   1412		}
   1413
   1414		/*
   1415		 * Otherwise it will be difficult to maintain
   1416		 * sk->sk_dst_cache.
   1417		 */
   1418		if (sk->sk_state == TCP_ESTABLISHED &&
   1419		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr))
   1420			daddr = &sk->sk_v6_daddr;
   1421
   1422		if (addr_len >= sizeof(struct sockaddr_in6) &&
   1423		    sin6->sin6_scope_id &&
   1424		    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))
   1425			fl6->flowi6_oif = sin6->sin6_scope_id;
   1426	} else {
   1427		if (sk->sk_state != TCP_ESTABLISHED)
   1428			return -EDESTADDRREQ;
   1429
   1430		fl6->fl6_dport = inet->inet_dport;
   1431		daddr = &sk->sk_v6_daddr;
   1432		fl6->flowlabel = np->flow_label;
   1433		connected = true;
   1434	}
   1435
   1436	if (!fl6->flowi6_oif)
   1437		fl6->flowi6_oif = READ_ONCE(sk->sk_bound_dev_if);
   1438
   1439	if (!fl6->flowi6_oif)
   1440		fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
   1441
   1442	fl6->flowi6_uid = sk->sk_uid;
   1443
   1444	if (msg->msg_controllen) {
   1445		opt = &opt_space;
   1446		memset(opt, 0, sizeof(struct ipv6_txoptions));
   1447		opt->tot_len = sizeof(*opt);
   1448		ipc6.opt = opt;
   1449
   1450		err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
   1451		if (err > 0)
   1452			err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, fl6,
   1453						    &ipc6);
   1454		if (err < 0) {
   1455			fl6_sock_release(flowlabel);
   1456			return err;
   1457		}
   1458		if ((fl6->flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
   1459			flowlabel = fl6_sock_lookup(sk, fl6->flowlabel);
   1460			if (IS_ERR(flowlabel))
   1461				return -EINVAL;
   1462		}
   1463		if (!(opt->opt_nflen|opt->opt_flen))
   1464			opt = NULL;
   1465		connected = false;
   1466	}
   1467	if (!opt) {
   1468		opt = txopt_get(np);
   1469		opt_to_free = opt;
   1470	}
   1471	if (flowlabel)
   1472		opt = fl6_merge_options(&opt_space, flowlabel, opt);
   1473	opt = ipv6_fixup_options(&opt_space, opt);
   1474	ipc6.opt = opt;
   1475
   1476	fl6->flowi6_proto = sk->sk_protocol;
   1477	fl6->flowi6_mark = ipc6.sockc.mark;
   1478	fl6->daddr = *daddr;
   1479	if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr))
   1480		fl6->saddr = np->saddr;
   1481	fl6->fl6_sport = inet->inet_sport;
   1482
   1483	if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
   1484		err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
   1485					   (struct sockaddr *)sin6,
   1486					   &fl6->saddr);
   1487		if (err)
   1488			goto out_no_dst;
   1489		if (sin6) {
   1490			if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
   1491				/* BPF program rewrote IPv6-only by IPv4-mapped
   1492				 * IPv6. It's currently unsupported.
   1493				 */
   1494				err = -ENOTSUPP;
   1495				goto out_no_dst;
   1496			}
   1497			if (sin6->sin6_port == 0) {
   1498				/* BPF program set invalid port. Reject it. */
   1499				err = -EINVAL;
   1500				goto out_no_dst;
   1501			}
   1502			fl6->fl6_dport = sin6->sin6_port;
   1503			fl6->daddr = sin6->sin6_addr;
   1504		}
   1505	}
   1506
   1507	if (ipv6_addr_any(&fl6->daddr))
   1508		fl6->daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
   1509
   1510	final_p = fl6_update_dst(fl6, opt, &final);
   1511	if (final_p)
   1512		connected = false;
   1513
   1514	if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) {
   1515		fl6->flowi6_oif = np->mcast_oif;
   1516		connected = false;
   1517	} else if (!fl6->flowi6_oif)
   1518		fl6->flowi6_oif = np->ucast_oif;
   1519
   1520	security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
   1521
   1522	if (ipc6.tclass < 0)
   1523		ipc6.tclass = np->tclass;
   1524
   1525	fl6->flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6->flowlabel);
   1526
   1527	dst = ip6_sk_dst_lookup_flow(sk, fl6, final_p, connected);
   1528	if (IS_ERR(dst)) {
   1529		err = PTR_ERR(dst);
   1530		dst = NULL;
   1531		goto out;
   1532	}
   1533
   1534	if (ipc6.hlimit < 0)
   1535		ipc6.hlimit = ip6_sk_dst_hoplimit(np, fl6, dst);
   1536
   1537	if (msg->msg_flags&MSG_CONFIRM)
   1538		goto do_confirm;
   1539back_from_confirm:
   1540
   1541	/* Lockless fast path for the non-corking case */
   1542	if (!corkreq) {
   1543		struct sk_buff *skb;
   1544
   1545		skb = ip6_make_skb(sk, getfrag, msg, ulen,
   1546				   sizeof(struct udphdr), &ipc6,
   1547				   (struct rt6_info *)dst,
   1548				   msg->msg_flags, &cork);
   1549		err = PTR_ERR(skb);
   1550		if (!IS_ERR_OR_NULL(skb))
   1551			err = udp_v6_send_skb(skb, fl6, &cork.base);
   1552		/* ip6_make_skb steals dst reference */
   1553		goto out_no_dst;
   1554	}
   1555
   1556	lock_sock(sk);
   1557	if (unlikely(up->pending)) {
   1558		/* The socket is already corked while preparing it. */
   1559		/* ... which is an evident application bug. --ANK */
   1560		release_sock(sk);
   1561
   1562		net_dbg_ratelimited("udp cork app bug 2\n");
   1563		err = -EINVAL;
   1564		goto out;
   1565	}
   1566
   1567	up->pending = AF_INET6;
   1568
   1569do_append_data:
   1570	if (ipc6.dontfrag < 0)
   1571		ipc6.dontfrag = np->dontfrag;
   1572	up->len += ulen;
   1573	err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr),
   1574			      &ipc6, fl6, (struct rt6_info *)dst,
   1575			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
   1576	if (err)
   1577		udp_v6_flush_pending_frames(sk);
   1578	else if (!corkreq)
   1579		err = udp_v6_push_pending_frames(sk);
   1580	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
   1581		up->pending = 0;
   1582
   1583	if (err > 0)
   1584		err = np->recverr ? net_xmit_errno(err) : 0;
   1585	release_sock(sk);
   1586
   1587out:
   1588	dst_release(dst);
   1589out_no_dst:
   1590	fl6_sock_release(flowlabel);
   1591	txopt_put(opt_to_free);
   1592	if (!err)
   1593		return len;
   1594	/*
   1595	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
   1596	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
   1597	 * we don't have a good statistic (IpOutDiscards but it can be too many
   1598	 * things).  We could add another new stat but at least for now that
   1599	 * seems like overkill.
   1600	 */
   1601	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
   1602		UDP6_INC_STATS(sock_net(sk),
   1603			       UDP_MIB_SNDBUFERRORS, is_udplite);
   1604	}
   1605	return err;
   1606
   1607do_confirm:
   1608	if (msg->msg_flags & MSG_PROBE)
   1609		dst_confirm_neigh(dst, &fl6->daddr);
   1610	if (!(msg->msg_flags&MSG_PROBE) || len)
   1611		goto back_from_confirm;
   1612	err = 0;
   1613	goto out;
   1614}
   1615
   1616void udpv6_destroy_sock(struct sock *sk)
   1617{
   1618	struct udp_sock *up = udp_sk(sk);
   1619	lock_sock(sk);
   1620
   1621	/* protects from races with udp_abort() */
   1622	sock_set_flag(sk, SOCK_DEAD);
   1623	udp_v6_flush_pending_frames(sk);
   1624	release_sock(sk);
   1625
   1626	if (static_branch_unlikely(&udpv6_encap_needed_key)) {
   1627		if (up->encap_type) {
   1628			void (*encap_destroy)(struct sock *sk);
   1629			encap_destroy = READ_ONCE(up->encap_destroy);
   1630			if (encap_destroy)
   1631				encap_destroy(sk);
   1632		}
   1633		if (up->encap_enabled) {
   1634			static_branch_dec(&udpv6_encap_needed_key);
   1635			udp_encap_disable();
   1636		}
   1637	}
   1638
   1639	inet6_destroy_sock(sk);
   1640}
   1641
   1642/*
   1643 *	Socket option code for UDP
   1644 */
   1645int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
   1646		     unsigned int optlen)
   1647{
   1648	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
   1649		return udp_lib_setsockopt(sk, level, optname,
   1650					  optval, optlen,
   1651					  udp_v6_push_pending_frames);
   1652	return ipv6_setsockopt(sk, level, optname, optval, optlen);
   1653}
   1654
   1655int udpv6_getsockopt(struct sock *sk, int level, int optname,
   1656		     char __user *optval, int __user *optlen)
   1657{
   1658	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
   1659		return udp_lib_getsockopt(sk, level, optname, optval, optlen);
   1660	return ipv6_getsockopt(sk, level, optname, optval, optlen);
   1661}
   1662
   1663/* thinking of making this const? Don't.
   1664 * early_demux can change based on sysctl.
   1665 */
   1666static struct inet6_protocol udpv6_protocol = {
   1667	.early_demux	=	udp_v6_early_demux,
   1668	.early_demux_handler =  udp_v6_early_demux,
   1669	.handler	=	udpv6_rcv,
   1670	.err_handler	=	udpv6_err,
   1671	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
   1672};
   1673
   1674/* ------------------------------------------------------------------------ */
   1675#ifdef CONFIG_PROC_FS
   1676int udp6_seq_show(struct seq_file *seq, void *v)
   1677{
   1678	if (v == SEQ_START_TOKEN) {
   1679		seq_puts(seq, IPV6_SEQ_DGRAM_HEADER);
   1680	} else {
   1681		int bucket = ((struct udp_iter_state *)seq->private)->bucket;
   1682		struct inet_sock *inet = inet_sk(v);
   1683		__u16 srcp = ntohs(inet->inet_sport);
   1684		__u16 destp = ntohs(inet->inet_dport);
   1685		__ip6_dgram_sock_seq_show(seq, v, srcp, destp,
   1686					  udp_rqueue_get(v), bucket);
   1687	}
   1688	return 0;
   1689}
   1690
   1691const struct seq_operations udp6_seq_ops = {
   1692	.start		= udp_seq_start,
   1693	.next		= udp_seq_next,
   1694	.stop		= udp_seq_stop,
   1695	.show		= udp6_seq_show,
   1696};
   1697EXPORT_SYMBOL(udp6_seq_ops);
   1698
   1699static struct udp_seq_afinfo udp6_seq_afinfo = {
   1700	.family		= AF_INET6,
   1701	.udp_table	= &udp_table,
   1702};
   1703
   1704int __net_init udp6_proc_init(struct net *net)
   1705{
   1706	if (!proc_create_net_data("udp6", 0444, net->proc_net, &udp6_seq_ops,
   1707			sizeof(struct udp_iter_state), &udp6_seq_afinfo))
   1708		return -ENOMEM;
   1709	return 0;
   1710}
   1711
   1712void udp6_proc_exit(struct net *net)
   1713{
   1714	remove_proc_entry("udp6", net->proc_net);
   1715}
   1716#endif /* CONFIG_PROC_FS */
   1717
   1718/* ------------------------------------------------------------------------ */
   1719
   1720struct proto udpv6_prot = {
   1721	.name			= "UDPv6",
   1722	.owner			= THIS_MODULE,
   1723	.close			= udp_lib_close,
   1724	.pre_connect		= udpv6_pre_connect,
   1725	.connect		= ip6_datagram_connect,
   1726	.disconnect		= udp_disconnect,
   1727	.ioctl			= udp_ioctl,
   1728	.init			= udp_init_sock,
   1729	.destroy		= udpv6_destroy_sock,
   1730	.setsockopt		= udpv6_setsockopt,
   1731	.getsockopt		= udpv6_getsockopt,
   1732	.sendmsg		= udpv6_sendmsg,
   1733	.recvmsg		= udpv6_recvmsg,
   1734	.release_cb		= ip6_datagram_release_cb,
   1735	.hash			= udp_lib_hash,
   1736	.unhash			= udp_lib_unhash,
   1737	.rehash			= udp_v6_rehash,
   1738	.get_port		= udp_v6_get_port,
   1739	.put_port		= udp_lib_unhash,
   1740#ifdef CONFIG_BPF_SYSCALL
   1741	.psock_update_sk_prot	= udp_bpf_update_proto,
   1742#endif
   1743	.memory_allocated	= &udp_memory_allocated,
   1744	.sysctl_mem		= sysctl_udp_mem,
   1745	.sysctl_wmem_offset     = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
   1746	.sysctl_rmem_offset     = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
   1747	.obj_size		= sizeof(struct udp6_sock),
   1748	.h.udp_table		= &udp_table,
   1749	.diag_destroy		= udp_abort,
   1750};
   1751
   1752static struct inet_protosw udpv6_protosw = {
   1753	.type =      SOCK_DGRAM,
   1754	.protocol =  IPPROTO_UDP,
   1755	.prot =      &udpv6_prot,
   1756	.ops =       &inet6_dgram_ops,
   1757	.flags =     INET_PROTOSW_PERMANENT,
   1758};
   1759
   1760int __init udpv6_init(void)
   1761{
   1762	int ret;
   1763
   1764	ret = inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP);
   1765	if (ret)
   1766		goto out;
   1767
   1768	ret = inet6_register_protosw(&udpv6_protosw);
   1769	if (ret)
   1770		goto out_udpv6_protocol;
   1771out:
   1772	return ret;
   1773
   1774out_udpv6_protocol:
   1775	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
   1776	goto out;
   1777}
   1778
   1779void udpv6_exit(void)
   1780{
   1781	inet6_unregister_protosw(&udpv6_protosw);
   1782	inet6_del_protocol(&udpv6_protocol, IPPROTO_UDP);
   1783}