cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ip_sockglue.c (42132B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * INET		An implementation of the TCP/IP protocol suite for the LINUX
      4 *		operating system.  INET is implemented using the  BSD Socket
      5 *		interface as the means of communication with the user level.
      6 *
      7 *		The IP to API glue.
      8 *
      9 * Authors:	see ip.c
     10 *
     11 * Fixes:
     12 *		Many		:	Split from ip.c , see ip.c for history.
     13 *		Martin Mares	:	TOS setting fixed.
     14 *		Alan Cox	:	Fixed a couple of oopses in Martin's
     15 *					TOS tweaks.
     16 *		Mike McLagan	:	Routing by source
     17 */
     18
     19#include <linux/module.h>
     20#include <linux/types.h>
     21#include <linux/mm.h>
     22#include <linux/skbuff.h>
     23#include <linux/ip.h>
     24#include <linux/icmp.h>
     25#include <linux/inetdevice.h>
     26#include <linux/netdevice.h>
     27#include <linux/slab.h>
     28#include <net/sock.h>
     29#include <net/ip.h>
     30#include <net/icmp.h>
     31#include <net/tcp_states.h>
     32#include <linux/udp.h>
     33#include <linux/igmp.h>
     34#include <linux/netfilter.h>
     35#include <linux/route.h>
     36#include <linux/mroute.h>
     37#include <net/inet_ecn.h>
     38#include <net/route.h>
     39#include <net/xfrm.h>
     40#include <net/compat.h>
     41#include <net/checksum.h>
     42#if IS_ENABLED(CONFIG_IPV6)
     43#include <net/transp_v6.h>
     44#endif
     45#include <net/ip_fib.h>
     46
     47#include <linux/errqueue.h>
     48#include <linux/uaccess.h>
     49
     50#include <linux/bpfilter.h>
     51
     52/*
     53 *	SOL_IP control messages.
     54 */
     55
     56static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
     57{
     58	struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
     59
     60	info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
     61
     62	put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
     63}
     64
     65static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
     66{
     67	int ttl = ip_hdr(skb)->ttl;
     68	put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
     69}
     70
     71static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
     72{
     73	put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
     74}
     75
     76static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
     77{
     78	if (IPCB(skb)->opt.optlen == 0)
     79		return;
     80
     81	put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
     82		 ip_hdr(skb) + 1);
     83}
     84
     85
     86static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
     87				 struct sk_buff *skb)
     88{
     89	unsigned char optbuf[sizeof(struct ip_options) + 40];
     90	struct ip_options *opt = (struct ip_options *)optbuf;
     91
     92	if (IPCB(skb)->opt.optlen == 0)
     93		return;
     94
     95	if (ip_options_echo(net, opt, skb)) {
     96		msg->msg_flags |= MSG_CTRUNC;
     97		return;
     98	}
     99	ip_options_undo(opt);
    100
    101	put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
    102}
    103
    104static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
    105{
    106	int val;
    107
    108	if (IPCB(skb)->frag_max_size == 0)
    109		return;
    110
    111	val = IPCB(skb)->frag_max_size;
    112	put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
    113}
    114
    115static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
    116				  int tlen, int offset)
    117{
    118	__wsum csum = skb->csum;
    119
    120	if (skb->ip_summed != CHECKSUM_COMPLETE)
    121		return;
    122
    123	if (offset != 0) {
    124		int tend_off = skb_transport_offset(skb) + tlen;
    125		csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0));
    126	}
    127
    128	put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
    129}
    130
    131static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
    132{
    133	char *secdata;
    134	u32 seclen, secid;
    135	int err;
    136
    137	err = security_socket_getpeersec_dgram(NULL, skb, &secid);
    138	if (err)
    139		return;
    140
    141	err = security_secid_to_secctx(secid, &secdata, &seclen);
    142	if (err)
    143		return;
    144
    145	put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
    146	security_release_secctx(secdata, seclen);
    147}
    148
    149static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
    150{
    151	__be16 _ports[2], *ports;
    152	struct sockaddr_in sin;
    153
    154	/* All current transport protocols have the port numbers in the
    155	 * first four bytes of the transport header and this function is
    156	 * written with this assumption in mind.
    157	 */
    158	ports = skb_header_pointer(skb, skb_transport_offset(skb),
    159				   sizeof(_ports), &_ports);
    160	if (!ports)
    161		return;
    162
    163	sin.sin_family = AF_INET;
    164	sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
    165	sin.sin_port = ports[1];
    166	memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
    167
    168	put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
    169}
    170
    171void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
    172			 struct sk_buff *skb, int tlen, int offset)
    173{
    174	struct inet_sock *inet = inet_sk(sk);
    175	unsigned int flags = inet->cmsg_flags;
    176
    177	/* Ordered by supposed usage frequency */
    178	if (flags & IP_CMSG_PKTINFO) {
    179		ip_cmsg_recv_pktinfo(msg, skb);
    180
    181		flags &= ~IP_CMSG_PKTINFO;
    182		if (!flags)
    183			return;
    184	}
    185
    186	if (flags & IP_CMSG_TTL) {
    187		ip_cmsg_recv_ttl(msg, skb);
    188
    189		flags &= ~IP_CMSG_TTL;
    190		if (!flags)
    191			return;
    192	}
    193
    194	if (flags & IP_CMSG_TOS) {
    195		ip_cmsg_recv_tos(msg, skb);
    196
    197		flags &= ~IP_CMSG_TOS;
    198		if (!flags)
    199			return;
    200	}
    201
    202	if (flags & IP_CMSG_RECVOPTS) {
    203		ip_cmsg_recv_opts(msg, skb);
    204
    205		flags &= ~IP_CMSG_RECVOPTS;
    206		if (!flags)
    207			return;
    208	}
    209
    210	if (flags & IP_CMSG_RETOPTS) {
    211		ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
    212
    213		flags &= ~IP_CMSG_RETOPTS;
    214		if (!flags)
    215			return;
    216	}
    217
    218	if (flags & IP_CMSG_PASSSEC) {
    219		ip_cmsg_recv_security(msg, skb);
    220
    221		flags &= ~IP_CMSG_PASSSEC;
    222		if (!flags)
    223			return;
    224	}
    225
    226	if (flags & IP_CMSG_ORIGDSTADDR) {
    227		ip_cmsg_recv_dstaddr(msg, skb);
    228
    229		flags &= ~IP_CMSG_ORIGDSTADDR;
    230		if (!flags)
    231			return;
    232	}
    233
    234	if (flags & IP_CMSG_CHECKSUM)
    235		ip_cmsg_recv_checksum(msg, skb, tlen, offset);
    236
    237	if (flags & IP_CMSG_RECVFRAGSIZE)
    238		ip_cmsg_recv_fragsize(msg, skb);
    239}
    240EXPORT_SYMBOL(ip_cmsg_recv_offset);
    241
    242int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
    243		 bool allow_ipv6)
    244{
    245	int err, val;
    246	struct cmsghdr *cmsg;
    247	struct net *net = sock_net(sk);
    248
    249	for_each_cmsghdr(cmsg, msg) {
    250		if (!CMSG_OK(msg, cmsg))
    251			return -EINVAL;
    252#if IS_ENABLED(CONFIG_IPV6)
    253		if (allow_ipv6 &&
    254		    cmsg->cmsg_level == SOL_IPV6 &&
    255		    cmsg->cmsg_type == IPV6_PKTINFO) {
    256			struct in6_pktinfo *src_info;
    257
    258			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
    259				return -EINVAL;
    260			src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
    261			if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
    262				return -EINVAL;
    263			if (src_info->ipi6_ifindex)
    264				ipc->oif = src_info->ipi6_ifindex;
    265			ipc->addr = src_info->ipi6_addr.s6_addr32[3];
    266			continue;
    267		}
    268#endif
    269		if (cmsg->cmsg_level == SOL_SOCKET) {
    270			err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc);
    271			if (err)
    272				return err;
    273			continue;
    274		}
    275
    276		if (cmsg->cmsg_level != SOL_IP)
    277			continue;
    278		switch (cmsg->cmsg_type) {
    279		case IP_RETOPTS:
    280			err = cmsg->cmsg_len - sizeof(struct cmsghdr);
    281
    282			/* Our caller is responsible for freeing ipc->opt */
    283			err = ip_options_get(net, &ipc->opt,
    284					     KERNEL_SOCKPTR(CMSG_DATA(cmsg)),
    285					     err < 40 ? err : 40);
    286			if (err)
    287				return err;
    288			break;
    289		case IP_PKTINFO:
    290		{
    291			struct in_pktinfo *info;
    292			if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
    293				return -EINVAL;
    294			info = (struct in_pktinfo *)CMSG_DATA(cmsg);
    295			if (info->ipi_ifindex)
    296				ipc->oif = info->ipi_ifindex;
    297			ipc->addr = info->ipi_spec_dst.s_addr;
    298			break;
    299		}
    300		case IP_TTL:
    301			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
    302				return -EINVAL;
    303			val = *(int *)CMSG_DATA(cmsg);
    304			if (val < 1 || val > 255)
    305				return -EINVAL;
    306			ipc->ttl = val;
    307			break;
    308		case IP_TOS:
    309			if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
    310				val = *(int *)CMSG_DATA(cmsg);
    311			else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
    312				val = *(u8 *)CMSG_DATA(cmsg);
    313			else
    314				return -EINVAL;
    315			if (val < 0 || val > 255)
    316				return -EINVAL;
    317			ipc->tos = val;
    318			ipc->priority = rt_tos2priority(ipc->tos);
    319			break;
    320
    321		default:
    322			return -EINVAL;
    323		}
    324	}
    325	return 0;
    326}
    327
    328static void ip_ra_destroy_rcu(struct rcu_head *head)
    329{
    330	struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
    331
    332	sock_put(ra->saved_sk);
    333	kfree(ra);
    334}
    335
    336int ip_ra_control(struct sock *sk, unsigned char on,
    337		  void (*destructor)(struct sock *))
    338{
    339	struct ip_ra_chain *ra, *new_ra;
    340	struct ip_ra_chain __rcu **rap;
    341	struct net *net = sock_net(sk);
    342
    343	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
    344		return -EINVAL;
    345
    346	new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
    347	if (on && !new_ra)
    348		return -ENOMEM;
    349
    350	mutex_lock(&net->ipv4.ra_mutex);
    351	for (rap = &net->ipv4.ra_chain;
    352	     (ra = rcu_dereference_protected(*rap,
    353			lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
    354	     rap = &ra->next) {
    355		if (ra->sk == sk) {
    356			if (on) {
    357				mutex_unlock(&net->ipv4.ra_mutex);
    358				kfree(new_ra);
    359				return -EADDRINUSE;
    360			}
    361			/* dont let ip_call_ra_chain() use sk again */
    362			ra->sk = NULL;
    363			RCU_INIT_POINTER(*rap, ra->next);
    364			mutex_unlock(&net->ipv4.ra_mutex);
    365
    366			if (ra->destructor)
    367				ra->destructor(sk);
    368			/*
    369			 * Delay sock_put(sk) and kfree(ra) after one rcu grace
    370			 * period. This guarantee ip_call_ra_chain() dont need
    371			 * to mess with socket refcounts.
    372			 */
    373			ra->saved_sk = sk;
    374			call_rcu(&ra->rcu, ip_ra_destroy_rcu);
    375			return 0;
    376		}
    377	}
    378	if (!new_ra) {
    379		mutex_unlock(&net->ipv4.ra_mutex);
    380		return -ENOBUFS;
    381	}
    382	new_ra->sk = sk;
    383	new_ra->destructor = destructor;
    384
    385	RCU_INIT_POINTER(new_ra->next, ra);
    386	rcu_assign_pointer(*rap, new_ra);
    387	sock_hold(sk);
    388	mutex_unlock(&net->ipv4.ra_mutex);
    389
    390	return 0;
    391}
    392
    393static void ipv4_icmp_error_rfc4884(const struct sk_buff *skb,
    394				    struct sock_ee_data_rfc4884 *out)
    395{
    396	switch (icmp_hdr(skb)->type) {
    397	case ICMP_DEST_UNREACH:
    398	case ICMP_TIME_EXCEEDED:
    399	case ICMP_PARAMETERPROB:
    400		ip_icmp_error_rfc4884(skb, out, sizeof(struct icmphdr),
    401				      icmp_hdr(skb)->un.reserved[1] * 4);
    402	}
    403}
    404
    405void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
    406		   __be16 port, u32 info, u8 *payload)
    407{
    408	struct sock_exterr_skb *serr;
    409
    410	skb = skb_clone(skb, GFP_ATOMIC);
    411	if (!skb)
    412		return;
    413
    414	serr = SKB_EXT_ERR(skb);
    415	serr->ee.ee_errno = err;
    416	serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
    417	serr->ee.ee_type = icmp_hdr(skb)->type;
    418	serr->ee.ee_code = icmp_hdr(skb)->code;
    419	serr->ee.ee_pad = 0;
    420	serr->ee.ee_info = info;
    421	serr->ee.ee_data = 0;
    422	serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
    423				   skb_network_header(skb);
    424	serr->port = port;
    425
    426	if (skb_pull(skb, payload - skb->data)) {
    427		if (inet_sk(sk)->recverr_rfc4884)
    428			ipv4_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
    429
    430		skb_reset_transport_header(skb);
    431		if (sock_queue_err_skb(sk, skb) == 0)
    432			return;
    433	}
    434	kfree_skb(skb);
    435}
    436
    437void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
    438{
    439	struct inet_sock *inet = inet_sk(sk);
    440	struct sock_exterr_skb *serr;
    441	struct iphdr *iph;
    442	struct sk_buff *skb;
    443
    444	if (!inet->recverr)
    445		return;
    446
    447	skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
    448	if (!skb)
    449		return;
    450
    451	skb_put(skb, sizeof(struct iphdr));
    452	skb_reset_network_header(skb);
    453	iph = ip_hdr(skb);
    454	iph->daddr = daddr;
    455
    456	serr = SKB_EXT_ERR(skb);
    457	serr->ee.ee_errno = err;
    458	serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
    459	serr->ee.ee_type = 0;
    460	serr->ee.ee_code = 0;
    461	serr->ee.ee_pad = 0;
    462	serr->ee.ee_info = info;
    463	serr->ee.ee_data = 0;
    464	serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
    465	serr->port = port;
    466
    467	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
    468	skb_reset_transport_header(skb);
    469
    470	if (sock_queue_err_skb(sk, skb))
    471		kfree_skb(skb);
    472}
    473
    474/* For some errors we have valid addr_offset even with zero payload and
    475 * zero port. Also, addr_offset should be supported if port is set.
    476 */
    477static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
    478{
    479	return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
    480	       serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
    481}
    482
    483/* IPv4 supports cmsg on all imcp errors and some timestamps
    484 *
    485 * Timestamp code paths do not initialize the fields expected by cmsg:
    486 * the PKTINFO fields in skb->cb[]. Fill those in here.
    487 */
    488static bool ipv4_datagram_support_cmsg(const struct sock *sk,
    489				       struct sk_buff *skb,
    490				       int ee_origin)
    491{
    492	struct in_pktinfo *info;
    493
    494	if (ee_origin == SO_EE_ORIGIN_ICMP)
    495		return true;
    496
    497	if (ee_origin == SO_EE_ORIGIN_LOCAL)
    498		return false;
    499
    500	/* Support IP_PKTINFO on tstamp packets if requested, to correlate
    501	 * timestamp with egress dev. Not possible for packets without iif
    502	 * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
    503	 */
    504	info = PKTINFO_SKB_CB(skb);
    505	if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
    506	    !info->ipi_ifindex)
    507		return false;
    508
    509	info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
    510	return true;
    511}
    512
    513/*
    514 *	Handle MSG_ERRQUEUE
    515 */
    516int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
    517{
    518	struct sock_exterr_skb *serr;
    519	struct sk_buff *skb;
    520	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
    521	struct {
    522		struct sock_extended_err ee;
    523		struct sockaddr_in	 offender;
    524	} errhdr;
    525	int err;
    526	int copied;
    527
    528	err = -EAGAIN;
    529	skb = sock_dequeue_err_skb(sk);
    530	if (!skb)
    531		goto out;
    532
    533	copied = skb->len;
    534	if (copied > len) {
    535		msg->msg_flags |= MSG_TRUNC;
    536		copied = len;
    537	}
    538	err = skb_copy_datagram_msg(skb, 0, msg, copied);
    539	if (unlikely(err)) {
    540		kfree_skb(skb);
    541		return err;
    542	}
    543	sock_recv_timestamp(msg, sk, skb);
    544
    545	serr = SKB_EXT_ERR(skb);
    546
    547	if (sin && ipv4_datagram_support_addr(serr)) {
    548		sin->sin_family = AF_INET;
    549		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
    550						   serr->addr_offset);
    551		sin->sin_port = serr->port;
    552		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
    553		*addr_len = sizeof(*sin);
    554	}
    555
    556	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
    557	sin = &errhdr.offender;
    558	memset(sin, 0, sizeof(*sin));
    559
    560	if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
    561		sin->sin_family = AF_INET;
    562		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
    563		if (inet_sk(sk)->cmsg_flags)
    564			ip_cmsg_recv(msg, skb);
    565	}
    566
    567	put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
    568
    569	/* Now we could try to dump offended packet options */
    570
    571	msg->msg_flags |= MSG_ERRQUEUE;
    572	err = copied;
    573
    574	consume_skb(skb);
    575out:
    576	return err;
    577}
    578
    579void __ip_sock_set_tos(struct sock *sk, int val)
    580{
    581	if (sk->sk_type == SOCK_STREAM) {
    582		val &= ~INET_ECN_MASK;
    583		val |= inet_sk(sk)->tos & INET_ECN_MASK;
    584	}
    585	if (inet_sk(sk)->tos != val) {
    586		inet_sk(sk)->tos = val;
    587		sk->sk_priority = rt_tos2priority(val);
    588		sk_dst_reset(sk);
    589	}
    590}
    591
    592void ip_sock_set_tos(struct sock *sk, int val)
    593{
    594	lock_sock(sk);
    595	__ip_sock_set_tos(sk, val);
    596	release_sock(sk);
    597}
    598EXPORT_SYMBOL(ip_sock_set_tos);
    599
    600void ip_sock_set_freebind(struct sock *sk)
    601{
    602	lock_sock(sk);
    603	inet_sk(sk)->freebind = true;
    604	release_sock(sk);
    605}
    606EXPORT_SYMBOL(ip_sock_set_freebind);
    607
    608void ip_sock_set_recverr(struct sock *sk)
    609{
    610	lock_sock(sk);
    611	inet_sk(sk)->recverr = true;
    612	release_sock(sk);
    613}
    614EXPORT_SYMBOL(ip_sock_set_recverr);
    615
    616int ip_sock_set_mtu_discover(struct sock *sk, int val)
    617{
    618	if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
    619		return -EINVAL;
    620	lock_sock(sk);
    621	inet_sk(sk)->pmtudisc = val;
    622	release_sock(sk);
    623	return 0;
    624}
    625EXPORT_SYMBOL(ip_sock_set_mtu_discover);
    626
    627void ip_sock_set_pktinfo(struct sock *sk)
    628{
    629	lock_sock(sk);
    630	inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
    631	release_sock(sk);
    632}
    633EXPORT_SYMBOL(ip_sock_set_pktinfo);
    634
    635/*
    636 *	Socket option code for IP. This is the end of the line after any
    637 *	TCP,UDP etc options on an IP socket.
    638 */
    639static bool setsockopt_needs_rtnl(int optname)
    640{
    641	switch (optname) {
    642	case IP_ADD_MEMBERSHIP:
    643	case IP_ADD_SOURCE_MEMBERSHIP:
    644	case IP_BLOCK_SOURCE:
    645	case IP_DROP_MEMBERSHIP:
    646	case IP_DROP_SOURCE_MEMBERSHIP:
    647	case IP_MSFILTER:
    648	case IP_UNBLOCK_SOURCE:
    649	case MCAST_BLOCK_SOURCE:
    650	case MCAST_MSFILTER:
    651	case MCAST_JOIN_GROUP:
    652	case MCAST_JOIN_SOURCE_GROUP:
    653	case MCAST_LEAVE_GROUP:
    654	case MCAST_LEAVE_SOURCE_GROUP:
    655	case MCAST_UNBLOCK_SOURCE:
    656		return true;
    657	}
    658	return false;
    659}
    660
    661static int set_mcast_msfilter(struct sock *sk, int ifindex,
    662			      int numsrc, int fmode,
    663			      struct sockaddr_storage *group,
    664			      struct sockaddr_storage *list)
    665{
    666	struct ip_msfilter *msf;
    667	struct sockaddr_in *psin;
    668	int err, i;
    669
    670	msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
    671	if (!msf)
    672		return -ENOBUFS;
    673
    674	psin = (struct sockaddr_in *)group;
    675	if (psin->sin_family != AF_INET)
    676		goto Eaddrnotavail;
    677	msf->imsf_multiaddr = psin->sin_addr.s_addr;
    678	msf->imsf_interface = 0;
    679	msf->imsf_fmode = fmode;
    680	msf->imsf_numsrc = numsrc;
    681	for (i = 0; i < numsrc; ++i) {
    682		psin = (struct sockaddr_in *)&list[i];
    683
    684		if (psin->sin_family != AF_INET)
    685			goto Eaddrnotavail;
    686		msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
    687	}
    688	err = ip_mc_msfilter(sk, msf, ifindex);
    689	kfree(msf);
    690	return err;
    691
    692Eaddrnotavail:
    693	kfree(msf);
    694	return -EADDRNOTAVAIL;
    695}
    696
    697static int copy_group_source_from_sockptr(struct group_source_req *greqs,
    698		sockptr_t optval, int optlen)
    699{
    700	if (in_compat_syscall()) {
    701		struct compat_group_source_req gr32;
    702
    703		if (optlen != sizeof(gr32))
    704			return -EINVAL;
    705		if (copy_from_sockptr(&gr32, optval, sizeof(gr32)))
    706			return -EFAULT;
    707		greqs->gsr_interface = gr32.gsr_interface;
    708		greqs->gsr_group = gr32.gsr_group;
    709		greqs->gsr_source = gr32.gsr_source;
    710	} else {
    711		if (optlen != sizeof(*greqs))
    712			return -EINVAL;
    713		if (copy_from_sockptr(greqs, optval, sizeof(*greqs)))
    714			return -EFAULT;
    715	}
    716
    717	return 0;
    718}
    719
    720static int do_mcast_group_source(struct sock *sk, int optname,
    721		sockptr_t optval, int optlen)
    722{
    723	struct group_source_req greqs;
    724	struct ip_mreq_source mreqs;
    725	struct sockaddr_in *psin;
    726	int omode, add, err;
    727
    728	err = copy_group_source_from_sockptr(&greqs, optval, optlen);
    729	if (err)
    730		return err;
    731
    732	if (greqs.gsr_group.ss_family != AF_INET ||
    733	    greqs.gsr_source.ss_family != AF_INET)
    734		return -EADDRNOTAVAIL;
    735
    736	psin = (struct sockaddr_in *)&greqs.gsr_group;
    737	mreqs.imr_multiaddr = psin->sin_addr.s_addr;
    738	psin = (struct sockaddr_in *)&greqs.gsr_source;
    739	mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
    740	mreqs.imr_interface = 0; /* use index for mc_source */
    741
    742	if (optname == MCAST_BLOCK_SOURCE) {
    743		omode = MCAST_EXCLUDE;
    744		add = 1;
    745	} else if (optname == MCAST_UNBLOCK_SOURCE) {
    746		omode = MCAST_EXCLUDE;
    747		add = 0;
    748	} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
    749		struct ip_mreqn mreq;
    750
    751		psin = (struct sockaddr_in *)&greqs.gsr_group;
    752		mreq.imr_multiaddr = psin->sin_addr;
    753		mreq.imr_address.s_addr = 0;
    754		mreq.imr_ifindex = greqs.gsr_interface;
    755		err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
    756		if (err && err != -EADDRINUSE)
    757			return err;
    758		greqs.gsr_interface = mreq.imr_ifindex;
    759		omode = MCAST_INCLUDE;
    760		add = 1;
    761	} else /* MCAST_LEAVE_SOURCE_GROUP */ {
    762		omode = MCAST_INCLUDE;
    763		add = 0;
    764	}
    765	return ip_mc_source(add, omode, sk, &mreqs, greqs.gsr_interface);
    766}
    767
    768static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
    769{
    770	struct group_filter *gsf = NULL;
    771	int err;
    772
    773	if (optlen < GROUP_FILTER_SIZE(0))
    774		return -EINVAL;
    775	if (optlen > sysctl_optmem_max)
    776		return -ENOBUFS;
    777
    778	gsf = memdup_sockptr(optval, optlen);
    779	if (IS_ERR(gsf))
    780		return PTR_ERR(gsf);
    781
    782	/* numsrc >= (4G-140)/128 overflow in 32 bits */
    783	err = -ENOBUFS;
    784	if (gsf->gf_numsrc >= 0x1ffffff ||
    785	    gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
    786		goto out_free_gsf;
    787
    788	err = -EINVAL;
    789	if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
    790		goto out_free_gsf;
    791
    792	err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
    793				 gsf->gf_fmode, &gsf->gf_group,
    794				 gsf->gf_slist_flex);
    795out_free_gsf:
    796	kfree(gsf);
    797	return err;
    798}
    799
    800static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
    801		int optlen)
    802{
    803	const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
    804	struct compat_group_filter *gf32;
    805	unsigned int n;
    806	void *p;
    807	int err;
    808
    809	if (optlen < size0)
    810		return -EINVAL;
    811	if (optlen > sysctl_optmem_max - 4)
    812		return -ENOBUFS;
    813
    814	p = kmalloc(optlen + 4, GFP_KERNEL);
    815	if (!p)
    816		return -ENOMEM;
    817	gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
    818
    819	err = -EFAULT;
    820	if (copy_from_sockptr(gf32, optval, optlen))
    821		goto out_free_gsf;
    822
    823	/* numsrc >= (4G-140)/128 overflow in 32 bits */
    824	n = gf32->gf_numsrc;
    825	err = -ENOBUFS;
    826	if (n >= 0x1ffffff)
    827		goto out_free_gsf;
    828
    829	err = -EINVAL;
    830	if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
    831		goto out_free_gsf;
    832
    833	/* numsrc >= (4G-140)/128 overflow in 32 bits */
    834	err = -ENOBUFS;
    835	if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
    836		goto out_free_gsf;
    837	err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
    838				 &gf32->gf_group, gf32->gf_slist_flex);
    839out_free_gsf:
    840	kfree(p);
    841	return err;
    842}
    843
    844static int ip_mcast_join_leave(struct sock *sk, int optname,
    845		sockptr_t optval, int optlen)
    846{
    847	struct ip_mreqn mreq = { };
    848	struct sockaddr_in *psin;
    849	struct group_req greq;
    850
    851	if (optlen < sizeof(struct group_req))
    852		return -EINVAL;
    853	if (copy_from_sockptr(&greq, optval, sizeof(greq)))
    854		return -EFAULT;
    855
    856	psin = (struct sockaddr_in *)&greq.gr_group;
    857	if (psin->sin_family != AF_INET)
    858		return -EINVAL;
    859	mreq.imr_multiaddr = psin->sin_addr;
    860	mreq.imr_ifindex = greq.gr_interface;
    861	if (optname == MCAST_JOIN_GROUP)
    862		return ip_mc_join_group(sk, &mreq);
    863	return ip_mc_leave_group(sk, &mreq);
    864}
    865
    866static int compat_ip_mcast_join_leave(struct sock *sk, int optname,
    867		sockptr_t optval, int optlen)
    868{
    869	struct compat_group_req greq;
    870	struct ip_mreqn mreq = { };
    871	struct sockaddr_in *psin;
    872
    873	if (optlen < sizeof(struct compat_group_req))
    874		return -EINVAL;
    875	if (copy_from_sockptr(&greq, optval, sizeof(greq)))
    876		return -EFAULT;
    877
    878	psin = (struct sockaddr_in *)&greq.gr_group;
    879	if (psin->sin_family != AF_INET)
    880		return -EINVAL;
    881	mreq.imr_multiaddr = psin->sin_addr;
    882	mreq.imr_ifindex = greq.gr_interface;
    883
    884	if (optname == MCAST_JOIN_GROUP)
    885		return ip_mc_join_group(sk, &mreq);
    886	return ip_mc_leave_group(sk, &mreq);
    887}
    888
    889DEFINE_STATIC_KEY_FALSE(ip4_min_ttl);
    890
    891static int do_ip_setsockopt(struct sock *sk, int level, int optname,
    892		sockptr_t optval, unsigned int optlen)
    893{
    894	struct inet_sock *inet = inet_sk(sk);
    895	struct net *net = sock_net(sk);
    896	int val = 0, err;
    897	bool needs_rtnl = setsockopt_needs_rtnl(optname);
    898
    899	switch (optname) {
    900	case IP_PKTINFO:
    901	case IP_RECVTTL:
    902	case IP_RECVOPTS:
    903	case IP_RECVTOS:
    904	case IP_RETOPTS:
    905	case IP_TOS:
    906	case IP_TTL:
    907	case IP_HDRINCL:
    908	case IP_MTU_DISCOVER:
    909	case IP_RECVERR:
    910	case IP_ROUTER_ALERT:
    911	case IP_FREEBIND:
    912	case IP_PASSSEC:
    913	case IP_TRANSPARENT:
    914	case IP_MINTTL:
    915	case IP_NODEFRAG:
    916	case IP_BIND_ADDRESS_NO_PORT:
    917	case IP_UNICAST_IF:
    918	case IP_MULTICAST_TTL:
    919	case IP_MULTICAST_ALL:
    920	case IP_MULTICAST_LOOP:
    921	case IP_RECVORIGDSTADDR:
    922	case IP_CHECKSUM:
    923	case IP_RECVFRAGSIZE:
    924	case IP_RECVERR_RFC4884:
    925		if (optlen >= sizeof(int)) {
    926			if (copy_from_sockptr(&val, optval, sizeof(val)))
    927				return -EFAULT;
    928		} else if (optlen >= sizeof(char)) {
    929			unsigned char ucval;
    930
    931			if (copy_from_sockptr(&ucval, optval, sizeof(ucval)))
    932				return -EFAULT;
    933			val = (int) ucval;
    934		}
    935	}
    936
    937	/* If optlen==0, it is equivalent to val == 0 */
    938
    939	if (optname == IP_ROUTER_ALERT)
    940		return ip_ra_control(sk, val ? 1 : 0, NULL);
    941	if (ip_mroute_opt(optname))
    942		return ip_mroute_setsockopt(sk, optname, optval, optlen);
    943
    944	err = 0;
    945	if (needs_rtnl)
    946		rtnl_lock();
    947	lock_sock(sk);
    948
    949	switch (optname) {
    950	case IP_OPTIONS:
    951	{
    952		struct ip_options_rcu *old, *opt = NULL;
    953
    954		if (optlen > 40)
    955			goto e_inval;
    956		err = ip_options_get(sock_net(sk), &opt, optval, optlen);
    957		if (err)
    958			break;
    959		old = rcu_dereference_protected(inet->inet_opt,
    960						lockdep_sock_is_held(sk));
    961		if (inet->is_icsk) {
    962			struct inet_connection_sock *icsk = inet_csk(sk);
    963#if IS_ENABLED(CONFIG_IPV6)
    964			if (sk->sk_family == PF_INET ||
    965			    (!((1 << sk->sk_state) &
    966			       (TCPF_LISTEN | TCPF_CLOSE)) &&
    967			     inet->inet_daddr != LOOPBACK4_IPV6)) {
    968#endif
    969				if (old)
    970					icsk->icsk_ext_hdr_len -= old->opt.optlen;
    971				if (opt)
    972					icsk->icsk_ext_hdr_len += opt->opt.optlen;
    973				icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
    974#if IS_ENABLED(CONFIG_IPV6)
    975			}
    976#endif
    977		}
    978		rcu_assign_pointer(inet->inet_opt, opt);
    979		if (old)
    980			kfree_rcu(old, rcu);
    981		break;
    982	}
    983	case IP_PKTINFO:
    984		if (val)
    985			inet->cmsg_flags |= IP_CMSG_PKTINFO;
    986		else
    987			inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
    988		break;
    989	case IP_RECVTTL:
    990		if (val)
    991			inet->cmsg_flags |=  IP_CMSG_TTL;
    992		else
    993			inet->cmsg_flags &= ~IP_CMSG_TTL;
    994		break;
    995	case IP_RECVTOS:
    996		if (val)
    997			inet->cmsg_flags |=  IP_CMSG_TOS;
    998		else
    999			inet->cmsg_flags &= ~IP_CMSG_TOS;
   1000		break;
   1001	case IP_RECVOPTS:
   1002		if (val)
   1003			inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
   1004		else
   1005			inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
   1006		break;
   1007	case IP_RETOPTS:
   1008		if (val)
   1009			inet->cmsg_flags |= IP_CMSG_RETOPTS;
   1010		else
   1011			inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
   1012		break;
   1013	case IP_PASSSEC:
   1014		if (val)
   1015			inet->cmsg_flags |= IP_CMSG_PASSSEC;
   1016		else
   1017			inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
   1018		break;
   1019	case IP_RECVORIGDSTADDR:
   1020		if (val)
   1021			inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
   1022		else
   1023			inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
   1024		break;
   1025	case IP_CHECKSUM:
   1026		if (val) {
   1027			if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
   1028				inet_inc_convert_csum(sk);
   1029				inet->cmsg_flags |= IP_CMSG_CHECKSUM;
   1030			}
   1031		} else {
   1032			if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
   1033				inet_dec_convert_csum(sk);
   1034				inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
   1035			}
   1036		}
   1037		break;
   1038	case IP_RECVFRAGSIZE:
   1039		if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
   1040			goto e_inval;
   1041		if (val)
   1042			inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
   1043		else
   1044			inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
   1045		break;
   1046	case IP_TOS:	/* This sets both TOS and Precedence */
   1047		__ip_sock_set_tos(sk, val);
   1048		break;
   1049	case IP_TTL:
   1050		if (optlen < 1)
   1051			goto e_inval;
   1052		if (val != -1 && (val < 1 || val > 255))
   1053			goto e_inval;
   1054		inet->uc_ttl = val;
   1055		break;
   1056	case IP_HDRINCL:
   1057		if (sk->sk_type != SOCK_RAW) {
   1058			err = -ENOPROTOOPT;
   1059			break;
   1060		}
   1061		inet->hdrincl = val ? 1 : 0;
   1062		break;
   1063	case IP_NODEFRAG:
   1064		if (sk->sk_type != SOCK_RAW) {
   1065			err = -ENOPROTOOPT;
   1066			break;
   1067		}
   1068		inet->nodefrag = val ? 1 : 0;
   1069		break;
   1070	case IP_BIND_ADDRESS_NO_PORT:
   1071		inet->bind_address_no_port = val ? 1 : 0;
   1072		break;
   1073	case IP_MTU_DISCOVER:
   1074		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
   1075			goto e_inval;
   1076		inet->pmtudisc = val;
   1077		break;
   1078	case IP_RECVERR:
   1079		inet->recverr = !!val;
   1080		if (!val)
   1081			skb_queue_purge(&sk->sk_error_queue);
   1082		break;
   1083	case IP_RECVERR_RFC4884:
   1084		if (val < 0 || val > 1)
   1085			goto e_inval;
   1086		inet->recverr_rfc4884 = !!val;
   1087		break;
   1088	case IP_MULTICAST_TTL:
   1089		if (sk->sk_type == SOCK_STREAM)
   1090			goto e_inval;
   1091		if (optlen < 1)
   1092			goto e_inval;
   1093		if (val == -1)
   1094			val = 1;
   1095		if (val < 0 || val > 255)
   1096			goto e_inval;
   1097		inet->mc_ttl = val;
   1098		break;
   1099	case IP_MULTICAST_LOOP:
   1100		if (optlen < 1)
   1101			goto e_inval;
   1102		inet->mc_loop = !!val;
   1103		break;
   1104	case IP_UNICAST_IF:
   1105	{
   1106		struct net_device *dev = NULL;
   1107		int ifindex;
   1108		int midx;
   1109
   1110		if (optlen != sizeof(int))
   1111			goto e_inval;
   1112
   1113		ifindex = (__force int)ntohl((__force __be32)val);
   1114		if (ifindex == 0) {
   1115			inet->uc_index = 0;
   1116			err = 0;
   1117			break;
   1118		}
   1119
   1120		dev = dev_get_by_index(sock_net(sk), ifindex);
   1121		err = -EADDRNOTAVAIL;
   1122		if (!dev)
   1123			break;
   1124
   1125		midx = l3mdev_master_ifindex(dev);
   1126		dev_put(dev);
   1127
   1128		err = -EINVAL;
   1129		if (sk->sk_bound_dev_if && midx != sk->sk_bound_dev_if)
   1130			break;
   1131
   1132		inet->uc_index = ifindex;
   1133		err = 0;
   1134		break;
   1135	}
   1136	case IP_MULTICAST_IF:
   1137	{
   1138		struct ip_mreqn mreq;
   1139		struct net_device *dev = NULL;
   1140		int midx;
   1141
   1142		if (sk->sk_type == SOCK_STREAM)
   1143			goto e_inval;
   1144		/*
   1145		 *	Check the arguments are allowable
   1146		 */
   1147
   1148		if (optlen < sizeof(struct in_addr))
   1149			goto e_inval;
   1150
   1151		err = -EFAULT;
   1152		if (optlen >= sizeof(struct ip_mreqn)) {
   1153			if (copy_from_sockptr(&mreq, optval, sizeof(mreq)))
   1154				break;
   1155		} else {
   1156			memset(&mreq, 0, sizeof(mreq));
   1157			if (optlen >= sizeof(struct ip_mreq)) {
   1158				if (copy_from_sockptr(&mreq, optval,
   1159						      sizeof(struct ip_mreq)))
   1160					break;
   1161			} else if (optlen >= sizeof(struct in_addr)) {
   1162				if (copy_from_sockptr(&mreq.imr_address, optval,
   1163						      sizeof(struct in_addr)))
   1164					break;
   1165			}
   1166		}
   1167
   1168		if (!mreq.imr_ifindex) {
   1169			if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
   1170				inet->mc_index = 0;
   1171				inet->mc_addr  = 0;
   1172				err = 0;
   1173				break;
   1174			}
   1175			dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
   1176			if (dev)
   1177				mreq.imr_ifindex = dev->ifindex;
   1178		} else
   1179			dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
   1180
   1181
   1182		err = -EADDRNOTAVAIL;
   1183		if (!dev)
   1184			break;
   1185
   1186		midx = l3mdev_master_ifindex(dev);
   1187
   1188		dev_put(dev);
   1189
   1190		err = -EINVAL;
   1191		if (sk->sk_bound_dev_if &&
   1192		    mreq.imr_ifindex != sk->sk_bound_dev_if &&
   1193		    midx != sk->sk_bound_dev_if)
   1194			break;
   1195
   1196		inet->mc_index = mreq.imr_ifindex;
   1197		inet->mc_addr  = mreq.imr_address.s_addr;
   1198		err = 0;
   1199		break;
   1200	}
   1201
   1202	case IP_ADD_MEMBERSHIP:
   1203	case IP_DROP_MEMBERSHIP:
   1204	{
   1205		struct ip_mreqn mreq;
   1206
   1207		err = -EPROTO;
   1208		if (inet_sk(sk)->is_icsk)
   1209			break;
   1210
   1211		if (optlen < sizeof(struct ip_mreq))
   1212			goto e_inval;
   1213		err = -EFAULT;
   1214		if (optlen >= sizeof(struct ip_mreqn)) {
   1215			if (copy_from_sockptr(&mreq, optval, sizeof(mreq)))
   1216				break;
   1217		} else {
   1218			memset(&mreq, 0, sizeof(mreq));
   1219			if (copy_from_sockptr(&mreq, optval,
   1220					      sizeof(struct ip_mreq)))
   1221				break;
   1222		}
   1223
   1224		if (optname == IP_ADD_MEMBERSHIP)
   1225			err = ip_mc_join_group(sk, &mreq);
   1226		else
   1227			err = ip_mc_leave_group(sk, &mreq);
   1228		break;
   1229	}
   1230	case IP_MSFILTER:
   1231	{
   1232		struct ip_msfilter *msf;
   1233
   1234		if (optlen < IP_MSFILTER_SIZE(0))
   1235			goto e_inval;
   1236		if (optlen > sysctl_optmem_max) {
   1237			err = -ENOBUFS;
   1238			break;
   1239		}
   1240		msf = memdup_sockptr(optval, optlen);
   1241		if (IS_ERR(msf)) {
   1242			err = PTR_ERR(msf);
   1243			break;
   1244		}
   1245		/* numsrc >= (1G-4) overflow in 32 bits */
   1246		if (msf->imsf_numsrc >= 0x3ffffffcU ||
   1247		    msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
   1248			kfree(msf);
   1249			err = -ENOBUFS;
   1250			break;
   1251		}
   1252		if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
   1253			kfree(msf);
   1254			err = -EINVAL;
   1255			break;
   1256		}
   1257		err = ip_mc_msfilter(sk, msf, 0);
   1258		kfree(msf);
   1259		break;
   1260	}
   1261	case IP_BLOCK_SOURCE:
   1262	case IP_UNBLOCK_SOURCE:
   1263	case IP_ADD_SOURCE_MEMBERSHIP:
   1264	case IP_DROP_SOURCE_MEMBERSHIP:
   1265	{
   1266		struct ip_mreq_source mreqs;
   1267		int omode, add;
   1268
   1269		if (optlen != sizeof(struct ip_mreq_source))
   1270			goto e_inval;
   1271		if (copy_from_sockptr(&mreqs, optval, sizeof(mreqs))) {
   1272			err = -EFAULT;
   1273			break;
   1274		}
   1275		if (optname == IP_BLOCK_SOURCE) {
   1276			omode = MCAST_EXCLUDE;
   1277			add = 1;
   1278		} else if (optname == IP_UNBLOCK_SOURCE) {
   1279			omode = MCAST_EXCLUDE;
   1280			add = 0;
   1281		} else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
   1282			struct ip_mreqn mreq;
   1283
   1284			mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
   1285			mreq.imr_address.s_addr = mreqs.imr_interface;
   1286			mreq.imr_ifindex = 0;
   1287			err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
   1288			if (err && err != -EADDRINUSE)
   1289				break;
   1290			omode = MCAST_INCLUDE;
   1291			add = 1;
   1292		} else /* IP_DROP_SOURCE_MEMBERSHIP */ {
   1293			omode = MCAST_INCLUDE;
   1294			add = 0;
   1295		}
   1296		err = ip_mc_source(add, omode, sk, &mreqs, 0);
   1297		break;
   1298	}
   1299	case MCAST_JOIN_GROUP:
   1300	case MCAST_LEAVE_GROUP:
   1301		if (in_compat_syscall())
   1302			err = compat_ip_mcast_join_leave(sk, optname, optval,
   1303							 optlen);
   1304		else
   1305			err = ip_mcast_join_leave(sk, optname, optval, optlen);
   1306		break;
   1307	case MCAST_JOIN_SOURCE_GROUP:
   1308	case MCAST_LEAVE_SOURCE_GROUP:
   1309	case MCAST_BLOCK_SOURCE:
   1310	case MCAST_UNBLOCK_SOURCE:
   1311		err = do_mcast_group_source(sk, optname, optval, optlen);
   1312		break;
   1313	case MCAST_MSFILTER:
   1314		if (in_compat_syscall())
   1315			err = compat_ip_set_mcast_msfilter(sk, optval, optlen);
   1316		else
   1317			err = ip_set_mcast_msfilter(sk, optval, optlen);
   1318		break;
   1319	case IP_MULTICAST_ALL:
   1320		if (optlen < 1)
   1321			goto e_inval;
   1322		if (val != 0 && val != 1)
   1323			goto e_inval;
   1324		inet->mc_all = val;
   1325		break;
   1326
   1327	case IP_FREEBIND:
   1328		if (optlen < 1)
   1329			goto e_inval;
   1330		inet->freebind = !!val;
   1331		break;
   1332
   1333	case IP_IPSEC_POLICY:
   1334	case IP_XFRM_POLICY:
   1335		err = -EPERM;
   1336		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
   1337			break;
   1338		err = xfrm_user_policy(sk, optname, optval, optlen);
   1339		break;
   1340
   1341	case IP_TRANSPARENT:
   1342		if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
   1343		    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
   1344			err = -EPERM;
   1345			break;
   1346		}
   1347		if (optlen < 1)
   1348			goto e_inval;
   1349		inet->transparent = !!val;
   1350		break;
   1351
   1352	case IP_MINTTL:
   1353		if (optlen < 1)
   1354			goto e_inval;
   1355		if (val < 0 || val > 255)
   1356			goto e_inval;
   1357
   1358		if (val)
   1359			static_branch_enable(&ip4_min_ttl);
   1360
   1361		/* tcp_v4_err() and tcp_v4_rcv() might read min_ttl
   1362		 * while we are changint it.
   1363		 */
   1364		WRITE_ONCE(inet->min_ttl, val);
   1365		break;
   1366
   1367	default:
   1368		err = -ENOPROTOOPT;
   1369		break;
   1370	}
   1371	release_sock(sk);
   1372	if (needs_rtnl)
   1373		rtnl_unlock();
   1374	return err;
   1375
   1376e_inval:
   1377	release_sock(sk);
   1378	if (needs_rtnl)
   1379		rtnl_unlock();
   1380	return -EINVAL;
   1381}
   1382
   1383/**
   1384 * ipv4_pktinfo_prepare - transfer some info from rtable to skb
   1385 * @sk: socket
   1386 * @skb: buffer
   1387 *
   1388 * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
   1389 * destination in skb->cb[] before dst drop.
   1390 * This way, receiver doesn't make cache line misses to read rtable.
   1391 */
   1392void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
   1393{
   1394	struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
   1395	bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
   1396		       ipv6_sk_rxinfo(sk);
   1397
   1398	if (prepare && skb_rtable(skb)) {
   1399		/* skb->cb is overloaded: prior to this point it is IP{6}CB
   1400		 * which has interface index (iif) as the first member of the
   1401		 * underlying inet{6}_skb_parm struct. This code then overlays
   1402		 * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
   1403		 * element so the iif is picked up from the prior IPCB. If iif
   1404		 * is the loopback interface, then return the sending interface
   1405		 * (e.g., process binds socket to eth0 for Tx which is
   1406		 * redirected to loopback in the rtable/dst).
   1407		 */
   1408		struct rtable *rt = skb_rtable(skb);
   1409		bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
   1410
   1411		if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
   1412			pktinfo->ipi_ifindex = inet_iif(skb);
   1413		else if (l3slave && rt && rt->rt_iif)
   1414			pktinfo->ipi_ifindex = rt->rt_iif;
   1415
   1416		pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
   1417	} else {
   1418		pktinfo->ipi_ifindex = 0;
   1419		pktinfo->ipi_spec_dst.s_addr = 0;
   1420	}
   1421	skb_dst_drop(skb);
   1422}
   1423
   1424int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
   1425		unsigned int optlen)
   1426{
   1427	int err;
   1428
   1429	if (level != SOL_IP)
   1430		return -ENOPROTOOPT;
   1431
   1432	err = do_ip_setsockopt(sk, level, optname, optval, optlen);
   1433#if IS_ENABLED(CONFIG_BPFILTER_UMH)
   1434	if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
   1435	    optname < BPFILTER_IPT_SET_MAX)
   1436		err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
   1437#endif
   1438#ifdef CONFIG_NETFILTER
   1439	/* we need to exclude all possible ENOPROTOOPTs except default case */
   1440	if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
   1441			optname != IP_IPSEC_POLICY &&
   1442			optname != IP_XFRM_POLICY &&
   1443			!ip_mroute_opt(optname))
   1444		err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
   1445#endif
   1446	return err;
   1447}
   1448EXPORT_SYMBOL(ip_setsockopt);
   1449
   1450/*
   1451 *	Get the options. Note for future reference. The GET of IP options gets
   1452 *	the _received_ ones. The set sets the _sent_ ones.
   1453 */
   1454
   1455static bool getsockopt_needs_rtnl(int optname)
   1456{
   1457	switch (optname) {
   1458	case IP_MSFILTER:
   1459	case MCAST_MSFILTER:
   1460		return true;
   1461	}
   1462	return false;
   1463}
   1464
   1465static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
   1466		int __user *optlen, int len)
   1467{
   1468	const int size0 = offsetof(struct group_filter, gf_slist_flex);
   1469	struct group_filter __user *p = optval;
   1470	struct group_filter gsf;
   1471	int num;
   1472	int err;
   1473
   1474	if (len < size0)
   1475		return -EINVAL;
   1476	if (copy_from_user(&gsf, p, size0))
   1477		return -EFAULT;
   1478
   1479	num = gsf.gf_numsrc;
   1480	err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex);
   1481	if (err)
   1482		return err;
   1483	if (gsf.gf_numsrc < num)
   1484		num = gsf.gf_numsrc;
   1485	if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
   1486	    copy_to_user(p, &gsf, size0))
   1487		return -EFAULT;
   1488	return 0;
   1489}
   1490
   1491static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
   1492		int __user *optlen, int len)
   1493{
   1494	const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
   1495	struct compat_group_filter __user *p = optval;
   1496	struct compat_group_filter gf32;
   1497	struct group_filter gf;
   1498	int num;
   1499	int err;
   1500
   1501	if (len < size0)
   1502		return -EINVAL;
   1503	if (copy_from_user(&gf32, p, size0))
   1504		return -EFAULT;
   1505
   1506	gf.gf_interface = gf32.gf_interface;
   1507	gf.gf_fmode = gf32.gf_fmode;
   1508	num = gf.gf_numsrc = gf32.gf_numsrc;
   1509	gf.gf_group = gf32.gf_group;
   1510
   1511	err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex);
   1512	if (err)
   1513		return err;
   1514	if (gf.gf_numsrc < num)
   1515		num = gf.gf_numsrc;
   1516	len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
   1517	if (put_user(len, optlen) ||
   1518	    put_user(gf.gf_fmode, &p->gf_fmode) ||
   1519	    put_user(gf.gf_numsrc, &p->gf_numsrc))
   1520		return -EFAULT;
   1521	return 0;
   1522}
   1523
   1524static int do_ip_getsockopt(struct sock *sk, int level, int optname,
   1525			    char __user *optval, int __user *optlen)
   1526{
   1527	struct inet_sock *inet = inet_sk(sk);
   1528	bool needs_rtnl = getsockopt_needs_rtnl(optname);
   1529	int val, err = 0;
   1530	int len;
   1531
   1532	if (level != SOL_IP)
   1533		return -EOPNOTSUPP;
   1534
   1535	if (ip_mroute_opt(optname))
   1536		return ip_mroute_getsockopt(sk, optname, optval, optlen);
   1537
   1538	if (get_user(len, optlen))
   1539		return -EFAULT;
   1540	if (len < 0)
   1541		return -EINVAL;
   1542
   1543	if (needs_rtnl)
   1544		rtnl_lock();
   1545	lock_sock(sk);
   1546
   1547	switch (optname) {
   1548	case IP_OPTIONS:
   1549	{
   1550		unsigned char optbuf[sizeof(struct ip_options)+40];
   1551		struct ip_options *opt = (struct ip_options *)optbuf;
   1552		struct ip_options_rcu *inet_opt;
   1553
   1554		inet_opt = rcu_dereference_protected(inet->inet_opt,
   1555						     lockdep_sock_is_held(sk));
   1556		opt->optlen = 0;
   1557		if (inet_opt)
   1558			memcpy(optbuf, &inet_opt->opt,
   1559			       sizeof(struct ip_options) +
   1560			       inet_opt->opt.optlen);
   1561		release_sock(sk);
   1562
   1563		if (opt->optlen == 0)
   1564			return put_user(0, optlen);
   1565
   1566		ip_options_undo(opt);
   1567
   1568		len = min_t(unsigned int, len, opt->optlen);
   1569		if (put_user(len, optlen))
   1570			return -EFAULT;
   1571		if (copy_to_user(optval, opt->__data, len))
   1572			return -EFAULT;
   1573		return 0;
   1574	}
   1575	case IP_PKTINFO:
   1576		val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
   1577		break;
   1578	case IP_RECVTTL:
   1579		val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
   1580		break;
   1581	case IP_RECVTOS:
   1582		val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
   1583		break;
   1584	case IP_RECVOPTS:
   1585		val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
   1586		break;
   1587	case IP_RETOPTS:
   1588		val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
   1589		break;
   1590	case IP_PASSSEC:
   1591		val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
   1592		break;
   1593	case IP_RECVORIGDSTADDR:
   1594		val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
   1595		break;
   1596	case IP_CHECKSUM:
   1597		val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
   1598		break;
   1599	case IP_RECVFRAGSIZE:
   1600		val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
   1601		break;
   1602	case IP_TOS:
   1603		val = inet->tos;
   1604		break;
   1605	case IP_TTL:
   1606	{
   1607		struct net *net = sock_net(sk);
   1608		val = (inet->uc_ttl == -1 ?
   1609		       net->ipv4.sysctl_ip_default_ttl :
   1610		       inet->uc_ttl);
   1611		break;
   1612	}
   1613	case IP_HDRINCL:
   1614		val = inet->hdrincl;
   1615		break;
   1616	case IP_NODEFRAG:
   1617		val = inet->nodefrag;
   1618		break;
   1619	case IP_BIND_ADDRESS_NO_PORT:
   1620		val = inet->bind_address_no_port;
   1621		break;
   1622	case IP_MTU_DISCOVER:
   1623		val = inet->pmtudisc;
   1624		break;
   1625	case IP_MTU:
   1626	{
   1627		struct dst_entry *dst;
   1628		val = 0;
   1629		dst = sk_dst_get(sk);
   1630		if (dst) {
   1631			val = dst_mtu(dst);
   1632			dst_release(dst);
   1633		}
   1634		if (!val) {
   1635			release_sock(sk);
   1636			return -ENOTCONN;
   1637		}
   1638		break;
   1639	}
   1640	case IP_RECVERR:
   1641		val = inet->recverr;
   1642		break;
   1643	case IP_RECVERR_RFC4884:
   1644		val = inet->recverr_rfc4884;
   1645		break;
   1646	case IP_MULTICAST_TTL:
   1647		val = inet->mc_ttl;
   1648		break;
   1649	case IP_MULTICAST_LOOP:
   1650		val = inet->mc_loop;
   1651		break;
   1652	case IP_UNICAST_IF:
   1653		val = (__force int)htonl((__u32) inet->uc_index);
   1654		break;
   1655	case IP_MULTICAST_IF:
   1656	{
   1657		struct in_addr addr;
   1658		len = min_t(unsigned int, len, sizeof(struct in_addr));
   1659		addr.s_addr = inet->mc_addr;
   1660		release_sock(sk);
   1661
   1662		if (put_user(len, optlen))
   1663			return -EFAULT;
   1664		if (copy_to_user(optval, &addr, len))
   1665			return -EFAULT;
   1666		return 0;
   1667	}
   1668	case IP_MSFILTER:
   1669	{
   1670		struct ip_msfilter msf;
   1671
   1672		if (len < IP_MSFILTER_SIZE(0)) {
   1673			err = -EINVAL;
   1674			goto out;
   1675		}
   1676		if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
   1677			err = -EFAULT;
   1678			goto out;
   1679		}
   1680		err = ip_mc_msfget(sk, &msf,
   1681				   (struct ip_msfilter __user *)optval, optlen);
   1682		goto out;
   1683	}
   1684	case MCAST_MSFILTER:
   1685		if (in_compat_syscall())
   1686			err = compat_ip_get_mcast_msfilter(sk, optval, optlen,
   1687							   len);
   1688		else
   1689			err = ip_get_mcast_msfilter(sk, optval, optlen, len);
   1690		goto out;
   1691	case IP_MULTICAST_ALL:
   1692		val = inet->mc_all;
   1693		break;
   1694	case IP_PKTOPTIONS:
   1695	{
   1696		struct msghdr msg;
   1697
   1698		release_sock(sk);
   1699
   1700		if (sk->sk_type != SOCK_STREAM)
   1701			return -ENOPROTOOPT;
   1702
   1703		msg.msg_control_is_user = true;
   1704		msg.msg_control_user = optval;
   1705		msg.msg_controllen = len;
   1706		msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
   1707
   1708		if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
   1709			struct in_pktinfo info;
   1710
   1711			info.ipi_addr.s_addr = inet->inet_rcv_saddr;
   1712			info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
   1713			info.ipi_ifindex = inet->mc_index;
   1714			put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
   1715		}
   1716		if (inet->cmsg_flags & IP_CMSG_TTL) {
   1717			int hlim = inet->mc_ttl;
   1718			put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
   1719		}
   1720		if (inet->cmsg_flags & IP_CMSG_TOS) {
   1721			int tos = inet->rcv_tos;
   1722			put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
   1723		}
   1724		len -= msg.msg_controllen;
   1725		return put_user(len, optlen);
   1726	}
   1727	case IP_FREEBIND:
   1728		val = inet->freebind;
   1729		break;
   1730	case IP_TRANSPARENT:
   1731		val = inet->transparent;
   1732		break;
   1733	case IP_MINTTL:
   1734		val = inet->min_ttl;
   1735		break;
   1736	default:
   1737		release_sock(sk);
   1738		return -ENOPROTOOPT;
   1739	}
   1740	release_sock(sk);
   1741
   1742	if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
   1743		unsigned char ucval = (unsigned char)val;
   1744		len = 1;
   1745		if (put_user(len, optlen))
   1746			return -EFAULT;
   1747		if (copy_to_user(optval, &ucval, 1))
   1748			return -EFAULT;
   1749	} else {
   1750		len = min_t(unsigned int, sizeof(int), len);
   1751		if (put_user(len, optlen))
   1752			return -EFAULT;
   1753		if (copy_to_user(optval, &val, len))
   1754			return -EFAULT;
   1755	}
   1756	return 0;
   1757
   1758out:
   1759	release_sock(sk);
   1760	if (needs_rtnl)
   1761		rtnl_unlock();
   1762	return err;
   1763}
   1764
   1765int ip_getsockopt(struct sock *sk, int level,
   1766		  int optname, char __user *optval, int __user *optlen)
   1767{
   1768	int err;
   1769
   1770	err = do_ip_getsockopt(sk, level, optname, optval, optlen);
   1771
   1772#if IS_ENABLED(CONFIG_BPFILTER_UMH)
   1773	if (optname >= BPFILTER_IPT_SO_GET_INFO &&
   1774	    optname < BPFILTER_IPT_GET_MAX)
   1775		err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
   1776#endif
   1777#ifdef CONFIG_NETFILTER
   1778	/* we need to exclude all possible ENOPROTOOPTs except default case */
   1779	if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
   1780			!ip_mroute_opt(optname)) {
   1781		int len;
   1782
   1783		if (get_user(len, optlen))
   1784			return -EFAULT;
   1785
   1786		err = nf_getsockopt(sk, PF_INET, optname, optval, &len);
   1787		if (err >= 0)
   1788			err = put_user(len, optlen);
   1789		return err;
   1790	}
   1791#endif
   1792	return err;
   1793}
   1794EXPORT_SYMBOL(ip_getsockopt);