cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ipmr.c (77743B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *	IP multicast routing support for mrouted 3.6/3.8
      4 *
      5 *		(c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
      6 *	  Linux Consultancy and Custom Driver Development
      7 *
      8 *	Fixes:
      9 *	Michael Chastain	:	Incorrect size of copying.
     10 *	Alan Cox		:	Added the cache manager code
     11 *	Alan Cox		:	Fixed the clone/copy bug and device race.
     12 *	Mike McLagan		:	Routing by source
     13 *	Malcolm Beattie		:	Buffer handling fixes.
     14 *	Alexey Kuznetsov	:	Double buffer free and other fixes.
     15 *	SVR Anand		:	Fixed several multicast bugs and problems.
     16 *	Alexey Kuznetsov	:	Status, optimisations and more.
     17 *	Brad Parker		:	Better behaviour on mrouted upcall
     18 *					overflow.
     19 *      Carlos Picoto           :       PIMv1 Support
     20 *	Pavlin Ivanov Radoslavov:	PIMv2 Registers must checksum only PIM header
     21 *					Relax this requirement to work with older peers.
     22 */
     23
     24#include <linux/uaccess.h>
     25#include <linux/types.h>
     26#include <linux/cache.h>
     27#include <linux/capability.h>
     28#include <linux/errno.h>
     29#include <linux/mm.h>
     30#include <linux/kernel.h>
     31#include <linux/fcntl.h>
     32#include <linux/stat.h>
     33#include <linux/socket.h>
     34#include <linux/in.h>
     35#include <linux/inet.h>
     36#include <linux/netdevice.h>
     37#include <linux/inetdevice.h>
     38#include <linux/igmp.h>
     39#include <linux/proc_fs.h>
     40#include <linux/seq_file.h>
     41#include <linux/mroute.h>
     42#include <linux/init.h>
     43#include <linux/if_ether.h>
     44#include <linux/slab.h>
     45#include <net/net_namespace.h>
     46#include <net/ip.h>
     47#include <net/protocol.h>
     48#include <linux/skbuff.h>
     49#include <net/route.h>
     50#include <net/icmp.h>
     51#include <net/udp.h>
     52#include <net/raw.h>
     53#include <linux/notifier.h>
     54#include <linux/if_arp.h>
     55#include <linux/netfilter_ipv4.h>
     56#include <linux/compat.h>
     57#include <linux/export.h>
     58#include <linux/rhashtable.h>
     59#include <net/ip_tunnels.h>
     60#include <net/checksum.h>
     61#include <net/netlink.h>
     62#include <net/fib_rules.h>
     63#include <linux/netconf.h>
     64#include <net/rtnh.h>
     65
     66#include <linux/nospec.h>
     67
     68struct ipmr_rule {
     69	struct fib_rule		common;
     70};
     71
     72struct ipmr_result {
     73	struct mr_table		*mrt;
     74};
     75
     76/* Big lock, protecting vif table, mrt cache and mroute socket state.
     77 * Note that the changes are semaphored via rtnl_lock.
     78 */
     79
     80static DEFINE_RWLOCK(mrt_lock);
     81
     82/* Multicast router control variables */
     83
     84/* Special spinlock for queue of unresolved entries */
     85static DEFINE_SPINLOCK(mfc_unres_lock);
     86
     87/* We return to original Alan's scheme. Hash table of resolved
     88 * entries is changed only in process context and protected
     89 * with weak lock mrt_lock. Queue of unresolved entries is protected
     90 * with strong spinlock mfc_unres_lock.
     91 *
     92 * In this case data path is free of exclusive locks at all.
     93 */
     94
     95static struct kmem_cache *mrt_cachep __ro_after_init;
     96
     97static struct mr_table *ipmr_new_table(struct net *net, u32 id);
     98static void ipmr_free_table(struct mr_table *mrt);
     99
    100static void ip_mr_forward(struct net *net, struct mr_table *mrt,
    101			  struct net_device *dev, struct sk_buff *skb,
    102			  struct mfc_cache *cache, int local);
    103static int ipmr_cache_report(struct mr_table *mrt,
    104			     struct sk_buff *pkt, vifi_t vifi, int assert);
    105static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
    106				 int cmd);
    107static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
    108static void mroute_clean_tables(struct mr_table *mrt, int flags);
    109static void ipmr_expire_process(struct timer_list *t);
    110
    111#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
    112#define ipmr_for_each_table(mrt, net)					\
    113	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list,	\
    114				lockdep_rtnl_is_held() ||		\
    115				list_empty(&net->ipv4.mr_tables))
    116
    117static struct mr_table *ipmr_mr_table_iter(struct net *net,
    118					   struct mr_table *mrt)
    119{
    120	struct mr_table *ret;
    121
    122	if (!mrt)
    123		ret = list_entry_rcu(net->ipv4.mr_tables.next,
    124				     struct mr_table, list);
    125	else
    126		ret = list_entry_rcu(mrt->list.next,
    127				     struct mr_table, list);
    128
    129	if (&ret->list == &net->ipv4.mr_tables)
    130		return NULL;
    131	return ret;
    132}
    133
    134static struct mr_table *ipmr_get_table(struct net *net, u32 id)
    135{
    136	struct mr_table *mrt;
    137
    138	ipmr_for_each_table(mrt, net) {
    139		if (mrt->id == id)
    140			return mrt;
    141	}
    142	return NULL;
    143}
    144
    145static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
    146			   struct mr_table **mrt)
    147{
    148	int err;
    149	struct ipmr_result res;
    150	struct fib_lookup_arg arg = {
    151		.result = &res,
    152		.flags = FIB_LOOKUP_NOREF,
    153	};
    154
    155	/* update flow if oif or iif point to device enslaved to l3mdev */
    156	l3mdev_update_flow(net, flowi4_to_flowi(flp4));
    157
    158	err = fib_rules_lookup(net->ipv4.mr_rules_ops,
    159			       flowi4_to_flowi(flp4), 0, &arg);
    160	if (err < 0)
    161		return err;
    162	*mrt = res.mrt;
    163	return 0;
    164}
    165
    166static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
    167			    int flags, struct fib_lookup_arg *arg)
    168{
    169	struct ipmr_result *res = arg->result;
    170	struct mr_table *mrt;
    171
    172	switch (rule->action) {
    173	case FR_ACT_TO_TBL:
    174		break;
    175	case FR_ACT_UNREACHABLE:
    176		return -ENETUNREACH;
    177	case FR_ACT_PROHIBIT:
    178		return -EACCES;
    179	case FR_ACT_BLACKHOLE:
    180	default:
    181		return -EINVAL;
    182	}
    183
    184	arg->table = fib_rule_get_table(rule, arg);
    185
    186	mrt = ipmr_get_table(rule->fr_net, arg->table);
    187	if (!mrt)
    188		return -EAGAIN;
    189	res->mrt = mrt;
    190	return 0;
    191}
    192
    193static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
    194{
    195	return 1;
    196}
    197
    198static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
    199			       struct fib_rule_hdr *frh, struct nlattr **tb,
    200			       struct netlink_ext_ack *extack)
    201{
    202	return 0;
    203}
    204
    205static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
    206			     struct nlattr **tb)
    207{
    208	return 1;
    209}
    210
    211static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
    212			  struct fib_rule_hdr *frh)
    213{
    214	frh->dst_len = 0;
    215	frh->src_len = 0;
    216	frh->tos     = 0;
    217	return 0;
    218}
    219
    220static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = {
    221	.family		= RTNL_FAMILY_IPMR,
    222	.rule_size	= sizeof(struct ipmr_rule),
    223	.addr_size	= sizeof(u32),
    224	.action		= ipmr_rule_action,
    225	.match		= ipmr_rule_match,
    226	.configure	= ipmr_rule_configure,
    227	.compare	= ipmr_rule_compare,
    228	.fill		= ipmr_rule_fill,
    229	.nlgroup	= RTNLGRP_IPV4_RULE,
    230	.owner		= THIS_MODULE,
    231};
    232
    233static int __net_init ipmr_rules_init(struct net *net)
    234{
    235	struct fib_rules_ops *ops;
    236	struct mr_table *mrt;
    237	int err;
    238
    239	ops = fib_rules_register(&ipmr_rules_ops_template, net);
    240	if (IS_ERR(ops))
    241		return PTR_ERR(ops);
    242
    243	INIT_LIST_HEAD(&net->ipv4.mr_tables);
    244
    245	mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
    246	if (IS_ERR(mrt)) {
    247		err = PTR_ERR(mrt);
    248		goto err1;
    249	}
    250
    251	err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
    252	if (err < 0)
    253		goto err2;
    254
    255	net->ipv4.mr_rules_ops = ops;
    256	return 0;
    257
    258err2:
    259	rtnl_lock();
    260	ipmr_free_table(mrt);
    261	rtnl_unlock();
    262err1:
    263	fib_rules_unregister(ops);
    264	return err;
    265}
    266
    267static void __net_exit ipmr_rules_exit(struct net *net)
    268{
    269	struct mr_table *mrt, *next;
    270
    271	ASSERT_RTNL();
    272	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
    273		list_del(&mrt->list);
    274		ipmr_free_table(mrt);
    275	}
    276	fib_rules_unregister(net->ipv4.mr_rules_ops);
    277}
    278
    279static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
    280			   struct netlink_ext_ack *extack)
    281{
    282	return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR, extack);
    283}
    284
    285static unsigned int ipmr_rules_seq_read(struct net *net)
    286{
    287	return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
    288}
    289
    290bool ipmr_rule_default(const struct fib_rule *rule)
    291{
    292	return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
    293}
    294EXPORT_SYMBOL(ipmr_rule_default);
    295#else
    296#define ipmr_for_each_table(mrt, net) \
    297	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
    298
    299static struct mr_table *ipmr_mr_table_iter(struct net *net,
    300					   struct mr_table *mrt)
    301{
    302	if (!mrt)
    303		return net->ipv4.mrt;
    304	return NULL;
    305}
    306
    307static struct mr_table *ipmr_get_table(struct net *net, u32 id)
    308{
    309	return net->ipv4.mrt;
    310}
    311
    312static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
    313			   struct mr_table **mrt)
    314{
    315	*mrt = net->ipv4.mrt;
    316	return 0;
    317}
    318
    319static int __net_init ipmr_rules_init(struct net *net)
    320{
    321	struct mr_table *mrt;
    322
    323	mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
    324	if (IS_ERR(mrt))
    325		return PTR_ERR(mrt);
    326	net->ipv4.mrt = mrt;
    327	return 0;
    328}
    329
    330static void __net_exit ipmr_rules_exit(struct net *net)
    331{
    332	ASSERT_RTNL();
    333	ipmr_free_table(net->ipv4.mrt);
    334	net->ipv4.mrt = NULL;
    335}
    336
    337static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
    338			   struct netlink_ext_ack *extack)
    339{
    340	return 0;
    341}
    342
    343static unsigned int ipmr_rules_seq_read(struct net *net)
    344{
    345	return 0;
    346}
    347
    348bool ipmr_rule_default(const struct fib_rule *rule)
    349{
    350	return true;
    351}
    352EXPORT_SYMBOL(ipmr_rule_default);
    353#endif
    354
    355static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
    356				const void *ptr)
    357{
    358	const struct mfc_cache_cmp_arg *cmparg = arg->key;
    359	const struct mfc_cache *c = ptr;
    360
    361	return cmparg->mfc_mcastgrp != c->mfc_mcastgrp ||
    362	       cmparg->mfc_origin != c->mfc_origin;
    363}
    364
    365static const struct rhashtable_params ipmr_rht_params = {
    366	.head_offset = offsetof(struct mr_mfc, mnode),
    367	.key_offset = offsetof(struct mfc_cache, cmparg),
    368	.key_len = sizeof(struct mfc_cache_cmp_arg),
    369	.nelem_hint = 3,
    370	.obj_cmpfn = ipmr_hash_cmp,
    371	.automatic_shrinking = true,
    372};
    373
    374static void ipmr_new_table_set(struct mr_table *mrt,
    375			       struct net *net)
    376{
    377#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
    378	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
    379#endif
    380}
    381
    382static struct mfc_cache_cmp_arg ipmr_mr_table_ops_cmparg_any = {
    383	.mfc_mcastgrp = htonl(INADDR_ANY),
    384	.mfc_origin = htonl(INADDR_ANY),
    385};
    386
    387static struct mr_table_ops ipmr_mr_table_ops = {
    388	.rht_params = &ipmr_rht_params,
    389	.cmparg_any = &ipmr_mr_table_ops_cmparg_any,
    390};
    391
    392static struct mr_table *ipmr_new_table(struct net *net, u32 id)
    393{
    394	struct mr_table *mrt;
    395
    396	/* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */
    397	if (id != RT_TABLE_DEFAULT && id >= 1000000000)
    398		return ERR_PTR(-EINVAL);
    399
    400	mrt = ipmr_get_table(net, id);
    401	if (mrt)
    402		return mrt;
    403
    404	return mr_table_alloc(net, id, &ipmr_mr_table_ops,
    405			      ipmr_expire_process, ipmr_new_table_set);
    406}
    407
    408static void ipmr_free_table(struct mr_table *mrt)
    409{
    410	del_timer_sync(&mrt->ipmr_expire_timer);
    411	mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
    412				 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC);
    413	rhltable_destroy(&mrt->mfc_hash);
    414	kfree(mrt);
    415}
    416
    417/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
    418
    419/* Initialize ipmr pimreg/tunnel in_device */
    420static bool ipmr_init_vif_indev(const struct net_device *dev)
    421{
    422	struct in_device *in_dev;
    423
    424	ASSERT_RTNL();
    425
    426	in_dev = __in_dev_get_rtnl(dev);
    427	if (!in_dev)
    428		return false;
    429	ipv4_devconf_setall(in_dev);
    430	neigh_parms_data_state_setall(in_dev->arp_parms);
    431	IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
    432
    433	return true;
    434}
    435
    436static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
    437{
    438	struct net_device *tunnel_dev, *new_dev;
    439	struct ip_tunnel_parm p = { };
    440	int err;
    441
    442	tunnel_dev = __dev_get_by_name(net, "tunl0");
    443	if (!tunnel_dev)
    444		goto out;
    445
    446	p.iph.daddr = v->vifc_rmt_addr.s_addr;
    447	p.iph.saddr = v->vifc_lcl_addr.s_addr;
    448	p.iph.version = 4;
    449	p.iph.ihl = 5;
    450	p.iph.protocol = IPPROTO_IPIP;
    451	sprintf(p.name, "dvmrp%d", v->vifc_vifi);
    452
    453	if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl)
    454		goto out;
    455	err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
    456			SIOCADDTUNNEL);
    457	if (err)
    458		goto out;
    459
    460	new_dev = __dev_get_by_name(net, p.name);
    461	if (!new_dev)
    462		goto out;
    463
    464	new_dev->flags |= IFF_MULTICAST;
    465	if (!ipmr_init_vif_indev(new_dev))
    466		goto out_unregister;
    467	if (dev_open(new_dev, NULL))
    468		goto out_unregister;
    469	dev_hold(new_dev);
    470	err = dev_set_allmulti(new_dev, 1);
    471	if (err) {
    472		dev_close(new_dev);
    473		tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
    474				SIOCDELTUNNEL);
    475		dev_put(new_dev);
    476		new_dev = ERR_PTR(err);
    477	}
    478	return new_dev;
    479
    480out_unregister:
    481	unregister_netdevice(new_dev);
    482out:
    483	return ERR_PTR(-ENOBUFS);
    484}
    485
    486#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
    487static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
    488{
    489	struct net *net = dev_net(dev);
    490	struct mr_table *mrt;
    491	struct flowi4 fl4 = {
    492		.flowi4_oif	= dev->ifindex,
    493		.flowi4_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,
    494		.flowi4_mark	= skb->mark,
    495	};
    496	int err;
    497
    498	err = ipmr_fib_lookup(net, &fl4, &mrt);
    499	if (err < 0) {
    500		kfree_skb(skb);
    501		return err;
    502	}
    503
    504	read_lock(&mrt_lock);
    505	dev->stats.tx_bytes += skb->len;
    506	dev->stats.tx_packets++;
    507	ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
    508	read_unlock(&mrt_lock);
    509	kfree_skb(skb);
    510	return NETDEV_TX_OK;
    511}
    512
    513static int reg_vif_get_iflink(const struct net_device *dev)
    514{
    515	return 0;
    516}
    517
    518static const struct net_device_ops reg_vif_netdev_ops = {
    519	.ndo_start_xmit	= reg_vif_xmit,
    520	.ndo_get_iflink = reg_vif_get_iflink,
    521};
    522
    523static void reg_vif_setup(struct net_device *dev)
    524{
    525	dev->type		= ARPHRD_PIMREG;
    526	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 8;
    527	dev->flags		= IFF_NOARP;
    528	dev->netdev_ops		= &reg_vif_netdev_ops;
    529	dev->needs_free_netdev	= true;
    530	dev->features		|= NETIF_F_NETNS_LOCAL;
    531}
    532
    533static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
    534{
    535	struct net_device *dev;
    536	char name[IFNAMSIZ];
    537
    538	if (mrt->id == RT_TABLE_DEFAULT)
    539		sprintf(name, "pimreg");
    540	else
    541		sprintf(name, "pimreg%u", mrt->id);
    542
    543	dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
    544
    545	if (!dev)
    546		return NULL;
    547
    548	dev_net_set(dev, net);
    549
    550	if (register_netdevice(dev)) {
    551		free_netdev(dev);
    552		return NULL;
    553	}
    554
    555	if (!ipmr_init_vif_indev(dev))
    556		goto failure;
    557	if (dev_open(dev, NULL))
    558		goto failure;
    559
    560	dev_hold(dev);
    561
    562	return dev;
    563
    564failure:
    565	unregister_netdevice(dev);
    566	return NULL;
    567}
    568
    569/* called with rcu_read_lock() */
    570static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
    571		     unsigned int pimlen)
    572{
    573	struct net_device *reg_dev = NULL;
    574	struct iphdr *encap;
    575
    576	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
    577	/* Check that:
    578	 * a. packet is really sent to a multicast group
    579	 * b. packet is not a NULL-REGISTER
    580	 * c. packet is not truncated
    581	 */
    582	if (!ipv4_is_multicast(encap->daddr) ||
    583	    encap->tot_len == 0 ||
    584	    ntohs(encap->tot_len) + pimlen > skb->len)
    585		return 1;
    586
    587	read_lock(&mrt_lock);
    588	if (mrt->mroute_reg_vif_num >= 0)
    589		reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
    590	read_unlock(&mrt_lock);
    591
    592	if (!reg_dev)
    593		return 1;
    594
    595	skb->mac_header = skb->network_header;
    596	skb_pull(skb, (u8 *)encap - skb->data);
    597	skb_reset_network_header(skb);
    598	skb->protocol = htons(ETH_P_IP);
    599	skb->ip_summed = CHECKSUM_NONE;
    600
    601	skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
    602
    603	netif_rx(skb);
    604
    605	return NET_RX_SUCCESS;
    606}
    607#else
    608static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
    609{
    610	return NULL;
    611}
    612#endif
    613
    614static int call_ipmr_vif_entry_notifiers(struct net *net,
    615					 enum fib_event_type event_type,
    616					 struct vif_device *vif,
    617					 vifi_t vif_index, u32 tb_id)
    618{
    619	return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
    620				     vif, vif_index, tb_id,
    621				     &net->ipv4.ipmr_seq);
    622}
    623
    624static int call_ipmr_mfc_entry_notifiers(struct net *net,
    625					 enum fib_event_type event_type,
    626					 struct mfc_cache *mfc, u32 tb_id)
    627{
    628	return mr_call_mfc_notifiers(net, RTNL_FAMILY_IPMR, event_type,
    629				     &mfc->_c, tb_id, &net->ipv4.ipmr_seq);
    630}
    631
    632/**
    633 *	vif_delete - Delete a VIF entry
    634 *	@mrt: Table to delete from
    635 *	@vifi: VIF identifier to delete
    636 *	@notify: Set to 1, if the caller is a notifier_call
    637 *	@head: if unregistering the VIF, place it on this queue
    638 */
    639static int vif_delete(struct mr_table *mrt, int vifi, int notify,
    640		      struct list_head *head)
    641{
    642	struct net *net = read_pnet(&mrt->net);
    643	struct vif_device *v;
    644	struct net_device *dev;
    645	struct in_device *in_dev;
    646
    647	if (vifi < 0 || vifi >= mrt->maxvif)
    648		return -EADDRNOTAVAIL;
    649
    650	v = &mrt->vif_table[vifi];
    651
    652	if (VIF_EXISTS(mrt, vifi))
    653		call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
    654					      mrt->id);
    655
    656	write_lock_bh(&mrt_lock);
    657	dev = v->dev;
    658	v->dev = NULL;
    659
    660	if (!dev) {
    661		write_unlock_bh(&mrt_lock);
    662		return -EADDRNOTAVAIL;
    663	}
    664
    665	if (vifi == mrt->mroute_reg_vif_num)
    666		mrt->mroute_reg_vif_num = -1;
    667
    668	if (vifi + 1 == mrt->maxvif) {
    669		int tmp;
    670
    671		for (tmp = vifi - 1; tmp >= 0; tmp--) {
    672			if (VIF_EXISTS(mrt, tmp))
    673				break;
    674		}
    675		mrt->maxvif = tmp+1;
    676	}
    677
    678	write_unlock_bh(&mrt_lock);
    679
    680	dev_set_allmulti(dev, -1);
    681
    682	in_dev = __in_dev_get_rtnl(dev);
    683	if (in_dev) {
    684		IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
    685		inet_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
    686					    NETCONFA_MC_FORWARDING,
    687					    dev->ifindex, &in_dev->cnf);
    688		ip_rt_multicast_event(in_dev);
    689	}
    690
    691	if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
    692		unregister_netdevice_queue(dev, head);
    693
    694	dev_put_track(dev, &v->dev_tracker);
    695	return 0;
    696}
    697
    698static void ipmr_cache_free_rcu(struct rcu_head *head)
    699{
    700	struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
    701
    702	kmem_cache_free(mrt_cachep, (struct mfc_cache *)c);
    703}
    704
    705static void ipmr_cache_free(struct mfc_cache *c)
    706{
    707	call_rcu(&c->_c.rcu, ipmr_cache_free_rcu);
    708}
    709
    710/* Destroy an unresolved cache entry, killing queued skbs
    711 * and reporting error to netlink readers.
    712 */
    713static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
    714{
    715	struct net *net = read_pnet(&mrt->net);
    716	struct sk_buff *skb;
    717	struct nlmsgerr *e;
    718
    719	atomic_dec(&mrt->cache_resolve_queue_len);
    720
    721	while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved))) {
    722		if (ip_hdr(skb)->version == 0) {
    723			struct nlmsghdr *nlh = skb_pull(skb,
    724							sizeof(struct iphdr));
    725			nlh->nlmsg_type = NLMSG_ERROR;
    726			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
    727			skb_trim(skb, nlh->nlmsg_len);
    728			e = nlmsg_data(nlh);
    729			e->error = -ETIMEDOUT;
    730			memset(&e->msg, 0, sizeof(e->msg));
    731
    732			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
    733		} else {
    734			kfree_skb(skb);
    735		}
    736	}
    737
    738	ipmr_cache_free(c);
    739}
    740
    741/* Timer process for the unresolved queue. */
    742static void ipmr_expire_process(struct timer_list *t)
    743{
    744	struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
    745	struct mr_mfc *c, *next;
    746	unsigned long expires;
    747	unsigned long now;
    748
    749	if (!spin_trylock(&mfc_unres_lock)) {
    750		mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
    751		return;
    752	}
    753
    754	if (list_empty(&mrt->mfc_unres_queue))
    755		goto out;
    756
    757	now = jiffies;
    758	expires = 10*HZ;
    759
    760	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
    761		if (time_after(c->mfc_un.unres.expires, now)) {
    762			unsigned long interval = c->mfc_un.unres.expires - now;
    763			if (interval < expires)
    764				expires = interval;
    765			continue;
    766		}
    767
    768		list_del(&c->list);
    769		mroute_netlink_event(mrt, (struct mfc_cache *)c, RTM_DELROUTE);
    770		ipmr_destroy_unres(mrt, (struct mfc_cache *)c);
    771	}
    772
    773	if (!list_empty(&mrt->mfc_unres_queue))
    774		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
    775
    776out:
    777	spin_unlock(&mfc_unres_lock);
    778}
    779
    780/* Fill oifs list. It is called under write locked mrt_lock. */
    781static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
    782				   unsigned char *ttls)
    783{
    784	int vifi;
    785
    786	cache->mfc_un.res.minvif = MAXVIFS;
    787	cache->mfc_un.res.maxvif = 0;
    788	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
    789
    790	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
    791		if (VIF_EXISTS(mrt, vifi) &&
    792		    ttls[vifi] && ttls[vifi] < 255) {
    793			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
    794			if (cache->mfc_un.res.minvif > vifi)
    795				cache->mfc_un.res.minvif = vifi;
    796			if (cache->mfc_un.res.maxvif <= vifi)
    797				cache->mfc_un.res.maxvif = vifi + 1;
    798		}
    799	}
    800	cache->mfc_un.res.lastuse = jiffies;
    801}
    802
    803static int vif_add(struct net *net, struct mr_table *mrt,
    804		   struct vifctl *vifc, int mrtsock)
    805{
    806	struct netdev_phys_item_id ppid = { };
    807	int vifi = vifc->vifc_vifi;
    808	struct vif_device *v = &mrt->vif_table[vifi];
    809	struct net_device *dev;
    810	struct in_device *in_dev;
    811	int err;
    812
    813	/* Is vif busy ? */
    814	if (VIF_EXISTS(mrt, vifi))
    815		return -EADDRINUSE;
    816
    817	switch (vifc->vifc_flags) {
    818	case VIFF_REGISTER:
    819		if (!ipmr_pimsm_enabled())
    820			return -EINVAL;
    821		/* Special Purpose VIF in PIM
    822		 * All the packets will be sent to the daemon
    823		 */
    824		if (mrt->mroute_reg_vif_num >= 0)
    825			return -EADDRINUSE;
    826		dev = ipmr_reg_vif(net, mrt);
    827		if (!dev)
    828			return -ENOBUFS;
    829		err = dev_set_allmulti(dev, 1);
    830		if (err) {
    831			unregister_netdevice(dev);
    832			dev_put(dev);
    833			return err;
    834		}
    835		break;
    836	case VIFF_TUNNEL:
    837		dev = ipmr_new_tunnel(net, vifc);
    838		if (IS_ERR(dev))
    839			return PTR_ERR(dev);
    840		break;
    841	case VIFF_USE_IFINDEX:
    842	case 0:
    843		if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
    844			dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
    845			if (dev && !__in_dev_get_rtnl(dev)) {
    846				dev_put(dev);
    847				return -EADDRNOTAVAIL;
    848			}
    849		} else {
    850			dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
    851		}
    852		if (!dev)
    853			return -EADDRNOTAVAIL;
    854		err = dev_set_allmulti(dev, 1);
    855		if (err) {
    856			dev_put(dev);
    857			return err;
    858		}
    859		break;
    860	default:
    861		return -EINVAL;
    862	}
    863
    864	in_dev = __in_dev_get_rtnl(dev);
    865	if (!in_dev) {
    866		dev_put(dev);
    867		return -EADDRNOTAVAIL;
    868	}
    869	IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
    870	inet_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING,
    871				    dev->ifindex, &in_dev->cnf);
    872	ip_rt_multicast_event(in_dev);
    873
    874	/* Fill in the VIF structures */
    875	vif_device_init(v, dev, vifc->vifc_rate_limit,
    876			vifc->vifc_threshold,
    877			vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
    878			(VIFF_TUNNEL | VIFF_REGISTER));
    879
    880	err = dev_get_port_parent_id(dev, &ppid, true);
    881	if (err == 0) {
    882		memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len);
    883		v->dev_parent_id.id_len = ppid.id_len;
    884	} else {
    885		v->dev_parent_id.id_len = 0;
    886	}
    887
    888	v->local = vifc->vifc_lcl_addr.s_addr;
    889	v->remote = vifc->vifc_rmt_addr.s_addr;
    890
    891	/* And finish update writing critical data */
    892	write_lock_bh(&mrt_lock);
    893	v->dev = dev;
    894	netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
    895	if (v->flags & VIFF_REGISTER)
    896		mrt->mroute_reg_vif_num = vifi;
    897	if (vifi+1 > mrt->maxvif)
    898		mrt->maxvif = vifi+1;
    899	write_unlock_bh(&mrt_lock);
    900	call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
    901	return 0;
    902}
    903
    904/* called with rcu_read_lock() */
    905static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
    906					 __be32 origin,
    907					 __be32 mcastgrp)
    908{
    909	struct mfc_cache_cmp_arg arg = {
    910			.mfc_mcastgrp = mcastgrp,
    911			.mfc_origin = origin
    912	};
    913
    914	return mr_mfc_find(mrt, &arg);
    915}
    916
    917/* Look for a (*,G) entry */
    918static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt,
    919					     __be32 mcastgrp, int vifi)
    920{
    921	struct mfc_cache_cmp_arg arg = {
    922			.mfc_mcastgrp = mcastgrp,
    923			.mfc_origin = htonl(INADDR_ANY)
    924	};
    925
    926	if (mcastgrp == htonl(INADDR_ANY))
    927		return mr_mfc_find_any_parent(mrt, vifi);
    928	return mr_mfc_find_any(mrt, vifi, &arg);
    929}
    930
    931/* Look for a (S,G,iif) entry if parent != -1 */
    932static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt,
    933						__be32 origin, __be32 mcastgrp,
    934						int parent)
    935{
    936	struct mfc_cache_cmp_arg arg = {
    937			.mfc_mcastgrp = mcastgrp,
    938			.mfc_origin = origin,
    939	};
    940
    941	return mr_mfc_find_parent(mrt, &arg, parent);
    942}
    943
    944/* Allocate a multicast cache entry */
    945static struct mfc_cache *ipmr_cache_alloc(void)
    946{
    947	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
    948
    949	if (c) {
    950		c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
    951		c->_c.mfc_un.res.minvif = MAXVIFS;
    952		c->_c.free = ipmr_cache_free_rcu;
    953		refcount_set(&c->_c.mfc_un.res.refcount, 1);
    954	}
    955	return c;
    956}
    957
    958static struct mfc_cache *ipmr_cache_alloc_unres(void)
    959{
    960	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
    961
    962	if (c) {
    963		skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
    964		c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
    965	}
    966	return c;
    967}
    968
    969/* A cache entry has gone into a resolved state from queued */
    970static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
    971			       struct mfc_cache *uc, struct mfc_cache *c)
    972{
    973	struct sk_buff *skb;
    974	struct nlmsgerr *e;
    975
    976	/* Play the pending entries through our router */
    977	while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
    978		if (ip_hdr(skb)->version == 0) {
    979			struct nlmsghdr *nlh = skb_pull(skb,
    980							sizeof(struct iphdr));
    981
    982			if (mr_fill_mroute(mrt, skb, &c->_c,
    983					   nlmsg_data(nlh)) > 0) {
    984				nlh->nlmsg_len = skb_tail_pointer(skb) -
    985						 (u8 *)nlh;
    986			} else {
    987				nlh->nlmsg_type = NLMSG_ERROR;
    988				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
    989				skb_trim(skb, nlh->nlmsg_len);
    990				e = nlmsg_data(nlh);
    991				e->error = -EMSGSIZE;
    992				memset(&e->msg, 0, sizeof(e->msg));
    993			}
    994
    995			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
    996		} else {
    997			ip_mr_forward(net, mrt, skb->dev, skb, c, 0);
    998		}
    999	}
   1000}
   1001
   1002/* Bounce a cache query up to mrouted and netlink.
   1003 *
   1004 * Called under mrt_lock.
   1005 */
   1006static int ipmr_cache_report(struct mr_table *mrt,
   1007			     struct sk_buff *pkt, vifi_t vifi, int assert)
   1008{
   1009	const int ihl = ip_hdrlen(pkt);
   1010	struct sock *mroute_sk;
   1011	struct igmphdr *igmp;
   1012	struct igmpmsg *msg;
   1013	struct sk_buff *skb;
   1014	int ret;
   1015
   1016	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
   1017		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
   1018	else
   1019		skb = alloc_skb(128, GFP_ATOMIC);
   1020
   1021	if (!skb)
   1022		return -ENOBUFS;
   1023
   1024	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
   1025		/* Ugly, but we have no choice with this interface.
   1026		 * Duplicate old header, fix ihl, length etc.
   1027		 * And all this only to mangle msg->im_msgtype and
   1028		 * to set msg->im_mbz to "mbz" :-)
   1029		 */
   1030		skb_push(skb, sizeof(struct iphdr));
   1031		skb_reset_network_header(skb);
   1032		skb_reset_transport_header(skb);
   1033		msg = (struct igmpmsg *)skb_network_header(skb);
   1034		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
   1035		msg->im_msgtype = assert;
   1036		msg->im_mbz = 0;
   1037		if (assert == IGMPMSG_WRVIFWHOLE) {
   1038			msg->im_vif = vifi;
   1039			msg->im_vif_hi = vifi >> 8;
   1040		} else {
   1041			msg->im_vif = mrt->mroute_reg_vif_num;
   1042			msg->im_vif_hi = mrt->mroute_reg_vif_num >> 8;
   1043		}
   1044		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
   1045		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
   1046					     sizeof(struct iphdr));
   1047	} else {
   1048		/* Copy the IP header */
   1049		skb_set_network_header(skb, skb->len);
   1050		skb_put(skb, ihl);
   1051		skb_copy_to_linear_data(skb, pkt->data, ihl);
   1052		/* Flag to the kernel this is a route add */
   1053		ip_hdr(skb)->protocol = 0;
   1054		msg = (struct igmpmsg *)skb_network_header(skb);
   1055		msg->im_vif = vifi;
   1056		msg->im_vif_hi = vifi >> 8;
   1057		skb_dst_set(skb, dst_clone(skb_dst(pkt)));
   1058		/* Add our header */
   1059		igmp = skb_put(skb, sizeof(struct igmphdr));
   1060		igmp->type = assert;
   1061		msg->im_msgtype = assert;
   1062		igmp->code = 0;
   1063		ip_hdr(skb)->tot_len = htons(skb->len);	/* Fix the length */
   1064		skb->transport_header = skb->network_header;
   1065	}
   1066
   1067	rcu_read_lock();
   1068	mroute_sk = rcu_dereference(mrt->mroute_sk);
   1069	if (!mroute_sk) {
   1070		rcu_read_unlock();
   1071		kfree_skb(skb);
   1072		return -EINVAL;
   1073	}
   1074
   1075	igmpmsg_netlink_event(mrt, skb);
   1076
   1077	/* Deliver to mrouted */
   1078	ret = sock_queue_rcv_skb(mroute_sk, skb);
   1079	rcu_read_unlock();
   1080	if (ret < 0) {
   1081		net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
   1082		kfree_skb(skb);
   1083	}
   1084
   1085	return ret;
   1086}
   1087
   1088/* Queue a packet for resolution. It gets locked cache entry! */
   1089static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
   1090				 struct sk_buff *skb, struct net_device *dev)
   1091{
   1092	const struct iphdr *iph = ip_hdr(skb);
   1093	struct mfc_cache *c;
   1094	bool found = false;
   1095	int err;
   1096
   1097	spin_lock_bh(&mfc_unres_lock);
   1098	list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
   1099		if (c->mfc_mcastgrp == iph->daddr &&
   1100		    c->mfc_origin == iph->saddr) {
   1101			found = true;
   1102			break;
   1103		}
   1104	}
   1105
   1106	if (!found) {
   1107		/* Create a new entry if allowable */
   1108		c = ipmr_cache_alloc_unres();
   1109		if (!c) {
   1110			spin_unlock_bh(&mfc_unres_lock);
   1111
   1112			kfree_skb(skb);
   1113			return -ENOBUFS;
   1114		}
   1115
   1116		/* Fill in the new cache entry */
   1117		c->_c.mfc_parent = -1;
   1118		c->mfc_origin	= iph->saddr;
   1119		c->mfc_mcastgrp	= iph->daddr;
   1120
   1121		/* Reflect first query at mrouted. */
   1122		err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
   1123
   1124		if (err < 0) {
   1125			/* If the report failed throw the cache entry
   1126			   out - Brad Parker
   1127			 */
   1128			spin_unlock_bh(&mfc_unres_lock);
   1129
   1130			ipmr_cache_free(c);
   1131			kfree_skb(skb);
   1132			return err;
   1133		}
   1134
   1135		atomic_inc(&mrt->cache_resolve_queue_len);
   1136		list_add(&c->_c.list, &mrt->mfc_unres_queue);
   1137		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
   1138
   1139		if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
   1140			mod_timer(&mrt->ipmr_expire_timer,
   1141				  c->_c.mfc_un.unres.expires);
   1142	}
   1143
   1144	/* See if we can append the packet */
   1145	if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
   1146		kfree_skb(skb);
   1147		err = -ENOBUFS;
   1148	} else {
   1149		if (dev) {
   1150			skb->dev = dev;
   1151			skb->skb_iif = dev->ifindex;
   1152		}
   1153		skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
   1154		err = 0;
   1155	}
   1156
   1157	spin_unlock_bh(&mfc_unres_lock);
   1158	return err;
   1159}
   1160
   1161/* MFC cache manipulation by user space mroute daemon */
   1162
   1163static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
   1164{
   1165	struct net *net = read_pnet(&mrt->net);
   1166	struct mfc_cache *c;
   1167
   1168	/* The entries are added/deleted only under RTNL */
   1169	rcu_read_lock();
   1170	c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
   1171				   mfc->mfcc_mcastgrp.s_addr, parent);
   1172	rcu_read_unlock();
   1173	if (!c)
   1174		return -ENOENT;
   1175	rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ipmr_rht_params);
   1176	list_del_rcu(&c->_c.list);
   1177	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
   1178	mroute_netlink_event(mrt, c, RTM_DELROUTE);
   1179	mr_cache_put(&c->_c);
   1180
   1181	return 0;
   1182}
   1183
   1184static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
   1185			struct mfcctl *mfc, int mrtsock, int parent)
   1186{
   1187	struct mfc_cache *uc, *c;
   1188	struct mr_mfc *_uc;
   1189	bool found;
   1190	int ret;
   1191
   1192	if (mfc->mfcc_parent >= MAXVIFS)
   1193		return -ENFILE;
   1194
   1195	/* The entries are added/deleted only under RTNL */
   1196	rcu_read_lock();
   1197	c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr,
   1198				   mfc->mfcc_mcastgrp.s_addr, parent);
   1199	rcu_read_unlock();
   1200	if (c) {
   1201		write_lock_bh(&mrt_lock);
   1202		c->_c.mfc_parent = mfc->mfcc_parent;
   1203		ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
   1204		if (!mrtsock)
   1205			c->_c.mfc_flags |= MFC_STATIC;
   1206		write_unlock_bh(&mrt_lock);
   1207		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
   1208					      mrt->id);
   1209		mroute_netlink_event(mrt, c, RTM_NEWROUTE);
   1210		return 0;
   1211	}
   1212
   1213	if (mfc->mfcc_mcastgrp.s_addr != htonl(INADDR_ANY) &&
   1214	    !ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
   1215		return -EINVAL;
   1216
   1217	c = ipmr_cache_alloc();
   1218	if (!c)
   1219		return -ENOMEM;
   1220
   1221	c->mfc_origin = mfc->mfcc_origin.s_addr;
   1222	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
   1223	c->_c.mfc_parent = mfc->mfcc_parent;
   1224	ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
   1225	if (!mrtsock)
   1226		c->_c.mfc_flags |= MFC_STATIC;
   1227
   1228	ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
   1229				  ipmr_rht_params);
   1230	if (ret) {
   1231		pr_err("ipmr: rhtable insert error %d\n", ret);
   1232		ipmr_cache_free(c);
   1233		return ret;
   1234	}
   1235	list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
   1236	/* Check to see if we resolved a queued list. If so we
   1237	 * need to send on the frames and tidy up.
   1238	 */
   1239	found = false;
   1240	spin_lock_bh(&mfc_unres_lock);
   1241	list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
   1242		uc = (struct mfc_cache *)_uc;
   1243		if (uc->mfc_origin == c->mfc_origin &&
   1244		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
   1245			list_del(&_uc->list);
   1246			atomic_dec(&mrt->cache_resolve_queue_len);
   1247			found = true;
   1248			break;
   1249		}
   1250	}
   1251	if (list_empty(&mrt->mfc_unres_queue))
   1252		del_timer(&mrt->ipmr_expire_timer);
   1253	spin_unlock_bh(&mfc_unres_lock);
   1254
   1255	if (found) {
   1256		ipmr_cache_resolve(net, mrt, uc, c);
   1257		ipmr_cache_free(uc);
   1258	}
   1259	call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
   1260	mroute_netlink_event(mrt, c, RTM_NEWROUTE);
   1261	return 0;
   1262}
   1263
   1264/* Close the multicast socket, and clear the vif tables etc */
   1265static void mroute_clean_tables(struct mr_table *mrt, int flags)
   1266{
   1267	struct net *net = read_pnet(&mrt->net);
   1268	struct mr_mfc *c, *tmp;
   1269	struct mfc_cache *cache;
   1270	LIST_HEAD(list);
   1271	int i;
   1272
   1273	/* Shut down all active vif entries */
   1274	if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) {
   1275		for (i = 0; i < mrt->maxvif; i++) {
   1276			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
   1277			     !(flags & MRT_FLUSH_VIFS_STATIC)) ||
   1278			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS)))
   1279				continue;
   1280			vif_delete(mrt, i, 0, &list);
   1281		}
   1282		unregister_netdevice_many(&list);
   1283	}
   1284
   1285	/* Wipe the cache */
   1286	if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) {
   1287		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
   1288			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) ||
   1289			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC)))
   1290				continue;
   1291			rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
   1292			list_del_rcu(&c->list);
   1293			cache = (struct mfc_cache *)c;
   1294			call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
   1295						      mrt->id);
   1296			mroute_netlink_event(mrt, cache, RTM_DELROUTE);
   1297			mr_cache_put(c);
   1298		}
   1299	}
   1300
   1301	if (flags & MRT_FLUSH_MFC) {
   1302		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
   1303			spin_lock_bh(&mfc_unres_lock);
   1304			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
   1305				list_del(&c->list);
   1306				cache = (struct mfc_cache *)c;
   1307				mroute_netlink_event(mrt, cache, RTM_DELROUTE);
   1308				ipmr_destroy_unres(mrt, cache);
   1309			}
   1310			spin_unlock_bh(&mfc_unres_lock);
   1311		}
   1312	}
   1313}
   1314
   1315/* called from ip_ra_control(), before an RCU grace period,
   1316 * we don't need to call synchronize_rcu() here
   1317 */
   1318static void mrtsock_destruct(struct sock *sk)
   1319{
   1320	struct net *net = sock_net(sk);
   1321	struct mr_table *mrt;
   1322
   1323	rtnl_lock();
   1324	ipmr_for_each_table(mrt, net) {
   1325		if (sk == rtnl_dereference(mrt->mroute_sk)) {
   1326			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
   1327			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
   1328						    NETCONFA_MC_FORWARDING,
   1329						    NETCONFA_IFINDEX_ALL,
   1330						    net->ipv4.devconf_all);
   1331			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
   1332			mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC);
   1333		}
   1334	}
   1335	rtnl_unlock();
   1336}
   1337
   1338/* Socket options and virtual interface manipulation. The whole
   1339 * virtual interface system is a complete heap, but unfortunately
   1340 * that's how BSD mrouted happens to think. Maybe one day with a proper
   1341 * MOSPF/PIM router set up we can clean this up.
   1342 */
   1343
   1344int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval,
   1345			 unsigned int optlen)
   1346{
   1347	struct net *net = sock_net(sk);
   1348	int val, ret = 0, parent = 0;
   1349	struct mr_table *mrt;
   1350	struct vifctl vif;
   1351	struct mfcctl mfc;
   1352	bool do_wrvifwhole;
   1353	u32 uval;
   1354
   1355	/* There's one exception to the lock - MRT_DONE which needs to unlock */
   1356	rtnl_lock();
   1357	if (sk->sk_type != SOCK_RAW ||
   1358	    inet_sk(sk)->inet_num != IPPROTO_IGMP) {
   1359		ret = -EOPNOTSUPP;
   1360		goto out_unlock;
   1361	}
   1362
   1363	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
   1364	if (!mrt) {
   1365		ret = -ENOENT;
   1366		goto out_unlock;
   1367	}
   1368	if (optname != MRT_INIT) {
   1369		if (sk != rcu_access_pointer(mrt->mroute_sk) &&
   1370		    !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
   1371			ret = -EACCES;
   1372			goto out_unlock;
   1373		}
   1374	}
   1375
   1376	switch (optname) {
   1377	case MRT_INIT:
   1378		if (optlen != sizeof(int)) {
   1379			ret = -EINVAL;
   1380			break;
   1381		}
   1382		if (rtnl_dereference(mrt->mroute_sk)) {
   1383			ret = -EADDRINUSE;
   1384			break;
   1385		}
   1386
   1387		ret = ip_ra_control(sk, 1, mrtsock_destruct);
   1388		if (ret == 0) {
   1389			rcu_assign_pointer(mrt->mroute_sk, sk);
   1390			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
   1391			inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
   1392						    NETCONFA_MC_FORWARDING,
   1393						    NETCONFA_IFINDEX_ALL,
   1394						    net->ipv4.devconf_all);
   1395		}
   1396		break;
   1397	case MRT_DONE:
   1398		if (sk != rcu_access_pointer(mrt->mroute_sk)) {
   1399			ret = -EACCES;
   1400		} else {
   1401			/* We need to unlock here because mrtsock_destruct takes
   1402			 * care of rtnl itself and we can't change that due to
   1403			 * the IP_ROUTER_ALERT setsockopt which runs without it.
   1404			 */
   1405			rtnl_unlock();
   1406			ret = ip_ra_control(sk, 0, NULL);
   1407			goto out;
   1408		}
   1409		break;
   1410	case MRT_ADD_VIF:
   1411	case MRT_DEL_VIF:
   1412		if (optlen != sizeof(vif)) {
   1413			ret = -EINVAL;
   1414			break;
   1415		}
   1416		if (copy_from_sockptr(&vif, optval, sizeof(vif))) {
   1417			ret = -EFAULT;
   1418			break;
   1419		}
   1420		if (vif.vifc_vifi >= MAXVIFS) {
   1421			ret = -ENFILE;
   1422			break;
   1423		}
   1424		if (optname == MRT_ADD_VIF) {
   1425			ret = vif_add(net, mrt, &vif,
   1426				      sk == rtnl_dereference(mrt->mroute_sk));
   1427		} else {
   1428			ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
   1429		}
   1430		break;
   1431	/* Manipulate the forwarding caches. These live
   1432	 * in a sort of kernel/user symbiosis.
   1433	 */
   1434	case MRT_ADD_MFC:
   1435	case MRT_DEL_MFC:
   1436		parent = -1;
   1437		fallthrough;
   1438	case MRT_ADD_MFC_PROXY:
   1439	case MRT_DEL_MFC_PROXY:
   1440		if (optlen != sizeof(mfc)) {
   1441			ret = -EINVAL;
   1442			break;
   1443		}
   1444		if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) {
   1445			ret = -EFAULT;
   1446			break;
   1447		}
   1448		if (parent == 0)
   1449			parent = mfc.mfcc_parent;
   1450		if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
   1451			ret = ipmr_mfc_delete(mrt, &mfc, parent);
   1452		else
   1453			ret = ipmr_mfc_add(net, mrt, &mfc,
   1454					   sk == rtnl_dereference(mrt->mroute_sk),
   1455					   parent);
   1456		break;
   1457	case MRT_FLUSH:
   1458		if (optlen != sizeof(val)) {
   1459			ret = -EINVAL;
   1460			break;
   1461		}
   1462		if (copy_from_sockptr(&val, optval, sizeof(val))) {
   1463			ret = -EFAULT;
   1464			break;
   1465		}
   1466		mroute_clean_tables(mrt, val);
   1467		break;
   1468	/* Control PIM assert. */
   1469	case MRT_ASSERT:
   1470		if (optlen != sizeof(val)) {
   1471			ret = -EINVAL;
   1472			break;
   1473		}
   1474		if (copy_from_sockptr(&val, optval, sizeof(val))) {
   1475			ret = -EFAULT;
   1476			break;
   1477		}
   1478		mrt->mroute_do_assert = val;
   1479		break;
   1480	case MRT_PIM:
   1481		if (!ipmr_pimsm_enabled()) {
   1482			ret = -ENOPROTOOPT;
   1483			break;
   1484		}
   1485		if (optlen != sizeof(val)) {
   1486			ret = -EINVAL;
   1487			break;
   1488		}
   1489		if (copy_from_sockptr(&val, optval, sizeof(val))) {
   1490			ret = -EFAULT;
   1491			break;
   1492		}
   1493
   1494		do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
   1495		val = !!val;
   1496		if (val != mrt->mroute_do_pim) {
   1497			mrt->mroute_do_pim = val;
   1498			mrt->mroute_do_assert = val;
   1499			mrt->mroute_do_wrvifwhole = do_wrvifwhole;
   1500		}
   1501		break;
   1502	case MRT_TABLE:
   1503		if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) {
   1504			ret = -ENOPROTOOPT;
   1505			break;
   1506		}
   1507		if (optlen != sizeof(uval)) {
   1508			ret = -EINVAL;
   1509			break;
   1510		}
   1511		if (copy_from_sockptr(&uval, optval, sizeof(uval))) {
   1512			ret = -EFAULT;
   1513			break;
   1514		}
   1515
   1516		if (sk == rtnl_dereference(mrt->mroute_sk)) {
   1517			ret = -EBUSY;
   1518		} else {
   1519			mrt = ipmr_new_table(net, uval);
   1520			if (IS_ERR(mrt))
   1521				ret = PTR_ERR(mrt);
   1522			else
   1523				raw_sk(sk)->ipmr_table = uval;
   1524		}
   1525		break;
   1526	/* Spurious command, or MRT_VERSION which you cannot set. */
   1527	default:
   1528		ret = -ENOPROTOOPT;
   1529	}
   1530out_unlock:
   1531	rtnl_unlock();
   1532out:
   1533	return ret;
   1534}
   1535
   1536/* Getsock opt support for the multicast routing system. */
   1537int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
   1538{
   1539	int olr;
   1540	int val;
   1541	struct net *net = sock_net(sk);
   1542	struct mr_table *mrt;
   1543
   1544	if (sk->sk_type != SOCK_RAW ||
   1545	    inet_sk(sk)->inet_num != IPPROTO_IGMP)
   1546		return -EOPNOTSUPP;
   1547
   1548	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
   1549	if (!mrt)
   1550		return -ENOENT;
   1551
   1552	switch (optname) {
   1553	case MRT_VERSION:
   1554		val = 0x0305;
   1555		break;
   1556	case MRT_PIM:
   1557		if (!ipmr_pimsm_enabled())
   1558			return -ENOPROTOOPT;
   1559		val = mrt->mroute_do_pim;
   1560		break;
   1561	case MRT_ASSERT:
   1562		val = mrt->mroute_do_assert;
   1563		break;
   1564	default:
   1565		return -ENOPROTOOPT;
   1566	}
   1567
   1568	if (get_user(olr, optlen))
   1569		return -EFAULT;
   1570	olr = min_t(unsigned int, olr, sizeof(int));
   1571	if (olr < 0)
   1572		return -EINVAL;
   1573	if (put_user(olr, optlen))
   1574		return -EFAULT;
   1575	if (copy_to_user(optval, &val, olr))
   1576		return -EFAULT;
   1577	return 0;
   1578}
   1579
   1580/* The IP multicast ioctl support routines. */
   1581int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
   1582{
   1583	struct sioc_sg_req sr;
   1584	struct sioc_vif_req vr;
   1585	struct vif_device *vif;
   1586	struct mfc_cache *c;
   1587	struct net *net = sock_net(sk);
   1588	struct mr_table *mrt;
   1589
   1590	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
   1591	if (!mrt)
   1592		return -ENOENT;
   1593
   1594	switch (cmd) {
   1595	case SIOCGETVIFCNT:
   1596		if (copy_from_user(&vr, arg, sizeof(vr)))
   1597			return -EFAULT;
   1598		if (vr.vifi >= mrt->maxvif)
   1599			return -EINVAL;
   1600		vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
   1601		read_lock(&mrt_lock);
   1602		vif = &mrt->vif_table[vr.vifi];
   1603		if (VIF_EXISTS(mrt, vr.vifi)) {
   1604			vr.icount = vif->pkt_in;
   1605			vr.ocount = vif->pkt_out;
   1606			vr.ibytes = vif->bytes_in;
   1607			vr.obytes = vif->bytes_out;
   1608			read_unlock(&mrt_lock);
   1609
   1610			if (copy_to_user(arg, &vr, sizeof(vr)))
   1611				return -EFAULT;
   1612			return 0;
   1613		}
   1614		read_unlock(&mrt_lock);
   1615		return -EADDRNOTAVAIL;
   1616	case SIOCGETSGCNT:
   1617		if (copy_from_user(&sr, arg, sizeof(sr)))
   1618			return -EFAULT;
   1619
   1620		rcu_read_lock();
   1621		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
   1622		if (c) {
   1623			sr.pktcnt = c->_c.mfc_un.res.pkt;
   1624			sr.bytecnt = c->_c.mfc_un.res.bytes;
   1625			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
   1626			rcu_read_unlock();
   1627
   1628			if (copy_to_user(arg, &sr, sizeof(sr)))
   1629				return -EFAULT;
   1630			return 0;
   1631		}
   1632		rcu_read_unlock();
   1633		return -EADDRNOTAVAIL;
   1634	default:
   1635		return -ENOIOCTLCMD;
   1636	}
   1637}
   1638
   1639#ifdef CONFIG_COMPAT
   1640struct compat_sioc_sg_req {
   1641	struct in_addr src;
   1642	struct in_addr grp;
   1643	compat_ulong_t pktcnt;
   1644	compat_ulong_t bytecnt;
   1645	compat_ulong_t wrong_if;
   1646};
   1647
   1648struct compat_sioc_vif_req {
   1649	vifi_t	vifi;		/* Which iface */
   1650	compat_ulong_t icount;
   1651	compat_ulong_t ocount;
   1652	compat_ulong_t ibytes;
   1653	compat_ulong_t obytes;
   1654};
   1655
   1656int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
   1657{
   1658	struct compat_sioc_sg_req sr;
   1659	struct compat_sioc_vif_req vr;
   1660	struct vif_device *vif;
   1661	struct mfc_cache *c;
   1662	struct net *net = sock_net(sk);
   1663	struct mr_table *mrt;
   1664
   1665	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
   1666	if (!mrt)
   1667		return -ENOENT;
   1668
   1669	switch (cmd) {
   1670	case SIOCGETVIFCNT:
   1671		if (copy_from_user(&vr, arg, sizeof(vr)))
   1672			return -EFAULT;
   1673		if (vr.vifi >= mrt->maxvif)
   1674			return -EINVAL;
   1675		vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
   1676		read_lock(&mrt_lock);
   1677		vif = &mrt->vif_table[vr.vifi];
   1678		if (VIF_EXISTS(mrt, vr.vifi)) {
   1679			vr.icount = vif->pkt_in;
   1680			vr.ocount = vif->pkt_out;
   1681			vr.ibytes = vif->bytes_in;
   1682			vr.obytes = vif->bytes_out;
   1683			read_unlock(&mrt_lock);
   1684
   1685			if (copy_to_user(arg, &vr, sizeof(vr)))
   1686				return -EFAULT;
   1687			return 0;
   1688		}
   1689		read_unlock(&mrt_lock);
   1690		return -EADDRNOTAVAIL;
   1691	case SIOCGETSGCNT:
   1692		if (copy_from_user(&sr, arg, sizeof(sr)))
   1693			return -EFAULT;
   1694
   1695		rcu_read_lock();
   1696		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
   1697		if (c) {
   1698			sr.pktcnt = c->_c.mfc_un.res.pkt;
   1699			sr.bytecnt = c->_c.mfc_un.res.bytes;
   1700			sr.wrong_if = c->_c.mfc_un.res.wrong_if;
   1701			rcu_read_unlock();
   1702
   1703			if (copy_to_user(arg, &sr, sizeof(sr)))
   1704				return -EFAULT;
   1705			return 0;
   1706		}
   1707		rcu_read_unlock();
   1708		return -EADDRNOTAVAIL;
   1709	default:
   1710		return -ENOIOCTLCMD;
   1711	}
   1712}
   1713#endif
   1714
   1715static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
   1716{
   1717	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
   1718	struct net *net = dev_net(dev);
   1719	struct mr_table *mrt;
   1720	struct vif_device *v;
   1721	int ct;
   1722
   1723	if (event != NETDEV_UNREGISTER)
   1724		return NOTIFY_DONE;
   1725
   1726	ipmr_for_each_table(mrt, net) {
   1727		v = &mrt->vif_table[0];
   1728		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
   1729			if (v->dev == dev)
   1730				vif_delete(mrt, ct, 1, NULL);
   1731		}
   1732	}
   1733	return NOTIFY_DONE;
   1734}
   1735
   1736static struct notifier_block ip_mr_notifier = {
   1737	.notifier_call = ipmr_device_event,
   1738};
   1739
   1740/* Encapsulate a packet by attaching a valid IPIP header to it.
   1741 * This avoids tunnel drivers and other mess and gives us the speed so
   1742 * important for multicast video.
   1743 */
   1744static void ip_encap(struct net *net, struct sk_buff *skb,
   1745		     __be32 saddr, __be32 daddr)
   1746{
   1747	struct iphdr *iph;
   1748	const struct iphdr *old_iph = ip_hdr(skb);
   1749
   1750	skb_push(skb, sizeof(struct iphdr));
   1751	skb->transport_header = skb->network_header;
   1752	skb_reset_network_header(skb);
   1753	iph = ip_hdr(skb);
   1754
   1755	iph->version	=	4;
   1756	iph->tos	=	old_iph->tos;
   1757	iph->ttl	=	old_iph->ttl;
   1758	iph->frag_off	=	0;
   1759	iph->daddr	=	daddr;
   1760	iph->saddr	=	saddr;
   1761	iph->protocol	=	IPPROTO_IPIP;
   1762	iph->ihl	=	5;
   1763	iph->tot_len	=	htons(skb->len);
   1764	ip_select_ident(net, skb, NULL);
   1765	ip_send_check(iph);
   1766
   1767	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
   1768	nf_reset_ct(skb);
   1769}
   1770
   1771static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
   1772				      struct sk_buff *skb)
   1773{
   1774	struct ip_options *opt = &(IPCB(skb)->opt);
   1775
   1776	IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
   1777	IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
   1778
   1779	if (unlikely(opt->optlen))
   1780		ip_forward_options(skb);
   1781
   1782	return dst_output(net, sk, skb);
   1783}
   1784
   1785#ifdef CONFIG_NET_SWITCHDEV
   1786static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
   1787				   int in_vifi, int out_vifi)
   1788{
   1789	struct vif_device *out_vif = &mrt->vif_table[out_vifi];
   1790	struct vif_device *in_vif = &mrt->vif_table[in_vifi];
   1791
   1792	if (!skb->offload_l3_fwd_mark)
   1793		return false;
   1794	if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
   1795		return false;
   1796	return netdev_phys_item_id_same(&out_vif->dev_parent_id,
   1797					&in_vif->dev_parent_id);
   1798}
   1799#else
   1800static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
   1801				   int in_vifi, int out_vifi)
   1802{
   1803	return false;
   1804}
   1805#endif
   1806
   1807/* Processing handlers for ipmr_forward */
   1808
   1809static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
   1810			    int in_vifi, struct sk_buff *skb, int vifi)
   1811{
   1812	const struct iphdr *iph = ip_hdr(skb);
   1813	struct vif_device *vif = &mrt->vif_table[vifi];
   1814	struct net_device *dev;
   1815	struct rtable *rt;
   1816	struct flowi4 fl4;
   1817	int    encap = 0;
   1818
   1819	if (!vif->dev)
   1820		goto out_free;
   1821
   1822	if (vif->flags & VIFF_REGISTER) {
   1823		vif->pkt_out++;
   1824		vif->bytes_out += skb->len;
   1825		vif->dev->stats.tx_bytes += skb->len;
   1826		vif->dev->stats.tx_packets++;
   1827		ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
   1828		goto out_free;
   1829	}
   1830
   1831	if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
   1832		goto out_free;
   1833
   1834	if (vif->flags & VIFF_TUNNEL) {
   1835		rt = ip_route_output_ports(net, &fl4, NULL,
   1836					   vif->remote, vif->local,
   1837					   0, 0,
   1838					   IPPROTO_IPIP,
   1839					   RT_TOS(iph->tos), vif->link);
   1840		if (IS_ERR(rt))
   1841			goto out_free;
   1842		encap = sizeof(struct iphdr);
   1843	} else {
   1844		rt = ip_route_output_ports(net, &fl4, NULL, iph->daddr, 0,
   1845					   0, 0,
   1846					   IPPROTO_IPIP,
   1847					   RT_TOS(iph->tos), vif->link);
   1848		if (IS_ERR(rt))
   1849			goto out_free;
   1850	}
   1851
   1852	dev = rt->dst.dev;
   1853
   1854	if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
   1855		/* Do not fragment multicasts. Alas, IPv4 does not
   1856		 * allow to send ICMP, so that packets will disappear
   1857		 * to blackhole.
   1858		 */
   1859		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
   1860		ip_rt_put(rt);
   1861		goto out_free;
   1862	}
   1863
   1864	encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
   1865
   1866	if (skb_cow(skb, encap)) {
   1867		ip_rt_put(rt);
   1868		goto out_free;
   1869	}
   1870
   1871	vif->pkt_out++;
   1872	vif->bytes_out += skb->len;
   1873
   1874	skb_dst_drop(skb);
   1875	skb_dst_set(skb, &rt->dst);
   1876	ip_decrease_ttl(ip_hdr(skb));
   1877
   1878	/* FIXME: forward and output firewalls used to be called here.
   1879	 * What do we do with netfilter? -- RR
   1880	 */
   1881	if (vif->flags & VIFF_TUNNEL) {
   1882		ip_encap(net, skb, vif->local, vif->remote);
   1883		/* FIXME: extra output firewall step used to be here. --RR */
   1884		vif->dev->stats.tx_packets++;
   1885		vif->dev->stats.tx_bytes += skb->len;
   1886	}
   1887
   1888	IPCB(skb)->flags |= IPSKB_FORWARDED;
   1889
   1890	/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
   1891	 * not only before forwarding, but after forwarding on all output
   1892	 * interfaces. It is clear, if mrouter runs a multicasting
   1893	 * program, it should receive packets not depending to what interface
   1894	 * program is joined.
   1895	 * If we will not make it, the program will have to join on all
   1896	 * interfaces. On the other hand, multihoming host (or router, but
   1897	 * not mrouter) cannot join to more than one interface - it will
   1898	 * result in receiving multiple packets.
   1899	 */
   1900	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
   1901		net, NULL, skb, skb->dev, dev,
   1902		ipmr_forward_finish);
   1903	return;
   1904
   1905out_free:
   1906	kfree_skb(skb);
   1907}
   1908
   1909static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
   1910{
   1911	int ct;
   1912
   1913	for (ct = mrt->maxvif-1; ct >= 0; ct--) {
   1914		if (mrt->vif_table[ct].dev == dev)
   1915			break;
   1916	}
   1917	return ct;
   1918}
   1919
   1920/* "local" means that we should preserve one skb (for local delivery) */
   1921static void ip_mr_forward(struct net *net, struct mr_table *mrt,
   1922			  struct net_device *dev, struct sk_buff *skb,
   1923			  struct mfc_cache *c, int local)
   1924{
   1925	int true_vifi = ipmr_find_vif(mrt, dev);
   1926	int psend = -1;
   1927	int vif, ct;
   1928
   1929	vif = c->_c.mfc_parent;
   1930	c->_c.mfc_un.res.pkt++;
   1931	c->_c.mfc_un.res.bytes += skb->len;
   1932	c->_c.mfc_un.res.lastuse = jiffies;
   1933
   1934	if (c->mfc_origin == htonl(INADDR_ANY) && true_vifi >= 0) {
   1935		struct mfc_cache *cache_proxy;
   1936
   1937		/* For an (*,G) entry, we only check that the incoming
   1938		 * interface is part of the static tree.
   1939		 */
   1940		cache_proxy = mr_mfc_find_any_parent(mrt, vif);
   1941		if (cache_proxy &&
   1942		    cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255)
   1943			goto forward;
   1944	}
   1945
   1946	/* Wrong interface: drop packet and (maybe) send PIM assert. */
   1947	if (mrt->vif_table[vif].dev != dev) {
   1948		if (rt_is_output_route(skb_rtable(skb))) {
   1949			/* It is our own packet, looped back.
   1950			 * Very complicated situation...
   1951			 *
   1952			 * The best workaround until routing daemons will be
   1953			 * fixed is not to redistribute packet, if it was
   1954			 * send through wrong interface. It means, that
   1955			 * multicast applications WILL NOT work for
   1956			 * (S,G), which have default multicast route pointing
   1957			 * to wrong oif. In any case, it is not a good
   1958			 * idea to use multicasting applications on router.
   1959			 */
   1960			goto dont_forward;
   1961		}
   1962
   1963		c->_c.mfc_un.res.wrong_if++;
   1964
   1965		if (true_vifi >= 0 && mrt->mroute_do_assert &&
   1966		    /* pimsm uses asserts, when switching from RPT to SPT,
   1967		     * so that we cannot check that packet arrived on an oif.
   1968		     * It is bad, but otherwise we would need to move pretty
   1969		     * large chunk of pimd to kernel. Ough... --ANK
   1970		     */
   1971		    (mrt->mroute_do_pim ||
   1972		     c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
   1973		    time_after(jiffies,
   1974			       c->_c.mfc_un.res.last_assert +
   1975			       MFC_ASSERT_THRESH)) {
   1976			c->_c.mfc_un.res.last_assert = jiffies;
   1977			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
   1978			if (mrt->mroute_do_wrvifwhole)
   1979				ipmr_cache_report(mrt, skb, true_vifi,
   1980						  IGMPMSG_WRVIFWHOLE);
   1981		}
   1982		goto dont_forward;
   1983	}
   1984
   1985forward:
   1986	mrt->vif_table[vif].pkt_in++;
   1987	mrt->vif_table[vif].bytes_in += skb->len;
   1988
   1989	/* Forward the frame */
   1990	if (c->mfc_origin == htonl(INADDR_ANY) &&
   1991	    c->mfc_mcastgrp == htonl(INADDR_ANY)) {
   1992		if (true_vifi >= 0 &&
   1993		    true_vifi != c->_c.mfc_parent &&
   1994		    ip_hdr(skb)->ttl >
   1995				c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
   1996			/* It's an (*,*) entry and the packet is not coming from
   1997			 * the upstream: forward the packet to the upstream
   1998			 * only.
   1999			 */
   2000			psend = c->_c.mfc_parent;
   2001			goto last_forward;
   2002		}
   2003		goto dont_forward;
   2004	}
   2005	for (ct = c->_c.mfc_un.res.maxvif - 1;
   2006	     ct >= c->_c.mfc_un.res.minvif; ct--) {
   2007		/* For (*,G) entry, don't forward to the incoming interface */
   2008		if ((c->mfc_origin != htonl(INADDR_ANY) ||
   2009		     ct != true_vifi) &&
   2010		    ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) {
   2011			if (psend != -1) {
   2012				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
   2013
   2014				if (skb2)
   2015					ipmr_queue_xmit(net, mrt, true_vifi,
   2016							skb2, psend);
   2017			}
   2018			psend = ct;
   2019		}
   2020	}
   2021last_forward:
   2022	if (psend != -1) {
   2023		if (local) {
   2024			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
   2025
   2026			if (skb2)
   2027				ipmr_queue_xmit(net, mrt, true_vifi, skb2,
   2028						psend);
   2029		} else {
   2030			ipmr_queue_xmit(net, mrt, true_vifi, skb, psend);
   2031			return;
   2032		}
   2033	}
   2034
   2035dont_forward:
   2036	if (!local)
   2037		kfree_skb(skb);
   2038}
   2039
   2040static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb)
   2041{
   2042	struct rtable *rt = skb_rtable(skb);
   2043	struct iphdr *iph = ip_hdr(skb);
   2044	struct flowi4 fl4 = {
   2045		.daddr = iph->daddr,
   2046		.saddr = iph->saddr,
   2047		.flowi4_tos = RT_TOS(iph->tos),
   2048		.flowi4_oif = (rt_is_output_route(rt) ?
   2049			       skb->dev->ifindex : 0),
   2050		.flowi4_iif = (rt_is_output_route(rt) ?
   2051			       LOOPBACK_IFINDEX :
   2052			       skb->dev->ifindex),
   2053		.flowi4_mark = skb->mark,
   2054	};
   2055	struct mr_table *mrt;
   2056	int err;
   2057
   2058	err = ipmr_fib_lookup(net, &fl4, &mrt);
   2059	if (err)
   2060		return ERR_PTR(err);
   2061	return mrt;
   2062}
   2063
   2064/* Multicast packets for forwarding arrive here
   2065 * Called with rcu_read_lock();
   2066 */
   2067int ip_mr_input(struct sk_buff *skb)
   2068{
   2069	struct mfc_cache *cache;
   2070	struct net *net = dev_net(skb->dev);
   2071	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
   2072	struct mr_table *mrt;
   2073	struct net_device *dev;
   2074
   2075	/* skb->dev passed in is the loX master dev for vrfs.
   2076	 * As there are no vifs associated with loopback devices,
   2077	 * get the proper interface that does have a vif associated with it.
   2078	 */
   2079	dev = skb->dev;
   2080	if (netif_is_l3_master(skb->dev)) {
   2081		dev = dev_get_by_index_rcu(net, IPCB(skb)->iif);
   2082		if (!dev) {
   2083			kfree_skb(skb);
   2084			return -ENODEV;
   2085		}
   2086	}
   2087
   2088	/* Packet is looped back after forward, it should not be
   2089	 * forwarded second time, but still can be delivered locally.
   2090	 */
   2091	if (IPCB(skb)->flags & IPSKB_FORWARDED)
   2092		goto dont_forward;
   2093
   2094	mrt = ipmr_rt_fib_lookup(net, skb);
   2095	if (IS_ERR(mrt)) {
   2096		kfree_skb(skb);
   2097		return PTR_ERR(mrt);
   2098	}
   2099	if (!local) {
   2100		if (IPCB(skb)->opt.router_alert) {
   2101			if (ip_call_ra_chain(skb))
   2102				return 0;
   2103		} else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
   2104			/* IGMPv1 (and broken IGMPv2 implementations sort of
   2105			 * Cisco IOS <= 11.2(8)) do not put router alert
   2106			 * option to IGMP packets destined to routable
   2107			 * groups. It is very bad, because it means
   2108			 * that we can forward NO IGMP messages.
   2109			 */
   2110			struct sock *mroute_sk;
   2111
   2112			mroute_sk = rcu_dereference(mrt->mroute_sk);
   2113			if (mroute_sk) {
   2114				nf_reset_ct(skb);
   2115				raw_rcv(mroute_sk, skb);
   2116				return 0;
   2117			}
   2118		}
   2119	}
   2120
   2121	/* already under rcu_read_lock() */
   2122	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
   2123	if (!cache) {
   2124		int vif = ipmr_find_vif(mrt, dev);
   2125
   2126		if (vif >= 0)
   2127			cache = ipmr_cache_find_any(mrt, ip_hdr(skb)->daddr,
   2128						    vif);
   2129	}
   2130
   2131	/* No usable cache entry */
   2132	if (!cache) {
   2133		int vif;
   2134
   2135		if (local) {
   2136			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
   2137			ip_local_deliver(skb);
   2138			if (!skb2)
   2139				return -ENOBUFS;
   2140			skb = skb2;
   2141		}
   2142
   2143		read_lock(&mrt_lock);
   2144		vif = ipmr_find_vif(mrt, dev);
   2145		if (vif >= 0) {
   2146			int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
   2147			read_unlock(&mrt_lock);
   2148
   2149			return err2;
   2150		}
   2151		read_unlock(&mrt_lock);
   2152		kfree_skb(skb);
   2153		return -ENODEV;
   2154	}
   2155
   2156	read_lock(&mrt_lock);
   2157	ip_mr_forward(net, mrt, dev, skb, cache, local);
   2158	read_unlock(&mrt_lock);
   2159
   2160	if (local)
   2161		return ip_local_deliver(skb);
   2162
   2163	return 0;
   2164
   2165dont_forward:
   2166	if (local)
   2167		return ip_local_deliver(skb);
   2168	kfree_skb(skb);
   2169	return 0;
   2170}
   2171
   2172#ifdef CONFIG_IP_PIMSM_V1
   2173/* Handle IGMP messages of PIMv1 */
   2174int pim_rcv_v1(struct sk_buff *skb)
   2175{
   2176	struct igmphdr *pim;
   2177	struct net *net = dev_net(skb->dev);
   2178	struct mr_table *mrt;
   2179
   2180	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
   2181		goto drop;
   2182
   2183	pim = igmp_hdr(skb);
   2184
   2185	mrt = ipmr_rt_fib_lookup(net, skb);
   2186	if (IS_ERR(mrt))
   2187		goto drop;
   2188	if (!mrt->mroute_do_pim ||
   2189	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
   2190		goto drop;
   2191
   2192	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
   2193drop:
   2194		kfree_skb(skb);
   2195	}
   2196	return 0;
   2197}
   2198#endif
   2199
   2200#ifdef CONFIG_IP_PIMSM_V2
   2201static int pim_rcv(struct sk_buff *skb)
   2202{
   2203	struct pimreghdr *pim;
   2204	struct net *net = dev_net(skb->dev);
   2205	struct mr_table *mrt;
   2206
   2207	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
   2208		goto drop;
   2209
   2210	pim = (struct pimreghdr *)skb_transport_header(skb);
   2211	if (pim->type != ((PIM_VERSION << 4) | (PIM_TYPE_REGISTER)) ||
   2212	    (pim->flags & PIM_NULL_REGISTER) ||
   2213	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
   2214	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
   2215		goto drop;
   2216
   2217	mrt = ipmr_rt_fib_lookup(net, skb);
   2218	if (IS_ERR(mrt))
   2219		goto drop;
   2220	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
   2221drop:
   2222		kfree_skb(skb);
   2223	}
   2224	return 0;
   2225}
   2226#endif
   2227
   2228int ipmr_get_route(struct net *net, struct sk_buff *skb,
   2229		   __be32 saddr, __be32 daddr,
   2230		   struct rtmsg *rtm, u32 portid)
   2231{
   2232	struct mfc_cache *cache;
   2233	struct mr_table *mrt;
   2234	int err;
   2235
   2236	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
   2237	if (!mrt)
   2238		return -ENOENT;
   2239
   2240	rcu_read_lock();
   2241	cache = ipmr_cache_find(mrt, saddr, daddr);
   2242	if (!cache && skb->dev) {
   2243		int vif = ipmr_find_vif(mrt, skb->dev);
   2244
   2245		if (vif >= 0)
   2246			cache = ipmr_cache_find_any(mrt, daddr, vif);
   2247	}
   2248	if (!cache) {
   2249		struct sk_buff *skb2;
   2250		struct iphdr *iph;
   2251		struct net_device *dev;
   2252		int vif = -1;
   2253
   2254		dev = skb->dev;
   2255		read_lock(&mrt_lock);
   2256		if (dev)
   2257			vif = ipmr_find_vif(mrt, dev);
   2258		if (vif < 0) {
   2259			read_unlock(&mrt_lock);
   2260			rcu_read_unlock();
   2261			return -ENODEV;
   2262		}
   2263
   2264		skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
   2265		if (!skb2) {
   2266			read_unlock(&mrt_lock);
   2267			rcu_read_unlock();
   2268			return -ENOMEM;
   2269		}
   2270
   2271		NETLINK_CB(skb2).portid = portid;
   2272		skb_push(skb2, sizeof(struct iphdr));
   2273		skb_reset_network_header(skb2);
   2274		iph = ip_hdr(skb2);
   2275		iph->ihl = sizeof(struct iphdr) >> 2;
   2276		iph->saddr = saddr;
   2277		iph->daddr = daddr;
   2278		iph->version = 0;
   2279		err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
   2280		read_unlock(&mrt_lock);
   2281		rcu_read_unlock();
   2282		return err;
   2283	}
   2284
   2285	read_lock(&mrt_lock);
   2286	err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
   2287	read_unlock(&mrt_lock);
   2288	rcu_read_unlock();
   2289	return err;
   2290}
   2291
   2292static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
   2293			    u32 portid, u32 seq, struct mfc_cache *c, int cmd,
   2294			    int flags)
   2295{
   2296	struct nlmsghdr *nlh;
   2297	struct rtmsg *rtm;
   2298	int err;
   2299
   2300	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
   2301	if (!nlh)
   2302		return -EMSGSIZE;
   2303
   2304	rtm = nlmsg_data(nlh);
   2305	rtm->rtm_family   = RTNL_FAMILY_IPMR;
   2306	rtm->rtm_dst_len  = 32;
   2307	rtm->rtm_src_len  = 32;
   2308	rtm->rtm_tos      = 0;
   2309	rtm->rtm_table    = mrt->id;
   2310	if (nla_put_u32(skb, RTA_TABLE, mrt->id))
   2311		goto nla_put_failure;
   2312	rtm->rtm_type     = RTN_MULTICAST;
   2313	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
   2314	if (c->_c.mfc_flags & MFC_STATIC)
   2315		rtm->rtm_protocol = RTPROT_STATIC;
   2316	else
   2317		rtm->rtm_protocol = RTPROT_MROUTED;
   2318	rtm->rtm_flags    = 0;
   2319
   2320	if (nla_put_in_addr(skb, RTA_SRC, c->mfc_origin) ||
   2321	    nla_put_in_addr(skb, RTA_DST, c->mfc_mcastgrp))
   2322		goto nla_put_failure;
   2323	err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
   2324	/* do not break the dump if cache is unresolved */
   2325	if (err < 0 && err != -ENOENT)
   2326		goto nla_put_failure;
   2327
   2328	nlmsg_end(skb, nlh);
   2329	return 0;
   2330
   2331nla_put_failure:
   2332	nlmsg_cancel(skb, nlh);
   2333	return -EMSGSIZE;
   2334}
   2335
   2336static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
   2337			     u32 portid, u32 seq, struct mr_mfc *c, int cmd,
   2338			     int flags)
   2339{
   2340	return ipmr_fill_mroute(mrt, skb, portid, seq, (struct mfc_cache *)c,
   2341				cmd, flags);
   2342}
   2343
   2344static size_t mroute_msgsize(bool unresolved, int maxvif)
   2345{
   2346	size_t len =
   2347		NLMSG_ALIGN(sizeof(struct rtmsg))
   2348		+ nla_total_size(4)	/* RTA_TABLE */
   2349		+ nla_total_size(4)	/* RTA_SRC */
   2350		+ nla_total_size(4)	/* RTA_DST */
   2351		;
   2352
   2353	if (!unresolved)
   2354		len = len
   2355		      + nla_total_size(4)	/* RTA_IIF */
   2356		      + nla_total_size(0)	/* RTA_MULTIPATH */
   2357		      + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
   2358						/* RTA_MFC_STATS */
   2359		      + nla_total_size_64bit(sizeof(struct rta_mfc_stats))
   2360		;
   2361
   2362	return len;
   2363}
   2364
   2365static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
   2366				 int cmd)
   2367{
   2368	struct net *net = read_pnet(&mrt->net);
   2369	struct sk_buff *skb;
   2370	int err = -ENOBUFS;
   2371
   2372	skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS,
   2373				       mrt->maxvif),
   2374			GFP_ATOMIC);
   2375	if (!skb)
   2376		goto errout;
   2377
   2378	err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
   2379	if (err < 0)
   2380		goto errout;
   2381
   2382	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE, NULL, GFP_ATOMIC);
   2383	return;
   2384
   2385errout:
   2386	kfree_skb(skb);
   2387	if (err < 0)
   2388		rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE, err);
   2389}
   2390
   2391static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
   2392{
   2393	size_t len =
   2394		NLMSG_ALIGN(sizeof(struct rtgenmsg))
   2395		+ nla_total_size(1)	/* IPMRA_CREPORT_MSGTYPE */
   2396		+ nla_total_size(4)	/* IPMRA_CREPORT_VIF_ID */
   2397		+ nla_total_size(4)	/* IPMRA_CREPORT_SRC_ADDR */
   2398		+ nla_total_size(4)	/* IPMRA_CREPORT_DST_ADDR */
   2399		+ nla_total_size(4)	/* IPMRA_CREPORT_TABLE */
   2400					/* IPMRA_CREPORT_PKT */
   2401		+ nla_total_size(payloadlen)
   2402		;
   2403
   2404	return len;
   2405}
   2406
   2407static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
   2408{
   2409	struct net *net = read_pnet(&mrt->net);
   2410	struct nlmsghdr *nlh;
   2411	struct rtgenmsg *rtgenm;
   2412	struct igmpmsg *msg;
   2413	struct sk_buff *skb;
   2414	struct nlattr *nla;
   2415	int payloadlen;
   2416
   2417	payloadlen = pkt->len - sizeof(struct igmpmsg);
   2418	msg = (struct igmpmsg *)skb_network_header(pkt);
   2419
   2420	skb = nlmsg_new(igmpmsg_netlink_msgsize(payloadlen), GFP_ATOMIC);
   2421	if (!skb)
   2422		goto errout;
   2423
   2424	nlh = nlmsg_put(skb, 0, 0, RTM_NEWCACHEREPORT,
   2425			sizeof(struct rtgenmsg), 0);
   2426	if (!nlh)
   2427		goto errout;
   2428	rtgenm = nlmsg_data(nlh);
   2429	rtgenm->rtgen_family = RTNL_FAMILY_IPMR;
   2430	if (nla_put_u8(skb, IPMRA_CREPORT_MSGTYPE, msg->im_msgtype) ||
   2431	    nla_put_u32(skb, IPMRA_CREPORT_VIF_ID, msg->im_vif | (msg->im_vif_hi << 8)) ||
   2432	    nla_put_in_addr(skb, IPMRA_CREPORT_SRC_ADDR,
   2433			    msg->im_src.s_addr) ||
   2434	    nla_put_in_addr(skb, IPMRA_CREPORT_DST_ADDR,
   2435			    msg->im_dst.s_addr) ||
   2436	    nla_put_u32(skb, IPMRA_CREPORT_TABLE, mrt->id))
   2437		goto nla_put_failure;
   2438
   2439	nla = nla_reserve(skb, IPMRA_CREPORT_PKT, payloadlen);
   2440	if (!nla || skb_copy_bits(pkt, sizeof(struct igmpmsg),
   2441				  nla_data(nla), payloadlen))
   2442		goto nla_put_failure;
   2443
   2444	nlmsg_end(skb, nlh);
   2445
   2446	rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MROUTE_R, NULL, GFP_ATOMIC);
   2447	return;
   2448
   2449nla_put_failure:
   2450	nlmsg_cancel(skb, nlh);
   2451errout:
   2452	kfree_skb(skb);
   2453	rtnl_set_sk_err(net, RTNLGRP_IPV4_MROUTE_R, -ENOBUFS);
   2454}
   2455
   2456static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
   2457				       const struct nlmsghdr *nlh,
   2458				       struct nlattr **tb,
   2459				       struct netlink_ext_ack *extack)
   2460{
   2461	struct rtmsg *rtm;
   2462	int i, err;
   2463
   2464	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
   2465		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request");
   2466		return -EINVAL;
   2467	}
   2468
   2469	if (!netlink_strict_get_check(skb))
   2470		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
   2471					      rtm_ipv4_policy, extack);
   2472
   2473	rtm = nlmsg_data(nlh);
   2474	if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
   2475	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
   2476	    rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
   2477	    rtm->rtm_scope || rtm->rtm_type || rtm->rtm_flags) {
   2478		NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for multicast route get request");
   2479		return -EINVAL;
   2480	}
   2481
   2482	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
   2483					    rtm_ipv4_policy, extack);
   2484	if (err)
   2485		return err;
   2486
   2487	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
   2488	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
   2489		NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4");
   2490		return -EINVAL;
   2491	}
   2492
   2493	for (i = 0; i <= RTA_MAX; i++) {
   2494		if (!tb[i])
   2495			continue;
   2496
   2497		switch (i) {
   2498		case RTA_SRC:
   2499		case RTA_DST:
   2500		case RTA_TABLE:
   2501			break;
   2502		default:
   2503			NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in multicast route get request");
   2504			return -EINVAL;
   2505		}
   2506	}
   2507
   2508	return 0;
   2509}
   2510
   2511static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
   2512			     struct netlink_ext_ack *extack)
   2513{
   2514	struct net *net = sock_net(in_skb->sk);
   2515	struct nlattr *tb[RTA_MAX + 1];
   2516	struct sk_buff *skb = NULL;
   2517	struct mfc_cache *cache;
   2518	struct mr_table *mrt;
   2519	__be32 src, grp;
   2520	u32 tableid;
   2521	int err;
   2522
   2523	err = ipmr_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
   2524	if (err < 0)
   2525		goto errout;
   2526
   2527	src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
   2528	grp = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
   2529	tableid = tb[RTA_TABLE] ? nla_get_u32(tb[RTA_TABLE]) : 0;
   2530
   2531	mrt = ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT);
   2532	if (!mrt) {
   2533		err = -ENOENT;
   2534		goto errout_free;
   2535	}
   2536
   2537	/* entries are added/deleted only under RTNL */
   2538	rcu_read_lock();
   2539	cache = ipmr_cache_find(mrt, src, grp);
   2540	rcu_read_unlock();
   2541	if (!cache) {
   2542		err = -ENOENT;
   2543		goto errout_free;
   2544	}
   2545
   2546	skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL);
   2547	if (!skb) {
   2548		err = -ENOBUFS;
   2549		goto errout_free;
   2550	}
   2551
   2552	err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid,
   2553			       nlh->nlmsg_seq, cache,
   2554			       RTM_NEWROUTE, 0);
   2555	if (err < 0)
   2556		goto errout_free;
   2557
   2558	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
   2559
   2560errout:
   2561	return err;
   2562
   2563errout_free:
   2564	kfree_skb(skb);
   2565	goto errout;
   2566}
   2567
   2568static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
   2569{
   2570	struct fib_dump_filter filter = {};
   2571	int err;
   2572
   2573	if (cb->strict_check) {
   2574		err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh,
   2575					    &filter, cb);
   2576		if (err < 0)
   2577			return err;
   2578	}
   2579
   2580	if (filter.table_id) {
   2581		struct mr_table *mrt;
   2582
   2583		mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
   2584		if (!mrt) {
   2585			if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR)
   2586				return skb->len;
   2587
   2588			NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
   2589			return -ENOENT;
   2590		}
   2591		err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute,
   2592				    &mfc_unres_lock, &filter);
   2593		return skb->len ? : err;
   2594	}
   2595
   2596	return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
   2597				_ipmr_fill_mroute, &mfc_unres_lock, &filter);
   2598}
   2599
   2600static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
   2601	[RTA_SRC]	= { .type = NLA_U32 },
   2602	[RTA_DST]	= { .type = NLA_U32 },
   2603	[RTA_IIF]	= { .type = NLA_U32 },
   2604	[RTA_TABLE]	= { .type = NLA_U32 },
   2605	[RTA_MULTIPATH]	= { .len = sizeof(struct rtnexthop) },
   2606};
   2607
   2608static bool ipmr_rtm_validate_proto(unsigned char rtm_protocol)
   2609{
   2610	switch (rtm_protocol) {
   2611	case RTPROT_STATIC:
   2612	case RTPROT_MROUTED:
   2613		return true;
   2614	}
   2615	return false;
   2616}
   2617
   2618static int ipmr_nla_get_ttls(const struct nlattr *nla, struct mfcctl *mfcc)
   2619{
   2620	struct rtnexthop *rtnh = nla_data(nla);
   2621	int remaining = nla_len(nla), vifi = 0;
   2622
   2623	while (rtnh_ok(rtnh, remaining)) {
   2624		mfcc->mfcc_ttls[vifi] = rtnh->rtnh_hops;
   2625		if (++vifi == MAXVIFS)
   2626			break;
   2627		rtnh = rtnh_next(rtnh, &remaining);
   2628	}
   2629
   2630	return remaining > 0 ? -EINVAL : vifi;
   2631}
   2632
   2633/* returns < 0 on error, 0 for ADD_MFC and 1 for ADD_MFC_PROXY */
   2634static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh,
   2635			    struct mfcctl *mfcc, int *mrtsock,
   2636			    struct mr_table **mrtret,
   2637			    struct netlink_ext_ack *extack)
   2638{
   2639	struct net_device *dev = NULL;
   2640	u32 tblid = RT_TABLE_DEFAULT;
   2641	struct mr_table *mrt;
   2642	struct nlattr *attr;
   2643	struct rtmsg *rtm;
   2644	int ret, rem;
   2645
   2646	ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX,
   2647					rtm_ipmr_policy, extack);
   2648	if (ret < 0)
   2649		goto out;
   2650	rtm = nlmsg_data(nlh);
   2651
   2652	ret = -EINVAL;
   2653	if (rtm->rtm_family != RTNL_FAMILY_IPMR || rtm->rtm_dst_len != 32 ||
   2654	    rtm->rtm_type != RTN_MULTICAST ||
   2655	    rtm->rtm_scope != RT_SCOPE_UNIVERSE ||
   2656	    !ipmr_rtm_validate_proto(rtm->rtm_protocol))
   2657		goto out;
   2658
   2659	memset(mfcc, 0, sizeof(*mfcc));
   2660	mfcc->mfcc_parent = -1;
   2661	ret = 0;
   2662	nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), rem) {
   2663		switch (nla_type(attr)) {
   2664		case RTA_SRC:
   2665			mfcc->mfcc_origin.s_addr = nla_get_be32(attr);
   2666			break;
   2667		case RTA_DST:
   2668			mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr);
   2669			break;
   2670		case RTA_IIF:
   2671			dev = __dev_get_by_index(net, nla_get_u32(attr));
   2672			if (!dev) {
   2673				ret = -ENODEV;
   2674				goto out;
   2675			}
   2676			break;
   2677		case RTA_MULTIPATH:
   2678			if (ipmr_nla_get_ttls(attr, mfcc) < 0) {
   2679				ret = -EINVAL;
   2680				goto out;
   2681			}
   2682			break;
   2683		case RTA_PREFSRC:
   2684			ret = 1;
   2685			break;
   2686		case RTA_TABLE:
   2687			tblid = nla_get_u32(attr);
   2688			break;
   2689		}
   2690	}
   2691	mrt = ipmr_get_table(net, tblid);
   2692	if (!mrt) {
   2693		ret = -ENOENT;
   2694		goto out;
   2695	}
   2696	*mrtret = mrt;
   2697	*mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0;
   2698	if (dev)
   2699		mfcc->mfcc_parent = ipmr_find_vif(mrt, dev);
   2700
   2701out:
   2702	return ret;
   2703}
   2704
   2705/* takes care of both newroute and delroute */
   2706static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh,
   2707			  struct netlink_ext_ack *extack)
   2708{
   2709	struct net *net = sock_net(skb->sk);
   2710	int ret, mrtsock, parent;
   2711	struct mr_table *tbl;
   2712	struct mfcctl mfcc;
   2713
   2714	mrtsock = 0;
   2715	tbl = NULL;
   2716	ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack);
   2717	if (ret < 0)
   2718		return ret;
   2719
   2720	parent = ret ? mfcc.mfcc_parent : -1;
   2721	if (nlh->nlmsg_type == RTM_NEWROUTE)
   2722		return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent);
   2723	else
   2724		return ipmr_mfc_delete(tbl, &mfcc, parent);
   2725}
   2726
   2727static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
   2728{
   2729	u32 queue_len = atomic_read(&mrt->cache_resolve_queue_len);
   2730
   2731	if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) ||
   2732	    nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) ||
   2733	    nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM,
   2734			mrt->mroute_reg_vif_num) ||
   2735	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
   2736		       mrt->mroute_do_assert) ||
   2737	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) ||
   2738	    nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
   2739		       mrt->mroute_do_wrvifwhole))
   2740		return false;
   2741
   2742	return true;
   2743}
   2744
   2745static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
   2746{
   2747	struct nlattr *vif_nest;
   2748	struct vif_device *vif;
   2749
   2750	/* if the VIF doesn't exist just continue */
   2751	if (!VIF_EXISTS(mrt, vifid))
   2752		return true;
   2753
   2754	vif = &mrt->vif_table[vifid];
   2755	vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF);
   2756	if (!vif_nest)
   2757		return false;
   2758	if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
   2759	    nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
   2760	    nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
   2761	    nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in,
   2762			      IPMRA_VIFA_PAD) ||
   2763	    nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out,
   2764			      IPMRA_VIFA_PAD) ||
   2765	    nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in,
   2766			      IPMRA_VIFA_PAD) ||
   2767	    nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out,
   2768			      IPMRA_VIFA_PAD) ||
   2769	    nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) ||
   2770	    nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) {
   2771		nla_nest_cancel(skb, vif_nest);
   2772		return false;
   2773	}
   2774	nla_nest_end(skb, vif_nest);
   2775
   2776	return true;
   2777}
   2778
   2779static int ipmr_valid_dumplink(const struct nlmsghdr *nlh,
   2780			       struct netlink_ext_ack *extack)
   2781{
   2782	struct ifinfomsg *ifm;
   2783
   2784	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
   2785		NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump");
   2786		return -EINVAL;
   2787	}
   2788
   2789	if (nlmsg_attrlen(nlh, sizeof(*ifm))) {
   2790		NL_SET_ERR_MSG(extack, "Invalid data after header in ipmr link dump");
   2791		return -EINVAL;
   2792	}
   2793
   2794	ifm = nlmsg_data(nlh);
   2795	if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
   2796	    ifm->ifi_change || ifm->ifi_index) {
   2797		NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request");
   2798		return -EINVAL;
   2799	}
   2800
   2801	return 0;
   2802}
   2803
   2804static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb)
   2805{
   2806	struct net *net = sock_net(skb->sk);
   2807	struct nlmsghdr *nlh = NULL;
   2808	unsigned int t = 0, s_t;
   2809	unsigned int e = 0, s_e;
   2810	struct mr_table *mrt;
   2811
   2812	if (cb->strict_check) {
   2813		int err = ipmr_valid_dumplink(cb->nlh, cb->extack);
   2814
   2815		if (err < 0)
   2816			return err;
   2817	}
   2818
   2819	s_t = cb->args[0];
   2820	s_e = cb->args[1];
   2821
   2822	ipmr_for_each_table(mrt, net) {
   2823		struct nlattr *vifs, *af;
   2824		struct ifinfomsg *hdr;
   2825		u32 i;
   2826
   2827		if (t < s_t)
   2828			goto skip_table;
   2829		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
   2830				cb->nlh->nlmsg_seq, RTM_NEWLINK,
   2831				sizeof(*hdr), NLM_F_MULTI);
   2832		if (!nlh)
   2833			break;
   2834
   2835		hdr = nlmsg_data(nlh);
   2836		memset(hdr, 0, sizeof(*hdr));
   2837		hdr->ifi_family = RTNL_FAMILY_IPMR;
   2838
   2839		af = nla_nest_start_noflag(skb, IFLA_AF_SPEC);
   2840		if (!af) {
   2841			nlmsg_cancel(skb, nlh);
   2842			goto out;
   2843		}
   2844
   2845		if (!ipmr_fill_table(mrt, skb)) {
   2846			nlmsg_cancel(skb, nlh);
   2847			goto out;
   2848		}
   2849
   2850		vifs = nla_nest_start_noflag(skb, IPMRA_TABLE_VIFS);
   2851		if (!vifs) {
   2852			nla_nest_end(skb, af);
   2853			nlmsg_end(skb, nlh);
   2854			goto out;
   2855		}
   2856		for (i = 0; i < mrt->maxvif; i++) {
   2857			if (e < s_e)
   2858				goto skip_entry;
   2859			if (!ipmr_fill_vif(mrt, i, skb)) {
   2860				nla_nest_end(skb, vifs);
   2861				nla_nest_end(skb, af);
   2862				nlmsg_end(skb, nlh);
   2863				goto out;
   2864			}
   2865skip_entry:
   2866			e++;
   2867		}
   2868		s_e = 0;
   2869		e = 0;
   2870		nla_nest_end(skb, vifs);
   2871		nla_nest_end(skb, af);
   2872		nlmsg_end(skb, nlh);
   2873skip_table:
   2874		t++;
   2875	}
   2876
   2877out:
   2878	cb->args[1] = e;
   2879	cb->args[0] = t;
   2880
   2881	return skb->len;
   2882}
   2883
   2884#ifdef CONFIG_PROC_FS
   2885/* The /proc interfaces to multicast routing :
   2886 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
   2887 */
   2888
   2889static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
   2890	__acquires(mrt_lock)
   2891{
   2892	struct mr_vif_iter *iter = seq->private;
   2893	struct net *net = seq_file_net(seq);
   2894	struct mr_table *mrt;
   2895
   2896	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
   2897	if (!mrt)
   2898		return ERR_PTR(-ENOENT);
   2899
   2900	iter->mrt = mrt;
   2901
   2902	read_lock(&mrt_lock);
   2903	return mr_vif_seq_start(seq, pos);
   2904}
   2905
   2906static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
   2907	__releases(mrt_lock)
   2908{
   2909	read_unlock(&mrt_lock);
   2910}
   2911
   2912static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
   2913{
   2914	struct mr_vif_iter *iter = seq->private;
   2915	struct mr_table *mrt = iter->mrt;
   2916
   2917	if (v == SEQ_START_TOKEN) {
   2918		seq_puts(seq,
   2919			 "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
   2920	} else {
   2921		const struct vif_device *vif = v;
   2922		const char *name =  vif->dev ?
   2923				    vif->dev->name : "none";
   2924
   2925		seq_printf(seq,
   2926			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
   2927			   vif - mrt->vif_table,
   2928			   name, vif->bytes_in, vif->pkt_in,
   2929			   vif->bytes_out, vif->pkt_out,
   2930			   vif->flags, vif->local, vif->remote);
   2931	}
   2932	return 0;
   2933}
   2934
   2935static const struct seq_operations ipmr_vif_seq_ops = {
   2936	.start = ipmr_vif_seq_start,
   2937	.next  = mr_vif_seq_next,
   2938	.stop  = ipmr_vif_seq_stop,
   2939	.show  = ipmr_vif_seq_show,
   2940};
   2941
   2942static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
   2943{
   2944	struct net *net = seq_file_net(seq);
   2945	struct mr_table *mrt;
   2946
   2947	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
   2948	if (!mrt)
   2949		return ERR_PTR(-ENOENT);
   2950
   2951	return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
   2952}
   2953
   2954static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
   2955{
   2956	int n;
   2957
   2958	if (v == SEQ_START_TOKEN) {
   2959		seq_puts(seq,
   2960		 "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
   2961	} else {
   2962		const struct mfc_cache *mfc = v;
   2963		const struct mr_mfc_iter *it = seq->private;
   2964		const struct mr_table *mrt = it->mrt;
   2965
   2966		seq_printf(seq, "%08X %08X %-3hd",
   2967			   (__force u32) mfc->mfc_mcastgrp,
   2968			   (__force u32) mfc->mfc_origin,
   2969			   mfc->_c.mfc_parent);
   2970
   2971		if (it->cache != &mrt->mfc_unres_queue) {
   2972			seq_printf(seq, " %8lu %8lu %8lu",
   2973				   mfc->_c.mfc_un.res.pkt,
   2974				   mfc->_c.mfc_un.res.bytes,
   2975				   mfc->_c.mfc_un.res.wrong_if);
   2976			for (n = mfc->_c.mfc_un.res.minvif;
   2977			     n < mfc->_c.mfc_un.res.maxvif; n++) {
   2978				if (VIF_EXISTS(mrt, n) &&
   2979				    mfc->_c.mfc_un.res.ttls[n] < 255)
   2980					seq_printf(seq,
   2981					   " %2d:%-3d",
   2982					   n, mfc->_c.mfc_un.res.ttls[n]);
   2983			}
   2984		} else {
   2985			/* unresolved mfc_caches don't contain
   2986			 * pkt, bytes and wrong_if values
   2987			 */
   2988			seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
   2989		}
   2990		seq_putc(seq, '\n');
   2991	}
   2992	return 0;
   2993}
   2994
   2995static const struct seq_operations ipmr_mfc_seq_ops = {
   2996	.start = ipmr_mfc_seq_start,
   2997	.next  = mr_mfc_seq_next,
   2998	.stop  = mr_mfc_seq_stop,
   2999	.show  = ipmr_mfc_seq_show,
   3000};
   3001#endif
   3002
   3003#ifdef CONFIG_IP_PIMSM_V2
   3004static const struct net_protocol pim_protocol = {
   3005	.handler	=	pim_rcv,
   3006};
   3007#endif
   3008
   3009static unsigned int ipmr_seq_read(struct net *net)
   3010{
   3011	ASSERT_RTNL();
   3012
   3013	return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
   3014}
   3015
   3016static int ipmr_dump(struct net *net, struct notifier_block *nb,
   3017		     struct netlink_ext_ack *extack)
   3018{
   3019	return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
   3020		       ipmr_mr_table_iter, &mrt_lock, extack);
   3021}
   3022
   3023static const struct fib_notifier_ops ipmr_notifier_ops_template = {
   3024	.family		= RTNL_FAMILY_IPMR,
   3025	.fib_seq_read	= ipmr_seq_read,
   3026	.fib_dump	= ipmr_dump,
   3027	.owner		= THIS_MODULE,
   3028};
   3029
   3030static int __net_init ipmr_notifier_init(struct net *net)
   3031{
   3032	struct fib_notifier_ops *ops;
   3033
   3034	net->ipv4.ipmr_seq = 0;
   3035
   3036	ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
   3037	if (IS_ERR(ops))
   3038		return PTR_ERR(ops);
   3039	net->ipv4.ipmr_notifier_ops = ops;
   3040
   3041	return 0;
   3042}
   3043
   3044static void __net_exit ipmr_notifier_exit(struct net *net)
   3045{
   3046	fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
   3047	net->ipv4.ipmr_notifier_ops = NULL;
   3048}
   3049
   3050/* Setup for IP multicast routing */
   3051static int __net_init ipmr_net_init(struct net *net)
   3052{
   3053	int err;
   3054
   3055	err = ipmr_notifier_init(net);
   3056	if (err)
   3057		goto ipmr_notifier_fail;
   3058
   3059	err = ipmr_rules_init(net);
   3060	if (err < 0)
   3061		goto ipmr_rules_fail;
   3062
   3063#ifdef CONFIG_PROC_FS
   3064	err = -ENOMEM;
   3065	if (!proc_create_net("ip_mr_vif", 0, net->proc_net, &ipmr_vif_seq_ops,
   3066			sizeof(struct mr_vif_iter)))
   3067		goto proc_vif_fail;
   3068	if (!proc_create_net("ip_mr_cache", 0, net->proc_net, &ipmr_mfc_seq_ops,
   3069			sizeof(struct mr_mfc_iter)))
   3070		goto proc_cache_fail;
   3071#endif
   3072	return 0;
   3073
   3074#ifdef CONFIG_PROC_FS
   3075proc_cache_fail:
   3076	remove_proc_entry("ip_mr_vif", net->proc_net);
   3077proc_vif_fail:
   3078	rtnl_lock();
   3079	ipmr_rules_exit(net);
   3080	rtnl_unlock();
   3081#endif
   3082ipmr_rules_fail:
   3083	ipmr_notifier_exit(net);
   3084ipmr_notifier_fail:
   3085	return err;
   3086}
   3087
   3088static void __net_exit ipmr_net_exit(struct net *net)
   3089{
   3090#ifdef CONFIG_PROC_FS
   3091	remove_proc_entry("ip_mr_cache", net->proc_net);
   3092	remove_proc_entry("ip_mr_vif", net->proc_net);
   3093#endif
   3094	ipmr_notifier_exit(net);
   3095}
   3096
   3097static void __net_exit ipmr_net_exit_batch(struct list_head *net_list)
   3098{
   3099	struct net *net;
   3100
   3101	rtnl_lock();
   3102	list_for_each_entry(net, net_list, exit_list)
   3103		ipmr_rules_exit(net);
   3104	rtnl_unlock();
   3105}
   3106
   3107static struct pernet_operations ipmr_net_ops = {
   3108	.init = ipmr_net_init,
   3109	.exit = ipmr_net_exit,
   3110	.exit_batch = ipmr_net_exit_batch,
   3111};
   3112
   3113int __init ip_mr_init(void)
   3114{
   3115	int err;
   3116
   3117	mrt_cachep = kmem_cache_create("ip_mrt_cache",
   3118				       sizeof(struct mfc_cache),
   3119				       0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
   3120				       NULL);
   3121
   3122	err = register_pernet_subsys(&ipmr_net_ops);
   3123	if (err)
   3124		goto reg_pernet_fail;
   3125
   3126	err = register_netdevice_notifier(&ip_mr_notifier);
   3127	if (err)
   3128		goto reg_notif_fail;
   3129#ifdef CONFIG_IP_PIMSM_V2
   3130	if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
   3131		pr_err("%s: can't add PIM protocol\n", __func__);
   3132		err = -EAGAIN;
   3133		goto add_proto_fail;
   3134	}
   3135#endif
   3136	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE,
   3137		      ipmr_rtm_getroute, ipmr_rtm_dumproute, 0);
   3138	rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE,
   3139		      ipmr_rtm_route, NULL, 0);
   3140	rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE,
   3141		      ipmr_rtm_route, NULL, 0);
   3142
   3143	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK,
   3144		      NULL, ipmr_rtm_dumplink, 0);
   3145	return 0;
   3146
   3147#ifdef CONFIG_IP_PIMSM_V2
   3148add_proto_fail:
   3149	unregister_netdevice_notifier(&ip_mr_notifier);
   3150#endif
   3151reg_notif_fail:
   3152	unregister_pernet_subsys(&ipmr_net_ops);
   3153reg_pernet_fail:
   3154	kmem_cache_destroy(mrt_cachep);
   3155	return err;
   3156}