cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

datapath.c (68161B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (c) 2007-2014 Nicira, Inc.
      4 */
      5
      6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
      7
      8#include <linux/init.h>
      9#include <linux/module.h>
     10#include <linux/if_arp.h>
     11#include <linux/if_vlan.h>
     12#include <linux/in.h>
     13#include <linux/ip.h>
     14#include <linux/jhash.h>
     15#include <linux/delay.h>
     16#include <linux/time.h>
     17#include <linux/etherdevice.h>
     18#include <linux/genetlink.h>
     19#include <linux/kernel.h>
     20#include <linux/kthread.h>
     21#include <linux/mutex.h>
     22#include <linux/percpu.h>
     23#include <linux/rcupdate.h>
     24#include <linux/tcp.h>
     25#include <linux/udp.h>
     26#include <linux/ethtool.h>
     27#include <linux/wait.h>
     28#include <asm/div64.h>
     29#include <linux/highmem.h>
     30#include <linux/netfilter_bridge.h>
     31#include <linux/netfilter_ipv4.h>
     32#include <linux/inetdevice.h>
     33#include <linux/list.h>
     34#include <linux/openvswitch.h>
     35#include <linux/rculist.h>
     36#include <linux/dmi.h>
     37#include <net/genetlink.h>
     38#include <net/net_namespace.h>
     39#include <net/netns/generic.h>
     40#include <net/pkt_cls.h>
     41
     42#include "datapath.h"
     43#include "flow.h"
     44#include "flow_table.h"
     45#include "flow_netlink.h"
     46#include "meter.h"
     47#include "openvswitch_trace.h"
     48#include "vport-internal_dev.h"
     49#include "vport-netdev.h"
     50
     51unsigned int ovs_net_id __read_mostly;
     52
     53static struct genl_family dp_packet_genl_family;
     54static struct genl_family dp_flow_genl_family;
     55static struct genl_family dp_datapath_genl_family;
     56
     57static const struct nla_policy flow_policy[];
     58
     59static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
     60	.name = OVS_FLOW_MCGROUP,
     61};
     62
     63static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
     64	.name = OVS_DATAPATH_MCGROUP,
     65};
     66
     67static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
     68	.name = OVS_VPORT_MCGROUP,
     69};
     70
     71/* Check if need to build a reply message.
     72 * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
     73static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
     74			    unsigned int group)
     75{
     76	return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
     77	       genl_has_listeners(family, genl_info_net(info), group);
     78}
     79
     80static void ovs_notify(struct genl_family *family,
     81		       struct sk_buff *skb, struct genl_info *info)
     82{
     83	genl_notify(family, skb, info, 0, GFP_KERNEL);
     84}
     85
     86/**
     87 * DOC: Locking:
     88 *
     89 * All writes e.g. Writes to device state (add/remove datapath, port, set
     90 * operations on vports, etc.), Writes to other state (flow table
     91 * modifications, set miscellaneous datapath parameters, etc.) are protected
     92 * by ovs_lock.
     93 *
     94 * Reads are protected by RCU.
     95 *
     96 * There are a few special cases (mostly stats) that have their own
     97 * synchronization but they nest under all of above and don't interact with
     98 * each other.
     99 *
    100 * The RTNL lock nests inside ovs_mutex.
    101 */
    102
    103static DEFINE_MUTEX(ovs_mutex);
    104
    105void ovs_lock(void)
    106{
    107	mutex_lock(&ovs_mutex);
    108}
    109
    110void ovs_unlock(void)
    111{
    112	mutex_unlock(&ovs_mutex);
    113}
    114
    115#ifdef CONFIG_LOCKDEP
    116int lockdep_ovsl_is_held(void)
    117{
    118	if (debug_locks)
    119		return lockdep_is_held(&ovs_mutex);
    120	else
    121		return 1;
    122}
    123#endif
    124
    125static struct vport *new_vport(const struct vport_parms *);
    126static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
    127			     const struct sw_flow_key *,
    128			     const struct dp_upcall_info *,
    129			     uint32_t cutlen);
    130static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
    131				  const struct sw_flow_key *,
    132				  const struct dp_upcall_info *,
    133				  uint32_t cutlen);
    134
    135static void ovs_dp_masks_rebalance(struct work_struct *work);
    136
    137static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
    138
    139/* Must be called with rcu_read_lock or ovs_mutex. */
    140const char *ovs_dp_name(const struct datapath *dp)
    141{
    142	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
    143	return ovs_vport_name(vport);
    144}
    145
    146static int get_dpifindex(const struct datapath *dp)
    147{
    148	struct vport *local;
    149	int ifindex;
    150
    151	rcu_read_lock();
    152
    153	local = ovs_vport_rcu(dp, OVSP_LOCAL);
    154	if (local)
    155		ifindex = local->dev->ifindex;
    156	else
    157		ifindex = 0;
    158
    159	rcu_read_unlock();
    160
    161	return ifindex;
    162}
    163
    164static void destroy_dp_rcu(struct rcu_head *rcu)
    165{
    166	struct datapath *dp = container_of(rcu, struct datapath, rcu);
    167
    168	ovs_flow_tbl_destroy(&dp->table);
    169	free_percpu(dp->stats_percpu);
    170	kfree(dp->ports);
    171	ovs_meters_exit(dp);
    172	kfree(rcu_dereference_raw(dp->upcall_portids));
    173	kfree(dp);
    174}
    175
    176static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
    177					    u16 port_no)
    178{
    179	return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
    180}
    181
    182/* Called with ovs_mutex or RCU read lock. */
    183struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
    184{
    185	struct vport *vport;
    186	struct hlist_head *head;
    187
    188	head = vport_hash_bucket(dp, port_no);
    189	hlist_for_each_entry_rcu(vport, head, dp_hash_node,
    190				 lockdep_ovsl_is_held()) {
    191		if (vport->port_no == port_no)
    192			return vport;
    193	}
    194	return NULL;
    195}
    196
    197/* Called with ovs_mutex. */
    198static struct vport *new_vport(const struct vport_parms *parms)
    199{
    200	struct vport *vport;
    201
    202	vport = ovs_vport_add(parms);
    203	if (!IS_ERR(vport)) {
    204		struct datapath *dp = parms->dp;
    205		struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
    206
    207		hlist_add_head_rcu(&vport->dp_hash_node, head);
    208	}
    209	return vport;
    210}
    211
    212void ovs_dp_detach_port(struct vport *p)
    213{
    214	ASSERT_OVSL();
    215
    216	/* First drop references to device. */
    217	hlist_del_rcu(&p->dp_hash_node);
    218
    219	/* Then destroy it. */
    220	ovs_vport_del(p);
    221}
    222
    223/* Must be called with rcu_read_lock. */
    224void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
    225{
    226	const struct vport *p = OVS_CB(skb)->input_vport;
    227	struct datapath *dp = p->dp;
    228	struct sw_flow *flow;
    229	struct sw_flow_actions *sf_acts;
    230	struct dp_stats_percpu *stats;
    231	u64 *stats_counter;
    232	u32 n_mask_hit;
    233	u32 n_cache_hit;
    234	int error;
    235
    236	stats = this_cpu_ptr(dp->stats_percpu);
    237
    238	/* Look up flow. */
    239	flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
    240					 &n_mask_hit, &n_cache_hit);
    241	if (unlikely(!flow)) {
    242		struct dp_upcall_info upcall;
    243
    244		memset(&upcall, 0, sizeof(upcall));
    245		upcall.cmd = OVS_PACKET_CMD_MISS;
    246
    247		if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
    248			upcall.portid =
    249			    ovs_dp_get_upcall_portid(dp, smp_processor_id());
    250		else
    251			upcall.portid = ovs_vport_find_upcall_portid(p, skb);
    252
    253		upcall.mru = OVS_CB(skb)->mru;
    254		error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
    255		if (unlikely(error))
    256			kfree_skb(skb);
    257		else
    258			consume_skb(skb);
    259		stats_counter = &stats->n_missed;
    260		goto out;
    261	}
    262
    263	ovs_flow_stats_update(flow, key->tp.flags, skb);
    264	sf_acts = rcu_dereference(flow->sf_acts);
    265	error = ovs_execute_actions(dp, skb, sf_acts, key);
    266	if (unlikely(error))
    267		net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
    268				    ovs_dp_name(dp), error);
    269
    270	stats_counter = &stats->n_hit;
    271
    272out:
    273	/* Update datapath statistics. */
    274	u64_stats_update_begin(&stats->syncp);
    275	(*stats_counter)++;
    276	stats->n_mask_hit += n_mask_hit;
    277	stats->n_cache_hit += n_cache_hit;
    278	u64_stats_update_end(&stats->syncp);
    279}
    280
    281int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
    282		  const struct sw_flow_key *key,
    283		  const struct dp_upcall_info *upcall_info,
    284		  uint32_t cutlen)
    285{
    286	struct dp_stats_percpu *stats;
    287	int err;
    288
    289	if (trace_ovs_dp_upcall_enabled())
    290		trace_ovs_dp_upcall(dp, skb, key, upcall_info);
    291
    292	if (upcall_info->portid == 0) {
    293		err = -ENOTCONN;
    294		goto err;
    295	}
    296
    297	if (!skb_is_gso(skb))
    298		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
    299	else
    300		err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
    301	if (err)
    302		goto err;
    303
    304	return 0;
    305
    306err:
    307	stats = this_cpu_ptr(dp->stats_percpu);
    308
    309	u64_stats_update_begin(&stats->syncp);
    310	stats->n_lost++;
    311	u64_stats_update_end(&stats->syncp);
    312
    313	return err;
    314}
    315
    316static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
    317			     const struct sw_flow_key *key,
    318			     const struct dp_upcall_info *upcall_info,
    319			     uint32_t cutlen)
    320{
    321	unsigned int gso_type = skb_shinfo(skb)->gso_type;
    322	struct sw_flow_key later_key;
    323	struct sk_buff *segs, *nskb;
    324	int err;
    325
    326	BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
    327	segs = __skb_gso_segment(skb, NETIF_F_SG, false);
    328	if (IS_ERR(segs))
    329		return PTR_ERR(segs);
    330	if (segs == NULL)
    331		return -EINVAL;
    332
    333	if (gso_type & SKB_GSO_UDP) {
    334		/* The initial flow key extracted by ovs_flow_key_extract()
    335		 * in this case is for a first fragment, so we need to
    336		 * properly mark later fragments.
    337		 */
    338		later_key = *key;
    339		later_key.ip.frag = OVS_FRAG_TYPE_LATER;
    340	}
    341
    342	/* Queue all of the segments. */
    343	skb_list_walk_safe(segs, skb, nskb) {
    344		if (gso_type & SKB_GSO_UDP && skb != segs)
    345			key = &later_key;
    346
    347		err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
    348		if (err)
    349			break;
    350
    351	}
    352
    353	/* Free all of the segments. */
    354	skb_list_walk_safe(segs, skb, nskb) {
    355		if (err)
    356			kfree_skb(skb);
    357		else
    358			consume_skb(skb);
    359	}
    360	return err;
    361}
    362
    363static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
    364			      unsigned int hdrlen, int actions_attrlen)
    365{
    366	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
    367		+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
    368		+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
    369		+ nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
    370		+ nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
    371
    372	/* OVS_PACKET_ATTR_USERDATA */
    373	if (upcall_info->userdata)
    374		size += NLA_ALIGN(upcall_info->userdata->nla_len);
    375
    376	/* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
    377	if (upcall_info->egress_tun_info)
    378		size += nla_total_size(ovs_tun_key_attr_size());
    379
    380	/* OVS_PACKET_ATTR_ACTIONS */
    381	if (upcall_info->actions_len)
    382		size += nla_total_size(actions_attrlen);
    383
    384	/* OVS_PACKET_ATTR_MRU */
    385	if (upcall_info->mru)
    386		size += nla_total_size(sizeof(upcall_info->mru));
    387
    388	return size;
    389}
    390
    391static void pad_packet(struct datapath *dp, struct sk_buff *skb)
    392{
    393	if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
    394		size_t plen = NLA_ALIGN(skb->len) - skb->len;
    395
    396		if (plen > 0)
    397			skb_put_zero(skb, plen);
    398	}
    399}
    400
    401static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
    402				  const struct sw_flow_key *key,
    403				  const struct dp_upcall_info *upcall_info,
    404				  uint32_t cutlen)
    405{
    406	struct ovs_header *upcall;
    407	struct sk_buff *nskb = NULL;
    408	struct sk_buff *user_skb = NULL; /* to be queued to userspace */
    409	struct nlattr *nla;
    410	size_t len;
    411	unsigned int hlen;
    412	int err, dp_ifindex;
    413	u64 hash;
    414
    415	dp_ifindex = get_dpifindex(dp);
    416	if (!dp_ifindex)
    417		return -ENODEV;
    418
    419	if (skb_vlan_tag_present(skb)) {
    420		nskb = skb_clone(skb, GFP_ATOMIC);
    421		if (!nskb)
    422			return -ENOMEM;
    423
    424		nskb = __vlan_hwaccel_push_inside(nskb);
    425		if (!nskb)
    426			return -ENOMEM;
    427
    428		skb = nskb;
    429	}
    430
    431	if (nla_attr_size(skb->len) > USHRT_MAX) {
    432		err = -EFBIG;
    433		goto out;
    434	}
    435
    436	/* Complete checksum if needed */
    437	if (skb->ip_summed == CHECKSUM_PARTIAL &&
    438	    (err = skb_csum_hwoffload_help(skb, 0)))
    439		goto out;
    440
    441	/* Older versions of OVS user space enforce alignment of the last
    442	 * Netlink attribute to NLA_ALIGNTO which would require extensive
    443	 * padding logic. Only perform zerocopy if padding is not required.
    444	 */
    445	if (dp->user_features & OVS_DP_F_UNALIGNED)
    446		hlen = skb_zerocopy_headlen(skb);
    447	else
    448		hlen = skb->len;
    449
    450	len = upcall_msg_size(upcall_info, hlen - cutlen,
    451			      OVS_CB(skb)->acts_origlen);
    452	user_skb = genlmsg_new(len, GFP_ATOMIC);
    453	if (!user_skb) {
    454		err = -ENOMEM;
    455		goto out;
    456	}
    457
    458	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
    459			     0, upcall_info->cmd);
    460	if (!upcall) {
    461		err = -EINVAL;
    462		goto out;
    463	}
    464	upcall->dp_ifindex = dp_ifindex;
    465
    466	err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
    467	if (err)
    468		goto out;
    469
    470	if (upcall_info->userdata)
    471		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
    472			  nla_len(upcall_info->userdata),
    473			  nla_data(upcall_info->userdata));
    474
    475	if (upcall_info->egress_tun_info) {
    476		nla = nla_nest_start_noflag(user_skb,
    477					    OVS_PACKET_ATTR_EGRESS_TUN_KEY);
    478		if (!nla) {
    479			err = -EMSGSIZE;
    480			goto out;
    481		}
    482		err = ovs_nla_put_tunnel_info(user_skb,
    483					      upcall_info->egress_tun_info);
    484		if (err)
    485			goto out;
    486
    487		nla_nest_end(user_skb, nla);
    488	}
    489
    490	if (upcall_info->actions_len) {
    491		nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
    492		if (!nla) {
    493			err = -EMSGSIZE;
    494			goto out;
    495		}
    496		err = ovs_nla_put_actions(upcall_info->actions,
    497					  upcall_info->actions_len,
    498					  user_skb);
    499		if (!err)
    500			nla_nest_end(user_skb, nla);
    501		else
    502			nla_nest_cancel(user_skb, nla);
    503	}
    504
    505	/* Add OVS_PACKET_ATTR_MRU */
    506	if (upcall_info->mru &&
    507	    nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
    508		err = -ENOBUFS;
    509		goto out;
    510	}
    511
    512	/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
    513	if (cutlen > 0 &&
    514	    nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
    515		err = -ENOBUFS;
    516		goto out;
    517	}
    518
    519	/* Add OVS_PACKET_ATTR_HASH */
    520	hash = skb_get_hash_raw(skb);
    521	if (skb->sw_hash)
    522		hash |= OVS_PACKET_HASH_SW_BIT;
    523
    524	if (skb->l4_hash)
    525		hash |= OVS_PACKET_HASH_L4_BIT;
    526
    527	if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
    528		err = -ENOBUFS;
    529		goto out;
    530	}
    531
    532	/* Only reserve room for attribute header, packet data is added
    533	 * in skb_zerocopy() */
    534	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
    535		err = -ENOBUFS;
    536		goto out;
    537	}
    538	nla->nla_len = nla_attr_size(skb->len - cutlen);
    539
    540	err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
    541	if (err)
    542		goto out;
    543
    544	/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
    545	pad_packet(dp, user_skb);
    546
    547	((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
    548
    549	err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
    550	user_skb = NULL;
    551out:
    552	if (err)
    553		skb_tx_error(skb);
    554	kfree_skb(user_skb);
    555	kfree_skb(nskb);
    556	return err;
    557}
    558
    559static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
    560{
    561	struct ovs_header *ovs_header = info->userhdr;
    562	struct net *net = sock_net(skb->sk);
    563	struct nlattr **a = info->attrs;
    564	struct sw_flow_actions *acts;
    565	struct sk_buff *packet;
    566	struct sw_flow *flow;
    567	struct sw_flow_actions *sf_acts;
    568	struct datapath *dp;
    569	struct vport *input_vport;
    570	u16 mru = 0;
    571	u64 hash;
    572	int len;
    573	int err;
    574	bool log = !a[OVS_PACKET_ATTR_PROBE];
    575
    576	err = -EINVAL;
    577	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
    578	    !a[OVS_PACKET_ATTR_ACTIONS])
    579		goto err;
    580
    581	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
    582	packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
    583	err = -ENOMEM;
    584	if (!packet)
    585		goto err;
    586	skb_reserve(packet, NET_IP_ALIGN);
    587
    588	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
    589
    590	/* Set packet's mru */
    591	if (a[OVS_PACKET_ATTR_MRU]) {
    592		mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
    593		packet->ignore_df = 1;
    594	}
    595	OVS_CB(packet)->mru = mru;
    596
    597	if (a[OVS_PACKET_ATTR_HASH]) {
    598		hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
    599
    600		__skb_set_hash(packet, hash & 0xFFFFFFFFULL,
    601			       !!(hash & OVS_PACKET_HASH_SW_BIT),
    602			       !!(hash & OVS_PACKET_HASH_L4_BIT));
    603	}
    604
    605	/* Build an sw_flow for sending this packet. */
    606	flow = ovs_flow_alloc();
    607	err = PTR_ERR(flow);
    608	if (IS_ERR(flow))
    609		goto err_kfree_skb;
    610
    611	err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
    612					     packet, &flow->key, log);
    613	if (err)
    614		goto err_flow_free;
    615
    616	err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
    617				   &flow->key, &acts, log);
    618	if (err)
    619		goto err_flow_free;
    620
    621	rcu_assign_pointer(flow->sf_acts, acts);
    622	packet->priority = flow->key.phy.priority;
    623	packet->mark = flow->key.phy.skb_mark;
    624
    625	rcu_read_lock();
    626	dp = get_dp_rcu(net, ovs_header->dp_ifindex);
    627	err = -ENODEV;
    628	if (!dp)
    629		goto err_unlock;
    630
    631	input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
    632	if (!input_vport)
    633		input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
    634
    635	if (!input_vport)
    636		goto err_unlock;
    637
    638	packet->dev = input_vport->dev;
    639	OVS_CB(packet)->input_vport = input_vport;
    640	sf_acts = rcu_dereference(flow->sf_acts);
    641
    642	local_bh_disable();
    643	err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
    644	local_bh_enable();
    645	rcu_read_unlock();
    646
    647	ovs_flow_free(flow, false);
    648	return err;
    649
    650err_unlock:
    651	rcu_read_unlock();
    652err_flow_free:
    653	ovs_flow_free(flow, false);
    654err_kfree_skb:
    655	kfree_skb(packet);
    656err:
    657	return err;
    658}
    659
    660static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
    661	[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
    662	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
    663	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
    664	[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
    665	[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
    666	[OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
    667};
    668
    669static const struct genl_small_ops dp_packet_genl_ops[] = {
    670	{ .cmd = OVS_PACKET_CMD_EXECUTE,
    671	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
    672	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
    673	  .doit = ovs_packet_cmd_execute
    674	}
    675};
    676
    677static struct genl_family dp_packet_genl_family __ro_after_init = {
    678	.hdrsize = sizeof(struct ovs_header),
    679	.name = OVS_PACKET_FAMILY,
    680	.version = OVS_PACKET_VERSION,
    681	.maxattr = OVS_PACKET_ATTR_MAX,
    682	.policy = packet_policy,
    683	.netnsok = true,
    684	.parallel_ops = true,
    685	.small_ops = dp_packet_genl_ops,
    686	.n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
    687	.module = THIS_MODULE,
    688};
    689
    690static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
    691			 struct ovs_dp_megaflow_stats *mega_stats)
    692{
    693	int i;
    694
    695	memset(mega_stats, 0, sizeof(*mega_stats));
    696
    697	stats->n_flows = ovs_flow_tbl_count(&dp->table);
    698	mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
    699
    700	stats->n_hit = stats->n_missed = stats->n_lost = 0;
    701
    702	for_each_possible_cpu(i) {
    703		const struct dp_stats_percpu *percpu_stats;
    704		struct dp_stats_percpu local_stats;
    705		unsigned int start;
    706
    707		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
    708
    709		do {
    710			start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
    711			local_stats = *percpu_stats;
    712		} while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
    713
    714		stats->n_hit += local_stats.n_hit;
    715		stats->n_missed += local_stats.n_missed;
    716		stats->n_lost += local_stats.n_lost;
    717		mega_stats->n_mask_hit += local_stats.n_mask_hit;
    718		mega_stats->n_cache_hit += local_stats.n_cache_hit;
    719	}
    720}
    721
    722static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
    723{
    724	return ovs_identifier_is_ufid(sfid) &&
    725	       !(ufid_flags & OVS_UFID_F_OMIT_KEY);
    726}
    727
    728static bool should_fill_mask(uint32_t ufid_flags)
    729{
    730	return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
    731}
    732
    733static bool should_fill_actions(uint32_t ufid_flags)
    734{
    735	return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
    736}
    737
    738static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
    739				    const struct sw_flow_id *sfid,
    740				    uint32_t ufid_flags)
    741{
    742	size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
    743
    744	/* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
    745	 * see ovs_nla_put_identifier()
    746	 */
    747	if (sfid && ovs_identifier_is_ufid(sfid))
    748		len += nla_total_size(sfid->ufid_len);
    749	else
    750		len += nla_total_size(ovs_key_attr_size());
    751
    752	/* OVS_FLOW_ATTR_KEY */
    753	if (!sfid || should_fill_key(sfid, ufid_flags))
    754		len += nla_total_size(ovs_key_attr_size());
    755
    756	/* OVS_FLOW_ATTR_MASK */
    757	if (should_fill_mask(ufid_flags))
    758		len += nla_total_size(ovs_key_attr_size());
    759
    760	/* OVS_FLOW_ATTR_ACTIONS */
    761	if (should_fill_actions(ufid_flags))
    762		len += nla_total_size(acts->orig_len);
    763
    764	return len
    765		+ nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
    766		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
    767		+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
    768}
    769
    770/* Called with ovs_mutex or RCU read lock. */
    771static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
    772				   struct sk_buff *skb)
    773{
    774	struct ovs_flow_stats stats;
    775	__be16 tcp_flags;
    776	unsigned long used;
    777
    778	ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
    779
    780	if (used &&
    781	    nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
    782			      OVS_FLOW_ATTR_PAD))
    783		return -EMSGSIZE;
    784
    785	if (stats.n_packets &&
    786	    nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
    787			  sizeof(struct ovs_flow_stats), &stats,
    788			  OVS_FLOW_ATTR_PAD))
    789		return -EMSGSIZE;
    790
    791	if ((u8)ntohs(tcp_flags) &&
    792	     nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
    793		return -EMSGSIZE;
    794
    795	return 0;
    796}
    797
    798/* Called with ovs_mutex or RCU read lock. */
    799static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
    800				     struct sk_buff *skb, int skb_orig_len)
    801{
    802	struct nlattr *start;
    803	int err;
    804
    805	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
    806	 * this is the first flow to be dumped into 'skb'.  This is unusual for
    807	 * Netlink but individual action lists can be longer than
    808	 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
    809	 * The userspace caller can always fetch the actions separately if it
    810	 * really wants them.  (Most userspace callers in fact don't care.)
    811	 *
    812	 * This can only fail for dump operations because the skb is always
    813	 * properly sized for single flows.
    814	 */
    815	start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
    816	if (start) {
    817		const struct sw_flow_actions *sf_acts;
    818
    819		sf_acts = rcu_dereference_ovsl(flow->sf_acts);
    820		err = ovs_nla_put_actions(sf_acts->actions,
    821					  sf_acts->actions_len, skb);
    822
    823		if (!err)
    824			nla_nest_end(skb, start);
    825		else {
    826			if (skb_orig_len)
    827				return err;
    828
    829			nla_nest_cancel(skb, start);
    830		}
    831	} else if (skb_orig_len) {
    832		return -EMSGSIZE;
    833	}
    834
    835	return 0;
    836}
    837
    838/* Called with ovs_mutex or RCU read lock. */
    839static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
    840				  struct sk_buff *skb, u32 portid,
    841				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
    842{
    843	const int skb_orig_len = skb->len;
    844	struct ovs_header *ovs_header;
    845	int err;
    846
    847	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
    848				 flags, cmd);
    849	if (!ovs_header)
    850		return -EMSGSIZE;
    851
    852	ovs_header->dp_ifindex = dp_ifindex;
    853
    854	err = ovs_nla_put_identifier(flow, skb);
    855	if (err)
    856		goto error;
    857
    858	if (should_fill_key(&flow->id, ufid_flags)) {
    859		err = ovs_nla_put_masked_key(flow, skb);
    860		if (err)
    861			goto error;
    862	}
    863
    864	if (should_fill_mask(ufid_flags)) {
    865		err = ovs_nla_put_mask(flow, skb);
    866		if (err)
    867			goto error;
    868	}
    869
    870	err = ovs_flow_cmd_fill_stats(flow, skb);
    871	if (err)
    872		goto error;
    873
    874	if (should_fill_actions(ufid_flags)) {
    875		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
    876		if (err)
    877			goto error;
    878	}
    879
    880	genlmsg_end(skb, ovs_header);
    881	return 0;
    882
    883error:
    884	genlmsg_cancel(skb, ovs_header);
    885	return err;
    886}
    887
    888/* May not be called with RCU read lock. */
    889static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
    890					       const struct sw_flow_id *sfid,
    891					       struct genl_info *info,
    892					       bool always,
    893					       uint32_t ufid_flags)
    894{
    895	struct sk_buff *skb;
    896	size_t len;
    897
    898	if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
    899		return NULL;
    900
    901	len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
    902	skb = genlmsg_new(len, GFP_KERNEL);
    903	if (!skb)
    904		return ERR_PTR(-ENOMEM);
    905
    906	return skb;
    907}
    908
    909/* Called with ovs_mutex. */
    910static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
    911					       int dp_ifindex,
    912					       struct genl_info *info, u8 cmd,
    913					       bool always, u32 ufid_flags)
    914{
    915	struct sk_buff *skb;
    916	int retval;
    917
    918	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
    919				      &flow->id, info, always, ufid_flags);
    920	if (IS_ERR_OR_NULL(skb))
    921		return skb;
    922
    923	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
    924					info->snd_portid, info->snd_seq, 0,
    925					cmd, ufid_flags);
    926	if (WARN_ON_ONCE(retval < 0)) {
    927		kfree_skb(skb);
    928		skb = ERR_PTR(retval);
    929	}
    930	return skb;
    931}
    932
    933static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
    934{
    935	struct net *net = sock_net(skb->sk);
    936	struct nlattr **a = info->attrs;
    937	struct ovs_header *ovs_header = info->userhdr;
    938	struct sw_flow *flow = NULL, *new_flow;
    939	struct sw_flow_mask mask;
    940	struct sk_buff *reply;
    941	struct datapath *dp;
    942	struct sw_flow_actions *acts;
    943	struct sw_flow_match match;
    944	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
    945	int error;
    946	bool log = !a[OVS_FLOW_ATTR_PROBE];
    947
    948	/* Must have key and actions. */
    949	error = -EINVAL;
    950	if (!a[OVS_FLOW_ATTR_KEY]) {
    951		OVS_NLERR(log, "Flow key attr not present in new flow.");
    952		goto error;
    953	}
    954	if (!a[OVS_FLOW_ATTR_ACTIONS]) {
    955		OVS_NLERR(log, "Flow actions attr not present in new flow.");
    956		goto error;
    957	}
    958
    959	/* Most of the time we need to allocate a new flow, do it before
    960	 * locking.
    961	 */
    962	new_flow = ovs_flow_alloc();
    963	if (IS_ERR(new_flow)) {
    964		error = PTR_ERR(new_flow);
    965		goto error;
    966	}
    967
    968	/* Extract key. */
    969	ovs_match_init(&match, &new_flow->key, false, &mask);
    970	error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
    971				  a[OVS_FLOW_ATTR_MASK], log);
    972	if (error)
    973		goto err_kfree_flow;
    974
    975	/* Extract flow identifier. */
    976	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
    977				       &new_flow->key, log);
    978	if (error)
    979		goto err_kfree_flow;
    980
    981	/* unmasked key is needed to match when ufid is not used. */
    982	if (ovs_identifier_is_key(&new_flow->id))
    983		match.key = new_flow->id.unmasked_key;
    984
    985	ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
    986
    987	/* Validate actions. */
    988	error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
    989				     &new_flow->key, &acts, log);
    990	if (error) {
    991		OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
    992		goto err_kfree_flow;
    993	}
    994
    995	reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
    996					ufid_flags);
    997	if (IS_ERR(reply)) {
    998		error = PTR_ERR(reply);
    999		goto err_kfree_acts;
   1000	}
   1001
   1002	ovs_lock();
   1003	dp = get_dp(net, ovs_header->dp_ifindex);
   1004	if (unlikely(!dp)) {
   1005		error = -ENODEV;
   1006		goto err_unlock_ovs;
   1007	}
   1008
   1009	/* Check if this is a duplicate flow */
   1010	if (ovs_identifier_is_ufid(&new_flow->id))
   1011		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
   1012	if (!flow)
   1013		flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
   1014	if (likely(!flow)) {
   1015		rcu_assign_pointer(new_flow->sf_acts, acts);
   1016
   1017		/* Put flow in bucket. */
   1018		error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
   1019		if (unlikely(error)) {
   1020			acts = NULL;
   1021			goto err_unlock_ovs;
   1022		}
   1023
   1024		if (unlikely(reply)) {
   1025			error = ovs_flow_cmd_fill_info(new_flow,
   1026						       ovs_header->dp_ifindex,
   1027						       reply, info->snd_portid,
   1028						       info->snd_seq, 0,
   1029						       OVS_FLOW_CMD_NEW,
   1030						       ufid_flags);
   1031			BUG_ON(error < 0);
   1032		}
   1033		ovs_unlock();
   1034	} else {
   1035		struct sw_flow_actions *old_acts;
   1036
   1037		/* Bail out if we're not allowed to modify an existing flow.
   1038		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
   1039		 * because Generic Netlink treats the latter as a dump
   1040		 * request.  We also accept NLM_F_EXCL in case that bug ever
   1041		 * gets fixed.
   1042		 */
   1043		if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
   1044							 | NLM_F_EXCL))) {
   1045			error = -EEXIST;
   1046			goto err_unlock_ovs;
   1047		}
   1048		/* The flow identifier has to be the same for flow updates.
   1049		 * Look for any overlapping flow.
   1050		 */
   1051		if (unlikely(!ovs_flow_cmp(flow, &match))) {
   1052			if (ovs_identifier_is_key(&flow->id))
   1053				flow = ovs_flow_tbl_lookup_exact(&dp->table,
   1054								 &match);
   1055			else /* UFID matches but key is different */
   1056				flow = NULL;
   1057			if (!flow) {
   1058				error = -ENOENT;
   1059				goto err_unlock_ovs;
   1060			}
   1061		}
   1062		/* Update actions. */
   1063		old_acts = ovsl_dereference(flow->sf_acts);
   1064		rcu_assign_pointer(flow->sf_acts, acts);
   1065
   1066		if (unlikely(reply)) {
   1067			error = ovs_flow_cmd_fill_info(flow,
   1068						       ovs_header->dp_ifindex,
   1069						       reply, info->snd_portid,
   1070						       info->snd_seq, 0,
   1071						       OVS_FLOW_CMD_NEW,
   1072						       ufid_flags);
   1073			BUG_ON(error < 0);
   1074		}
   1075		ovs_unlock();
   1076
   1077		ovs_nla_free_flow_actions_rcu(old_acts);
   1078		ovs_flow_free(new_flow, false);
   1079	}
   1080
   1081	if (reply)
   1082		ovs_notify(&dp_flow_genl_family, reply, info);
   1083	return 0;
   1084
   1085err_unlock_ovs:
   1086	ovs_unlock();
   1087	kfree_skb(reply);
   1088err_kfree_acts:
   1089	ovs_nla_free_flow_actions(acts);
   1090err_kfree_flow:
   1091	ovs_flow_free(new_flow, false);
   1092error:
   1093	return error;
   1094}
   1095
   1096/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
   1097static noinline_for_stack
   1098struct sw_flow_actions *get_flow_actions(struct net *net,
   1099					 const struct nlattr *a,
   1100					 const struct sw_flow_key *key,
   1101					 const struct sw_flow_mask *mask,
   1102					 bool log)
   1103{
   1104	struct sw_flow_actions *acts;
   1105	struct sw_flow_key masked_key;
   1106	int error;
   1107
   1108	ovs_flow_mask_key(&masked_key, key, true, mask);
   1109	error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
   1110	if (error) {
   1111		OVS_NLERR(log,
   1112			  "Actions may not be safe on all matching packets");
   1113		return ERR_PTR(error);
   1114	}
   1115
   1116	return acts;
   1117}
   1118
   1119/* Factor out match-init and action-copy to avoid
   1120 * "Wframe-larger-than=1024" warning. Because mask is only
   1121 * used to get actions, we new a function to save some
   1122 * stack space.
   1123 *
   1124 * If there are not key and action attrs, we return 0
   1125 * directly. In the case, the caller will also not use the
   1126 * match as before. If there is action attr, we try to get
   1127 * actions and save them to *acts. Before returning from
   1128 * the function, we reset the match->mask pointer. Because
   1129 * we should not to return match object with dangling reference
   1130 * to mask.
   1131 * */
   1132static noinline_for_stack int
   1133ovs_nla_init_match_and_action(struct net *net,
   1134			      struct sw_flow_match *match,
   1135			      struct sw_flow_key *key,
   1136			      struct nlattr **a,
   1137			      struct sw_flow_actions **acts,
   1138			      bool log)
   1139{
   1140	struct sw_flow_mask mask;
   1141	int error = 0;
   1142
   1143	if (a[OVS_FLOW_ATTR_KEY]) {
   1144		ovs_match_init(match, key, true, &mask);
   1145		error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
   1146					  a[OVS_FLOW_ATTR_MASK], log);
   1147		if (error)
   1148			goto error;
   1149	}
   1150
   1151	if (a[OVS_FLOW_ATTR_ACTIONS]) {
   1152		if (!a[OVS_FLOW_ATTR_KEY]) {
   1153			OVS_NLERR(log,
   1154				  "Flow key attribute not present in set flow.");
   1155			error = -EINVAL;
   1156			goto error;
   1157		}
   1158
   1159		*acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
   1160					 &mask, log);
   1161		if (IS_ERR(*acts)) {
   1162			error = PTR_ERR(*acts);
   1163			goto error;
   1164		}
   1165	}
   1166
   1167	/* On success, error is 0. */
   1168error:
   1169	match->mask = NULL;
   1170	return error;
   1171}
   1172
   1173static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
   1174{
   1175	struct net *net = sock_net(skb->sk);
   1176	struct nlattr **a = info->attrs;
   1177	struct ovs_header *ovs_header = info->userhdr;
   1178	struct sw_flow_key key;
   1179	struct sw_flow *flow;
   1180	struct sk_buff *reply = NULL;
   1181	struct datapath *dp;
   1182	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
   1183	struct sw_flow_match match;
   1184	struct sw_flow_id sfid;
   1185	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
   1186	int error = 0;
   1187	bool log = !a[OVS_FLOW_ATTR_PROBE];
   1188	bool ufid_present;
   1189
   1190	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
   1191	if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
   1192		OVS_NLERR(log,
   1193			  "Flow set message rejected, Key attribute missing.");
   1194		return -EINVAL;
   1195	}
   1196
   1197	error = ovs_nla_init_match_and_action(net, &match, &key, a,
   1198					      &acts, log);
   1199	if (error)
   1200		goto error;
   1201
   1202	if (acts) {
   1203		/* Can allocate before locking if have acts. */
   1204		reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
   1205						ufid_flags);
   1206		if (IS_ERR(reply)) {
   1207			error = PTR_ERR(reply);
   1208			goto err_kfree_acts;
   1209		}
   1210	}
   1211
   1212	ovs_lock();
   1213	dp = get_dp(net, ovs_header->dp_ifindex);
   1214	if (unlikely(!dp)) {
   1215		error = -ENODEV;
   1216		goto err_unlock_ovs;
   1217	}
   1218	/* Check that the flow exists. */
   1219	if (ufid_present)
   1220		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
   1221	else
   1222		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
   1223	if (unlikely(!flow)) {
   1224		error = -ENOENT;
   1225		goto err_unlock_ovs;
   1226	}
   1227
   1228	/* Update actions, if present. */
   1229	if (likely(acts)) {
   1230		old_acts = ovsl_dereference(flow->sf_acts);
   1231		rcu_assign_pointer(flow->sf_acts, acts);
   1232
   1233		if (unlikely(reply)) {
   1234			error = ovs_flow_cmd_fill_info(flow,
   1235						       ovs_header->dp_ifindex,
   1236						       reply, info->snd_portid,
   1237						       info->snd_seq, 0,
   1238						       OVS_FLOW_CMD_SET,
   1239						       ufid_flags);
   1240			BUG_ON(error < 0);
   1241		}
   1242	} else {
   1243		/* Could not alloc without acts before locking. */
   1244		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
   1245						info, OVS_FLOW_CMD_SET, false,
   1246						ufid_flags);
   1247
   1248		if (IS_ERR(reply)) {
   1249			error = PTR_ERR(reply);
   1250			goto err_unlock_ovs;
   1251		}
   1252	}
   1253
   1254	/* Clear stats. */
   1255	if (a[OVS_FLOW_ATTR_CLEAR])
   1256		ovs_flow_stats_clear(flow);
   1257	ovs_unlock();
   1258
   1259	if (reply)
   1260		ovs_notify(&dp_flow_genl_family, reply, info);
   1261	if (old_acts)
   1262		ovs_nla_free_flow_actions_rcu(old_acts);
   1263
   1264	return 0;
   1265
   1266err_unlock_ovs:
   1267	ovs_unlock();
   1268	kfree_skb(reply);
   1269err_kfree_acts:
   1270	ovs_nla_free_flow_actions(acts);
   1271error:
   1272	return error;
   1273}
   1274
   1275static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
   1276{
   1277	struct nlattr **a = info->attrs;
   1278	struct ovs_header *ovs_header = info->userhdr;
   1279	struct net *net = sock_net(skb->sk);
   1280	struct sw_flow_key key;
   1281	struct sk_buff *reply;
   1282	struct sw_flow *flow;
   1283	struct datapath *dp;
   1284	struct sw_flow_match match;
   1285	struct sw_flow_id ufid;
   1286	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
   1287	int err = 0;
   1288	bool log = !a[OVS_FLOW_ATTR_PROBE];
   1289	bool ufid_present;
   1290
   1291	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
   1292	if (a[OVS_FLOW_ATTR_KEY]) {
   1293		ovs_match_init(&match, &key, true, NULL);
   1294		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
   1295					log);
   1296	} else if (!ufid_present) {
   1297		OVS_NLERR(log,
   1298			  "Flow get message rejected, Key attribute missing.");
   1299		err = -EINVAL;
   1300	}
   1301	if (err)
   1302		return err;
   1303
   1304	ovs_lock();
   1305	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
   1306	if (!dp) {
   1307		err = -ENODEV;
   1308		goto unlock;
   1309	}
   1310
   1311	if (ufid_present)
   1312		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
   1313	else
   1314		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
   1315	if (!flow) {
   1316		err = -ENOENT;
   1317		goto unlock;
   1318	}
   1319
   1320	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
   1321					OVS_FLOW_CMD_GET, true, ufid_flags);
   1322	if (IS_ERR(reply)) {
   1323		err = PTR_ERR(reply);
   1324		goto unlock;
   1325	}
   1326
   1327	ovs_unlock();
   1328	return genlmsg_reply(reply, info);
   1329unlock:
   1330	ovs_unlock();
   1331	return err;
   1332}
   1333
   1334static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
   1335{
   1336	struct nlattr **a = info->attrs;
   1337	struct ovs_header *ovs_header = info->userhdr;
   1338	struct net *net = sock_net(skb->sk);
   1339	struct sw_flow_key key;
   1340	struct sk_buff *reply;
   1341	struct sw_flow *flow = NULL;
   1342	struct datapath *dp;
   1343	struct sw_flow_match match;
   1344	struct sw_flow_id ufid;
   1345	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
   1346	int err;
   1347	bool log = !a[OVS_FLOW_ATTR_PROBE];
   1348	bool ufid_present;
   1349
   1350	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
   1351	if (a[OVS_FLOW_ATTR_KEY]) {
   1352		ovs_match_init(&match, &key, true, NULL);
   1353		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
   1354					NULL, log);
   1355		if (unlikely(err))
   1356			return err;
   1357	}
   1358
   1359	ovs_lock();
   1360	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
   1361	if (unlikely(!dp)) {
   1362		err = -ENODEV;
   1363		goto unlock;
   1364	}
   1365
   1366	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
   1367		err = ovs_flow_tbl_flush(&dp->table);
   1368		goto unlock;
   1369	}
   1370
   1371	if (ufid_present)
   1372		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
   1373	else
   1374		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
   1375	if (unlikely(!flow)) {
   1376		err = -ENOENT;
   1377		goto unlock;
   1378	}
   1379
   1380	ovs_flow_tbl_remove(&dp->table, flow);
   1381	ovs_unlock();
   1382
   1383	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
   1384					&flow->id, info, false, ufid_flags);
   1385	if (likely(reply)) {
   1386		if (!IS_ERR(reply)) {
   1387			rcu_read_lock();	/*To keep RCU checker happy. */
   1388			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
   1389						     reply, info->snd_portid,
   1390						     info->snd_seq, 0,
   1391						     OVS_FLOW_CMD_DEL,
   1392						     ufid_flags);
   1393			rcu_read_unlock();
   1394			if (WARN_ON_ONCE(err < 0)) {
   1395				kfree_skb(reply);
   1396				goto out_free;
   1397			}
   1398
   1399			ovs_notify(&dp_flow_genl_family, reply, info);
   1400		} else {
   1401			netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
   1402					PTR_ERR(reply));
   1403		}
   1404	}
   1405
   1406out_free:
   1407	ovs_flow_free(flow, true);
   1408	return 0;
   1409unlock:
   1410	ovs_unlock();
   1411	return err;
   1412}
   1413
   1414static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
   1415{
   1416	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
   1417	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
   1418	struct table_instance *ti;
   1419	struct datapath *dp;
   1420	u32 ufid_flags;
   1421	int err;
   1422
   1423	err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
   1424				       OVS_FLOW_ATTR_MAX, flow_policy, NULL);
   1425	if (err)
   1426		return err;
   1427	ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
   1428
   1429	rcu_read_lock();
   1430	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
   1431	if (!dp) {
   1432		rcu_read_unlock();
   1433		return -ENODEV;
   1434	}
   1435
   1436	ti = rcu_dereference(dp->table.ti);
   1437	for (;;) {
   1438		struct sw_flow *flow;
   1439		u32 bucket, obj;
   1440
   1441		bucket = cb->args[0];
   1442		obj = cb->args[1];
   1443		flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
   1444		if (!flow)
   1445			break;
   1446
   1447		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
   1448					   NETLINK_CB(cb->skb).portid,
   1449					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
   1450					   OVS_FLOW_CMD_GET, ufid_flags) < 0)
   1451			break;
   1452
   1453		cb->args[0] = bucket;
   1454		cb->args[1] = obj;
   1455	}
   1456	rcu_read_unlock();
   1457	return skb->len;
   1458}
   1459
   1460static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
   1461	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
   1462	[OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
   1463	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
   1464	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
   1465	[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
   1466	[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
   1467	[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
   1468};
   1469
   1470static const struct genl_small_ops dp_flow_genl_ops[] = {
   1471	{ .cmd = OVS_FLOW_CMD_NEW,
   1472	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   1473	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
   1474	  .doit = ovs_flow_cmd_new
   1475	},
   1476	{ .cmd = OVS_FLOW_CMD_DEL,
   1477	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   1478	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
   1479	  .doit = ovs_flow_cmd_del
   1480	},
   1481	{ .cmd = OVS_FLOW_CMD_GET,
   1482	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   1483	  .flags = 0,		    /* OK for unprivileged users. */
   1484	  .doit = ovs_flow_cmd_get,
   1485	  .dumpit = ovs_flow_cmd_dump
   1486	},
   1487	{ .cmd = OVS_FLOW_CMD_SET,
   1488	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   1489	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
   1490	  .doit = ovs_flow_cmd_set,
   1491	},
   1492};
   1493
   1494static struct genl_family dp_flow_genl_family __ro_after_init = {
   1495	.hdrsize = sizeof(struct ovs_header),
   1496	.name = OVS_FLOW_FAMILY,
   1497	.version = OVS_FLOW_VERSION,
   1498	.maxattr = OVS_FLOW_ATTR_MAX,
   1499	.policy = flow_policy,
   1500	.netnsok = true,
   1501	.parallel_ops = true,
   1502	.small_ops = dp_flow_genl_ops,
   1503	.n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
   1504	.mcgrps = &ovs_dp_flow_multicast_group,
   1505	.n_mcgrps = 1,
   1506	.module = THIS_MODULE,
   1507};
   1508
   1509static size_t ovs_dp_cmd_msg_size(void)
   1510{
   1511	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
   1512
   1513	msgsize += nla_total_size(IFNAMSIZ);
   1514	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
   1515	msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
   1516	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
   1517	msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
   1518
   1519	return msgsize;
   1520}
   1521
   1522/* Called with ovs_mutex. */
   1523static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
   1524				u32 portid, u32 seq, u32 flags, u8 cmd)
   1525{
   1526	struct ovs_header *ovs_header;
   1527	struct ovs_dp_stats dp_stats;
   1528	struct ovs_dp_megaflow_stats dp_megaflow_stats;
   1529	int err;
   1530
   1531	ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
   1532				 flags, cmd);
   1533	if (!ovs_header)
   1534		goto error;
   1535
   1536	ovs_header->dp_ifindex = get_dpifindex(dp);
   1537
   1538	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
   1539	if (err)
   1540		goto nla_put_failure;
   1541
   1542	get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
   1543	if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
   1544			  &dp_stats, OVS_DP_ATTR_PAD))
   1545		goto nla_put_failure;
   1546
   1547	if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
   1548			  sizeof(struct ovs_dp_megaflow_stats),
   1549			  &dp_megaflow_stats, OVS_DP_ATTR_PAD))
   1550		goto nla_put_failure;
   1551
   1552	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
   1553		goto nla_put_failure;
   1554
   1555	if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
   1556			ovs_flow_tbl_masks_cache_size(&dp->table)))
   1557		goto nla_put_failure;
   1558
   1559	genlmsg_end(skb, ovs_header);
   1560	return 0;
   1561
   1562nla_put_failure:
   1563	genlmsg_cancel(skb, ovs_header);
   1564error:
   1565	return -EMSGSIZE;
   1566}
   1567
   1568static struct sk_buff *ovs_dp_cmd_alloc_info(void)
   1569{
   1570	return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
   1571}
   1572
   1573/* Called with rcu_read_lock or ovs_mutex. */
   1574static struct datapath *lookup_datapath(struct net *net,
   1575					const struct ovs_header *ovs_header,
   1576					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
   1577{
   1578	struct datapath *dp;
   1579
   1580	if (!a[OVS_DP_ATTR_NAME])
   1581		dp = get_dp(net, ovs_header->dp_ifindex);
   1582	else {
   1583		struct vport *vport;
   1584
   1585		vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
   1586		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
   1587	}
   1588	return dp ? dp : ERR_PTR(-ENODEV);
   1589}
   1590
   1591static void ovs_dp_reset_user_features(struct sk_buff *skb,
   1592				       struct genl_info *info)
   1593{
   1594	struct datapath *dp;
   1595
   1596	dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
   1597			     info->attrs);
   1598	if (IS_ERR(dp))
   1599		return;
   1600
   1601	WARN(dp->user_features, "Dropping previously announced user features\n");
   1602	dp->user_features = 0;
   1603}
   1604
   1605static int ovs_dp_set_upcall_portids(struct datapath *dp,
   1606			      const struct nlattr *ids)
   1607{
   1608	struct dp_nlsk_pids *old, *dp_nlsk_pids;
   1609
   1610	if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
   1611		return -EINVAL;
   1612
   1613	old = ovsl_dereference(dp->upcall_portids);
   1614
   1615	dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
   1616			       GFP_KERNEL);
   1617	if (!dp_nlsk_pids)
   1618		return -ENOMEM;
   1619
   1620	dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
   1621	nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
   1622
   1623	rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
   1624
   1625	kfree_rcu(old, rcu);
   1626
   1627	return 0;
   1628}
   1629
   1630u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
   1631{
   1632	struct dp_nlsk_pids *dp_nlsk_pids;
   1633
   1634	dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
   1635
   1636	if (dp_nlsk_pids) {
   1637		if (cpu_id < dp_nlsk_pids->n_pids) {
   1638			return dp_nlsk_pids->pids[cpu_id];
   1639		} else if (dp_nlsk_pids->n_pids > 0 &&
   1640			   cpu_id >= dp_nlsk_pids->n_pids) {
   1641			/* If the number of netlink PIDs is mismatched with
   1642			 * the number of CPUs as seen by the kernel, log this
   1643			 * and send the upcall to an arbitrary socket (0) in
   1644			 * order to not drop packets
   1645			 */
   1646			pr_info_ratelimited("cpu_id mismatch with handler threads");
   1647			return dp_nlsk_pids->pids[cpu_id %
   1648						  dp_nlsk_pids->n_pids];
   1649		} else {
   1650			return 0;
   1651		}
   1652	} else {
   1653		return 0;
   1654	}
   1655}
   1656
   1657static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
   1658{
   1659	u32 user_features = 0, old_features = dp->user_features;
   1660	int err;
   1661
   1662	if (a[OVS_DP_ATTR_USER_FEATURES]) {
   1663		user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
   1664
   1665		if (user_features & ~(OVS_DP_F_VPORT_PIDS |
   1666				      OVS_DP_F_UNALIGNED |
   1667				      OVS_DP_F_TC_RECIRC_SHARING |
   1668				      OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
   1669			return -EOPNOTSUPP;
   1670
   1671#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
   1672		if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
   1673			return -EOPNOTSUPP;
   1674#endif
   1675	}
   1676
   1677	if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
   1678		int err;
   1679		u32 cache_size;
   1680
   1681		cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
   1682		err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
   1683		if (err)
   1684			return err;
   1685	}
   1686
   1687	dp->user_features = user_features;
   1688
   1689	if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
   1690	    a[OVS_DP_ATTR_PER_CPU_PIDS]) {
   1691		/* Upcall Netlink Port IDs have been updated */
   1692		err = ovs_dp_set_upcall_portids(dp,
   1693						a[OVS_DP_ATTR_PER_CPU_PIDS]);
   1694		if (err)
   1695			return err;
   1696	}
   1697
   1698	if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
   1699	    !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
   1700		tc_skb_ext_tc_enable();
   1701	else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
   1702		 (old_features & OVS_DP_F_TC_RECIRC_SHARING))
   1703		tc_skb_ext_tc_disable();
   1704
   1705	return 0;
   1706}
   1707
   1708static int ovs_dp_stats_init(struct datapath *dp)
   1709{
   1710	dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
   1711	if (!dp->stats_percpu)
   1712		return -ENOMEM;
   1713
   1714	return 0;
   1715}
   1716
   1717static int ovs_dp_vport_init(struct datapath *dp)
   1718{
   1719	int i;
   1720
   1721	dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
   1722				  sizeof(struct hlist_head),
   1723				  GFP_KERNEL);
   1724	if (!dp->ports)
   1725		return -ENOMEM;
   1726
   1727	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
   1728		INIT_HLIST_HEAD(&dp->ports[i]);
   1729
   1730	return 0;
   1731}
   1732
   1733static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
   1734{
   1735	struct nlattr **a = info->attrs;
   1736	struct vport_parms parms;
   1737	struct sk_buff *reply;
   1738	struct datapath *dp;
   1739	struct vport *vport;
   1740	struct ovs_net *ovs_net;
   1741	int err;
   1742
   1743	err = -EINVAL;
   1744	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
   1745		goto err;
   1746
   1747	reply = ovs_dp_cmd_alloc_info();
   1748	if (!reply)
   1749		return -ENOMEM;
   1750
   1751	err = -ENOMEM;
   1752	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
   1753	if (dp == NULL)
   1754		goto err_destroy_reply;
   1755
   1756	ovs_dp_set_net(dp, sock_net(skb->sk));
   1757
   1758	/* Allocate table. */
   1759	err = ovs_flow_tbl_init(&dp->table);
   1760	if (err)
   1761		goto err_destroy_dp;
   1762
   1763	err = ovs_dp_stats_init(dp);
   1764	if (err)
   1765		goto err_destroy_table;
   1766
   1767	err = ovs_dp_vport_init(dp);
   1768	if (err)
   1769		goto err_destroy_stats;
   1770
   1771	err = ovs_meters_init(dp);
   1772	if (err)
   1773		goto err_destroy_ports;
   1774
   1775	/* Set up our datapath device. */
   1776	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
   1777	parms.type = OVS_VPORT_TYPE_INTERNAL;
   1778	parms.options = NULL;
   1779	parms.dp = dp;
   1780	parms.port_no = OVSP_LOCAL;
   1781	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
   1782
   1783	/* So far only local changes have been made, now need the lock. */
   1784	ovs_lock();
   1785
   1786	err = ovs_dp_change(dp, a);
   1787	if (err)
   1788		goto err_unlock_and_destroy_meters;
   1789
   1790	vport = new_vport(&parms);
   1791	if (IS_ERR(vport)) {
   1792		err = PTR_ERR(vport);
   1793		if (err == -EBUSY)
   1794			err = -EEXIST;
   1795
   1796		if (err == -EEXIST) {
   1797			/* An outdated user space instance that does not understand
   1798			 * the concept of user_features has attempted to create a new
   1799			 * datapath and is likely to reuse it. Drop all user features.
   1800			 */
   1801			if (info->genlhdr->version < OVS_DP_VER_FEATURES)
   1802				ovs_dp_reset_user_features(skb, info);
   1803		}
   1804
   1805		goto err_unlock_and_destroy_meters;
   1806	}
   1807
   1808	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
   1809				   info->snd_seq, 0, OVS_DP_CMD_NEW);
   1810	BUG_ON(err < 0);
   1811
   1812	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
   1813	list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
   1814
   1815	ovs_unlock();
   1816
   1817	ovs_notify(&dp_datapath_genl_family, reply, info);
   1818	return 0;
   1819
   1820err_unlock_and_destroy_meters:
   1821	ovs_unlock();
   1822	ovs_meters_exit(dp);
   1823err_destroy_ports:
   1824	kfree(dp->ports);
   1825err_destroy_stats:
   1826	free_percpu(dp->stats_percpu);
   1827err_destroy_table:
   1828	ovs_flow_tbl_destroy(&dp->table);
   1829err_destroy_dp:
   1830	kfree(dp);
   1831err_destroy_reply:
   1832	kfree_skb(reply);
   1833err:
   1834	return err;
   1835}
   1836
   1837/* Called with ovs_mutex. */
   1838static void __dp_destroy(struct datapath *dp)
   1839{
   1840	struct flow_table *table = &dp->table;
   1841	int i;
   1842
   1843	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
   1844		tc_skb_ext_tc_disable();
   1845
   1846	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
   1847		struct vport *vport;
   1848		struct hlist_node *n;
   1849
   1850		hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
   1851			if (vport->port_no != OVSP_LOCAL)
   1852				ovs_dp_detach_port(vport);
   1853	}
   1854
   1855	list_del_rcu(&dp->list_node);
   1856
   1857	/* OVSP_LOCAL is datapath internal port. We need to make sure that
   1858	 * all ports in datapath are destroyed first before freeing datapath.
   1859	 */
   1860	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
   1861
   1862	/* Flush sw_flow in the tables. RCU cb only releases resource
   1863	 * such as dp, ports and tables. That may avoid some issues
   1864	 * such as RCU usage warning.
   1865	 */
   1866	table_instance_flow_flush(table, ovsl_dereference(table->ti),
   1867				  ovsl_dereference(table->ufid_ti));
   1868
   1869	/* RCU destroy the ports, meters and flow tables. */
   1870	call_rcu(&dp->rcu, destroy_dp_rcu);
   1871}
   1872
   1873static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
   1874{
   1875	struct sk_buff *reply;
   1876	struct datapath *dp;
   1877	int err;
   1878
   1879	reply = ovs_dp_cmd_alloc_info();
   1880	if (!reply)
   1881		return -ENOMEM;
   1882
   1883	ovs_lock();
   1884	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
   1885	err = PTR_ERR(dp);
   1886	if (IS_ERR(dp))
   1887		goto err_unlock_free;
   1888
   1889	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
   1890				   info->snd_seq, 0, OVS_DP_CMD_DEL);
   1891	BUG_ON(err < 0);
   1892
   1893	__dp_destroy(dp);
   1894	ovs_unlock();
   1895
   1896	ovs_notify(&dp_datapath_genl_family, reply, info);
   1897
   1898	return 0;
   1899
   1900err_unlock_free:
   1901	ovs_unlock();
   1902	kfree_skb(reply);
   1903	return err;
   1904}
   1905
   1906static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
   1907{
   1908	struct sk_buff *reply;
   1909	struct datapath *dp;
   1910	int err;
   1911
   1912	reply = ovs_dp_cmd_alloc_info();
   1913	if (!reply)
   1914		return -ENOMEM;
   1915
   1916	ovs_lock();
   1917	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
   1918	err = PTR_ERR(dp);
   1919	if (IS_ERR(dp))
   1920		goto err_unlock_free;
   1921
   1922	err = ovs_dp_change(dp, info->attrs);
   1923	if (err)
   1924		goto err_unlock_free;
   1925
   1926	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
   1927				   info->snd_seq, 0, OVS_DP_CMD_SET);
   1928	BUG_ON(err < 0);
   1929
   1930	ovs_unlock();
   1931	ovs_notify(&dp_datapath_genl_family, reply, info);
   1932
   1933	return 0;
   1934
   1935err_unlock_free:
   1936	ovs_unlock();
   1937	kfree_skb(reply);
   1938	return err;
   1939}
   1940
   1941static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
   1942{
   1943	struct sk_buff *reply;
   1944	struct datapath *dp;
   1945	int err;
   1946
   1947	reply = ovs_dp_cmd_alloc_info();
   1948	if (!reply)
   1949		return -ENOMEM;
   1950
   1951	ovs_lock();
   1952	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
   1953	if (IS_ERR(dp)) {
   1954		err = PTR_ERR(dp);
   1955		goto err_unlock_free;
   1956	}
   1957	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
   1958				   info->snd_seq, 0, OVS_DP_CMD_GET);
   1959	BUG_ON(err < 0);
   1960	ovs_unlock();
   1961
   1962	return genlmsg_reply(reply, info);
   1963
   1964err_unlock_free:
   1965	ovs_unlock();
   1966	kfree_skb(reply);
   1967	return err;
   1968}
   1969
   1970static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
   1971{
   1972	struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
   1973	struct datapath *dp;
   1974	int skip = cb->args[0];
   1975	int i = 0;
   1976
   1977	ovs_lock();
   1978	list_for_each_entry(dp, &ovs_net->dps, list_node) {
   1979		if (i >= skip &&
   1980		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
   1981					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
   1982					 OVS_DP_CMD_GET) < 0)
   1983			break;
   1984		i++;
   1985	}
   1986	ovs_unlock();
   1987
   1988	cb->args[0] = i;
   1989
   1990	return skb->len;
   1991}
   1992
   1993static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
   1994	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
   1995	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
   1996	[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
   1997	[OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
   1998		PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
   1999};
   2000
   2001static const struct genl_small_ops dp_datapath_genl_ops[] = {
   2002	{ .cmd = OVS_DP_CMD_NEW,
   2003	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   2004	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
   2005	  .doit = ovs_dp_cmd_new
   2006	},
   2007	{ .cmd = OVS_DP_CMD_DEL,
   2008	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   2009	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
   2010	  .doit = ovs_dp_cmd_del
   2011	},
   2012	{ .cmd = OVS_DP_CMD_GET,
   2013	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   2014	  .flags = 0,		    /* OK for unprivileged users. */
   2015	  .doit = ovs_dp_cmd_get,
   2016	  .dumpit = ovs_dp_cmd_dump
   2017	},
   2018	{ .cmd = OVS_DP_CMD_SET,
   2019	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   2020	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
   2021	  .doit = ovs_dp_cmd_set,
   2022	},
   2023};
   2024
   2025static struct genl_family dp_datapath_genl_family __ro_after_init = {
   2026	.hdrsize = sizeof(struct ovs_header),
   2027	.name = OVS_DATAPATH_FAMILY,
   2028	.version = OVS_DATAPATH_VERSION,
   2029	.maxattr = OVS_DP_ATTR_MAX,
   2030	.policy = datapath_policy,
   2031	.netnsok = true,
   2032	.parallel_ops = true,
   2033	.small_ops = dp_datapath_genl_ops,
   2034	.n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
   2035	.mcgrps = &ovs_dp_datapath_multicast_group,
   2036	.n_mcgrps = 1,
   2037	.module = THIS_MODULE,
   2038};
   2039
   2040/* Called with ovs_mutex or RCU read lock. */
   2041static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
   2042				   struct net *net, u32 portid, u32 seq,
   2043				   u32 flags, u8 cmd, gfp_t gfp)
   2044{
   2045	struct ovs_header *ovs_header;
   2046	struct ovs_vport_stats vport_stats;
   2047	int err;
   2048
   2049	ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
   2050				 flags, cmd);
   2051	if (!ovs_header)
   2052		return -EMSGSIZE;
   2053
   2054	ovs_header->dp_ifindex = get_dpifindex(vport->dp);
   2055
   2056	if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
   2057	    nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
   2058	    nla_put_string(skb, OVS_VPORT_ATTR_NAME,
   2059			   ovs_vport_name(vport)) ||
   2060	    nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
   2061		goto nla_put_failure;
   2062
   2063	if (!net_eq(net, dev_net(vport->dev))) {
   2064		int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
   2065
   2066		if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
   2067			goto nla_put_failure;
   2068	}
   2069
   2070	ovs_vport_get_stats(vport, &vport_stats);
   2071	if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
   2072			  sizeof(struct ovs_vport_stats), &vport_stats,
   2073			  OVS_VPORT_ATTR_PAD))
   2074		goto nla_put_failure;
   2075
   2076	if (ovs_vport_get_upcall_portids(vport, skb))
   2077		goto nla_put_failure;
   2078
   2079	err = ovs_vport_get_options(vport, skb);
   2080	if (err == -EMSGSIZE)
   2081		goto error;
   2082
   2083	genlmsg_end(skb, ovs_header);
   2084	return 0;
   2085
   2086nla_put_failure:
   2087	err = -EMSGSIZE;
   2088error:
   2089	genlmsg_cancel(skb, ovs_header);
   2090	return err;
   2091}
   2092
   2093static struct sk_buff *ovs_vport_cmd_alloc_info(void)
   2094{
   2095	return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   2096}
   2097
   2098/* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
   2099struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
   2100					 u32 portid, u32 seq, u8 cmd)
   2101{
   2102	struct sk_buff *skb;
   2103	int retval;
   2104
   2105	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
   2106	if (!skb)
   2107		return ERR_PTR(-ENOMEM);
   2108
   2109	retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
   2110					 GFP_KERNEL);
   2111	BUG_ON(retval < 0);
   2112
   2113	return skb;
   2114}
   2115
   2116/* Called with ovs_mutex or RCU read lock. */
   2117static struct vport *lookup_vport(struct net *net,
   2118				  const struct ovs_header *ovs_header,
   2119				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
   2120{
   2121	struct datapath *dp;
   2122	struct vport *vport;
   2123
   2124	if (a[OVS_VPORT_ATTR_IFINDEX])
   2125		return ERR_PTR(-EOPNOTSUPP);
   2126	if (a[OVS_VPORT_ATTR_NAME]) {
   2127		vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
   2128		if (!vport)
   2129			return ERR_PTR(-ENODEV);
   2130		if (ovs_header->dp_ifindex &&
   2131		    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
   2132			return ERR_PTR(-ENODEV);
   2133		return vport;
   2134	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
   2135		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
   2136
   2137		if (port_no >= DP_MAX_PORTS)
   2138			return ERR_PTR(-EFBIG);
   2139
   2140		dp = get_dp(net, ovs_header->dp_ifindex);
   2141		if (!dp)
   2142			return ERR_PTR(-ENODEV);
   2143
   2144		vport = ovs_vport_ovsl_rcu(dp, port_no);
   2145		if (!vport)
   2146			return ERR_PTR(-ENODEV);
   2147		return vport;
   2148	} else
   2149		return ERR_PTR(-EINVAL);
   2150
   2151}
   2152
   2153static unsigned int ovs_get_max_headroom(struct datapath *dp)
   2154{
   2155	unsigned int dev_headroom, max_headroom = 0;
   2156	struct net_device *dev;
   2157	struct vport *vport;
   2158	int i;
   2159
   2160	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
   2161		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
   2162					 lockdep_ovsl_is_held()) {
   2163			dev = vport->dev;
   2164			dev_headroom = netdev_get_fwd_headroom(dev);
   2165			if (dev_headroom > max_headroom)
   2166				max_headroom = dev_headroom;
   2167		}
   2168	}
   2169
   2170	return max_headroom;
   2171}
   2172
   2173/* Called with ovs_mutex */
   2174static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
   2175{
   2176	struct vport *vport;
   2177	int i;
   2178
   2179	dp->max_headroom = new_headroom;
   2180	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
   2181		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
   2182					 lockdep_ovsl_is_held())
   2183			netdev_set_rx_headroom(vport->dev, new_headroom);
   2184	}
   2185}
   2186
   2187static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
   2188{
   2189	struct nlattr **a = info->attrs;
   2190	struct ovs_header *ovs_header = info->userhdr;
   2191	struct vport_parms parms;
   2192	struct sk_buff *reply;
   2193	struct vport *vport;
   2194	struct datapath *dp;
   2195	unsigned int new_headroom;
   2196	u32 port_no;
   2197	int err;
   2198
   2199	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
   2200	    !a[OVS_VPORT_ATTR_UPCALL_PID])
   2201		return -EINVAL;
   2202	if (a[OVS_VPORT_ATTR_IFINDEX])
   2203		return -EOPNOTSUPP;
   2204
   2205	port_no = a[OVS_VPORT_ATTR_PORT_NO]
   2206		? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
   2207	if (port_no >= DP_MAX_PORTS)
   2208		return -EFBIG;
   2209
   2210	reply = ovs_vport_cmd_alloc_info();
   2211	if (!reply)
   2212		return -ENOMEM;
   2213
   2214	ovs_lock();
   2215restart:
   2216	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
   2217	err = -ENODEV;
   2218	if (!dp)
   2219		goto exit_unlock_free;
   2220
   2221	if (port_no) {
   2222		vport = ovs_vport_ovsl(dp, port_no);
   2223		err = -EBUSY;
   2224		if (vport)
   2225			goto exit_unlock_free;
   2226	} else {
   2227		for (port_no = 1; ; port_no++) {
   2228			if (port_no >= DP_MAX_PORTS) {
   2229				err = -EFBIG;
   2230				goto exit_unlock_free;
   2231			}
   2232			vport = ovs_vport_ovsl(dp, port_no);
   2233			if (!vport)
   2234				break;
   2235		}
   2236	}
   2237
   2238	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
   2239	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
   2240	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
   2241	parms.dp = dp;
   2242	parms.port_no = port_no;
   2243	parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
   2244
   2245	vport = new_vport(&parms);
   2246	err = PTR_ERR(vport);
   2247	if (IS_ERR(vport)) {
   2248		if (err == -EAGAIN)
   2249			goto restart;
   2250		goto exit_unlock_free;
   2251	}
   2252
   2253	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
   2254				      info->snd_portid, info->snd_seq, 0,
   2255				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
   2256
   2257	new_headroom = netdev_get_fwd_headroom(vport->dev);
   2258
   2259	if (new_headroom > dp->max_headroom)
   2260		ovs_update_headroom(dp, new_headroom);
   2261	else
   2262		netdev_set_rx_headroom(vport->dev, dp->max_headroom);
   2263
   2264	BUG_ON(err < 0);
   2265	ovs_unlock();
   2266
   2267	ovs_notify(&dp_vport_genl_family, reply, info);
   2268	return 0;
   2269
   2270exit_unlock_free:
   2271	ovs_unlock();
   2272	kfree_skb(reply);
   2273	return err;
   2274}
   2275
   2276static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
   2277{
   2278	struct nlattr **a = info->attrs;
   2279	struct sk_buff *reply;
   2280	struct vport *vport;
   2281	int err;
   2282
   2283	reply = ovs_vport_cmd_alloc_info();
   2284	if (!reply)
   2285		return -ENOMEM;
   2286
   2287	ovs_lock();
   2288	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
   2289	err = PTR_ERR(vport);
   2290	if (IS_ERR(vport))
   2291		goto exit_unlock_free;
   2292
   2293	if (a[OVS_VPORT_ATTR_TYPE] &&
   2294	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
   2295		err = -EINVAL;
   2296		goto exit_unlock_free;
   2297	}
   2298
   2299	if (a[OVS_VPORT_ATTR_OPTIONS]) {
   2300		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
   2301		if (err)
   2302			goto exit_unlock_free;
   2303	}
   2304
   2305
   2306	if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
   2307		struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
   2308
   2309		err = ovs_vport_set_upcall_portids(vport, ids);
   2310		if (err)
   2311			goto exit_unlock_free;
   2312	}
   2313
   2314	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
   2315				      info->snd_portid, info->snd_seq, 0,
   2316				      OVS_VPORT_CMD_SET, GFP_KERNEL);
   2317	BUG_ON(err < 0);
   2318
   2319	ovs_unlock();
   2320	ovs_notify(&dp_vport_genl_family, reply, info);
   2321	return 0;
   2322
   2323exit_unlock_free:
   2324	ovs_unlock();
   2325	kfree_skb(reply);
   2326	return err;
   2327}
   2328
   2329static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
   2330{
   2331	bool update_headroom = false;
   2332	struct nlattr **a = info->attrs;
   2333	struct sk_buff *reply;
   2334	struct datapath *dp;
   2335	struct vport *vport;
   2336	unsigned int new_headroom;
   2337	int err;
   2338
   2339	reply = ovs_vport_cmd_alloc_info();
   2340	if (!reply)
   2341		return -ENOMEM;
   2342
   2343	ovs_lock();
   2344	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
   2345	err = PTR_ERR(vport);
   2346	if (IS_ERR(vport))
   2347		goto exit_unlock_free;
   2348
   2349	if (vport->port_no == OVSP_LOCAL) {
   2350		err = -EINVAL;
   2351		goto exit_unlock_free;
   2352	}
   2353
   2354	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
   2355				      info->snd_portid, info->snd_seq, 0,
   2356				      OVS_VPORT_CMD_DEL, GFP_KERNEL);
   2357	BUG_ON(err < 0);
   2358
   2359	/* the vport deletion may trigger dp headroom update */
   2360	dp = vport->dp;
   2361	if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
   2362		update_headroom = true;
   2363
   2364	netdev_reset_rx_headroom(vport->dev);
   2365	ovs_dp_detach_port(vport);
   2366
   2367	if (update_headroom) {
   2368		new_headroom = ovs_get_max_headroom(dp);
   2369
   2370		if (new_headroom < dp->max_headroom)
   2371			ovs_update_headroom(dp, new_headroom);
   2372	}
   2373	ovs_unlock();
   2374
   2375	ovs_notify(&dp_vport_genl_family, reply, info);
   2376	return 0;
   2377
   2378exit_unlock_free:
   2379	ovs_unlock();
   2380	kfree_skb(reply);
   2381	return err;
   2382}
   2383
   2384static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
   2385{
   2386	struct nlattr **a = info->attrs;
   2387	struct ovs_header *ovs_header = info->userhdr;
   2388	struct sk_buff *reply;
   2389	struct vport *vport;
   2390	int err;
   2391
   2392	reply = ovs_vport_cmd_alloc_info();
   2393	if (!reply)
   2394		return -ENOMEM;
   2395
   2396	rcu_read_lock();
   2397	vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
   2398	err = PTR_ERR(vport);
   2399	if (IS_ERR(vport))
   2400		goto exit_unlock_free;
   2401	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
   2402				      info->snd_portid, info->snd_seq, 0,
   2403				      OVS_VPORT_CMD_GET, GFP_ATOMIC);
   2404	BUG_ON(err < 0);
   2405	rcu_read_unlock();
   2406
   2407	return genlmsg_reply(reply, info);
   2408
   2409exit_unlock_free:
   2410	rcu_read_unlock();
   2411	kfree_skb(reply);
   2412	return err;
   2413}
   2414
   2415static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
   2416{
   2417	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
   2418	struct datapath *dp;
   2419	int bucket = cb->args[0], skip = cb->args[1];
   2420	int i, j = 0;
   2421
   2422	rcu_read_lock();
   2423	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
   2424	if (!dp) {
   2425		rcu_read_unlock();
   2426		return -ENODEV;
   2427	}
   2428	for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
   2429		struct vport *vport;
   2430
   2431		j = 0;
   2432		hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
   2433			if (j >= skip &&
   2434			    ovs_vport_cmd_fill_info(vport, skb,
   2435						    sock_net(skb->sk),
   2436						    NETLINK_CB(cb->skb).portid,
   2437						    cb->nlh->nlmsg_seq,
   2438						    NLM_F_MULTI,
   2439						    OVS_VPORT_CMD_GET,
   2440						    GFP_ATOMIC) < 0)
   2441				goto out;
   2442
   2443			j++;
   2444		}
   2445		skip = 0;
   2446	}
   2447out:
   2448	rcu_read_unlock();
   2449
   2450	cb->args[0] = i;
   2451	cb->args[1] = j;
   2452
   2453	return skb->len;
   2454}
   2455
   2456static void ovs_dp_masks_rebalance(struct work_struct *work)
   2457{
   2458	struct ovs_net *ovs_net = container_of(work, struct ovs_net,
   2459					       masks_rebalance.work);
   2460	struct datapath *dp;
   2461
   2462	ovs_lock();
   2463
   2464	list_for_each_entry(dp, &ovs_net->dps, list_node)
   2465		ovs_flow_masks_rebalance(&dp->table);
   2466
   2467	ovs_unlock();
   2468
   2469	schedule_delayed_work(&ovs_net->masks_rebalance,
   2470			      msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
   2471}
   2472
   2473static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
   2474	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
   2475	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
   2476	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
   2477	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
   2478	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
   2479	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
   2480	[OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
   2481	[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
   2482};
   2483
   2484static const struct genl_small_ops dp_vport_genl_ops[] = {
   2485	{ .cmd = OVS_VPORT_CMD_NEW,
   2486	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   2487	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
   2488	  .doit = ovs_vport_cmd_new
   2489	},
   2490	{ .cmd = OVS_VPORT_CMD_DEL,
   2491	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   2492	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
   2493	  .doit = ovs_vport_cmd_del
   2494	},
   2495	{ .cmd = OVS_VPORT_CMD_GET,
   2496	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   2497	  .flags = 0,		    /* OK for unprivileged users. */
   2498	  .doit = ovs_vport_cmd_get,
   2499	  .dumpit = ovs_vport_cmd_dump
   2500	},
   2501	{ .cmd = OVS_VPORT_CMD_SET,
   2502	  .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
   2503	  .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
   2504	  .doit = ovs_vport_cmd_set,
   2505	},
   2506};
   2507
   2508struct genl_family dp_vport_genl_family __ro_after_init = {
   2509	.hdrsize = sizeof(struct ovs_header),
   2510	.name = OVS_VPORT_FAMILY,
   2511	.version = OVS_VPORT_VERSION,
   2512	.maxattr = OVS_VPORT_ATTR_MAX,
   2513	.policy = vport_policy,
   2514	.netnsok = true,
   2515	.parallel_ops = true,
   2516	.small_ops = dp_vport_genl_ops,
   2517	.n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
   2518	.mcgrps = &ovs_dp_vport_multicast_group,
   2519	.n_mcgrps = 1,
   2520	.module = THIS_MODULE,
   2521};
   2522
   2523static struct genl_family * const dp_genl_families[] = {
   2524	&dp_datapath_genl_family,
   2525	&dp_vport_genl_family,
   2526	&dp_flow_genl_family,
   2527	&dp_packet_genl_family,
   2528	&dp_meter_genl_family,
   2529#if	IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
   2530	&dp_ct_limit_genl_family,
   2531#endif
   2532};
   2533
   2534static void dp_unregister_genl(int n_families)
   2535{
   2536	int i;
   2537
   2538	for (i = 0; i < n_families; i++)
   2539		genl_unregister_family(dp_genl_families[i]);
   2540}
   2541
   2542static int __init dp_register_genl(void)
   2543{
   2544	int err;
   2545	int i;
   2546
   2547	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
   2548
   2549		err = genl_register_family(dp_genl_families[i]);
   2550		if (err)
   2551			goto error;
   2552	}
   2553
   2554	return 0;
   2555
   2556error:
   2557	dp_unregister_genl(i);
   2558	return err;
   2559}
   2560
   2561static int __net_init ovs_init_net(struct net *net)
   2562{
   2563	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
   2564	int err;
   2565
   2566	INIT_LIST_HEAD(&ovs_net->dps);
   2567	INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
   2568	INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
   2569
   2570	err = ovs_ct_init(net);
   2571	if (err)
   2572		return err;
   2573
   2574	schedule_delayed_work(&ovs_net->masks_rebalance,
   2575			      msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
   2576	return 0;
   2577}
   2578
   2579static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
   2580					    struct list_head *head)
   2581{
   2582	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
   2583	struct datapath *dp;
   2584
   2585	list_for_each_entry(dp, &ovs_net->dps, list_node) {
   2586		int i;
   2587
   2588		for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
   2589			struct vport *vport;
   2590
   2591			hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
   2592				if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
   2593					continue;
   2594
   2595				if (dev_net(vport->dev) == dnet)
   2596					list_add(&vport->detach_list, head);
   2597			}
   2598		}
   2599	}
   2600}
   2601
   2602static void __net_exit ovs_exit_net(struct net *dnet)
   2603{
   2604	struct datapath *dp, *dp_next;
   2605	struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
   2606	struct vport *vport, *vport_next;
   2607	struct net *net;
   2608	LIST_HEAD(head);
   2609
   2610	ovs_lock();
   2611
   2612	ovs_ct_exit(dnet);
   2613
   2614	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
   2615		__dp_destroy(dp);
   2616
   2617	down_read(&net_rwsem);
   2618	for_each_net(net)
   2619		list_vports_from_net(net, dnet, &head);
   2620	up_read(&net_rwsem);
   2621
   2622	/* Detach all vports from given namespace. */
   2623	list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
   2624		list_del(&vport->detach_list);
   2625		ovs_dp_detach_port(vport);
   2626	}
   2627
   2628	ovs_unlock();
   2629
   2630	cancel_delayed_work_sync(&ovs_net->masks_rebalance);
   2631	cancel_work_sync(&ovs_net->dp_notify_work);
   2632}
   2633
   2634static struct pernet_operations ovs_net_ops = {
   2635	.init = ovs_init_net,
   2636	.exit = ovs_exit_net,
   2637	.id   = &ovs_net_id,
   2638	.size = sizeof(struct ovs_net),
   2639};
   2640
   2641static int __init dp_init(void)
   2642{
   2643	int err;
   2644
   2645	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
   2646		     sizeof_field(struct sk_buff, cb));
   2647
   2648	pr_info("Open vSwitch switching datapath\n");
   2649
   2650	err = action_fifos_init();
   2651	if (err)
   2652		goto error;
   2653
   2654	err = ovs_internal_dev_rtnl_link_register();
   2655	if (err)
   2656		goto error_action_fifos_exit;
   2657
   2658	err = ovs_flow_init();
   2659	if (err)
   2660		goto error_unreg_rtnl_link;
   2661
   2662	err = ovs_vport_init();
   2663	if (err)
   2664		goto error_flow_exit;
   2665
   2666	err = register_pernet_device(&ovs_net_ops);
   2667	if (err)
   2668		goto error_vport_exit;
   2669
   2670	err = register_netdevice_notifier(&ovs_dp_device_notifier);
   2671	if (err)
   2672		goto error_netns_exit;
   2673
   2674	err = ovs_netdev_init();
   2675	if (err)
   2676		goto error_unreg_notifier;
   2677
   2678	err = dp_register_genl();
   2679	if (err < 0)
   2680		goto error_unreg_netdev;
   2681
   2682	return 0;
   2683
   2684error_unreg_netdev:
   2685	ovs_netdev_exit();
   2686error_unreg_notifier:
   2687	unregister_netdevice_notifier(&ovs_dp_device_notifier);
   2688error_netns_exit:
   2689	unregister_pernet_device(&ovs_net_ops);
   2690error_vport_exit:
   2691	ovs_vport_exit();
   2692error_flow_exit:
   2693	ovs_flow_exit();
   2694error_unreg_rtnl_link:
   2695	ovs_internal_dev_rtnl_link_unregister();
   2696error_action_fifos_exit:
   2697	action_fifos_exit();
   2698error:
   2699	return err;
   2700}
   2701
   2702static void dp_cleanup(void)
   2703{
   2704	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
   2705	ovs_netdev_exit();
   2706	unregister_netdevice_notifier(&ovs_dp_device_notifier);
   2707	unregister_pernet_device(&ovs_net_ops);
   2708	rcu_barrier();
   2709	ovs_vport_exit();
   2710	ovs_flow_exit();
   2711	ovs_internal_dev_rtnl_link_unregister();
   2712	action_fifos_exit();
   2713}
   2714
   2715module_init(dp_init);
   2716module_exit(dp_cleanup);
   2717
   2718MODULE_DESCRIPTION("Open vSwitch switching datapath");
   2719MODULE_LICENSE("GPL");
   2720MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
   2721MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
   2722MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
   2723MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
   2724MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
   2725MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);