cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

cls_api.c (93713B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * net/sched/cls_api.c	Packet classifier API.
      4 *
      5 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
      6 *
      7 * Changes:
      8 *
      9 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
     10 */
     11
     12#include <linux/module.h>
     13#include <linux/types.h>
     14#include <linux/kernel.h>
     15#include <linux/string.h>
     16#include <linux/errno.h>
     17#include <linux/err.h>
     18#include <linux/skbuff.h>
     19#include <linux/init.h>
     20#include <linux/kmod.h>
     21#include <linux/slab.h>
     22#include <linux/idr.h>
     23#include <linux/jhash.h>
     24#include <linux/rculist.h>
     25#include <net/net_namespace.h>
     26#include <net/sock.h>
     27#include <net/netlink.h>
     28#include <net/pkt_sched.h>
     29#include <net/pkt_cls.h>
     30#include <net/tc_act/tc_pedit.h>
     31#include <net/tc_act/tc_mirred.h>
     32#include <net/tc_act/tc_vlan.h>
     33#include <net/tc_act/tc_tunnel_key.h>
     34#include <net/tc_act/tc_csum.h>
     35#include <net/tc_act/tc_gact.h>
     36#include <net/tc_act/tc_police.h>
     37#include <net/tc_act/tc_sample.h>
     38#include <net/tc_act/tc_skbedit.h>
     39#include <net/tc_act/tc_ct.h>
     40#include <net/tc_act/tc_mpls.h>
     41#include <net/tc_act/tc_gate.h>
     42#include <net/flow_offload.h>
     43
     44extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
     45
     46/* The list of all installed classifier types */
     47static LIST_HEAD(tcf_proto_base);
     48
     49/* Protects list of registered TC modules. It is pure SMP lock. */
     50static DEFINE_RWLOCK(cls_mod_lock);
     51
     52#ifdef CONFIG_NET_CLS_ACT
     53DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc);
     54EXPORT_SYMBOL(tc_skb_ext_tc);
     55
     56void tc_skb_ext_tc_enable(void)
     57{
     58	static_branch_inc(&tc_skb_ext_tc);
     59}
     60EXPORT_SYMBOL(tc_skb_ext_tc_enable);
     61
     62void tc_skb_ext_tc_disable(void)
     63{
     64	static_branch_dec(&tc_skb_ext_tc);
     65}
     66EXPORT_SYMBOL(tc_skb_ext_tc_disable);
     67#endif
     68
     69static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
     70{
     71	return jhash_3words(tp->chain->index, tp->prio,
     72			    (__force __u32)tp->protocol, 0);
     73}
     74
     75static void tcf_proto_signal_destroying(struct tcf_chain *chain,
     76					struct tcf_proto *tp)
     77{
     78	struct tcf_block *block = chain->block;
     79
     80	mutex_lock(&block->proto_destroy_lock);
     81	hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
     82		     destroy_obj_hashfn(tp));
     83	mutex_unlock(&block->proto_destroy_lock);
     84}
     85
     86static bool tcf_proto_cmp(const struct tcf_proto *tp1,
     87			  const struct tcf_proto *tp2)
     88{
     89	return tp1->chain->index == tp2->chain->index &&
     90	       tp1->prio == tp2->prio &&
     91	       tp1->protocol == tp2->protocol;
     92}
     93
     94static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
     95					struct tcf_proto *tp)
     96{
     97	u32 hash = destroy_obj_hashfn(tp);
     98	struct tcf_proto *iter;
     99	bool found = false;
    100
    101	rcu_read_lock();
    102	hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
    103				   destroy_ht_node, hash) {
    104		if (tcf_proto_cmp(tp, iter)) {
    105			found = true;
    106			break;
    107		}
    108	}
    109	rcu_read_unlock();
    110
    111	return found;
    112}
    113
    114static void
    115tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
    116{
    117	struct tcf_block *block = chain->block;
    118
    119	mutex_lock(&block->proto_destroy_lock);
    120	if (hash_hashed(&tp->destroy_ht_node))
    121		hash_del_rcu(&tp->destroy_ht_node);
    122	mutex_unlock(&block->proto_destroy_lock);
    123}
    124
    125/* Find classifier type by string name */
    126
    127static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
    128{
    129	const struct tcf_proto_ops *t, *res = NULL;
    130
    131	if (kind) {
    132		read_lock(&cls_mod_lock);
    133		list_for_each_entry(t, &tcf_proto_base, head) {
    134			if (strcmp(kind, t->kind) == 0) {
    135				if (try_module_get(t->owner))
    136					res = t;
    137				break;
    138			}
    139		}
    140		read_unlock(&cls_mod_lock);
    141	}
    142	return res;
    143}
    144
    145static const struct tcf_proto_ops *
    146tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
    147		     struct netlink_ext_ack *extack)
    148{
    149	const struct tcf_proto_ops *ops;
    150
    151	ops = __tcf_proto_lookup_ops(kind);
    152	if (ops)
    153		return ops;
    154#ifdef CONFIG_MODULES
    155	if (rtnl_held)
    156		rtnl_unlock();
    157	request_module("cls_%s", kind);
    158	if (rtnl_held)
    159		rtnl_lock();
    160	ops = __tcf_proto_lookup_ops(kind);
    161	/* We dropped the RTNL semaphore in order to perform
    162	 * the module load. So, even if we succeeded in loading
    163	 * the module we have to replay the request. We indicate
    164	 * this using -EAGAIN.
    165	 */
    166	if (ops) {
    167		module_put(ops->owner);
    168		return ERR_PTR(-EAGAIN);
    169	}
    170#endif
    171	NL_SET_ERR_MSG(extack, "TC classifier not found");
    172	return ERR_PTR(-ENOENT);
    173}
    174
    175/* Register(unregister) new classifier type */
    176
    177int register_tcf_proto_ops(struct tcf_proto_ops *ops)
    178{
    179	struct tcf_proto_ops *t;
    180	int rc = -EEXIST;
    181
    182	write_lock(&cls_mod_lock);
    183	list_for_each_entry(t, &tcf_proto_base, head)
    184		if (!strcmp(ops->kind, t->kind))
    185			goto out;
    186
    187	list_add_tail(&ops->head, &tcf_proto_base);
    188	rc = 0;
    189out:
    190	write_unlock(&cls_mod_lock);
    191	return rc;
    192}
    193EXPORT_SYMBOL(register_tcf_proto_ops);
    194
    195static struct workqueue_struct *tc_filter_wq;
    196
    197int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
    198{
    199	struct tcf_proto_ops *t;
    200	int rc = -ENOENT;
    201
    202	/* Wait for outstanding call_rcu()s, if any, from a
    203	 * tcf_proto_ops's destroy() handler.
    204	 */
    205	rcu_barrier();
    206	flush_workqueue(tc_filter_wq);
    207
    208	write_lock(&cls_mod_lock);
    209	list_for_each_entry(t, &tcf_proto_base, head) {
    210		if (t == ops) {
    211			list_del(&t->head);
    212			rc = 0;
    213			break;
    214		}
    215	}
    216	write_unlock(&cls_mod_lock);
    217	return rc;
    218}
    219EXPORT_SYMBOL(unregister_tcf_proto_ops);
    220
    221bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
    222{
    223	INIT_RCU_WORK(rwork, func);
    224	return queue_rcu_work(tc_filter_wq, rwork);
    225}
    226EXPORT_SYMBOL(tcf_queue_work);
    227
    228/* Select new prio value from the range, managed by kernel. */
    229
    230static inline u32 tcf_auto_prio(struct tcf_proto *tp)
    231{
    232	u32 first = TC_H_MAKE(0xC0000000U, 0U);
    233
    234	if (tp)
    235		first = tp->prio - 1;
    236
    237	return TC_H_MAJ(first);
    238}
    239
    240static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
    241{
    242	if (kind)
    243		return nla_strscpy(name, kind, IFNAMSIZ) < 0;
    244	memset(name, 0, IFNAMSIZ);
    245	return false;
    246}
    247
    248static bool tcf_proto_is_unlocked(const char *kind)
    249{
    250	const struct tcf_proto_ops *ops;
    251	bool ret;
    252
    253	if (strlen(kind) == 0)
    254		return false;
    255
    256	ops = tcf_proto_lookup_ops(kind, false, NULL);
    257	/* On error return false to take rtnl lock. Proto lookup/create
    258	 * functions will perform lookup again and properly handle errors.
    259	 */
    260	if (IS_ERR(ops))
    261		return false;
    262
    263	ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
    264	module_put(ops->owner);
    265	return ret;
    266}
    267
    268static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
    269					  u32 prio, struct tcf_chain *chain,
    270					  bool rtnl_held,
    271					  struct netlink_ext_ack *extack)
    272{
    273	struct tcf_proto *tp;
    274	int err;
    275
    276	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
    277	if (!tp)
    278		return ERR_PTR(-ENOBUFS);
    279
    280	tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
    281	if (IS_ERR(tp->ops)) {
    282		err = PTR_ERR(tp->ops);
    283		goto errout;
    284	}
    285	tp->classify = tp->ops->classify;
    286	tp->protocol = protocol;
    287	tp->prio = prio;
    288	tp->chain = chain;
    289	spin_lock_init(&tp->lock);
    290	refcount_set(&tp->refcnt, 1);
    291
    292	err = tp->ops->init(tp);
    293	if (err) {
    294		module_put(tp->ops->owner);
    295		goto errout;
    296	}
    297	return tp;
    298
    299errout:
    300	kfree(tp);
    301	return ERR_PTR(err);
    302}
    303
    304static void tcf_proto_get(struct tcf_proto *tp)
    305{
    306	refcount_inc(&tp->refcnt);
    307}
    308
    309static void tcf_chain_put(struct tcf_chain *chain);
    310
    311static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
    312			      bool sig_destroy, struct netlink_ext_ack *extack)
    313{
    314	tp->ops->destroy(tp, rtnl_held, extack);
    315	if (sig_destroy)
    316		tcf_proto_signal_destroyed(tp->chain, tp);
    317	tcf_chain_put(tp->chain);
    318	module_put(tp->ops->owner);
    319	kfree_rcu(tp, rcu);
    320}
    321
    322static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
    323			  struct netlink_ext_ack *extack)
    324{
    325	if (refcount_dec_and_test(&tp->refcnt))
    326		tcf_proto_destroy(tp, rtnl_held, true, extack);
    327}
    328
    329static bool tcf_proto_check_delete(struct tcf_proto *tp)
    330{
    331	if (tp->ops->delete_empty)
    332		return tp->ops->delete_empty(tp);
    333
    334	tp->deleting = true;
    335	return tp->deleting;
    336}
    337
    338static void tcf_proto_mark_delete(struct tcf_proto *tp)
    339{
    340	spin_lock(&tp->lock);
    341	tp->deleting = true;
    342	spin_unlock(&tp->lock);
    343}
    344
    345static bool tcf_proto_is_deleting(struct tcf_proto *tp)
    346{
    347	bool deleting;
    348
    349	spin_lock(&tp->lock);
    350	deleting = tp->deleting;
    351	spin_unlock(&tp->lock);
    352
    353	return deleting;
    354}
    355
    356#define ASSERT_BLOCK_LOCKED(block)					\
    357	lockdep_assert_held(&(block)->lock)
    358
    359struct tcf_filter_chain_list_item {
    360	struct list_head list;
    361	tcf_chain_head_change_t *chain_head_change;
    362	void *chain_head_change_priv;
    363};
    364
    365static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
    366					  u32 chain_index)
    367{
    368	struct tcf_chain *chain;
    369
    370	ASSERT_BLOCK_LOCKED(block);
    371
    372	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
    373	if (!chain)
    374		return NULL;
    375	list_add_tail_rcu(&chain->list, &block->chain_list);
    376	mutex_init(&chain->filter_chain_lock);
    377	chain->block = block;
    378	chain->index = chain_index;
    379	chain->refcnt = 1;
    380	if (!chain->index)
    381		block->chain0.chain = chain;
    382	return chain;
    383}
    384
    385static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
    386				       struct tcf_proto *tp_head)
    387{
    388	if (item->chain_head_change)
    389		item->chain_head_change(tp_head, item->chain_head_change_priv);
    390}
    391
    392static void tcf_chain0_head_change(struct tcf_chain *chain,
    393				   struct tcf_proto *tp_head)
    394{
    395	struct tcf_filter_chain_list_item *item;
    396	struct tcf_block *block = chain->block;
    397
    398	if (chain->index)
    399		return;
    400
    401	mutex_lock(&block->lock);
    402	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
    403		tcf_chain_head_change_item(item, tp_head);
    404	mutex_unlock(&block->lock);
    405}
    406
    407/* Returns true if block can be safely freed. */
    408
    409static bool tcf_chain_detach(struct tcf_chain *chain)
    410{
    411	struct tcf_block *block = chain->block;
    412
    413	ASSERT_BLOCK_LOCKED(block);
    414
    415	list_del_rcu(&chain->list);
    416	if (!chain->index)
    417		block->chain0.chain = NULL;
    418
    419	if (list_empty(&block->chain_list) &&
    420	    refcount_read(&block->refcnt) == 0)
    421		return true;
    422
    423	return false;
    424}
    425
    426static void tcf_block_destroy(struct tcf_block *block)
    427{
    428	mutex_destroy(&block->lock);
    429	mutex_destroy(&block->proto_destroy_lock);
    430	kfree_rcu(block, rcu);
    431}
    432
    433static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
    434{
    435	struct tcf_block *block = chain->block;
    436
    437	mutex_destroy(&chain->filter_chain_lock);
    438	kfree_rcu(chain, rcu);
    439	if (free_block)
    440		tcf_block_destroy(block);
    441}
    442
    443static void tcf_chain_hold(struct tcf_chain *chain)
    444{
    445	ASSERT_BLOCK_LOCKED(chain->block);
    446
    447	++chain->refcnt;
    448}
    449
    450static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
    451{
    452	ASSERT_BLOCK_LOCKED(chain->block);
    453
    454	/* In case all the references are action references, this
    455	 * chain should not be shown to the user.
    456	 */
    457	return chain->refcnt == chain->action_refcnt;
    458}
    459
    460static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
    461					  u32 chain_index)
    462{
    463	struct tcf_chain *chain;
    464
    465	ASSERT_BLOCK_LOCKED(block);
    466
    467	list_for_each_entry(chain, &block->chain_list, list) {
    468		if (chain->index == chain_index)
    469			return chain;
    470	}
    471	return NULL;
    472}
    473
    474#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
    475static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
    476					      u32 chain_index)
    477{
    478	struct tcf_chain *chain;
    479
    480	list_for_each_entry_rcu(chain, &block->chain_list, list) {
    481		if (chain->index == chain_index)
    482			return chain;
    483	}
    484	return NULL;
    485}
    486#endif
    487
    488static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
    489			   u32 seq, u16 flags, int event, bool unicast);
    490
    491static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
    492					 u32 chain_index, bool create,
    493					 bool by_act)
    494{
    495	struct tcf_chain *chain = NULL;
    496	bool is_first_reference;
    497
    498	mutex_lock(&block->lock);
    499	chain = tcf_chain_lookup(block, chain_index);
    500	if (chain) {
    501		tcf_chain_hold(chain);
    502	} else {
    503		if (!create)
    504			goto errout;
    505		chain = tcf_chain_create(block, chain_index);
    506		if (!chain)
    507			goto errout;
    508	}
    509
    510	if (by_act)
    511		++chain->action_refcnt;
    512	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
    513	mutex_unlock(&block->lock);
    514
    515	/* Send notification only in case we got the first
    516	 * non-action reference. Until then, the chain acts only as
    517	 * a placeholder for actions pointing to it and user ought
    518	 * not know about them.
    519	 */
    520	if (is_first_reference && !by_act)
    521		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
    522				RTM_NEWCHAIN, false);
    523
    524	return chain;
    525
    526errout:
    527	mutex_unlock(&block->lock);
    528	return chain;
    529}
    530
    531static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
    532				       bool create)
    533{
    534	return __tcf_chain_get(block, chain_index, create, false);
    535}
    536
    537struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
    538{
    539	return __tcf_chain_get(block, chain_index, true, true);
    540}
    541EXPORT_SYMBOL(tcf_chain_get_by_act);
    542
    543static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
    544			       void *tmplt_priv);
    545static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
    546				  void *tmplt_priv, u32 chain_index,
    547				  struct tcf_block *block, struct sk_buff *oskb,
    548				  u32 seq, u16 flags, bool unicast);
    549
    550static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
    551			    bool explicitly_created)
    552{
    553	struct tcf_block *block = chain->block;
    554	const struct tcf_proto_ops *tmplt_ops;
    555	bool free_block = false;
    556	unsigned int refcnt;
    557	void *tmplt_priv;
    558
    559	mutex_lock(&block->lock);
    560	if (explicitly_created) {
    561		if (!chain->explicitly_created) {
    562			mutex_unlock(&block->lock);
    563			return;
    564		}
    565		chain->explicitly_created = false;
    566	}
    567
    568	if (by_act)
    569		chain->action_refcnt--;
    570
    571	/* tc_chain_notify_delete can't be called while holding block lock.
    572	 * However, when block is unlocked chain can be changed concurrently, so
    573	 * save these to temporary variables.
    574	 */
    575	refcnt = --chain->refcnt;
    576	tmplt_ops = chain->tmplt_ops;
    577	tmplt_priv = chain->tmplt_priv;
    578
    579	/* The last dropped non-action reference will trigger notification. */
    580	if (refcnt - chain->action_refcnt == 0 && !by_act) {
    581		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
    582				       block, NULL, 0, 0, false);
    583		/* Last reference to chain, no need to lock. */
    584		chain->flushing = false;
    585	}
    586
    587	if (refcnt == 0)
    588		free_block = tcf_chain_detach(chain);
    589	mutex_unlock(&block->lock);
    590
    591	if (refcnt == 0) {
    592		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
    593		tcf_chain_destroy(chain, free_block);
    594	}
    595}
    596
    597static void tcf_chain_put(struct tcf_chain *chain)
    598{
    599	__tcf_chain_put(chain, false, false);
    600}
    601
    602void tcf_chain_put_by_act(struct tcf_chain *chain)
    603{
    604	__tcf_chain_put(chain, true, false);
    605}
    606EXPORT_SYMBOL(tcf_chain_put_by_act);
    607
    608static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
    609{
    610	__tcf_chain_put(chain, false, true);
    611}
    612
    613static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
    614{
    615	struct tcf_proto *tp, *tp_next;
    616
    617	mutex_lock(&chain->filter_chain_lock);
    618	tp = tcf_chain_dereference(chain->filter_chain, chain);
    619	while (tp) {
    620		tp_next = rcu_dereference_protected(tp->next, 1);
    621		tcf_proto_signal_destroying(chain, tp);
    622		tp = tp_next;
    623	}
    624	tp = tcf_chain_dereference(chain->filter_chain, chain);
    625	RCU_INIT_POINTER(chain->filter_chain, NULL);
    626	tcf_chain0_head_change(chain, NULL);
    627	chain->flushing = true;
    628	mutex_unlock(&chain->filter_chain_lock);
    629
    630	while (tp) {
    631		tp_next = rcu_dereference_protected(tp->next, 1);
    632		tcf_proto_put(tp, rtnl_held, NULL);
    633		tp = tp_next;
    634	}
    635}
    636
    637static int tcf_block_setup(struct tcf_block *block,
    638			   struct flow_block_offload *bo);
    639
    640static void tcf_block_offload_init(struct flow_block_offload *bo,
    641				   struct net_device *dev, struct Qdisc *sch,
    642				   enum flow_block_command command,
    643				   enum flow_block_binder_type binder_type,
    644				   struct flow_block *flow_block,
    645				   bool shared, struct netlink_ext_ack *extack)
    646{
    647	bo->net = dev_net(dev);
    648	bo->command = command;
    649	bo->binder_type = binder_type;
    650	bo->block = flow_block;
    651	bo->block_shared = shared;
    652	bo->extack = extack;
    653	bo->sch = sch;
    654	bo->cb_list_head = &flow_block->cb_list;
    655	INIT_LIST_HEAD(&bo->cb_list);
    656}
    657
    658static void tcf_block_unbind(struct tcf_block *block,
    659			     struct flow_block_offload *bo);
    660
    661static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
    662{
    663	struct tcf_block *block = block_cb->indr.data;
    664	struct net_device *dev = block_cb->indr.dev;
    665	struct Qdisc *sch = block_cb->indr.sch;
    666	struct netlink_ext_ack extack = {};
    667	struct flow_block_offload bo = {};
    668
    669	tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
    670			       block_cb->indr.binder_type,
    671			       &block->flow_block, tcf_block_shared(block),
    672			       &extack);
    673	rtnl_lock();
    674	down_write(&block->cb_lock);
    675	list_del(&block_cb->driver_list);
    676	list_move(&block_cb->list, &bo.cb_list);
    677	tcf_block_unbind(block, &bo);
    678	up_write(&block->cb_lock);
    679	rtnl_unlock();
    680}
    681
    682static bool tcf_block_offload_in_use(struct tcf_block *block)
    683{
    684	return atomic_read(&block->offloadcnt);
    685}
    686
    687static int tcf_block_offload_cmd(struct tcf_block *block,
    688				 struct net_device *dev, struct Qdisc *sch,
    689				 struct tcf_block_ext_info *ei,
    690				 enum flow_block_command command,
    691				 struct netlink_ext_ack *extack)
    692{
    693	struct flow_block_offload bo = {};
    694
    695	tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
    696			       &block->flow_block, tcf_block_shared(block),
    697			       extack);
    698
    699	if (dev->netdev_ops->ndo_setup_tc) {
    700		int err;
    701
    702		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
    703		if (err < 0) {
    704			if (err != -EOPNOTSUPP)
    705				NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
    706			return err;
    707		}
    708
    709		return tcf_block_setup(block, &bo);
    710	}
    711
    712	flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
    713				    tc_block_indr_cleanup);
    714	tcf_block_setup(block, &bo);
    715
    716	return -EOPNOTSUPP;
    717}
    718
    719static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
    720				  struct tcf_block_ext_info *ei,
    721				  struct netlink_ext_ack *extack)
    722{
    723	struct net_device *dev = q->dev_queue->dev;
    724	int err;
    725
    726	down_write(&block->cb_lock);
    727
    728	/* If tc offload feature is disabled and the block we try to bind
    729	 * to already has some offloaded filters, forbid to bind.
    730	 */
    731	if (dev->netdev_ops->ndo_setup_tc &&
    732	    !tc_can_offload(dev) &&
    733	    tcf_block_offload_in_use(block)) {
    734		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
    735		err = -EOPNOTSUPP;
    736		goto err_unlock;
    737	}
    738
    739	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
    740	if (err == -EOPNOTSUPP)
    741		goto no_offload_dev_inc;
    742	if (err)
    743		goto err_unlock;
    744
    745	up_write(&block->cb_lock);
    746	return 0;
    747
    748no_offload_dev_inc:
    749	if (tcf_block_offload_in_use(block))
    750		goto err_unlock;
    751
    752	err = 0;
    753	block->nooffloaddevcnt++;
    754err_unlock:
    755	up_write(&block->cb_lock);
    756	return err;
    757}
    758
    759static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
    760				     struct tcf_block_ext_info *ei)
    761{
    762	struct net_device *dev = q->dev_queue->dev;
    763	int err;
    764
    765	down_write(&block->cb_lock);
    766	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL);
    767	if (err == -EOPNOTSUPP)
    768		goto no_offload_dev_dec;
    769	up_write(&block->cb_lock);
    770	return;
    771
    772no_offload_dev_dec:
    773	WARN_ON(block->nooffloaddevcnt-- == 0);
    774	up_write(&block->cb_lock);
    775}
    776
    777static int
    778tcf_chain0_head_change_cb_add(struct tcf_block *block,
    779			      struct tcf_block_ext_info *ei,
    780			      struct netlink_ext_ack *extack)
    781{
    782	struct tcf_filter_chain_list_item *item;
    783	struct tcf_chain *chain0;
    784
    785	item = kmalloc(sizeof(*item), GFP_KERNEL);
    786	if (!item) {
    787		NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
    788		return -ENOMEM;
    789	}
    790	item->chain_head_change = ei->chain_head_change;
    791	item->chain_head_change_priv = ei->chain_head_change_priv;
    792
    793	mutex_lock(&block->lock);
    794	chain0 = block->chain0.chain;
    795	if (chain0)
    796		tcf_chain_hold(chain0);
    797	else
    798		list_add(&item->list, &block->chain0.filter_chain_list);
    799	mutex_unlock(&block->lock);
    800
    801	if (chain0) {
    802		struct tcf_proto *tp_head;
    803
    804		mutex_lock(&chain0->filter_chain_lock);
    805
    806		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
    807		if (tp_head)
    808			tcf_chain_head_change_item(item, tp_head);
    809
    810		mutex_lock(&block->lock);
    811		list_add(&item->list, &block->chain0.filter_chain_list);
    812		mutex_unlock(&block->lock);
    813
    814		mutex_unlock(&chain0->filter_chain_lock);
    815		tcf_chain_put(chain0);
    816	}
    817
    818	return 0;
    819}
    820
    821static void
    822tcf_chain0_head_change_cb_del(struct tcf_block *block,
    823			      struct tcf_block_ext_info *ei)
    824{
    825	struct tcf_filter_chain_list_item *item;
    826
    827	mutex_lock(&block->lock);
    828	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
    829		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
    830		    (item->chain_head_change == ei->chain_head_change &&
    831		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
    832			if (block->chain0.chain)
    833				tcf_chain_head_change_item(item, NULL);
    834			list_del(&item->list);
    835			mutex_unlock(&block->lock);
    836
    837			kfree(item);
    838			return;
    839		}
    840	}
    841	mutex_unlock(&block->lock);
    842	WARN_ON(1);
    843}
    844
    845struct tcf_net {
    846	spinlock_t idr_lock; /* Protects idr */
    847	struct idr idr;
    848};
    849
    850static unsigned int tcf_net_id;
    851
    852static int tcf_block_insert(struct tcf_block *block, struct net *net,
    853			    struct netlink_ext_ack *extack)
    854{
    855	struct tcf_net *tn = net_generic(net, tcf_net_id);
    856	int err;
    857
    858	idr_preload(GFP_KERNEL);
    859	spin_lock(&tn->idr_lock);
    860	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
    861			    GFP_NOWAIT);
    862	spin_unlock(&tn->idr_lock);
    863	idr_preload_end();
    864
    865	return err;
    866}
    867
    868static void tcf_block_remove(struct tcf_block *block, struct net *net)
    869{
    870	struct tcf_net *tn = net_generic(net, tcf_net_id);
    871
    872	spin_lock(&tn->idr_lock);
    873	idr_remove(&tn->idr, block->index);
    874	spin_unlock(&tn->idr_lock);
    875}
    876
    877static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
    878					  u32 block_index,
    879					  struct netlink_ext_ack *extack)
    880{
    881	struct tcf_block *block;
    882
    883	block = kzalloc(sizeof(*block), GFP_KERNEL);
    884	if (!block) {
    885		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
    886		return ERR_PTR(-ENOMEM);
    887	}
    888	mutex_init(&block->lock);
    889	mutex_init(&block->proto_destroy_lock);
    890	init_rwsem(&block->cb_lock);
    891	flow_block_init(&block->flow_block);
    892	INIT_LIST_HEAD(&block->chain_list);
    893	INIT_LIST_HEAD(&block->owner_list);
    894	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
    895
    896	refcount_set(&block->refcnt, 1);
    897	block->net = net;
    898	block->index = block_index;
    899
    900	/* Don't store q pointer for blocks which are shared */
    901	if (!tcf_block_shared(block))
    902		block->q = q;
    903	return block;
    904}
    905
    906static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
    907{
    908	struct tcf_net *tn = net_generic(net, tcf_net_id);
    909
    910	return idr_find(&tn->idr, block_index);
    911}
    912
    913static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
    914{
    915	struct tcf_block *block;
    916
    917	rcu_read_lock();
    918	block = tcf_block_lookup(net, block_index);
    919	if (block && !refcount_inc_not_zero(&block->refcnt))
    920		block = NULL;
    921	rcu_read_unlock();
    922
    923	return block;
    924}
    925
    926static struct tcf_chain *
    927__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
    928{
    929	mutex_lock(&block->lock);
    930	if (chain)
    931		chain = list_is_last(&chain->list, &block->chain_list) ?
    932			NULL : list_next_entry(chain, list);
    933	else
    934		chain = list_first_entry_or_null(&block->chain_list,
    935						 struct tcf_chain, list);
    936
    937	/* skip all action-only chains */
    938	while (chain && tcf_chain_held_by_acts_only(chain))
    939		chain = list_is_last(&chain->list, &block->chain_list) ?
    940			NULL : list_next_entry(chain, list);
    941
    942	if (chain)
    943		tcf_chain_hold(chain);
    944	mutex_unlock(&block->lock);
    945
    946	return chain;
    947}
    948
    949/* Function to be used by all clients that want to iterate over all chains on
    950 * block. It properly obtains block->lock and takes reference to chain before
    951 * returning it. Users of this function must be tolerant to concurrent chain
    952 * insertion/deletion or ensure that no concurrent chain modification is
    953 * possible. Note that all netlink dump callbacks cannot guarantee to provide
    954 * consistent dump because rtnl lock is released each time skb is filled with
    955 * data and sent to user-space.
    956 */
    957
    958struct tcf_chain *
    959tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
    960{
    961	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
    962
    963	if (chain)
    964		tcf_chain_put(chain);
    965
    966	return chain_next;
    967}
    968EXPORT_SYMBOL(tcf_get_next_chain);
    969
    970static struct tcf_proto *
    971__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
    972{
    973	u32 prio = 0;
    974
    975	ASSERT_RTNL();
    976	mutex_lock(&chain->filter_chain_lock);
    977
    978	if (!tp) {
    979		tp = tcf_chain_dereference(chain->filter_chain, chain);
    980	} else if (tcf_proto_is_deleting(tp)) {
    981		/* 'deleting' flag is set and chain->filter_chain_lock was
    982		 * unlocked, which means next pointer could be invalid. Restart
    983		 * search.
    984		 */
    985		prio = tp->prio + 1;
    986		tp = tcf_chain_dereference(chain->filter_chain, chain);
    987
    988		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
    989			if (!tp->deleting && tp->prio >= prio)
    990				break;
    991	} else {
    992		tp = tcf_chain_dereference(tp->next, chain);
    993	}
    994
    995	if (tp)
    996		tcf_proto_get(tp);
    997
    998	mutex_unlock(&chain->filter_chain_lock);
    999
   1000	return tp;
   1001}
   1002
   1003/* Function to be used by all clients that want to iterate over all tp's on
   1004 * chain. Users of this function must be tolerant to concurrent tp
   1005 * insertion/deletion or ensure that no concurrent chain modification is
   1006 * possible. Note that all netlink dump callbacks cannot guarantee to provide
   1007 * consistent dump because rtnl lock is released each time skb is filled with
   1008 * data and sent to user-space.
   1009 */
   1010
   1011struct tcf_proto *
   1012tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
   1013{
   1014	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
   1015
   1016	if (tp)
   1017		tcf_proto_put(tp, true, NULL);
   1018
   1019	return tp_next;
   1020}
   1021EXPORT_SYMBOL(tcf_get_next_proto);
   1022
   1023static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
   1024{
   1025	struct tcf_chain *chain;
   1026
   1027	/* Last reference to block. At this point chains cannot be added or
   1028	 * removed concurrently.
   1029	 */
   1030	for (chain = tcf_get_next_chain(block, NULL);
   1031	     chain;
   1032	     chain = tcf_get_next_chain(block, chain)) {
   1033		tcf_chain_put_explicitly_created(chain);
   1034		tcf_chain_flush(chain, rtnl_held);
   1035	}
   1036}
   1037
   1038/* Lookup Qdisc and increments its reference counter.
   1039 * Set parent, if necessary.
   1040 */
   1041
   1042static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
   1043			    u32 *parent, int ifindex, bool rtnl_held,
   1044			    struct netlink_ext_ack *extack)
   1045{
   1046	const struct Qdisc_class_ops *cops;
   1047	struct net_device *dev;
   1048	int err = 0;
   1049
   1050	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
   1051		return 0;
   1052
   1053	rcu_read_lock();
   1054
   1055	/* Find link */
   1056	dev = dev_get_by_index_rcu(net, ifindex);
   1057	if (!dev) {
   1058		rcu_read_unlock();
   1059		return -ENODEV;
   1060	}
   1061
   1062	/* Find qdisc */
   1063	if (!*parent) {
   1064		*q = rcu_dereference(dev->qdisc);
   1065		*parent = (*q)->handle;
   1066	} else {
   1067		*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
   1068		if (!*q) {
   1069			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
   1070			err = -EINVAL;
   1071			goto errout_rcu;
   1072		}
   1073	}
   1074
   1075	*q = qdisc_refcount_inc_nz(*q);
   1076	if (!*q) {
   1077		NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
   1078		err = -EINVAL;
   1079		goto errout_rcu;
   1080	}
   1081
   1082	/* Is it classful? */
   1083	cops = (*q)->ops->cl_ops;
   1084	if (!cops) {
   1085		NL_SET_ERR_MSG(extack, "Qdisc not classful");
   1086		err = -EINVAL;
   1087		goto errout_qdisc;
   1088	}
   1089
   1090	if (!cops->tcf_block) {
   1091		NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
   1092		err = -EOPNOTSUPP;
   1093		goto errout_qdisc;
   1094	}
   1095
   1096errout_rcu:
   1097	/* At this point we know that qdisc is not noop_qdisc,
   1098	 * which means that qdisc holds a reference to net_device
   1099	 * and we hold a reference to qdisc, so it is safe to release
   1100	 * rcu read lock.
   1101	 */
   1102	rcu_read_unlock();
   1103	return err;
   1104
   1105errout_qdisc:
   1106	rcu_read_unlock();
   1107
   1108	if (rtnl_held)
   1109		qdisc_put(*q);
   1110	else
   1111		qdisc_put_unlocked(*q);
   1112	*q = NULL;
   1113
   1114	return err;
   1115}
   1116
   1117static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
   1118			       int ifindex, struct netlink_ext_ack *extack)
   1119{
   1120	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
   1121		return 0;
   1122
   1123	/* Do we search for filter, attached to class? */
   1124	if (TC_H_MIN(parent)) {
   1125		const struct Qdisc_class_ops *cops = q->ops->cl_ops;
   1126
   1127		*cl = cops->find(q, parent);
   1128		if (*cl == 0) {
   1129			NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
   1130			return -ENOENT;
   1131		}
   1132	}
   1133
   1134	return 0;
   1135}
   1136
   1137static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
   1138					  unsigned long cl, int ifindex,
   1139					  u32 block_index,
   1140					  struct netlink_ext_ack *extack)
   1141{
   1142	struct tcf_block *block;
   1143
   1144	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
   1145		block = tcf_block_refcnt_get(net, block_index);
   1146		if (!block) {
   1147			NL_SET_ERR_MSG(extack, "Block of given index was not found");
   1148			return ERR_PTR(-EINVAL);
   1149		}
   1150	} else {
   1151		const struct Qdisc_class_ops *cops = q->ops->cl_ops;
   1152
   1153		block = cops->tcf_block(q, cl, extack);
   1154		if (!block)
   1155			return ERR_PTR(-EINVAL);
   1156
   1157		if (tcf_block_shared(block)) {
   1158			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
   1159			return ERR_PTR(-EOPNOTSUPP);
   1160		}
   1161
   1162		/* Always take reference to block in order to support execution
   1163		 * of rules update path of cls API without rtnl lock. Caller
   1164		 * must release block when it is finished using it. 'if' block
   1165		 * of this conditional obtain reference to block by calling
   1166		 * tcf_block_refcnt_get().
   1167		 */
   1168		refcount_inc(&block->refcnt);
   1169	}
   1170
   1171	return block;
   1172}
   1173
   1174static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
   1175			    struct tcf_block_ext_info *ei, bool rtnl_held)
   1176{
   1177	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
   1178		/* Flushing/putting all chains will cause the block to be
   1179		 * deallocated when last chain is freed. However, if chain_list
   1180		 * is empty, block has to be manually deallocated. After block
   1181		 * reference counter reached 0, it is no longer possible to
   1182		 * increment it or add new chains to block.
   1183		 */
   1184		bool free_block = list_empty(&block->chain_list);
   1185
   1186		mutex_unlock(&block->lock);
   1187		if (tcf_block_shared(block))
   1188			tcf_block_remove(block, block->net);
   1189
   1190		if (q)
   1191			tcf_block_offload_unbind(block, q, ei);
   1192
   1193		if (free_block)
   1194			tcf_block_destroy(block);
   1195		else
   1196			tcf_block_flush_all_chains(block, rtnl_held);
   1197	} else if (q) {
   1198		tcf_block_offload_unbind(block, q, ei);
   1199	}
   1200}
   1201
   1202static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
   1203{
   1204	__tcf_block_put(block, NULL, NULL, rtnl_held);
   1205}
   1206
   1207/* Find tcf block.
   1208 * Set q, parent, cl when appropriate.
   1209 */
   1210
   1211static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
   1212					u32 *parent, unsigned long *cl,
   1213					int ifindex, u32 block_index,
   1214					struct netlink_ext_ack *extack)
   1215{
   1216	struct tcf_block *block;
   1217	int err = 0;
   1218
   1219	ASSERT_RTNL();
   1220
   1221	err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
   1222	if (err)
   1223		goto errout;
   1224
   1225	err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
   1226	if (err)
   1227		goto errout_qdisc;
   1228
   1229	block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
   1230	if (IS_ERR(block)) {
   1231		err = PTR_ERR(block);
   1232		goto errout_qdisc;
   1233	}
   1234
   1235	return block;
   1236
   1237errout_qdisc:
   1238	if (*q)
   1239		qdisc_put(*q);
   1240errout:
   1241	*q = NULL;
   1242	return ERR_PTR(err);
   1243}
   1244
   1245static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
   1246			      bool rtnl_held)
   1247{
   1248	if (!IS_ERR_OR_NULL(block))
   1249		tcf_block_refcnt_put(block, rtnl_held);
   1250
   1251	if (q) {
   1252		if (rtnl_held)
   1253			qdisc_put(q);
   1254		else
   1255			qdisc_put_unlocked(q);
   1256	}
   1257}
   1258
   1259struct tcf_block_owner_item {
   1260	struct list_head list;
   1261	struct Qdisc *q;
   1262	enum flow_block_binder_type binder_type;
   1263};
   1264
   1265static void
   1266tcf_block_owner_netif_keep_dst(struct tcf_block *block,
   1267			       struct Qdisc *q,
   1268			       enum flow_block_binder_type binder_type)
   1269{
   1270	if (block->keep_dst &&
   1271	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
   1272	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
   1273		netif_keep_dst(qdisc_dev(q));
   1274}
   1275
   1276void tcf_block_netif_keep_dst(struct tcf_block *block)
   1277{
   1278	struct tcf_block_owner_item *item;
   1279
   1280	block->keep_dst = true;
   1281	list_for_each_entry(item, &block->owner_list, list)
   1282		tcf_block_owner_netif_keep_dst(block, item->q,
   1283					       item->binder_type);
   1284}
   1285EXPORT_SYMBOL(tcf_block_netif_keep_dst);
   1286
   1287static int tcf_block_owner_add(struct tcf_block *block,
   1288			       struct Qdisc *q,
   1289			       enum flow_block_binder_type binder_type)
   1290{
   1291	struct tcf_block_owner_item *item;
   1292
   1293	item = kmalloc(sizeof(*item), GFP_KERNEL);
   1294	if (!item)
   1295		return -ENOMEM;
   1296	item->q = q;
   1297	item->binder_type = binder_type;
   1298	list_add(&item->list, &block->owner_list);
   1299	return 0;
   1300}
   1301
   1302static void tcf_block_owner_del(struct tcf_block *block,
   1303				struct Qdisc *q,
   1304				enum flow_block_binder_type binder_type)
   1305{
   1306	struct tcf_block_owner_item *item;
   1307
   1308	list_for_each_entry(item, &block->owner_list, list) {
   1309		if (item->q == q && item->binder_type == binder_type) {
   1310			list_del(&item->list);
   1311			kfree(item);
   1312			return;
   1313		}
   1314	}
   1315	WARN_ON(1);
   1316}
   1317
   1318int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
   1319		      struct tcf_block_ext_info *ei,
   1320		      struct netlink_ext_ack *extack)
   1321{
   1322	struct net *net = qdisc_net(q);
   1323	struct tcf_block *block = NULL;
   1324	int err;
   1325
   1326	if (ei->block_index)
   1327		/* block_index not 0 means the shared block is requested */
   1328		block = tcf_block_refcnt_get(net, ei->block_index);
   1329
   1330	if (!block) {
   1331		block = tcf_block_create(net, q, ei->block_index, extack);
   1332		if (IS_ERR(block))
   1333			return PTR_ERR(block);
   1334		if (tcf_block_shared(block)) {
   1335			err = tcf_block_insert(block, net, extack);
   1336			if (err)
   1337				goto err_block_insert;
   1338		}
   1339	}
   1340
   1341	err = tcf_block_owner_add(block, q, ei->binder_type);
   1342	if (err)
   1343		goto err_block_owner_add;
   1344
   1345	tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
   1346
   1347	err = tcf_chain0_head_change_cb_add(block, ei, extack);
   1348	if (err)
   1349		goto err_chain0_head_change_cb_add;
   1350
   1351	err = tcf_block_offload_bind(block, q, ei, extack);
   1352	if (err)
   1353		goto err_block_offload_bind;
   1354
   1355	*p_block = block;
   1356	return 0;
   1357
   1358err_block_offload_bind:
   1359	tcf_chain0_head_change_cb_del(block, ei);
   1360err_chain0_head_change_cb_add:
   1361	tcf_block_owner_del(block, q, ei->binder_type);
   1362err_block_owner_add:
   1363err_block_insert:
   1364	tcf_block_refcnt_put(block, true);
   1365	return err;
   1366}
   1367EXPORT_SYMBOL(tcf_block_get_ext);
   1368
   1369static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
   1370{
   1371	struct tcf_proto __rcu **p_filter_chain = priv;
   1372
   1373	rcu_assign_pointer(*p_filter_chain, tp_head);
   1374}
   1375
   1376int tcf_block_get(struct tcf_block **p_block,
   1377		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
   1378		  struct netlink_ext_ack *extack)
   1379{
   1380	struct tcf_block_ext_info ei = {
   1381		.chain_head_change = tcf_chain_head_change_dflt,
   1382		.chain_head_change_priv = p_filter_chain,
   1383	};
   1384
   1385	WARN_ON(!p_filter_chain);
   1386	return tcf_block_get_ext(p_block, q, &ei, extack);
   1387}
   1388EXPORT_SYMBOL(tcf_block_get);
   1389
   1390/* XXX: Standalone actions are not allowed to jump to any chain, and bound
   1391 * actions should be all removed after flushing.
   1392 */
   1393void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
   1394		       struct tcf_block_ext_info *ei)
   1395{
   1396	if (!block)
   1397		return;
   1398	tcf_chain0_head_change_cb_del(block, ei);
   1399	tcf_block_owner_del(block, q, ei->binder_type);
   1400
   1401	__tcf_block_put(block, q, ei, true);
   1402}
   1403EXPORT_SYMBOL(tcf_block_put_ext);
   1404
   1405void tcf_block_put(struct tcf_block *block)
   1406{
   1407	struct tcf_block_ext_info ei = {0, };
   1408
   1409	if (!block)
   1410		return;
   1411	tcf_block_put_ext(block, block->q, &ei);
   1412}
   1413
   1414EXPORT_SYMBOL(tcf_block_put);
   1415
   1416static int
   1417tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
   1418			    void *cb_priv, bool add, bool offload_in_use,
   1419			    struct netlink_ext_ack *extack)
   1420{
   1421	struct tcf_chain *chain, *chain_prev;
   1422	struct tcf_proto *tp, *tp_prev;
   1423	int err;
   1424
   1425	lockdep_assert_held(&block->cb_lock);
   1426
   1427	for (chain = __tcf_get_next_chain(block, NULL);
   1428	     chain;
   1429	     chain_prev = chain,
   1430		     chain = __tcf_get_next_chain(block, chain),
   1431		     tcf_chain_put(chain_prev)) {
   1432		for (tp = __tcf_get_next_proto(chain, NULL); tp;
   1433		     tp_prev = tp,
   1434			     tp = __tcf_get_next_proto(chain, tp),
   1435			     tcf_proto_put(tp_prev, true, NULL)) {
   1436			if (tp->ops->reoffload) {
   1437				err = tp->ops->reoffload(tp, add, cb, cb_priv,
   1438							 extack);
   1439				if (err && add)
   1440					goto err_playback_remove;
   1441			} else if (add && offload_in_use) {
   1442				err = -EOPNOTSUPP;
   1443				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
   1444				goto err_playback_remove;
   1445			}
   1446		}
   1447	}
   1448
   1449	return 0;
   1450
   1451err_playback_remove:
   1452	tcf_proto_put(tp, true, NULL);
   1453	tcf_chain_put(chain);
   1454	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
   1455				    extack);
   1456	return err;
   1457}
   1458
   1459static int tcf_block_bind(struct tcf_block *block,
   1460			  struct flow_block_offload *bo)
   1461{
   1462	struct flow_block_cb *block_cb, *next;
   1463	int err, i = 0;
   1464
   1465	lockdep_assert_held(&block->cb_lock);
   1466
   1467	list_for_each_entry(block_cb, &bo->cb_list, list) {
   1468		err = tcf_block_playback_offloads(block, block_cb->cb,
   1469						  block_cb->cb_priv, true,
   1470						  tcf_block_offload_in_use(block),
   1471						  bo->extack);
   1472		if (err)
   1473			goto err_unroll;
   1474		if (!bo->unlocked_driver_cb)
   1475			block->lockeddevcnt++;
   1476
   1477		i++;
   1478	}
   1479	list_splice(&bo->cb_list, &block->flow_block.cb_list);
   1480
   1481	return 0;
   1482
   1483err_unroll:
   1484	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
   1485		if (i-- > 0) {
   1486			list_del(&block_cb->list);
   1487			tcf_block_playback_offloads(block, block_cb->cb,
   1488						    block_cb->cb_priv, false,
   1489						    tcf_block_offload_in_use(block),
   1490						    NULL);
   1491			if (!bo->unlocked_driver_cb)
   1492				block->lockeddevcnt--;
   1493		}
   1494		flow_block_cb_free(block_cb);
   1495	}
   1496
   1497	return err;
   1498}
   1499
   1500static void tcf_block_unbind(struct tcf_block *block,
   1501			     struct flow_block_offload *bo)
   1502{
   1503	struct flow_block_cb *block_cb, *next;
   1504
   1505	lockdep_assert_held(&block->cb_lock);
   1506
   1507	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
   1508		tcf_block_playback_offloads(block, block_cb->cb,
   1509					    block_cb->cb_priv, false,
   1510					    tcf_block_offload_in_use(block),
   1511					    NULL);
   1512		list_del(&block_cb->list);
   1513		flow_block_cb_free(block_cb);
   1514		if (!bo->unlocked_driver_cb)
   1515			block->lockeddevcnt--;
   1516	}
   1517}
   1518
   1519static int tcf_block_setup(struct tcf_block *block,
   1520			   struct flow_block_offload *bo)
   1521{
   1522	int err;
   1523
   1524	switch (bo->command) {
   1525	case FLOW_BLOCK_BIND:
   1526		err = tcf_block_bind(block, bo);
   1527		break;
   1528	case FLOW_BLOCK_UNBIND:
   1529		err = 0;
   1530		tcf_block_unbind(block, bo);
   1531		break;
   1532	default:
   1533		WARN_ON_ONCE(1);
   1534		err = -EOPNOTSUPP;
   1535	}
   1536
   1537	return err;
   1538}
   1539
   1540/* Main classifier routine: scans classifier chain attached
   1541 * to this qdisc, (optionally) tests for protocol and asks
   1542 * specific classifiers.
   1543 */
   1544static inline int __tcf_classify(struct sk_buff *skb,
   1545				 const struct tcf_proto *tp,
   1546				 const struct tcf_proto *orig_tp,
   1547				 struct tcf_result *res,
   1548				 bool compat_mode,
   1549				 u32 *last_executed_chain)
   1550{
   1551#ifdef CONFIG_NET_CLS_ACT
   1552	const int max_reclassify_loop = 16;
   1553	const struct tcf_proto *first_tp;
   1554	int limit = 0;
   1555
   1556reclassify:
   1557#endif
   1558	for (; tp; tp = rcu_dereference_bh(tp->next)) {
   1559		__be16 protocol = skb_protocol(skb, false);
   1560		int err;
   1561
   1562		if (tp->protocol != protocol &&
   1563		    tp->protocol != htons(ETH_P_ALL))
   1564			continue;
   1565
   1566		err = tp->classify(skb, tp, res);
   1567#ifdef CONFIG_NET_CLS_ACT
   1568		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
   1569			first_tp = orig_tp;
   1570			*last_executed_chain = first_tp->chain->index;
   1571			goto reset;
   1572		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
   1573			first_tp = res->goto_tp;
   1574			*last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
   1575			goto reset;
   1576		}
   1577#endif
   1578		if (err >= 0)
   1579			return err;
   1580	}
   1581
   1582	return TC_ACT_UNSPEC; /* signal: continue lookup */
   1583#ifdef CONFIG_NET_CLS_ACT
   1584reset:
   1585	if (unlikely(limit++ >= max_reclassify_loop)) {
   1586		net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
   1587				       tp->chain->block->index,
   1588				       tp->prio & 0xffff,
   1589				       ntohs(tp->protocol));
   1590		return TC_ACT_SHOT;
   1591	}
   1592
   1593	tp = first_tp;
   1594	goto reclassify;
   1595#endif
   1596}
   1597
   1598int tcf_classify(struct sk_buff *skb,
   1599		 const struct tcf_block *block,
   1600		 const struct tcf_proto *tp,
   1601		 struct tcf_result *res, bool compat_mode)
   1602{
   1603#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
   1604	u32 last_executed_chain = 0;
   1605
   1606	return __tcf_classify(skb, tp, tp, res, compat_mode,
   1607			      &last_executed_chain);
   1608#else
   1609	u32 last_executed_chain = tp ? tp->chain->index : 0;
   1610	const struct tcf_proto *orig_tp = tp;
   1611	struct tc_skb_ext *ext;
   1612	int ret;
   1613
   1614	if (block) {
   1615		ext = skb_ext_find(skb, TC_SKB_EXT);
   1616
   1617		if (ext && ext->chain) {
   1618			struct tcf_chain *fchain;
   1619
   1620			fchain = tcf_chain_lookup_rcu(block, ext->chain);
   1621			if (!fchain)
   1622				return TC_ACT_SHOT;
   1623
   1624			/* Consume, so cloned/redirect skbs won't inherit ext */
   1625			skb_ext_del(skb, TC_SKB_EXT);
   1626
   1627			tp = rcu_dereference_bh(fchain->filter_chain);
   1628			last_executed_chain = fchain->index;
   1629		}
   1630	}
   1631
   1632	ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
   1633			     &last_executed_chain);
   1634
   1635	if (tc_skb_ext_tc_enabled()) {
   1636		/* If we missed on some chain */
   1637		if (ret == TC_ACT_UNSPEC && last_executed_chain) {
   1638			struct tc_skb_cb *cb = tc_skb_cb(skb);
   1639
   1640			ext = tc_skb_ext_alloc(skb);
   1641			if (WARN_ON_ONCE(!ext))
   1642				return TC_ACT_SHOT;
   1643			ext->chain = last_executed_chain;
   1644			ext->mru = cb->mru;
   1645			ext->post_ct = cb->post_ct;
   1646			ext->post_ct_snat = cb->post_ct_snat;
   1647			ext->post_ct_dnat = cb->post_ct_dnat;
   1648			ext->zone = cb->zone;
   1649		}
   1650	}
   1651
   1652	return ret;
   1653#endif
   1654}
   1655EXPORT_SYMBOL(tcf_classify);
   1656
   1657struct tcf_chain_info {
   1658	struct tcf_proto __rcu **pprev;
   1659	struct tcf_proto __rcu *next;
   1660};
   1661
   1662static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
   1663					   struct tcf_chain_info *chain_info)
   1664{
   1665	return tcf_chain_dereference(*chain_info->pprev, chain);
   1666}
   1667
   1668static int tcf_chain_tp_insert(struct tcf_chain *chain,
   1669			       struct tcf_chain_info *chain_info,
   1670			       struct tcf_proto *tp)
   1671{
   1672	if (chain->flushing)
   1673		return -EAGAIN;
   1674
   1675	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
   1676	if (*chain_info->pprev == chain->filter_chain)
   1677		tcf_chain0_head_change(chain, tp);
   1678	tcf_proto_get(tp);
   1679	rcu_assign_pointer(*chain_info->pprev, tp);
   1680
   1681	return 0;
   1682}
   1683
   1684static void tcf_chain_tp_remove(struct tcf_chain *chain,
   1685				struct tcf_chain_info *chain_info,
   1686				struct tcf_proto *tp)
   1687{
   1688	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
   1689
   1690	tcf_proto_mark_delete(tp);
   1691	if (tp == chain->filter_chain)
   1692		tcf_chain0_head_change(chain, next);
   1693	RCU_INIT_POINTER(*chain_info->pprev, next);
   1694}
   1695
   1696static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
   1697					   struct tcf_chain_info *chain_info,
   1698					   u32 protocol, u32 prio,
   1699					   bool prio_allocate);
   1700
   1701/* Try to insert new proto.
   1702 * If proto with specified priority already exists, free new proto
   1703 * and return existing one.
   1704 */
   1705
   1706static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
   1707						    struct tcf_proto *tp_new,
   1708						    u32 protocol, u32 prio,
   1709						    bool rtnl_held)
   1710{
   1711	struct tcf_chain_info chain_info;
   1712	struct tcf_proto *tp;
   1713	int err = 0;
   1714
   1715	mutex_lock(&chain->filter_chain_lock);
   1716
   1717	if (tcf_proto_exists_destroying(chain, tp_new)) {
   1718		mutex_unlock(&chain->filter_chain_lock);
   1719		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
   1720		return ERR_PTR(-EAGAIN);
   1721	}
   1722
   1723	tp = tcf_chain_tp_find(chain, &chain_info,
   1724			       protocol, prio, false);
   1725	if (!tp)
   1726		err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
   1727	mutex_unlock(&chain->filter_chain_lock);
   1728
   1729	if (tp) {
   1730		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
   1731		tp_new = tp;
   1732	} else if (err) {
   1733		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
   1734		tp_new = ERR_PTR(err);
   1735	}
   1736
   1737	return tp_new;
   1738}
   1739
   1740static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
   1741				      struct tcf_proto *tp, bool rtnl_held,
   1742				      struct netlink_ext_ack *extack)
   1743{
   1744	struct tcf_chain_info chain_info;
   1745	struct tcf_proto *tp_iter;
   1746	struct tcf_proto **pprev;
   1747	struct tcf_proto *next;
   1748
   1749	mutex_lock(&chain->filter_chain_lock);
   1750
   1751	/* Atomically find and remove tp from chain. */
   1752	for (pprev = &chain->filter_chain;
   1753	     (tp_iter = tcf_chain_dereference(*pprev, chain));
   1754	     pprev = &tp_iter->next) {
   1755		if (tp_iter == tp) {
   1756			chain_info.pprev = pprev;
   1757			chain_info.next = tp_iter->next;
   1758			WARN_ON(tp_iter->deleting);
   1759			break;
   1760		}
   1761	}
   1762	/* Verify that tp still exists and no new filters were inserted
   1763	 * concurrently.
   1764	 * Mark tp for deletion if it is empty.
   1765	 */
   1766	if (!tp_iter || !tcf_proto_check_delete(tp)) {
   1767		mutex_unlock(&chain->filter_chain_lock);
   1768		return;
   1769	}
   1770
   1771	tcf_proto_signal_destroying(chain, tp);
   1772	next = tcf_chain_dereference(chain_info.next, chain);
   1773	if (tp == chain->filter_chain)
   1774		tcf_chain0_head_change(chain, next);
   1775	RCU_INIT_POINTER(*chain_info.pprev, next);
   1776	mutex_unlock(&chain->filter_chain_lock);
   1777
   1778	tcf_proto_put(tp, rtnl_held, extack);
   1779}
   1780
   1781static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
   1782					   struct tcf_chain_info *chain_info,
   1783					   u32 protocol, u32 prio,
   1784					   bool prio_allocate)
   1785{
   1786	struct tcf_proto **pprev;
   1787	struct tcf_proto *tp;
   1788
   1789	/* Check the chain for existence of proto-tcf with this priority */
   1790	for (pprev = &chain->filter_chain;
   1791	     (tp = tcf_chain_dereference(*pprev, chain));
   1792	     pprev = &tp->next) {
   1793		if (tp->prio >= prio) {
   1794			if (tp->prio == prio) {
   1795				if (prio_allocate ||
   1796				    (tp->protocol != protocol && protocol))
   1797					return ERR_PTR(-EINVAL);
   1798			} else {
   1799				tp = NULL;
   1800			}
   1801			break;
   1802		}
   1803	}
   1804	chain_info->pprev = pprev;
   1805	if (tp) {
   1806		chain_info->next = tp->next;
   1807		tcf_proto_get(tp);
   1808	} else {
   1809		chain_info->next = NULL;
   1810	}
   1811	return tp;
   1812}
   1813
   1814static int tcf_fill_node(struct net *net, struct sk_buff *skb,
   1815			 struct tcf_proto *tp, struct tcf_block *block,
   1816			 struct Qdisc *q, u32 parent, void *fh,
   1817			 u32 portid, u32 seq, u16 flags, int event,
   1818			 bool terse_dump, bool rtnl_held)
   1819{
   1820	struct tcmsg *tcm;
   1821	struct nlmsghdr  *nlh;
   1822	unsigned char *b = skb_tail_pointer(skb);
   1823
   1824	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
   1825	if (!nlh)
   1826		goto out_nlmsg_trim;
   1827	tcm = nlmsg_data(nlh);
   1828	tcm->tcm_family = AF_UNSPEC;
   1829	tcm->tcm__pad1 = 0;
   1830	tcm->tcm__pad2 = 0;
   1831	if (q) {
   1832		tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
   1833		tcm->tcm_parent = parent;
   1834	} else {
   1835		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
   1836		tcm->tcm_block_index = block->index;
   1837	}
   1838	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
   1839	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
   1840		goto nla_put_failure;
   1841	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
   1842		goto nla_put_failure;
   1843	if (!fh) {
   1844		tcm->tcm_handle = 0;
   1845	} else if (terse_dump) {
   1846		if (tp->ops->terse_dump) {
   1847			if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
   1848						rtnl_held) < 0)
   1849				goto nla_put_failure;
   1850		} else {
   1851			goto cls_op_not_supp;
   1852		}
   1853	} else {
   1854		if (tp->ops->dump &&
   1855		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
   1856			goto nla_put_failure;
   1857	}
   1858	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
   1859	return skb->len;
   1860
   1861out_nlmsg_trim:
   1862nla_put_failure:
   1863cls_op_not_supp:
   1864	nlmsg_trim(skb, b);
   1865	return -1;
   1866}
   1867
   1868static int tfilter_notify(struct net *net, struct sk_buff *oskb,
   1869			  struct nlmsghdr *n, struct tcf_proto *tp,
   1870			  struct tcf_block *block, struct Qdisc *q,
   1871			  u32 parent, void *fh, int event, bool unicast,
   1872			  bool rtnl_held)
   1873{
   1874	struct sk_buff *skb;
   1875	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
   1876	int err = 0;
   1877
   1878	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
   1879	if (!skb)
   1880		return -ENOBUFS;
   1881
   1882	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
   1883			  n->nlmsg_seq, n->nlmsg_flags, event,
   1884			  false, rtnl_held) <= 0) {
   1885		kfree_skb(skb);
   1886		return -EINVAL;
   1887	}
   1888
   1889	if (unicast)
   1890		err = rtnl_unicast(skb, net, portid);
   1891	else
   1892		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
   1893				     n->nlmsg_flags & NLM_F_ECHO);
   1894	return err;
   1895}
   1896
   1897static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
   1898			      struct nlmsghdr *n, struct tcf_proto *tp,
   1899			      struct tcf_block *block, struct Qdisc *q,
   1900			      u32 parent, void *fh, bool unicast, bool *last,
   1901			      bool rtnl_held, struct netlink_ext_ack *extack)
   1902{
   1903	struct sk_buff *skb;
   1904	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
   1905	int err;
   1906
   1907	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
   1908	if (!skb)
   1909		return -ENOBUFS;
   1910
   1911	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
   1912			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
   1913			  false, rtnl_held) <= 0) {
   1914		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
   1915		kfree_skb(skb);
   1916		return -EINVAL;
   1917	}
   1918
   1919	err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
   1920	if (err) {
   1921		kfree_skb(skb);
   1922		return err;
   1923	}
   1924
   1925	if (unicast)
   1926		err = rtnl_unicast(skb, net, portid);
   1927	else
   1928		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
   1929				     n->nlmsg_flags & NLM_F_ECHO);
   1930	if (err < 0)
   1931		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
   1932
   1933	return err;
   1934}
   1935
   1936static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
   1937				 struct tcf_block *block, struct Qdisc *q,
   1938				 u32 parent, struct nlmsghdr *n,
   1939				 struct tcf_chain *chain, int event)
   1940{
   1941	struct tcf_proto *tp;
   1942
   1943	for (tp = tcf_get_next_proto(chain, NULL);
   1944	     tp; tp = tcf_get_next_proto(chain, tp))
   1945		tfilter_notify(net, oskb, n, tp, block,
   1946			       q, parent, NULL, event, false, true);
   1947}
   1948
   1949static void tfilter_put(struct tcf_proto *tp, void *fh)
   1950{
   1951	if (tp->ops->put && fh)
   1952		tp->ops->put(tp, fh);
   1953}
   1954
   1955static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
   1956			  struct netlink_ext_ack *extack)
   1957{
   1958	struct net *net = sock_net(skb->sk);
   1959	struct nlattr *tca[TCA_MAX + 1];
   1960	char name[IFNAMSIZ];
   1961	struct tcmsg *t;
   1962	u32 protocol;
   1963	u32 prio;
   1964	bool prio_allocate;
   1965	u32 parent;
   1966	u32 chain_index;
   1967	struct Qdisc *q;
   1968	struct tcf_chain_info chain_info;
   1969	struct tcf_chain *chain;
   1970	struct tcf_block *block;
   1971	struct tcf_proto *tp;
   1972	unsigned long cl;
   1973	void *fh;
   1974	int err;
   1975	int tp_created;
   1976	bool rtnl_held = false;
   1977	u32 flags;
   1978
   1979	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
   1980		return -EPERM;
   1981
   1982replay:
   1983	tp_created = 0;
   1984
   1985	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
   1986				     rtm_tca_policy, extack);
   1987	if (err < 0)
   1988		return err;
   1989
   1990	t = nlmsg_data(n);
   1991	protocol = TC_H_MIN(t->tcm_info);
   1992	prio = TC_H_MAJ(t->tcm_info);
   1993	prio_allocate = false;
   1994	parent = t->tcm_parent;
   1995	tp = NULL;
   1996	cl = 0;
   1997	block = NULL;
   1998	q = NULL;
   1999	chain = NULL;
   2000	flags = 0;
   2001
   2002	if (prio == 0) {
   2003		/* If no priority is provided by the user,
   2004		 * we allocate one.
   2005		 */
   2006		if (n->nlmsg_flags & NLM_F_CREATE) {
   2007			prio = TC_H_MAKE(0x80000000U, 0U);
   2008			prio_allocate = true;
   2009		} else {
   2010			NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
   2011			return -ENOENT;
   2012		}
   2013	}
   2014
   2015	/* Find head of filter chain. */
   2016
   2017	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
   2018	if (err)
   2019		return err;
   2020
   2021	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
   2022		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
   2023		err = -EINVAL;
   2024		goto errout;
   2025	}
   2026
   2027	/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
   2028	 * block is shared (no qdisc found), qdisc is not unlocked, classifier
   2029	 * type is not specified, classifier is not unlocked.
   2030	 */
   2031	if (rtnl_held ||
   2032	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
   2033	    !tcf_proto_is_unlocked(name)) {
   2034		rtnl_held = true;
   2035		rtnl_lock();
   2036	}
   2037
   2038	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
   2039	if (err)
   2040		goto errout;
   2041
   2042	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
   2043				 extack);
   2044	if (IS_ERR(block)) {
   2045		err = PTR_ERR(block);
   2046		goto errout;
   2047	}
   2048	block->classid = parent;
   2049
   2050	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
   2051	if (chain_index > TC_ACT_EXT_VAL_MASK) {
   2052		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
   2053		err = -EINVAL;
   2054		goto errout;
   2055	}
   2056	chain = tcf_chain_get(block, chain_index, true);
   2057	if (!chain) {
   2058		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
   2059		err = -ENOMEM;
   2060		goto errout;
   2061	}
   2062
   2063	mutex_lock(&chain->filter_chain_lock);
   2064	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
   2065			       prio, prio_allocate);
   2066	if (IS_ERR(tp)) {
   2067		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
   2068		err = PTR_ERR(tp);
   2069		goto errout_locked;
   2070	}
   2071
   2072	if (tp == NULL) {
   2073		struct tcf_proto *tp_new = NULL;
   2074
   2075		if (chain->flushing) {
   2076			err = -EAGAIN;
   2077			goto errout_locked;
   2078		}
   2079
   2080		/* Proto-tcf does not exist, create new one */
   2081
   2082		if (tca[TCA_KIND] == NULL || !protocol) {
   2083			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
   2084			err = -EINVAL;
   2085			goto errout_locked;
   2086		}
   2087
   2088		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
   2089			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
   2090			err = -ENOENT;
   2091			goto errout_locked;
   2092		}
   2093
   2094		if (prio_allocate)
   2095			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
   2096							       &chain_info));
   2097
   2098		mutex_unlock(&chain->filter_chain_lock);
   2099		tp_new = tcf_proto_create(name, protocol, prio, chain,
   2100					  rtnl_held, extack);
   2101		if (IS_ERR(tp_new)) {
   2102			err = PTR_ERR(tp_new);
   2103			goto errout_tp;
   2104		}
   2105
   2106		tp_created = 1;
   2107		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
   2108						rtnl_held);
   2109		if (IS_ERR(tp)) {
   2110			err = PTR_ERR(tp);
   2111			goto errout_tp;
   2112		}
   2113	} else {
   2114		mutex_unlock(&chain->filter_chain_lock);
   2115	}
   2116
   2117	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
   2118		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
   2119		err = -EINVAL;
   2120		goto errout;
   2121	}
   2122
   2123	fh = tp->ops->get(tp, t->tcm_handle);
   2124
   2125	if (!fh) {
   2126		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
   2127			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
   2128			err = -ENOENT;
   2129			goto errout;
   2130		}
   2131	} else if (n->nlmsg_flags & NLM_F_EXCL) {
   2132		tfilter_put(tp, fh);
   2133		NL_SET_ERR_MSG(extack, "Filter already exists");
   2134		err = -EEXIST;
   2135		goto errout;
   2136	}
   2137
   2138	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
   2139		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
   2140		err = -EINVAL;
   2141		goto errout;
   2142	}
   2143
   2144	if (!(n->nlmsg_flags & NLM_F_CREATE))
   2145		flags |= TCA_ACT_FLAGS_REPLACE;
   2146	if (!rtnl_held)
   2147		flags |= TCA_ACT_FLAGS_NO_RTNL;
   2148	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
   2149			      flags, extack);
   2150	if (err == 0) {
   2151		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
   2152			       RTM_NEWTFILTER, false, rtnl_held);
   2153		tfilter_put(tp, fh);
   2154		/* q pointer is NULL for shared blocks */
   2155		if (q)
   2156			q->flags &= ~TCQ_F_CAN_BYPASS;
   2157	}
   2158
   2159errout:
   2160	if (err && tp_created)
   2161		tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
   2162errout_tp:
   2163	if (chain) {
   2164		if (tp && !IS_ERR(tp))
   2165			tcf_proto_put(tp, rtnl_held, NULL);
   2166		if (!tp_created)
   2167			tcf_chain_put(chain);
   2168	}
   2169	tcf_block_release(q, block, rtnl_held);
   2170
   2171	if (rtnl_held)
   2172		rtnl_unlock();
   2173
   2174	if (err == -EAGAIN) {
   2175		/* Take rtnl lock in case EAGAIN is caused by concurrent flush
   2176		 * of target chain.
   2177		 */
   2178		rtnl_held = true;
   2179		/* Replay the request. */
   2180		goto replay;
   2181	}
   2182	return err;
   2183
   2184errout_locked:
   2185	mutex_unlock(&chain->filter_chain_lock);
   2186	goto errout;
   2187}
   2188
   2189static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
   2190			  struct netlink_ext_ack *extack)
   2191{
   2192	struct net *net = sock_net(skb->sk);
   2193	struct nlattr *tca[TCA_MAX + 1];
   2194	char name[IFNAMSIZ];
   2195	struct tcmsg *t;
   2196	u32 protocol;
   2197	u32 prio;
   2198	u32 parent;
   2199	u32 chain_index;
   2200	struct Qdisc *q = NULL;
   2201	struct tcf_chain_info chain_info;
   2202	struct tcf_chain *chain = NULL;
   2203	struct tcf_block *block = NULL;
   2204	struct tcf_proto *tp = NULL;
   2205	unsigned long cl = 0;
   2206	void *fh = NULL;
   2207	int err;
   2208	bool rtnl_held = false;
   2209
   2210	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
   2211		return -EPERM;
   2212
   2213	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
   2214				     rtm_tca_policy, extack);
   2215	if (err < 0)
   2216		return err;
   2217
   2218	t = nlmsg_data(n);
   2219	protocol = TC_H_MIN(t->tcm_info);
   2220	prio = TC_H_MAJ(t->tcm_info);
   2221	parent = t->tcm_parent;
   2222
   2223	if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
   2224		NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
   2225		return -ENOENT;
   2226	}
   2227
   2228	/* Find head of filter chain. */
   2229
   2230	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
   2231	if (err)
   2232		return err;
   2233
   2234	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
   2235		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
   2236		err = -EINVAL;
   2237		goto errout;
   2238	}
   2239	/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
   2240	 * found), qdisc is not unlocked, classifier type is not specified,
   2241	 * classifier is not unlocked.
   2242	 */
   2243	if (!prio ||
   2244	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
   2245	    !tcf_proto_is_unlocked(name)) {
   2246		rtnl_held = true;
   2247		rtnl_lock();
   2248	}
   2249
   2250	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
   2251	if (err)
   2252		goto errout;
   2253
   2254	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
   2255				 extack);
   2256	if (IS_ERR(block)) {
   2257		err = PTR_ERR(block);
   2258		goto errout;
   2259	}
   2260
   2261	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
   2262	if (chain_index > TC_ACT_EXT_VAL_MASK) {
   2263		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
   2264		err = -EINVAL;
   2265		goto errout;
   2266	}
   2267	chain = tcf_chain_get(block, chain_index, false);
   2268	if (!chain) {
   2269		/* User requested flush on non-existent chain. Nothing to do,
   2270		 * so just return success.
   2271		 */
   2272		if (prio == 0) {
   2273			err = 0;
   2274			goto errout;
   2275		}
   2276		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
   2277		err = -ENOENT;
   2278		goto errout;
   2279	}
   2280
   2281	if (prio == 0) {
   2282		tfilter_notify_chain(net, skb, block, q, parent, n,
   2283				     chain, RTM_DELTFILTER);
   2284		tcf_chain_flush(chain, rtnl_held);
   2285		err = 0;
   2286		goto errout;
   2287	}
   2288
   2289	mutex_lock(&chain->filter_chain_lock);
   2290	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
   2291			       prio, false);
   2292	if (!tp || IS_ERR(tp)) {
   2293		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
   2294		err = tp ? PTR_ERR(tp) : -ENOENT;
   2295		goto errout_locked;
   2296	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
   2297		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
   2298		err = -EINVAL;
   2299		goto errout_locked;
   2300	} else if (t->tcm_handle == 0) {
   2301		tcf_proto_signal_destroying(chain, tp);
   2302		tcf_chain_tp_remove(chain, &chain_info, tp);
   2303		mutex_unlock(&chain->filter_chain_lock);
   2304
   2305		tcf_proto_put(tp, rtnl_held, NULL);
   2306		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
   2307			       RTM_DELTFILTER, false, rtnl_held);
   2308		err = 0;
   2309		goto errout;
   2310	}
   2311	mutex_unlock(&chain->filter_chain_lock);
   2312
   2313	fh = tp->ops->get(tp, t->tcm_handle);
   2314
   2315	if (!fh) {
   2316		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
   2317		err = -ENOENT;
   2318	} else {
   2319		bool last;
   2320
   2321		err = tfilter_del_notify(net, skb, n, tp, block,
   2322					 q, parent, fh, false, &last,
   2323					 rtnl_held, extack);
   2324
   2325		if (err)
   2326			goto errout;
   2327		if (last)
   2328			tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
   2329	}
   2330
   2331errout:
   2332	if (chain) {
   2333		if (tp && !IS_ERR(tp))
   2334			tcf_proto_put(tp, rtnl_held, NULL);
   2335		tcf_chain_put(chain);
   2336	}
   2337	tcf_block_release(q, block, rtnl_held);
   2338
   2339	if (rtnl_held)
   2340		rtnl_unlock();
   2341
   2342	return err;
   2343
   2344errout_locked:
   2345	mutex_unlock(&chain->filter_chain_lock);
   2346	goto errout;
   2347}
   2348
   2349static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
   2350			  struct netlink_ext_ack *extack)
   2351{
   2352	struct net *net = sock_net(skb->sk);
   2353	struct nlattr *tca[TCA_MAX + 1];
   2354	char name[IFNAMSIZ];
   2355	struct tcmsg *t;
   2356	u32 protocol;
   2357	u32 prio;
   2358	u32 parent;
   2359	u32 chain_index;
   2360	struct Qdisc *q = NULL;
   2361	struct tcf_chain_info chain_info;
   2362	struct tcf_chain *chain = NULL;
   2363	struct tcf_block *block = NULL;
   2364	struct tcf_proto *tp = NULL;
   2365	unsigned long cl = 0;
   2366	void *fh = NULL;
   2367	int err;
   2368	bool rtnl_held = false;
   2369
   2370	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
   2371				     rtm_tca_policy, extack);
   2372	if (err < 0)
   2373		return err;
   2374
   2375	t = nlmsg_data(n);
   2376	protocol = TC_H_MIN(t->tcm_info);
   2377	prio = TC_H_MAJ(t->tcm_info);
   2378	parent = t->tcm_parent;
   2379
   2380	if (prio == 0) {
   2381		NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
   2382		return -ENOENT;
   2383	}
   2384
   2385	/* Find head of filter chain. */
   2386
   2387	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
   2388	if (err)
   2389		return err;
   2390
   2391	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
   2392		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
   2393		err = -EINVAL;
   2394		goto errout;
   2395	}
   2396	/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
   2397	 * unlocked, classifier type is not specified, classifier is not
   2398	 * unlocked.
   2399	 */
   2400	if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
   2401	    !tcf_proto_is_unlocked(name)) {
   2402		rtnl_held = true;
   2403		rtnl_lock();
   2404	}
   2405
   2406	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
   2407	if (err)
   2408		goto errout;
   2409
   2410	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
   2411				 extack);
   2412	if (IS_ERR(block)) {
   2413		err = PTR_ERR(block);
   2414		goto errout;
   2415	}
   2416
   2417	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
   2418	if (chain_index > TC_ACT_EXT_VAL_MASK) {
   2419		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
   2420		err = -EINVAL;
   2421		goto errout;
   2422	}
   2423	chain = tcf_chain_get(block, chain_index, false);
   2424	if (!chain) {
   2425		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
   2426		err = -EINVAL;
   2427		goto errout;
   2428	}
   2429
   2430	mutex_lock(&chain->filter_chain_lock);
   2431	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
   2432			       prio, false);
   2433	mutex_unlock(&chain->filter_chain_lock);
   2434	if (!tp || IS_ERR(tp)) {
   2435		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
   2436		err = tp ? PTR_ERR(tp) : -ENOENT;
   2437		goto errout;
   2438	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
   2439		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
   2440		err = -EINVAL;
   2441		goto errout;
   2442	}
   2443
   2444	fh = tp->ops->get(tp, t->tcm_handle);
   2445
   2446	if (!fh) {
   2447		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
   2448		err = -ENOENT;
   2449	} else {
   2450		err = tfilter_notify(net, skb, n, tp, block, q, parent,
   2451				     fh, RTM_NEWTFILTER, true, rtnl_held);
   2452		if (err < 0)
   2453			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
   2454	}
   2455
   2456	tfilter_put(tp, fh);
   2457errout:
   2458	if (chain) {
   2459		if (tp && !IS_ERR(tp))
   2460			tcf_proto_put(tp, rtnl_held, NULL);
   2461		tcf_chain_put(chain);
   2462	}
   2463	tcf_block_release(q, block, rtnl_held);
   2464
   2465	if (rtnl_held)
   2466		rtnl_unlock();
   2467
   2468	return err;
   2469}
   2470
   2471struct tcf_dump_args {
   2472	struct tcf_walker w;
   2473	struct sk_buff *skb;
   2474	struct netlink_callback *cb;
   2475	struct tcf_block *block;
   2476	struct Qdisc *q;
   2477	u32 parent;
   2478	bool terse_dump;
   2479};
   2480
   2481static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
   2482{
   2483	struct tcf_dump_args *a = (void *)arg;
   2484	struct net *net = sock_net(a->skb->sk);
   2485
   2486	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
   2487			     n, NETLINK_CB(a->cb->skb).portid,
   2488			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
   2489			     RTM_NEWTFILTER, a->terse_dump, true);
   2490}
   2491
   2492static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
   2493			   struct sk_buff *skb, struct netlink_callback *cb,
   2494			   long index_start, long *p_index, bool terse)
   2495{
   2496	struct net *net = sock_net(skb->sk);
   2497	struct tcf_block *block = chain->block;
   2498	struct tcmsg *tcm = nlmsg_data(cb->nlh);
   2499	struct tcf_proto *tp, *tp_prev;
   2500	struct tcf_dump_args arg;
   2501
   2502	for (tp = __tcf_get_next_proto(chain, NULL);
   2503	     tp;
   2504	     tp_prev = tp,
   2505		     tp = __tcf_get_next_proto(chain, tp),
   2506		     tcf_proto_put(tp_prev, true, NULL),
   2507		     (*p_index)++) {
   2508		if (*p_index < index_start)
   2509			continue;
   2510		if (TC_H_MAJ(tcm->tcm_info) &&
   2511		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
   2512			continue;
   2513		if (TC_H_MIN(tcm->tcm_info) &&
   2514		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
   2515			continue;
   2516		if (*p_index > index_start)
   2517			memset(&cb->args[1], 0,
   2518			       sizeof(cb->args) - sizeof(cb->args[0]));
   2519		if (cb->args[1] == 0) {
   2520			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
   2521					  NETLINK_CB(cb->skb).portid,
   2522					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
   2523					  RTM_NEWTFILTER, false, true) <= 0)
   2524				goto errout;
   2525			cb->args[1] = 1;
   2526		}
   2527		if (!tp->ops->walk)
   2528			continue;
   2529		arg.w.fn = tcf_node_dump;
   2530		arg.skb = skb;
   2531		arg.cb = cb;
   2532		arg.block = block;
   2533		arg.q = q;
   2534		arg.parent = parent;
   2535		arg.w.stop = 0;
   2536		arg.w.skip = cb->args[1] - 1;
   2537		arg.w.count = 0;
   2538		arg.w.cookie = cb->args[2];
   2539		arg.terse_dump = terse;
   2540		tp->ops->walk(tp, &arg.w, true);
   2541		cb->args[2] = arg.w.cookie;
   2542		cb->args[1] = arg.w.count + 1;
   2543		if (arg.w.stop)
   2544			goto errout;
   2545	}
   2546	return true;
   2547
   2548errout:
   2549	tcf_proto_put(tp, true, NULL);
   2550	return false;
   2551}
   2552
   2553static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
   2554	[TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
   2555};
   2556
   2557/* called with RTNL */
   2558static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
   2559{
   2560	struct tcf_chain *chain, *chain_prev;
   2561	struct net *net = sock_net(skb->sk);
   2562	struct nlattr *tca[TCA_MAX + 1];
   2563	struct Qdisc *q = NULL;
   2564	struct tcf_block *block;
   2565	struct tcmsg *tcm = nlmsg_data(cb->nlh);
   2566	bool terse_dump = false;
   2567	long index_start;
   2568	long index;
   2569	u32 parent;
   2570	int err;
   2571
   2572	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
   2573		return skb->len;
   2574
   2575	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
   2576				     tcf_tfilter_dump_policy, cb->extack);
   2577	if (err)
   2578		return err;
   2579
   2580	if (tca[TCA_DUMP_FLAGS]) {
   2581		struct nla_bitfield32 flags =
   2582			nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
   2583
   2584		terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
   2585	}
   2586
   2587	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
   2588		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
   2589		if (!block)
   2590			goto out;
   2591		/* If we work with block index, q is NULL and parent value
   2592		 * will never be used in the following code. The check
   2593		 * in tcf_fill_node prevents it. However, compiler does not
   2594		 * see that far, so set parent to zero to silence the warning
   2595		 * about parent being uninitialized.
   2596		 */
   2597		parent = 0;
   2598	} else {
   2599		const struct Qdisc_class_ops *cops;
   2600		struct net_device *dev;
   2601		unsigned long cl = 0;
   2602
   2603		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
   2604		if (!dev)
   2605			return skb->len;
   2606
   2607		parent = tcm->tcm_parent;
   2608		if (!parent)
   2609			q = rtnl_dereference(dev->qdisc);
   2610		else
   2611			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
   2612		if (!q)
   2613			goto out;
   2614		cops = q->ops->cl_ops;
   2615		if (!cops)
   2616			goto out;
   2617		if (!cops->tcf_block)
   2618			goto out;
   2619		if (TC_H_MIN(tcm->tcm_parent)) {
   2620			cl = cops->find(q, tcm->tcm_parent);
   2621			if (cl == 0)
   2622				goto out;
   2623		}
   2624		block = cops->tcf_block(q, cl, NULL);
   2625		if (!block)
   2626			goto out;
   2627		parent = block->classid;
   2628		if (tcf_block_shared(block))
   2629			q = NULL;
   2630	}
   2631
   2632	index_start = cb->args[0];
   2633	index = 0;
   2634
   2635	for (chain = __tcf_get_next_chain(block, NULL);
   2636	     chain;
   2637	     chain_prev = chain,
   2638		     chain = __tcf_get_next_chain(block, chain),
   2639		     tcf_chain_put(chain_prev)) {
   2640		if (tca[TCA_CHAIN] &&
   2641		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
   2642			continue;
   2643		if (!tcf_chain_dump(chain, q, parent, skb, cb,
   2644				    index_start, &index, terse_dump)) {
   2645			tcf_chain_put(chain);
   2646			err = -EMSGSIZE;
   2647			break;
   2648		}
   2649	}
   2650
   2651	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
   2652		tcf_block_refcnt_put(block, true);
   2653	cb->args[0] = index;
   2654
   2655out:
   2656	/* If we did no progress, the error (EMSGSIZE) is real */
   2657	if (skb->len == 0 && err)
   2658		return err;
   2659	return skb->len;
   2660}
   2661
   2662static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
   2663			      void *tmplt_priv, u32 chain_index,
   2664			      struct net *net, struct sk_buff *skb,
   2665			      struct tcf_block *block,
   2666			      u32 portid, u32 seq, u16 flags, int event)
   2667{
   2668	unsigned char *b = skb_tail_pointer(skb);
   2669	const struct tcf_proto_ops *ops;
   2670	struct nlmsghdr *nlh;
   2671	struct tcmsg *tcm;
   2672	void *priv;
   2673
   2674	ops = tmplt_ops;
   2675	priv = tmplt_priv;
   2676
   2677	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
   2678	if (!nlh)
   2679		goto out_nlmsg_trim;
   2680	tcm = nlmsg_data(nlh);
   2681	tcm->tcm_family = AF_UNSPEC;
   2682	tcm->tcm__pad1 = 0;
   2683	tcm->tcm__pad2 = 0;
   2684	tcm->tcm_handle = 0;
   2685	if (block->q) {
   2686		tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
   2687		tcm->tcm_parent = block->q->handle;
   2688	} else {
   2689		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
   2690		tcm->tcm_block_index = block->index;
   2691	}
   2692
   2693	if (nla_put_u32(skb, TCA_CHAIN, chain_index))
   2694		goto nla_put_failure;
   2695
   2696	if (ops) {
   2697		if (nla_put_string(skb, TCA_KIND, ops->kind))
   2698			goto nla_put_failure;
   2699		if (ops->tmplt_dump(skb, net, priv) < 0)
   2700			goto nla_put_failure;
   2701	}
   2702
   2703	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
   2704	return skb->len;
   2705
   2706out_nlmsg_trim:
   2707nla_put_failure:
   2708	nlmsg_trim(skb, b);
   2709	return -EMSGSIZE;
   2710}
   2711
   2712static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
   2713			   u32 seq, u16 flags, int event, bool unicast)
   2714{
   2715	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
   2716	struct tcf_block *block = chain->block;
   2717	struct net *net = block->net;
   2718	struct sk_buff *skb;
   2719	int err = 0;
   2720
   2721	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
   2722	if (!skb)
   2723		return -ENOBUFS;
   2724
   2725	if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
   2726			       chain->index, net, skb, block, portid,
   2727			       seq, flags, event) <= 0) {
   2728		kfree_skb(skb);
   2729		return -EINVAL;
   2730	}
   2731
   2732	if (unicast)
   2733		err = rtnl_unicast(skb, net, portid);
   2734	else
   2735		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
   2736				     flags & NLM_F_ECHO);
   2737
   2738	return err;
   2739}
   2740
   2741static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
   2742				  void *tmplt_priv, u32 chain_index,
   2743				  struct tcf_block *block, struct sk_buff *oskb,
   2744				  u32 seq, u16 flags, bool unicast)
   2745{
   2746	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
   2747	struct net *net = block->net;
   2748	struct sk_buff *skb;
   2749
   2750	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
   2751	if (!skb)
   2752		return -ENOBUFS;
   2753
   2754	if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
   2755			       block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
   2756		kfree_skb(skb);
   2757		return -EINVAL;
   2758	}
   2759
   2760	if (unicast)
   2761		return rtnl_unicast(skb, net, portid);
   2762
   2763	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
   2764}
   2765
   2766static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
   2767			      struct nlattr **tca,
   2768			      struct netlink_ext_ack *extack)
   2769{
   2770	const struct tcf_proto_ops *ops;
   2771	char name[IFNAMSIZ];
   2772	void *tmplt_priv;
   2773
   2774	/* If kind is not set, user did not specify template. */
   2775	if (!tca[TCA_KIND])
   2776		return 0;
   2777
   2778	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
   2779		NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
   2780		return -EINVAL;
   2781	}
   2782
   2783	ops = tcf_proto_lookup_ops(name, true, extack);
   2784	if (IS_ERR(ops))
   2785		return PTR_ERR(ops);
   2786	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
   2787		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
   2788		return -EOPNOTSUPP;
   2789	}
   2790
   2791	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
   2792	if (IS_ERR(tmplt_priv)) {
   2793		module_put(ops->owner);
   2794		return PTR_ERR(tmplt_priv);
   2795	}
   2796	chain->tmplt_ops = ops;
   2797	chain->tmplt_priv = tmplt_priv;
   2798	return 0;
   2799}
   2800
   2801static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
   2802			       void *tmplt_priv)
   2803{
   2804	/* If template ops are set, no work to do for us. */
   2805	if (!tmplt_ops)
   2806		return;
   2807
   2808	tmplt_ops->tmplt_destroy(tmplt_priv);
   2809	module_put(tmplt_ops->owner);
   2810}
   2811
   2812/* Add/delete/get a chain */
   2813
   2814static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
   2815			struct netlink_ext_ack *extack)
   2816{
   2817	struct net *net = sock_net(skb->sk);
   2818	struct nlattr *tca[TCA_MAX + 1];
   2819	struct tcmsg *t;
   2820	u32 parent;
   2821	u32 chain_index;
   2822	struct Qdisc *q;
   2823	struct tcf_chain *chain;
   2824	struct tcf_block *block;
   2825	unsigned long cl;
   2826	int err;
   2827
   2828	if (n->nlmsg_type != RTM_GETCHAIN &&
   2829	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
   2830		return -EPERM;
   2831
   2832replay:
   2833	q = NULL;
   2834	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
   2835				     rtm_tca_policy, extack);
   2836	if (err < 0)
   2837		return err;
   2838
   2839	t = nlmsg_data(n);
   2840	parent = t->tcm_parent;
   2841	cl = 0;
   2842
   2843	block = tcf_block_find(net, &q, &parent, &cl,
   2844			       t->tcm_ifindex, t->tcm_block_index, extack);
   2845	if (IS_ERR(block))
   2846		return PTR_ERR(block);
   2847
   2848	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
   2849	if (chain_index > TC_ACT_EXT_VAL_MASK) {
   2850		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
   2851		err = -EINVAL;
   2852		goto errout_block;
   2853	}
   2854
   2855	mutex_lock(&block->lock);
   2856	chain = tcf_chain_lookup(block, chain_index);
   2857	if (n->nlmsg_type == RTM_NEWCHAIN) {
   2858		if (chain) {
   2859			if (tcf_chain_held_by_acts_only(chain)) {
   2860				/* The chain exists only because there is
   2861				 * some action referencing it.
   2862				 */
   2863				tcf_chain_hold(chain);
   2864			} else {
   2865				NL_SET_ERR_MSG(extack, "Filter chain already exists");
   2866				err = -EEXIST;
   2867				goto errout_block_locked;
   2868			}
   2869		} else {
   2870			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
   2871				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
   2872				err = -ENOENT;
   2873				goto errout_block_locked;
   2874			}
   2875			chain = tcf_chain_create(block, chain_index);
   2876			if (!chain) {
   2877				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
   2878				err = -ENOMEM;
   2879				goto errout_block_locked;
   2880			}
   2881		}
   2882	} else {
   2883		if (!chain || tcf_chain_held_by_acts_only(chain)) {
   2884			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
   2885			err = -EINVAL;
   2886			goto errout_block_locked;
   2887		}
   2888		tcf_chain_hold(chain);
   2889	}
   2890
   2891	if (n->nlmsg_type == RTM_NEWCHAIN) {
   2892		/* Modifying chain requires holding parent block lock. In case
   2893		 * the chain was successfully added, take a reference to the
   2894		 * chain. This ensures that an empty chain does not disappear at
   2895		 * the end of this function.
   2896		 */
   2897		tcf_chain_hold(chain);
   2898		chain->explicitly_created = true;
   2899	}
   2900	mutex_unlock(&block->lock);
   2901
   2902	switch (n->nlmsg_type) {
   2903	case RTM_NEWCHAIN:
   2904		err = tc_chain_tmplt_add(chain, net, tca, extack);
   2905		if (err) {
   2906			tcf_chain_put_explicitly_created(chain);
   2907			goto errout;
   2908		}
   2909
   2910		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
   2911				RTM_NEWCHAIN, false);
   2912		break;
   2913	case RTM_DELCHAIN:
   2914		tfilter_notify_chain(net, skb, block, q, parent, n,
   2915				     chain, RTM_DELTFILTER);
   2916		/* Flush the chain first as the user requested chain removal. */
   2917		tcf_chain_flush(chain, true);
   2918		/* In case the chain was successfully deleted, put a reference
   2919		 * to the chain previously taken during addition.
   2920		 */
   2921		tcf_chain_put_explicitly_created(chain);
   2922		break;
   2923	case RTM_GETCHAIN:
   2924		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
   2925				      n->nlmsg_flags, n->nlmsg_type, true);
   2926		if (err < 0)
   2927			NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
   2928		break;
   2929	default:
   2930		err = -EOPNOTSUPP;
   2931		NL_SET_ERR_MSG(extack, "Unsupported message type");
   2932		goto errout;
   2933	}
   2934
   2935errout:
   2936	tcf_chain_put(chain);
   2937errout_block:
   2938	tcf_block_release(q, block, true);
   2939	if (err == -EAGAIN)
   2940		/* Replay the request. */
   2941		goto replay;
   2942	return err;
   2943
   2944errout_block_locked:
   2945	mutex_unlock(&block->lock);
   2946	goto errout_block;
   2947}
   2948
   2949/* called with RTNL */
   2950static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
   2951{
   2952	struct net *net = sock_net(skb->sk);
   2953	struct nlattr *tca[TCA_MAX + 1];
   2954	struct Qdisc *q = NULL;
   2955	struct tcf_block *block;
   2956	struct tcmsg *tcm = nlmsg_data(cb->nlh);
   2957	struct tcf_chain *chain;
   2958	long index_start;
   2959	long index;
   2960	int err;
   2961
   2962	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
   2963		return skb->len;
   2964
   2965	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
   2966				     rtm_tca_policy, cb->extack);
   2967	if (err)
   2968		return err;
   2969
   2970	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
   2971		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
   2972		if (!block)
   2973			goto out;
   2974	} else {
   2975		const struct Qdisc_class_ops *cops;
   2976		struct net_device *dev;
   2977		unsigned long cl = 0;
   2978
   2979		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
   2980		if (!dev)
   2981			return skb->len;
   2982
   2983		if (!tcm->tcm_parent)
   2984			q = rtnl_dereference(dev->qdisc);
   2985		else
   2986			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
   2987
   2988		if (!q)
   2989			goto out;
   2990		cops = q->ops->cl_ops;
   2991		if (!cops)
   2992			goto out;
   2993		if (!cops->tcf_block)
   2994			goto out;
   2995		if (TC_H_MIN(tcm->tcm_parent)) {
   2996			cl = cops->find(q, tcm->tcm_parent);
   2997			if (cl == 0)
   2998				goto out;
   2999		}
   3000		block = cops->tcf_block(q, cl, NULL);
   3001		if (!block)
   3002			goto out;
   3003		if (tcf_block_shared(block))
   3004			q = NULL;
   3005	}
   3006
   3007	index_start = cb->args[0];
   3008	index = 0;
   3009
   3010	mutex_lock(&block->lock);
   3011	list_for_each_entry(chain, &block->chain_list, list) {
   3012		if ((tca[TCA_CHAIN] &&
   3013		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
   3014			continue;
   3015		if (index < index_start) {
   3016			index++;
   3017			continue;
   3018		}
   3019		if (tcf_chain_held_by_acts_only(chain))
   3020			continue;
   3021		err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
   3022					 chain->index, net, skb, block,
   3023					 NETLINK_CB(cb->skb).portid,
   3024					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
   3025					 RTM_NEWCHAIN);
   3026		if (err <= 0)
   3027			break;
   3028		index++;
   3029	}
   3030	mutex_unlock(&block->lock);
   3031
   3032	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
   3033		tcf_block_refcnt_put(block, true);
   3034	cb->args[0] = index;
   3035
   3036out:
   3037	/* If we did no progress, the error (EMSGSIZE) is real */
   3038	if (skb->len == 0 && err)
   3039		return err;
   3040	return skb->len;
   3041}
   3042
   3043void tcf_exts_destroy(struct tcf_exts *exts)
   3044{
   3045#ifdef CONFIG_NET_CLS_ACT
   3046	if (exts->actions) {
   3047		tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
   3048		kfree(exts->actions);
   3049	}
   3050	exts->nr_actions = 0;
   3051#endif
   3052}
   3053EXPORT_SYMBOL(tcf_exts_destroy);
   3054
   3055int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
   3056			 struct nlattr *rate_tlv, struct tcf_exts *exts,
   3057			 u32 flags, u32 fl_flags, struct netlink_ext_ack *extack)
   3058{
   3059#ifdef CONFIG_NET_CLS_ACT
   3060	{
   3061		int init_res[TCA_ACT_MAX_PRIO] = {};
   3062		struct tc_action *act;
   3063		size_t attr_size = 0;
   3064
   3065		if (exts->police && tb[exts->police]) {
   3066			struct tc_action_ops *a_o;
   3067
   3068			a_o = tc_action_load_ops(tb[exts->police], true,
   3069						 !(flags & TCA_ACT_FLAGS_NO_RTNL),
   3070						 extack);
   3071			if (IS_ERR(a_o))
   3072				return PTR_ERR(a_o);
   3073			flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
   3074			act = tcf_action_init_1(net, tp, tb[exts->police],
   3075						rate_tlv, a_o, init_res, flags,
   3076						extack);
   3077			module_put(a_o->owner);
   3078			if (IS_ERR(act))
   3079				return PTR_ERR(act);
   3080
   3081			act->type = exts->type = TCA_OLD_COMPAT;
   3082			exts->actions[0] = act;
   3083			exts->nr_actions = 1;
   3084			tcf_idr_insert_many(exts->actions);
   3085		} else if (exts->action && tb[exts->action]) {
   3086			int err;
   3087
   3088			flags |= TCA_ACT_FLAGS_BIND;
   3089			err = tcf_action_init(net, tp, tb[exts->action],
   3090					      rate_tlv, exts->actions, init_res,
   3091					      &attr_size, flags, fl_flags,
   3092					      extack);
   3093			if (err < 0)
   3094				return err;
   3095			exts->nr_actions = err;
   3096		}
   3097	}
   3098#else
   3099	if ((exts->action && tb[exts->action]) ||
   3100	    (exts->police && tb[exts->police])) {
   3101		NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
   3102		return -EOPNOTSUPP;
   3103	}
   3104#endif
   3105
   3106	return 0;
   3107}
   3108EXPORT_SYMBOL(tcf_exts_validate_ex);
   3109
   3110int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
   3111		      struct nlattr *rate_tlv, struct tcf_exts *exts,
   3112		      u32 flags, struct netlink_ext_ack *extack)
   3113{
   3114	return tcf_exts_validate_ex(net, tp, tb, rate_tlv, exts,
   3115				    flags, 0, extack);
   3116}
   3117EXPORT_SYMBOL(tcf_exts_validate);
   3118
   3119void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
   3120{
   3121#ifdef CONFIG_NET_CLS_ACT
   3122	struct tcf_exts old = *dst;
   3123
   3124	*dst = *src;
   3125	tcf_exts_destroy(&old);
   3126#endif
   3127}
   3128EXPORT_SYMBOL(tcf_exts_change);
   3129
   3130#ifdef CONFIG_NET_CLS_ACT
   3131static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
   3132{
   3133	if (exts->nr_actions == 0)
   3134		return NULL;
   3135	else
   3136		return exts->actions[0];
   3137}
   3138#endif
   3139
   3140int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
   3141{
   3142#ifdef CONFIG_NET_CLS_ACT
   3143	struct nlattr *nest;
   3144
   3145	if (exts->action && tcf_exts_has_actions(exts)) {
   3146		/*
   3147		 * again for backward compatible mode - we want
   3148		 * to work with both old and new modes of entering
   3149		 * tc data even if iproute2  was newer - jhs
   3150		 */
   3151		if (exts->type != TCA_OLD_COMPAT) {
   3152			nest = nla_nest_start_noflag(skb, exts->action);
   3153			if (nest == NULL)
   3154				goto nla_put_failure;
   3155
   3156			if (tcf_action_dump(skb, exts->actions, 0, 0, false)
   3157			    < 0)
   3158				goto nla_put_failure;
   3159			nla_nest_end(skb, nest);
   3160		} else if (exts->police) {
   3161			struct tc_action *act = tcf_exts_first_act(exts);
   3162			nest = nla_nest_start_noflag(skb, exts->police);
   3163			if (nest == NULL || !act)
   3164				goto nla_put_failure;
   3165			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
   3166				goto nla_put_failure;
   3167			nla_nest_end(skb, nest);
   3168		}
   3169	}
   3170	return 0;
   3171
   3172nla_put_failure:
   3173	nla_nest_cancel(skb, nest);
   3174	return -1;
   3175#else
   3176	return 0;
   3177#endif
   3178}
   3179EXPORT_SYMBOL(tcf_exts_dump);
   3180
   3181int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
   3182{
   3183#ifdef CONFIG_NET_CLS_ACT
   3184	struct nlattr *nest;
   3185
   3186	if (!exts->action || !tcf_exts_has_actions(exts))
   3187		return 0;
   3188
   3189	nest = nla_nest_start_noflag(skb, exts->action);
   3190	if (!nest)
   3191		goto nla_put_failure;
   3192
   3193	if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
   3194		goto nla_put_failure;
   3195	nla_nest_end(skb, nest);
   3196	return 0;
   3197
   3198nla_put_failure:
   3199	nla_nest_cancel(skb, nest);
   3200	return -1;
   3201#else
   3202	return 0;
   3203#endif
   3204}
   3205EXPORT_SYMBOL(tcf_exts_terse_dump);
   3206
   3207int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
   3208{
   3209#ifdef CONFIG_NET_CLS_ACT
   3210	struct tc_action *a = tcf_exts_first_act(exts);
   3211	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
   3212		return -1;
   3213#endif
   3214	return 0;
   3215}
   3216EXPORT_SYMBOL(tcf_exts_dump_stats);
   3217
   3218static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
   3219{
   3220	if (*flags & TCA_CLS_FLAGS_IN_HW)
   3221		return;
   3222	*flags |= TCA_CLS_FLAGS_IN_HW;
   3223	atomic_inc(&block->offloadcnt);
   3224}
   3225
   3226static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
   3227{
   3228	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
   3229		return;
   3230	*flags &= ~TCA_CLS_FLAGS_IN_HW;
   3231	atomic_dec(&block->offloadcnt);
   3232}
   3233
   3234static void tc_cls_offload_cnt_update(struct tcf_block *block,
   3235				      struct tcf_proto *tp, u32 *cnt,
   3236				      u32 *flags, u32 diff, bool add)
   3237{
   3238	lockdep_assert_held(&block->cb_lock);
   3239
   3240	spin_lock(&tp->lock);
   3241	if (add) {
   3242		if (!*cnt)
   3243			tcf_block_offload_inc(block, flags);
   3244		*cnt += diff;
   3245	} else {
   3246		*cnt -= diff;
   3247		if (!*cnt)
   3248			tcf_block_offload_dec(block, flags);
   3249	}
   3250	spin_unlock(&tp->lock);
   3251}
   3252
   3253static void
   3254tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
   3255			 u32 *cnt, u32 *flags)
   3256{
   3257	lockdep_assert_held(&block->cb_lock);
   3258
   3259	spin_lock(&tp->lock);
   3260	tcf_block_offload_dec(block, flags);
   3261	*cnt = 0;
   3262	spin_unlock(&tp->lock);
   3263}
   3264
   3265static int
   3266__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
   3267		   void *type_data, bool err_stop)
   3268{
   3269	struct flow_block_cb *block_cb;
   3270	int ok_count = 0;
   3271	int err;
   3272
   3273	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
   3274		err = block_cb->cb(type, type_data, block_cb->cb_priv);
   3275		if (err) {
   3276			if (err_stop)
   3277				return err;
   3278		} else {
   3279			ok_count++;
   3280		}
   3281	}
   3282	return ok_count;
   3283}
   3284
   3285int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
   3286		     void *type_data, bool err_stop, bool rtnl_held)
   3287{
   3288	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
   3289	int ok_count;
   3290
   3291retry:
   3292	if (take_rtnl)
   3293		rtnl_lock();
   3294	down_read(&block->cb_lock);
   3295	/* Need to obtain rtnl lock if block is bound to devs that require it.
   3296	 * In block bind code cb_lock is obtained while holding rtnl, so we must
   3297	 * obtain the locks in same order here.
   3298	 */
   3299	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
   3300		up_read(&block->cb_lock);
   3301		take_rtnl = true;
   3302		goto retry;
   3303	}
   3304
   3305	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
   3306
   3307	up_read(&block->cb_lock);
   3308	if (take_rtnl)
   3309		rtnl_unlock();
   3310	return ok_count;
   3311}
   3312EXPORT_SYMBOL(tc_setup_cb_call);
   3313
   3314/* Non-destructive filter add. If filter that wasn't already in hardware is
   3315 * successfully offloaded, increment block offloads counter. On failure,
   3316 * previously offloaded filter is considered to be intact and offloads counter
   3317 * is not decremented.
   3318 */
   3319
   3320int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
   3321		    enum tc_setup_type type, void *type_data, bool err_stop,
   3322		    u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
   3323{
   3324	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
   3325	int ok_count;
   3326
   3327retry:
   3328	if (take_rtnl)
   3329		rtnl_lock();
   3330	down_read(&block->cb_lock);
   3331	/* Need to obtain rtnl lock if block is bound to devs that require it.
   3332	 * In block bind code cb_lock is obtained while holding rtnl, so we must
   3333	 * obtain the locks in same order here.
   3334	 */
   3335	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
   3336		up_read(&block->cb_lock);
   3337		take_rtnl = true;
   3338		goto retry;
   3339	}
   3340
   3341	/* Make sure all netdevs sharing this block are offload-capable. */
   3342	if (block->nooffloaddevcnt && err_stop) {
   3343		ok_count = -EOPNOTSUPP;
   3344		goto err_unlock;
   3345	}
   3346
   3347	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
   3348	if (ok_count < 0)
   3349		goto err_unlock;
   3350
   3351	if (tp->ops->hw_add)
   3352		tp->ops->hw_add(tp, type_data);
   3353	if (ok_count > 0)
   3354		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
   3355					  ok_count, true);
   3356err_unlock:
   3357	up_read(&block->cb_lock);
   3358	if (take_rtnl)
   3359		rtnl_unlock();
   3360	return min(ok_count, 0);
   3361}
   3362EXPORT_SYMBOL(tc_setup_cb_add);
   3363
   3364/* Destructive filter replace. If filter that wasn't already in hardware is
   3365 * successfully offloaded, increment block offload counter. On failure,
   3366 * previously offloaded filter is considered to be destroyed and offload counter
   3367 * is decremented.
   3368 */
   3369
   3370int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
   3371			enum tc_setup_type type, void *type_data, bool err_stop,
   3372			u32 *old_flags, unsigned int *old_in_hw_count,
   3373			u32 *new_flags, unsigned int *new_in_hw_count,
   3374			bool rtnl_held)
   3375{
   3376	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
   3377	int ok_count;
   3378
   3379retry:
   3380	if (take_rtnl)
   3381		rtnl_lock();
   3382	down_read(&block->cb_lock);
   3383	/* Need to obtain rtnl lock if block is bound to devs that require it.
   3384	 * In block bind code cb_lock is obtained while holding rtnl, so we must
   3385	 * obtain the locks in same order here.
   3386	 */
   3387	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
   3388		up_read(&block->cb_lock);
   3389		take_rtnl = true;
   3390		goto retry;
   3391	}
   3392
   3393	/* Make sure all netdevs sharing this block are offload-capable. */
   3394	if (block->nooffloaddevcnt && err_stop) {
   3395		ok_count = -EOPNOTSUPP;
   3396		goto err_unlock;
   3397	}
   3398
   3399	tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
   3400	if (tp->ops->hw_del)
   3401		tp->ops->hw_del(tp, type_data);
   3402
   3403	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
   3404	if (ok_count < 0)
   3405		goto err_unlock;
   3406
   3407	if (tp->ops->hw_add)
   3408		tp->ops->hw_add(tp, type_data);
   3409	if (ok_count > 0)
   3410		tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
   3411					  new_flags, ok_count, true);
   3412err_unlock:
   3413	up_read(&block->cb_lock);
   3414	if (take_rtnl)
   3415		rtnl_unlock();
   3416	return min(ok_count, 0);
   3417}
   3418EXPORT_SYMBOL(tc_setup_cb_replace);
   3419
   3420/* Destroy filter and decrement block offload counter, if filter was previously
   3421 * offloaded.
   3422 */
   3423
   3424int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
   3425			enum tc_setup_type type, void *type_data, bool err_stop,
   3426			u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
   3427{
   3428	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
   3429	int ok_count;
   3430
   3431retry:
   3432	if (take_rtnl)
   3433		rtnl_lock();
   3434	down_read(&block->cb_lock);
   3435	/* Need to obtain rtnl lock if block is bound to devs that require it.
   3436	 * In block bind code cb_lock is obtained while holding rtnl, so we must
   3437	 * obtain the locks in same order here.
   3438	 */
   3439	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
   3440		up_read(&block->cb_lock);
   3441		take_rtnl = true;
   3442		goto retry;
   3443	}
   3444
   3445	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
   3446
   3447	tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
   3448	if (tp->ops->hw_del)
   3449		tp->ops->hw_del(tp, type_data);
   3450
   3451	up_read(&block->cb_lock);
   3452	if (take_rtnl)
   3453		rtnl_unlock();
   3454	return min(ok_count, 0);
   3455}
   3456EXPORT_SYMBOL(tc_setup_cb_destroy);
   3457
   3458int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
   3459			  bool add, flow_setup_cb_t *cb,
   3460			  enum tc_setup_type type, void *type_data,
   3461			  void *cb_priv, u32 *flags, unsigned int *in_hw_count)
   3462{
   3463	int err = cb(type, type_data, cb_priv);
   3464
   3465	if (err) {
   3466		if (add && tc_skip_sw(*flags))
   3467			return err;
   3468	} else {
   3469		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
   3470					  add);
   3471	}
   3472
   3473	return 0;
   3474}
   3475EXPORT_SYMBOL(tc_setup_cb_reoffload);
   3476
   3477static int tcf_act_get_cookie(struct flow_action_entry *entry,
   3478			      const struct tc_action *act)
   3479{
   3480	struct tc_cookie *cookie;
   3481	int err = 0;
   3482
   3483	rcu_read_lock();
   3484	cookie = rcu_dereference(act->act_cookie);
   3485	if (cookie) {
   3486		entry->cookie = flow_action_cookie_create(cookie->data,
   3487							  cookie->len,
   3488							  GFP_ATOMIC);
   3489		if (!entry->cookie)
   3490			err = -ENOMEM;
   3491	}
   3492	rcu_read_unlock();
   3493	return err;
   3494}
   3495
   3496static void tcf_act_put_cookie(struct flow_action_entry *entry)
   3497{
   3498	flow_action_cookie_destroy(entry->cookie);
   3499}
   3500
   3501void tc_cleanup_offload_action(struct flow_action *flow_action)
   3502{
   3503	struct flow_action_entry *entry;
   3504	int i;
   3505
   3506	flow_action_for_each(i, entry, flow_action) {
   3507		tcf_act_put_cookie(entry);
   3508		if (entry->destructor)
   3509			entry->destructor(entry->destructor_priv);
   3510	}
   3511}
   3512EXPORT_SYMBOL(tc_cleanup_offload_action);
   3513
   3514static int tc_setup_offload_act(struct tc_action *act,
   3515				struct flow_action_entry *entry,
   3516				u32 *index_inc,
   3517				struct netlink_ext_ack *extack)
   3518{
   3519#ifdef CONFIG_NET_CLS_ACT
   3520	if (act->ops->offload_act_setup) {
   3521		return act->ops->offload_act_setup(act, entry, index_inc, true,
   3522						   extack);
   3523	} else {
   3524		NL_SET_ERR_MSG(extack, "Action does not support offload");
   3525		return -EOPNOTSUPP;
   3526	}
   3527#else
   3528	return 0;
   3529#endif
   3530}
   3531
   3532int tc_setup_action(struct flow_action *flow_action,
   3533		    struct tc_action *actions[],
   3534		    struct netlink_ext_ack *extack)
   3535{
   3536	int i, j, index, err = 0;
   3537	struct tc_action *act;
   3538
   3539	BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
   3540	BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE);
   3541	BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED);
   3542
   3543	if (!actions)
   3544		return 0;
   3545
   3546	j = 0;
   3547	tcf_act_for_each_action(i, act, actions) {
   3548		struct flow_action_entry *entry;
   3549
   3550		entry = &flow_action->entries[j];
   3551		spin_lock_bh(&act->tcfa_lock);
   3552		err = tcf_act_get_cookie(entry, act);
   3553		if (err)
   3554			goto err_out_locked;
   3555
   3556		entry->hw_stats = tc_act_hw_stats(act->hw_stats);
   3557		entry->hw_index = act->tcfa_index;
   3558		index = 0;
   3559		err = tc_setup_offload_act(act, entry, &index, extack);
   3560		if (!err)
   3561			j += index;
   3562		else
   3563			goto err_out_locked;
   3564		spin_unlock_bh(&act->tcfa_lock);
   3565	}
   3566
   3567err_out:
   3568	if (err)
   3569		tc_cleanup_offload_action(flow_action);
   3570
   3571	return err;
   3572err_out_locked:
   3573	spin_unlock_bh(&act->tcfa_lock);
   3574	goto err_out;
   3575}
   3576
   3577int tc_setup_offload_action(struct flow_action *flow_action,
   3578			    const struct tcf_exts *exts,
   3579			    struct netlink_ext_ack *extack)
   3580{
   3581#ifdef CONFIG_NET_CLS_ACT
   3582	if (!exts)
   3583		return 0;
   3584
   3585	return tc_setup_action(flow_action, exts->actions, extack);
   3586#else
   3587	return 0;
   3588#endif
   3589}
   3590EXPORT_SYMBOL(tc_setup_offload_action);
   3591
   3592unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
   3593{
   3594	unsigned int num_acts = 0;
   3595	struct tc_action *act;
   3596	int i;
   3597
   3598	tcf_exts_for_each_action(i, act, exts) {
   3599		if (is_tcf_pedit(act))
   3600			num_acts += tcf_pedit_nkeys(act);
   3601		else
   3602			num_acts++;
   3603	}
   3604	return num_acts;
   3605}
   3606EXPORT_SYMBOL(tcf_exts_num_actions);
   3607
   3608#ifdef CONFIG_NET_CLS_ACT
   3609static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
   3610					u32 *p_block_index,
   3611					struct netlink_ext_ack *extack)
   3612{
   3613	*p_block_index = nla_get_u32(block_index_attr);
   3614	if (!*p_block_index) {
   3615		NL_SET_ERR_MSG(extack, "Block number may not be zero");
   3616		return -EINVAL;
   3617	}
   3618
   3619	return 0;
   3620}
   3621
   3622int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
   3623		    enum flow_block_binder_type binder_type,
   3624		    struct nlattr *block_index_attr,
   3625		    struct netlink_ext_ack *extack)
   3626{
   3627	u32 block_index;
   3628	int err;
   3629
   3630	if (!block_index_attr)
   3631		return 0;
   3632
   3633	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
   3634	if (err)
   3635		return err;
   3636
   3637	if (!block_index)
   3638		return 0;
   3639
   3640	qe->info.binder_type = binder_type;
   3641	qe->info.chain_head_change = tcf_chain_head_change_dflt;
   3642	qe->info.chain_head_change_priv = &qe->filter_chain;
   3643	qe->info.block_index = block_index;
   3644
   3645	return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
   3646}
   3647EXPORT_SYMBOL(tcf_qevent_init);
   3648
   3649void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
   3650{
   3651	if (qe->info.block_index)
   3652		tcf_block_put_ext(qe->block, sch, &qe->info);
   3653}
   3654EXPORT_SYMBOL(tcf_qevent_destroy);
   3655
   3656int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
   3657			       struct netlink_ext_ack *extack)
   3658{
   3659	u32 block_index;
   3660	int err;
   3661
   3662	if (!block_index_attr)
   3663		return 0;
   3664
   3665	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
   3666	if (err)
   3667		return err;
   3668
   3669	/* Bounce newly-configured block or change in block. */
   3670	if (block_index != qe->info.block_index) {
   3671		NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
   3672		return -EINVAL;
   3673	}
   3674
   3675	return 0;
   3676}
   3677EXPORT_SYMBOL(tcf_qevent_validate_change);
   3678
   3679struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
   3680				  struct sk_buff **to_free, int *ret)
   3681{
   3682	struct tcf_result cl_res;
   3683	struct tcf_proto *fl;
   3684
   3685	if (!qe->info.block_index)
   3686		return skb;
   3687
   3688	fl = rcu_dereference_bh(qe->filter_chain);
   3689
   3690	switch (tcf_classify(skb, NULL, fl, &cl_res, false)) {
   3691	case TC_ACT_SHOT:
   3692		qdisc_qstats_drop(sch);
   3693		__qdisc_drop(skb, to_free);
   3694		*ret = __NET_XMIT_BYPASS;
   3695		return NULL;
   3696	case TC_ACT_STOLEN:
   3697	case TC_ACT_QUEUED:
   3698	case TC_ACT_TRAP:
   3699		__qdisc_drop(skb, to_free);
   3700		*ret = __NET_XMIT_STOLEN;
   3701		return NULL;
   3702	case TC_ACT_REDIRECT:
   3703		skb_do_redirect(skb);
   3704		*ret = __NET_XMIT_STOLEN;
   3705		return NULL;
   3706	}
   3707
   3708	return skb;
   3709}
   3710EXPORT_SYMBOL(tcf_qevent_handle);
   3711
   3712int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
   3713{
   3714	if (!qe->info.block_index)
   3715		return 0;
   3716	return nla_put_u32(skb, attr_name, qe->info.block_index);
   3717}
   3718EXPORT_SYMBOL(tcf_qevent_dump);
   3719#endif
   3720
   3721static __net_init int tcf_net_init(struct net *net)
   3722{
   3723	struct tcf_net *tn = net_generic(net, tcf_net_id);
   3724
   3725	spin_lock_init(&tn->idr_lock);
   3726	idr_init(&tn->idr);
   3727	return 0;
   3728}
   3729
   3730static void __net_exit tcf_net_exit(struct net *net)
   3731{
   3732	struct tcf_net *tn = net_generic(net, tcf_net_id);
   3733
   3734	idr_destroy(&tn->idr);
   3735}
   3736
   3737static struct pernet_operations tcf_net_ops = {
   3738	.init = tcf_net_init,
   3739	.exit = tcf_net_exit,
   3740	.id   = &tcf_net_id,
   3741	.size = sizeof(struct tcf_net),
   3742};
   3743
   3744static int __init tc_filter_init(void)
   3745{
   3746	int err;
   3747
   3748	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
   3749	if (!tc_filter_wq)
   3750		return -ENOMEM;
   3751
   3752	err = register_pernet_subsys(&tcf_net_ops);
   3753	if (err)
   3754		goto err_register_pernet_subsys;
   3755
   3756	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
   3757		      RTNL_FLAG_DOIT_UNLOCKED);
   3758	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
   3759		      RTNL_FLAG_DOIT_UNLOCKED);
   3760	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
   3761		      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
   3762	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
   3763	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
   3764	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
   3765		      tc_dump_chain, 0);
   3766
   3767	return 0;
   3768
   3769err_register_pernet_subsys:
   3770	destroy_workqueue(tc_filter_wq);
   3771	return err;
   3772}
   3773
   3774subsys_initcall(tc_filter_init);