cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sch_htb.c (57943B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
      4 *
      5 * Authors:	Martin Devera, <devik@cdi.cz>
      6 *
      7 * Credits (in time order) for older HTB versions:
      8 *              Stef Coene <stef.coene@docum.org>
      9 *			HTB support at LARTC mailing list
     10 *		Ondrej Kraus, <krauso@barr.cz>
     11 *			found missing INIT_QDISC(htb)
     12 *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
     13 *			helped a lot to locate nasty class stall bug
     14 *		Andi Kleen, Jamal Hadi, Bert Hubert
     15 *			code review and helpful comments on shaping
     16 *		Tomasz Wrona, <tw@eter.tym.pl>
     17 *			created test case so that I was able to fix nasty bug
     18 *		Wilfried Weissmann
     19 *			spotted bug in dequeue code and helped with fix
     20 *		Jiri Fojtasek
     21 *			fixed requeue routine
     22 *		and many others. thanks.
     23 */
     24#include <linux/module.h>
     25#include <linux/moduleparam.h>
     26#include <linux/types.h>
     27#include <linux/kernel.h>
     28#include <linux/string.h>
     29#include <linux/errno.h>
     30#include <linux/skbuff.h>
     31#include <linux/list.h>
     32#include <linux/compiler.h>
     33#include <linux/rbtree.h>
     34#include <linux/workqueue.h>
     35#include <linux/slab.h>
     36#include <net/netlink.h>
     37#include <net/sch_generic.h>
     38#include <net/pkt_sched.h>
     39#include <net/pkt_cls.h>
     40
     41/* HTB algorithm.
     42    Author: devik@cdi.cz
     43    ========================================================================
     44    HTB is like TBF with multiple classes. It is also similar to CBQ because
     45    it allows to assign priority to each class in hierarchy.
     46    In fact it is another implementation of Floyd's formal sharing.
     47
     48    Levels:
     49    Each class is assigned level. Leaf has ALWAYS level 0 and root
     50    classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
     51    one less than their parent.
     52*/
     53
     54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
     55#define HTB_VER 0x30011		/* major must be matched with number supplied by TC as version */
     56
     57#if HTB_VER >> 16 != TC_HTB_PROTOVER
     58#error "Mismatched sch_htb.c and pkt_sch.h"
     59#endif
     60
     61/* Module parameter and sysfs export */
     62module_param    (htb_hysteresis, int, 0640);
     63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
     64
     65static int htb_rate_est = 0; /* htb classes have a default rate estimator */
     66module_param(htb_rate_est, int, 0640);
     67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
     68
     69/* used internaly to keep status of single class */
     70enum htb_cmode {
     71	HTB_CANT_SEND,		/* class can't send and can't borrow */
     72	HTB_MAY_BORROW,		/* class can't send but may borrow */
     73	HTB_CAN_SEND		/* class can send */
     74};
     75
     76struct htb_prio {
     77	union {
     78		struct rb_root	row;
     79		struct rb_root	feed;
     80	};
     81	struct rb_node	*ptr;
     82	/* When class changes from state 1->2 and disconnects from
     83	 * parent's feed then we lost ptr value and start from the
     84	 * first child again. Here we store classid of the
     85	 * last valid ptr (used when ptr is NULL).
     86	 */
     87	u32		last_ptr_id;
     88};
     89
     90/* interior & leaf nodes; props specific to leaves are marked L:
     91 * To reduce false sharing, place mostly read fields at beginning,
     92 * and mostly written ones at the end.
     93 */
     94struct htb_class {
     95	struct Qdisc_class_common common;
     96	struct psched_ratecfg	rate;
     97	struct psched_ratecfg	ceil;
     98	s64			buffer, cbuffer;/* token bucket depth/rate */
     99	s64			mbuffer;	/* max wait time */
    100	u32			prio;		/* these two are used only by leaves... */
    101	int			quantum;	/* but stored for parent-to-leaf return */
    102
    103	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
    104	struct tcf_block	*block;
    105	int			filter_cnt;
    106
    107	int			level;		/* our level (see above) */
    108	unsigned int		children;
    109	struct htb_class	*parent;	/* parent class */
    110
    111	struct net_rate_estimator __rcu *rate_est;
    112
    113	/*
    114	 * Written often fields
    115	 */
    116	struct gnet_stats_basic_sync bstats;
    117	struct gnet_stats_basic_sync bstats_bias;
    118	struct tc_htb_xstats	xstats;	/* our special stats */
    119
    120	/* token bucket parameters */
    121	s64			tokens, ctokens;/* current number of tokens */
    122	s64			t_c;		/* checkpoint time */
    123
    124	union {
    125		struct htb_class_leaf {
    126			int		deficit[TC_HTB_MAXDEPTH];
    127			struct Qdisc	*q;
    128			struct netdev_queue *offload_queue;
    129		} leaf;
    130		struct htb_class_inner {
    131			struct htb_prio clprio[TC_HTB_NUMPRIO];
    132		} inner;
    133	};
    134	s64			pq_key;
    135
    136	int			prio_activity;	/* for which prios are we active */
    137	enum htb_cmode		cmode;		/* current mode of the class */
    138	struct rb_node		pq_node;	/* node for event queue */
    139	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
    140
    141	unsigned int drops ____cacheline_aligned_in_smp;
    142	unsigned int		overlimits;
    143};
    144
    145struct htb_level {
    146	struct rb_root	wait_pq;
    147	struct htb_prio hprio[TC_HTB_NUMPRIO];
    148};
    149
    150struct htb_sched {
    151	struct Qdisc_class_hash clhash;
    152	int			defcls;		/* class where unclassified flows go to */
    153	int			rate2quantum;	/* quant = rate / rate2quantum */
    154
    155	/* filters for qdisc itself */
    156	struct tcf_proto __rcu	*filter_list;
    157	struct tcf_block	*block;
    158
    159#define HTB_WARN_TOOMANYEVENTS	0x1
    160	unsigned int		warned;	/* only one warning */
    161	int			direct_qlen;
    162	struct work_struct	work;
    163
    164	/* non shaped skbs; let them go directly thru */
    165	struct qdisc_skb_head	direct_queue;
    166	u32			direct_pkts;
    167	u32			overlimits;
    168
    169	struct qdisc_watchdog	watchdog;
    170
    171	s64			now;	/* cached dequeue time */
    172
    173	/* time of nearest event per level (row) */
    174	s64			near_ev_cache[TC_HTB_MAXDEPTH];
    175
    176	int			row_mask[TC_HTB_MAXDEPTH];
    177
    178	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
    179
    180	struct Qdisc		**direct_qdiscs;
    181	unsigned int            num_direct_qdiscs;
    182
    183	bool			offload;
    184};
    185
    186/* find class in global hash table using given handle */
    187static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
    188{
    189	struct htb_sched *q = qdisc_priv(sch);
    190	struct Qdisc_class_common *clc;
    191
    192	clc = qdisc_class_find(&q->clhash, handle);
    193	if (clc == NULL)
    194		return NULL;
    195	return container_of(clc, struct htb_class, common);
    196}
    197
    198static unsigned long htb_search(struct Qdisc *sch, u32 handle)
    199{
    200	return (unsigned long)htb_find(handle, sch);
    201}
    202/**
    203 * htb_classify - classify a packet into class
    204 *
    205 * It returns NULL if the packet should be dropped or -1 if the packet
    206 * should be passed directly thru. In all other cases leaf class is returned.
    207 * We allow direct class selection by classid in priority. The we examine
    208 * filters in qdisc and in inner nodes (if higher filter points to the inner
    209 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
    210 * internal fifo (direct). These packets then go directly thru. If we still
    211 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
    212 * then finish and return direct queue.
    213 */
    214#define HTB_DIRECT ((struct htb_class *)-1L)
    215
    216static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
    217				      int *qerr)
    218{
    219	struct htb_sched *q = qdisc_priv(sch);
    220	struct htb_class *cl;
    221	struct tcf_result res;
    222	struct tcf_proto *tcf;
    223	int result;
    224
    225	/* allow to select class by setting skb->priority to valid classid;
    226	 * note that nfmark can be used too by attaching filter fw with no
    227	 * rules in it
    228	 */
    229	if (skb->priority == sch->handle)
    230		return HTB_DIRECT;	/* X:0 (direct flow) selected */
    231	cl = htb_find(skb->priority, sch);
    232	if (cl) {
    233		if (cl->level == 0)
    234			return cl;
    235		/* Start with inner filter chain if a non-leaf class is selected */
    236		tcf = rcu_dereference_bh(cl->filter_list);
    237	} else {
    238		tcf = rcu_dereference_bh(q->filter_list);
    239	}
    240
    241	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
    242	while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
    243#ifdef CONFIG_NET_CLS_ACT
    244		switch (result) {
    245		case TC_ACT_QUEUED:
    246		case TC_ACT_STOLEN:
    247		case TC_ACT_TRAP:
    248			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
    249			fallthrough;
    250		case TC_ACT_SHOT:
    251			return NULL;
    252		}
    253#endif
    254		cl = (void *)res.class;
    255		if (!cl) {
    256			if (res.classid == sch->handle)
    257				return HTB_DIRECT;	/* X:0 (direct flow) */
    258			cl = htb_find(res.classid, sch);
    259			if (!cl)
    260				break;	/* filter selected invalid classid */
    261		}
    262		if (!cl->level)
    263			return cl;	/* we hit leaf; return it */
    264
    265		/* we have got inner class; apply inner filter chain */
    266		tcf = rcu_dereference_bh(cl->filter_list);
    267	}
    268	/* classification failed; try to use default class */
    269	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
    270	if (!cl || cl->level)
    271		return HTB_DIRECT;	/* bad default .. this is safe bet */
    272	return cl;
    273}
    274
    275/**
    276 * htb_add_to_id_tree - adds class to the round robin list
    277 * @root: the root of the tree
    278 * @cl: the class to add
    279 * @prio: the give prio in class
    280 *
    281 * Routine adds class to the list (actually tree) sorted by classid.
    282 * Make sure that class is not already on such list for given prio.
    283 */
    284static void htb_add_to_id_tree(struct rb_root *root,
    285			       struct htb_class *cl, int prio)
    286{
    287	struct rb_node **p = &root->rb_node, *parent = NULL;
    288
    289	while (*p) {
    290		struct htb_class *c;
    291		parent = *p;
    292		c = rb_entry(parent, struct htb_class, node[prio]);
    293
    294		if (cl->common.classid > c->common.classid)
    295			p = &parent->rb_right;
    296		else
    297			p = &parent->rb_left;
    298	}
    299	rb_link_node(&cl->node[prio], parent, p);
    300	rb_insert_color(&cl->node[prio], root);
    301}
    302
    303/**
    304 * htb_add_to_wait_tree - adds class to the event queue with delay
    305 * @q: the priority event queue
    306 * @cl: the class to add
    307 * @delay: delay in microseconds
    308 *
    309 * The class is added to priority event queue to indicate that class will
    310 * change its mode in cl->pq_key microseconds. Make sure that class is not
    311 * already in the queue.
    312 */
    313static void htb_add_to_wait_tree(struct htb_sched *q,
    314				 struct htb_class *cl, s64 delay)
    315{
    316	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
    317
    318	cl->pq_key = q->now + delay;
    319	if (cl->pq_key == q->now)
    320		cl->pq_key++;
    321
    322	/* update the nearest event cache */
    323	if (q->near_ev_cache[cl->level] > cl->pq_key)
    324		q->near_ev_cache[cl->level] = cl->pq_key;
    325
    326	while (*p) {
    327		struct htb_class *c;
    328		parent = *p;
    329		c = rb_entry(parent, struct htb_class, pq_node);
    330		if (cl->pq_key >= c->pq_key)
    331			p = &parent->rb_right;
    332		else
    333			p = &parent->rb_left;
    334	}
    335	rb_link_node(&cl->pq_node, parent, p);
    336	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
    337}
    338
    339/**
    340 * htb_next_rb_node - finds next node in binary tree
    341 * @n: the current node in binary tree
    342 *
    343 * When we are past last key we return NULL.
    344 * Average complexity is 2 steps per call.
    345 */
    346static inline void htb_next_rb_node(struct rb_node **n)
    347{
    348	*n = rb_next(*n);
    349}
    350
    351/**
    352 * htb_add_class_to_row - add class to its row
    353 * @q: the priority event queue
    354 * @cl: the class to add
    355 * @mask: the given priorities in class in bitmap
    356 *
    357 * The class is added to row at priorities marked in mask.
    358 * It does nothing if mask == 0.
    359 */
    360static inline void htb_add_class_to_row(struct htb_sched *q,
    361					struct htb_class *cl, int mask)
    362{
    363	q->row_mask[cl->level] |= mask;
    364	while (mask) {
    365		int prio = ffz(~mask);
    366		mask &= ~(1 << prio);
    367		htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
    368	}
    369}
    370
    371/* If this triggers, it is a bug in this code, but it need not be fatal */
    372static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
    373{
    374	if (RB_EMPTY_NODE(rb)) {
    375		WARN_ON(1);
    376	} else {
    377		rb_erase(rb, root);
    378		RB_CLEAR_NODE(rb);
    379	}
    380}
    381
    382
    383/**
    384 * htb_remove_class_from_row - removes class from its row
    385 * @q: the priority event queue
    386 * @cl: the class to add
    387 * @mask: the given priorities in class in bitmap
    388 *
    389 * The class is removed from row at priorities marked in mask.
    390 * It does nothing if mask == 0.
    391 */
    392static inline void htb_remove_class_from_row(struct htb_sched *q,
    393						 struct htb_class *cl, int mask)
    394{
    395	int m = 0;
    396	struct htb_level *hlevel = &q->hlevel[cl->level];
    397
    398	while (mask) {
    399		int prio = ffz(~mask);
    400		struct htb_prio *hprio = &hlevel->hprio[prio];
    401
    402		mask &= ~(1 << prio);
    403		if (hprio->ptr == cl->node + prio)
    404			htb_next_rb_node(&hprio->ptr);
    405
    406		htb_safe_rb_erase(cl->node + prio, &hprio->row);
    407		if (!hprio->row.rb_node)
    408			m |= 1 << prio;
    409	}
    410	q->row_mask[cl->level] &= ~m;
    411}
    412
    413/**
    414 * htb_activate_prios - creates active classe's feed chain
    415 * @q: the priority event queue
    416 * @cl: the class to activate
    417 *
    418 * The class is connected to ancestors and/or appropriate rows
    419 * for priorities it is participating on. cl->cmode must be new
    420 * (activated) mode. It does nothing if cl->prio_activity == 0.
    421 */
    422static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
    423{
    424	struct htb_class *p = cl->parent;
    425	long m, mask = cl->prio_activity;
    426
    427	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
    428		m = mask;
    429		while (m) {
    430			int prio = ffz(~m);
    431			m &= ~(1 << prio);
    432
    433			if (p->inner.clprio[prio].feed.rb_node)
    434				/* parent already has its feed in use so that
    435				 * reset bit in mask as parent is already ok
    436				 */
    437				mask &= ~(1 << prio);
    438
    439			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
    440		}
    441		p->prio_activity |= mask;
    442		cl = p;
    443		p = cl->parent;
    444
    445	}
    446	if (cl->cmode == HTB_CAN_SEND && mask)
    447		htb_add_class_to_row(q, cl, mask);
    448}
    449
    450/**
    451 * htb_deactivate_prios - remove class from feed chain
    452 * @q: the priority event queue
    453 * @cl: the class to deactivate
    454 *
    455 * cl->cmode must represent old mode (before deactivation). It does
    456 * nothing if cl->prio_activity == 0. Class is removed from all feed
    457 * chains and rows.
    458 */
    459static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
    460{
    461	struct htb_class *p = cl->parent;
    462	long m, mask = cl->prio_activity;
    463
    464	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
    465		m = mask;
    466		mask = 0;
    467		while (m) {
    468			int prio = ffz(~m);
    469			m &= ~(1 << prio);
    470
    471			if (p->inner.clprio[prio].ptr == cl->node + prio) {
    472				/* we are removing child which is pointed to from
    473				 * parent feed - forget the pointer but remember
    474				 * classid
    475				 */
    476				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
    477				p->inner.clprio[prio].ptr = NULL;
    478			}
    479
    480			htb_safe_rb_erase(cl->node + prio,
    481					  &p->inner.clprio[prio].feed);
    482
    483			if (!p->inner.clprio[prio].feed.rb_node)
    484				mask |= 1 << prio;
    485		}
    486
    487		p->prio_activity &= ~mask;
    488		cl = p;
    489		p = cl->parent;
    490
    491	}
    492	if (cl->cmode == HTB_CAN_SEND && mask)
    493		htb_remove_class_from_row(q, cl, mask);
    494}
    495
    496static inline s64 htb_lowater(const struct htb_class *cl)
    497{
    498	if (htb_hysteresis)
    499		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
    500	else
    501		return 0;
    502}
    503static inline s64 htb_hiwater(const struct htb_class *cl)
    504{
    505	if (htb_hysteresis)
    506		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
    507	else
    508		return 0;
    509}
    510
    511
    512/**
    513 * htb_class_mode - computes and returns current class mode
    514 * @cl: the target class
    515 * @diff: diff time in microseconds
    516 *
    517 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
    518 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
    519 * from now to time when cl will change its state.
    520 * Also it is worth to note that class mode doesn't change simply
    521 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
    522 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
    523 * mode transitions per time unit. The speed gain is about 1/6.
    524 */
    525static inline enum htb_cmode
    526htb_class_mode(struct htb_class *cl, s64 *diff)
    527{
    528	s64 toks;
    529
    530	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
    531		*diff = -toks;
    532		return HTB_CANT_SEND;
    533	}
    534
    535	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
    536		return HTB_CAN_SEND;
    537
    538	*diff = -toks;
    539	return HTB_MAY_BORROW;
    540}
    541
    542/**
    543 * htb_change_class_mode - changes classe's mode
    544 * @q: the priority event queue
    545 * @cl: the target class
    546 * @diff: diff time in microseconds
    547 *
    548 * This should be the only way how to change classe's mode under normal
    549 * circumstances. Routine will update feed lists linkage, change mode
    550 * and add class to the wait event queue if appropriate. New mode should
    551 * be different from old one and cl->pq_key has to be valid if changing
    552 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
    553 */
    554static void
    555htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
    556{
    557	enum htb_cmode new_mode = htb_class_mode(cl, diff);
    558
    559	if (new_mode == cl->cmode)
    560		return;
    561
    562	if (new_mode == HTB_CANT_SEND) {
    563		cl->overlimits++;
    564		q->overlimits++;
    565	}
    566
    567	if (cl->prio_activity) {	/* not necessary: speed optimization */
    568		if (cl->cmode != HTB_CANT_SEND)
    569			htb_deactivate_prios(q, cl);
    570		cl->cmode = new_mode;
    571		if (new_mode != HTB_CANT_SEND)
    572			htb_activate_prios(q, cl);
    573	} else
    574		cl->cmode = new_mode;
    575}
    576
    577/**
    578 * htb_activate - inserts leaf cl into appropriate active feeds
    579 * @q: the priority event queue
    580 * @cl: the target class
    581 *
    582 * Routine learns (new) priority of leaf and activates feed chain
    583 * for the prio. It can be called on already active leaf safely.
    584 * It also adds leaf into droplist.
    585 */
    586static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
    587{
    588	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
    589
    590	if (!cl->prio_activity) {
    591		cl->prio_activity = 1 << cl->prio;
    592		htb_activate_prios(q, cl);
    593	}
    594}
    595
    596/**
    597 * htb_deactivate - remove leaf cl from active feeds
    598 * @q: the priority event queue
    599 * @cl: the target class
    600 *
    601 * Make sure that leaf is active. In the other words it can't be called
    602 * with non-active leaf. It also removes class from the drop list.
    603 */
    604static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
    605{
    606	WARN_ON(!cl->prio_activity);
    607
    608	htb_deactivate_prios(q, cl);
    609	cl->prio_activity = 0;
    610}
    611
    612static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
    613		       struct sk_buff **to_free)
    614{
    615	int ret;
    616	unsigned int len = qdisc_pkt_len(skb);
    617	struct htb_sched *q = qdisc_priv(sch);
    618	struct htb_class *cl = htb_classify(skb, sch, &ret);
    619
    620	if (cl == HTB_DIRECT) {
    621		/* enqueue to helper queue */
    622		if (q->direct_queue.qlen < q->direct_qlen) {
    623			__qdisc_enqueue_tail(skb, &q->direct_queue);
    624			q->direct_pkts++;
    625		} else {
    626			return qdisc_drop(skb, sch, to_free);
    627		}
    628#ifdef CONFIG_NET_CLS_ACT
    629	} else if (!cl) {
    630		if (ret & __NET_XMIT_BYPASS)
    631			qdisc_qstats_drop(sch);
    632		__qdisc_drop(skb, to_free);
    633		return ret;
    634#endif
    635	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
    636					to_free)) != NET_XMIT_SUCCESS) {
    637		if (net_xmit_drop_count(ret)) {
    638			qdisc_qstats_drop(sch);
    639			cl->drops++;
    640		}
    641		return ret;
    642	} else {
    643		htb_activate(q, cl);
    644	}
    645
    646	sch->qstats.backlog += len;
    647	sch->q.qlen++;
    648	return NET_XMIT_SUCCESS;
    649}
    650
    651static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
    652{
    653	s64 toks = diff + cl->tokens;
    654
    655	if (toks > cl->buffer)
    656		toks = cl->buffer;
    657	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
    658	if (toks <= -cl->mbuffer)
    659		toks = 1 - cl->mbuffer;
    660
    661	cl->tokens = toks;
    662}
    663
    664static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
    665{
    666	s64 toks = diff + cl->ctokens;
    667
    668	if (toks > cl->cbuffer)
    669		toks = cl->cbuffer;
    670	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
    671	if (toks <= -cl->mbuffer)
    672		toks = 1 - cl->mbuffer;
    673
    674	cl->ctokens = toks;
    675}
    676
    677/**
    678 * htb_charge_class - charges amount "bytes" to leaf and ancestors
    679 * @q: the priority event queue
    680 * @cl: the class to start iterate
    681 * @level: the minimum level to account
    682 * @skb: the socket buffer
    683 *
    684 * Routine assumes that packet "bytes" long was dequeued from leaf cl
    685 * borrowing from "level". It accounts bytes to ceil leaky bucket for
    686 * leaf and all ancestors and to rate bucket for ancestors at levels
    687 * "level" and higher. It also handles possible change of mode resulting
    688 * from the update. Note that mode can also increase here (MAY_BORROW to
    689 * CAN_SEND) because we can use more precise clock that event queue here.
    690 * In such case we remove class from event queue first.
    691 */
    692static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
    693			     int level, struct sk_buff *skb)
    694{
    695	int bytes = qdisc_pkt_len(skb);
    696	enum htb_cmode old_mode;
    697	s64 diff;
    698
    699	while (cl) {
    700		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
    701		if (cl->level >= level) {
    702			if (cl->level == level)
    703				cl->xstats.lends++;
    704			htb_accnt_tokens(cl, bytes, diff);
    705		} else {
    706			cl->xstats.borrows++;
    707			cl->tokens += diff;	/* we moved t_c; update tokens */
    708		}
    709		htb_accnt_ctokens(cl, bytes, diff);
    710		cl->t_c = q->now;
    711
    712		old_mode = cl->cmode;
    713		diff = 0;
    714		htb_change_class_mode(q, cl, &diff);
    715		if (old_mode != cl->cmode) {
    716			if (old_mode != HTB_CAN_SEND)
    717				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
    718			if (cl->cmode != HTB_CAN_SEND)
    719				htb_add_to_wait_tree(q, cl, diff);
    720		}
    721
    722		/* update basic stats except for leaves which are already updated */
    723		if (cl->level)
    724			bstats_update(&cl->bstats, skb);
    725
    726		cl = cl->parent;
    727	}
    728}
    729
    730/**
    731 * htb_do_events - make mode changes to classes at the level
    732 * @q: the priority event queue
    733 * @level: which wait_pq in 'q->hlevel'
    734 * @start: start jiffies
    735 *
    736 * Scans event queue for pending events and applies them. Returns time of
    737 * next pending event (0 for no event in pq, q->now for too many events).
    738 * Note: Applied are events whose have cl->pq_key <= q->now.
    739 */
    740static s64 htb_do_events(struct htb_sched *q, const int level,
    741			 unsigned long start)
    742{
    743	/* don't run for longer than 2 jiffies; 2 is used instead of
    744	 * 1 to simplify things when jiffy is going to be incremented
    745	 * too soon
    746	 */
    747	unsigned long stop_at = start + 2;
    748	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
    749
    750	while (time_before(jiffies, stop_at)) {
    751		struct htb_class *cl;
    752		s64 diff;
    753		struct rb_node *p = rb_first(wait_pq);
    754
    755		if (!p)
    756			return 0;
    757
    758		cl = rb_entry(p, struct htb_class, pq_node);
    759		if (cl->pq_key > q->now)
    760			return cl->pq_key;
    761
    762		htb_safe_rb_erase(p, wait_pq);
    763		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
    764		htb_change_class_mode(q, cl, &diff);
    765		if (cl->cmode != HTB_CAN_SEND)
    766			htb_add_to_wait_tree(q, cl, diff);
    767	}
    768
    769	/* too much load - let's continue after a break for scheduling */
    770	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
    771		pr_warn("htb: too many events!\n");
    772		q->warned |= HTB_WARN_TOOMANYEVENTS;
    773	}
    774
    775	return q->now;
    776}
    777
    778/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
    779 * is no such one exists.
    780 */
    781static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
    782					      u32 id)
    783{
    784	struct rb_node *r = NULL;
    785	while (n) {
    786		struct htb_class *cl =
    787		    rb_entry(n, struct htb_class, node[prio]);
    788
    789		if (id > cl->common.classid) {
    790			n = n->rb_right;
    791		} else if (id < cl->common.classid) {
    792			r = n;
    793			n = n->rb_left;
    794		} else {
    795			return n;
    796		}
    797	}
    798	return r;
    799}
    800
    801/**
    802 * htb_lookup_leaf - returns next leaf class in DRR order
    803 * @hprio: the current one
    804 * @prio: which prio in class
    805 *
    806 * Find leaf where current feed pointers points to.
    807 */
    808static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
    809{
    810	int i;
    811	struct {
    812		struct rb_node *root;
    813		struct rb_node **pptr;
    814		u32 *pid;
    815	} stk[TC_HTB_MAXDEPTH], *sp = stk;
    816
    817	BUG_ON(!hprio->row.rb_node);
    818	sp->root = hprio->row.rb_node;
    819	sp->pptr = &hprio->ptr;
    820	sp->pid = &hprio->last_ptr_id;
    821
    822	for (i = 0; i < 65535; i++) {
    823		if (!*sp->pptr && *sp->pid) {
    824			/* ptr was invalidated but id is valid - try to recover
    825			 * the original or next ptr
    826			 */
    827			*sp->pptr =
    828			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
    829		}
    830		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
    831				 * can become out of date quickly
    832				 */
    833		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
    834			*sp->pptr = sp->root;
    835			while ((*sp->pptr)->rb_left)
    836				*sp->pptr = (*sp->pptr)->rb_left;
    837			if (sp > stk) {
    838				sp--;
    839				if (!*sp->pptr) {
    840					WARN_ON(1);
    841					return NULL;
    842				}
    843				htb_next_rb_node(sp->pptr);
    844			}
    845		} else {
    846			struct htb_class *cl;
    847			struct htb_prio *clp;
    848
    849			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
    850			if (!cl->level)
    851				return cl;
    852			clp = &cl->inner.clprio[prio];
    853			(++sp)->root = clp->feed.rb_node;
    854			sp->pptr = &clp->ptr;
    855			sp->pid = &clp->last_ptr_id;
    856		}
    857	}
    858	WARN_ON(1);
    859	return NULL;
    860}
    861
    862/* dequeues packet at given priority and level; call only if
    863 * you are sure that there is active class at prio/level
    864 */
    865static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
    866					const int level)
    867{
    868	struct sk_buff *skb = NULL;
    869	struct htb_class *cl, *start;
    870	struct htb_level *hlevel = &q->hlevel[level];
    871	struct htb_prio *hprio = &hlevel->hprio[prio];
    872
    873	/* look initial class up in the row */
    874	start = cl = htb_lookup_leaf(hprio, prio);
    875
    876	do {
    877next:
    878		if (unlikely(!cl))
    879			return NULL;
    880
    881		/* class can be empty - it is unlikely but can be true if leaf
    882		 * qdisc drops packets in enqueue routine or if someone used
    883		 * graft operation on the leaf since last dequeue;
    884		 * simply deactivate and skip such class
    885		 */
    886		if (unlikely(cl->leaf.q->q.qlen == 0)) {
    887			struct htb_class *next;
    888			htb_deactivate(q, cl);
    889
    890			/* row/level might become empty */
    891			if ((q->row_mask[level] & (1 << prio)) == 0)
    892				return NULL;
    893
    894			next = htb_lookup_leaf(hprio, prio);
    895
    896			if (cl == start)	/* fix start if we just deleted it */
    897				start = next;
    898			cl = next;
    899			goto next;
    900		}
    901
    902		skb = cl->leaf.q->dequeue(cl->leaf.q);
    903		if (likely(skb != NULL))
    904			break;
    905
    906		qdisc_warn_nonwc("htb", cl->leaf.q);
    907		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
    908					 &q->hlevel[0].hprio[prio].ptr);
    909		cl = htb_lookup_leaf(hprio, prio);
    910
    911	} while (cl != start);
    912
    913	if (likely(skb != NULL)) {
    914		bstats_update(&cl->bstats, skb);
    915		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
    916		if (cl->leaf.deficit[level] < 0) {
    917			cl->leaf.deficit[level] += cl->quantum;
    918			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
    919						 &q->hlevel[0].hprio[prio].ptr);
    920		}
    921		/* this used to be after charge_class but this constelation
    922		 * gives us slightly better performance
    923		 */
    924		if (!cl->leaf.q->q.qlen)
    925			htb_deactivate(q, cl);
    926		htb_charge_class(q, cl, level, skb);
    927	}
    928	return skb;
    929}
    930
    931static struct sk_buff *htb_dequeue(struct Qdisc *sch)
    932{
    933	struct sk_buff *skb;
    934	struct htb_sched *q = qdisc_priv(sch);
    935	int level;
    936	s64 next_event;
    937	unsigned long start_at;
    938
    939	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
    940	skb = __qdisc_dequeue_head(&q->direct_queue);
    941	if (skb != NULL) {
    942ok:
    943		qdisc_bstats_update(sch, skb);
    944		qdisc_qstats_backlog_dec(sch, skb);
    945		sch->q.qlen--;
    946		return skb;
    947	}
    948
    949	if (!sch->q.qlen)
    950		goto fin;
    951	q->now = ktime_get_ns();
    952	start_at = jiffies;
    953
    954	next_event = q->now + 5LLU * NSEC_PER_SEC;
    955
    956	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
    957		/* common case optimization - skip event handler quickly */
    958		int m;
    959		s64 event = q->near_ev_cache[level];
    960
    961		if (q->now >= event) {
    962			event = htb_do_events(q, level, start_at);
    963			if (!event)
    964				event = q->now + NSEC_PER_SEC;
    965			q->near_ev_cache[level] = event;
    966		}
    967
    968		if (next_event > event)
    969			next_event = event;
    970
    971		m = ~q->row_mask[level];
    972		while (m != (int)(-1)) {
    973			int prio = ffz(m);
    974
    975			m |= 1 << prio;
    976			skb = htb_dequeue_tree(q, prio, level);
    977			if (likely(skb != NULL))
    978				goto ok;
    979		}
    980	}
    981	if (likely(next_event > q->now))
    982		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
    983	else
    984		schedule_work(&q->work);
    985fin:
    986	return skb;
    987}
    988
    989/* reset all classes */
    990/* always caled under BH & queue lock */
    991static void htb_reset(struct Qdisc *sch)
    992{
    993	struct htb_sched *q = qdisc_priv(sch);
    994	struct htb_class *cl;
    995	unsigned int i;
    996
    997	for (i = 0; i < q->clhash.hashsize; i++) {
    998		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
    999			if (cl->level)
   1000				memset(&cl->inner, 0, sizeof(cl->inner));
   1001			else {
   1002				if (cl->leaf.q && !q->offload)
   1003					qdisc_reset(cl->leaf.q);
   1004			}
   1005			cl->prio_activity = 0;
   1006			cl->cmode = HTB_CAN_SEND;
   1007		}
   1008	}
   1009	qdisc_watchdog_cancel(&q->watchdog);
   1010	__qdisc_reset_queue(&q->direct_queue);
   1011	sch->q.qlen = 0;
   1012	sch->qstats.backlog = 0;
   1013	memset(q->hlevel, 0, sizeof(q->hlevel));
   1014	memset(q->row_mask, 0, sizeof(q->row_mask));
   1015}
   1016
   1017static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
   1018	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
   1019	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
   1020	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
   1021	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
   1022	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
   1023	[TCA_HTB_RATE64] = { .type = NLA_U64 },
   1024	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
   1025	[TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
   1026};
   1027
   1028static void htb_work_func(struct work_struct *work)
   1029{
   1030	struct htb_sched *q = container_of(work, struct htb_sched, work);
   1031	struct Qdisc *sch = q->watchdog.qdisc;
   1032
   1033	rcu_read_lock();
   1034	__netif_schedule(qdisc_root(sch));
   1035	rcu_read_unlock();
   1036}
   1037
   1038static void htb_set_lockdep_class_child(struct Qdisc *q)
   1039{
   1040	static struct lock_class_key child_key;
   1041
   1042	lockdep_set_class(qdisc_lock(q), &child_key);
   1043}
   1044
   1045static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
   1046{
   1047	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
   1048}
   1049
   1050static int htb_init(struct Qdisc *sch, struct nlattr *opt,
   1051		    struct netlink_ext_ack *extack)
   1052{
   1053	struct net_device *dev = qdisc_dev(sch);
   1054	struct tc_htb_qopt_offload offload_opt;
   1055	struct htb_sched *q = qdisc_priv(sch);
   1056	struct nlattr *tb[TCA_HTB_MAX + 1];
   1057	struct tc_htb_glob *gopt;
   1058	unsigned int ntx;
   1059	bool offload;
   1060	int err;
   1061
   1062	qdisc_watchdog_init(&q->watchdog, sch);
   1063	INIT_WORK(&q->work, htb_work_func);
   1064
   1065	if (!opt)
   1066		return -EINVAL;
   1067
   1068	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
   1069	if (err)
   1070		return err;
   1071
   1072	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
   1073					  NULL);
   1074	if (err < 0)
   1075		return err;
   1076
   1077	if (!tb[TCA_HTB_INIT])
   1078		return -EINVAL;
   1079
   1080	gopt = nla_data(tb[TCA_HTB_INIT]);
   1081	if (gopt->version != HTB_VER >> 16)
   1082		return -EINVAL;
   1083
   1084	offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
   1085
   1086	if (offload) {
   1087		if (sch->parent != TC_H_ROOT) {
   1088			NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
   1089			return -EOPNOTSUPP;
   1090		}
   1091
   1092		if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
   1093			NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
   1094			return -EOPNOTSUPP;
   1095		}
   1096
   1097		q->num_direct_qdiscs = dev->real_num_tx_queues;
   1098		q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
   1099					   sizeof(*q->direct_qdiscs),
   1100					   GFP_KERNEL);
   1101		if (!q->direct_qdiscs)
   1102			return -ENOMEM;
   1103	}
   1104
   1105	err = qdisc_class_hash_init(&q->clhash);
   1106	if (err < 0)
   1107		goto err_free_direct_qdiscs;
   1108
   1109	qdisc_skb_head_init(&q->direct_queue);
   1110
   1111	if (tb[TCA_HTB_DIRECT_QLEN])
   1112		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
   1113	else
   1114		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
   1115
   1116	if ((q->rate2quantum = gopt->rate2quantum) < 1)
   1117		q->rate2quantum = 1;
   1118	q->defcls = gopt->defcls;
   1119
   1120	if (!offload)
   1121		return 0;
   1122
   1123	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
   1124		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
   1125		struct Qdisc *qdisc;
   1126
   1127		qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
   1128					  TC_H_MAKE(sch->handle, 0), extack);
   1129		if (!qdisc) {
   1130			err = -ENOMEM;
   1131			goto err_free_qdiscs;
   1132		}
   1133
   1134		htb_set_lockdep_class_child(qdisc);
   1135		q->direct_qdiscs[ntx] = qdisc;
   1136		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
   1137	}
   1138
   1139	sch->flags |= TCQ_F_MQROOT;
   1140
   1141	offload_opt = (struct tc_htb_qopt_offload) {
   1142		.command = TC_HTB_CREATE,
   1143		.parent_classid = TC_H_MAJ(sch->handle) >> 16,
   1144		.classid = TC_H_MIN(q->defcls),
   1145		.extack = extack,
   1146	};
   1147	err = htb_offload(dev, &offload_opt);
   1148	if (err)
   1149		goto err_free_qdiscs;
   1150
   1151	/* Defer this assignment, so that htb_destroy skips offload-related
   1152	 * parts (especially calling ndo_setup_tc) on errors.
   1153	 */
   1154	q->offload = true;
   1155
   1156	return 0;
   1157
   1158err_free_qdiscs:
   1159	for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
   1160	     ntx++)
   1161		qdisc_put(q->direct_qdiscs[ntx]);
   1162
   1163	qdisc_class_hash_destroy(&q->clhash);
   1164	/* Prevent use-after-free and double-free when htb_destroy gets called.
   1165	 */
   1166	q->clhash.hash = NULL;
   1167	q->clhash.hashsize = 0;
   1168
   1169err_free_direct_qdiscs:
   1170	kfree(q->direct_qdiscs);
   1171	q->direct_qdiscs = NULL;
   1172	return err;
   1173}
   1174
   1175static void htb_attach_offload(struct Qdisc *sch)
   1176{
   1177	struct net_device *dev = qdisc_dev(sch);
   1178	struct htb_sched *q = qdisc_priv(sch);
   1179	unsigned int ntx;
   1180
   1181	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
   1182		struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
   1183
   1184		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
   1185		qdisc_put(old);
   1186		qdisc_hash_add(qdisc, false);
   1187	}
   1188	for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
   1189		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
   1190		struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
   1191
   1192		qdisc_put(old);
   1193	}
   1194
   1195	kfree(q->direct_qdiscs);
   1196	q->direct_qdiscs = NULL;
   1197}
   1198
   1199static void htb_attach_software(struct Qdisc *sch)
   1200{
   1201	struct net_device *dev = qdisc_dev(sch);
   1202	unsigned int ntx;
   1203
   1204	/* Resemble qdisc_graft behavior. */
   1205	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
   1206		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
   1207		struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
   1208
   1209		qdisc_refcount_inc(sch);
   1210
   1211		qdisc_put(old);
   1212	}
   1213}
   1214
   1215static void htb_attach(struct Qdisc *sch)
   1216{
   1217	struct htb_sched *q = qdisc_priv(sch);
   1218
   1219	if (q->offload)
   1220		htb_attach_offload(sch);
   1221	else
   1222		htb_attach_software(sch);
   1223}
   1224
   1225static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
   1226{
   1227	struct htb_sched *q = qdisc_priv(sch);
   1228	struct nlattr *nest;
   1229	struct tc_htb_glob gopt;
   1230
   1231	if (q->offload)
   1232		sch->flags |= TCQ_F_OFFLOADED;
   1233	else
   1234		sch->flags &= ~TCQ_F_OFFLOADED;
   1235
   1236	sch->qstats.overlimits = q->overlimits;
   1237	/* Its safe to not acquire qdisc lock. As we hold RTNL,
   1238	 * no change can happen on the qdisc parameters.
   1239	 */
   1240
   1241	gopt.direct_pkts = q->direct_pkts;
   1242	gopt.version = HTB_VER;
   1243	gopt.rate2quantum = q->rate2quantum;
   1244	gopt.defcls = q->defcls;
   1245	gopt.debug = 0;
   1246
   1247	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
   1248	if (nest == NULL)
   1249		goto nla_put_failure;
   1250	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
   1251	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
   1252		goto nla_put_failure;
   1253	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
   1254		goto nla_put_failure;
   1255
   1256	return nla_nest_end(skb, nest);
   1257
   1258nla_put_failure:
   1259	nla_nest_cancel(skb, nest);
   1260	return -1;
   1261}
   1262
   1263static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
   1264			  struct sk_buff *skb, struct tcmsg *tcm)
   1265{
   1266	struct htb_class *cl = (struct htb_class *)arg;
   1267	struct htb_sched *q = qdisc_priv(sch);
   1268	struct nlattr *nest;
   1269	struct tc_htb_opt opt;
   1270
   1271	/* Its safe to not acquire qdisc lock. As we hold RTNL,
   1272	 * no change can happen on the class parameters.
   1273	 */
   1274	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
   1275	tcm->tcm_handle = cl->common.classid;
   1276	if (!cl->level && cl->leaf.q)
   1277		tcm->tcm_info = cl->leaf.q->handle;
   1278
   1279	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
   1280	if (nest == NULL)
   1281		goto nla_put_failure;
   1282
   1283	memset(&opt, 0, sizeof(opt));
   1284
   1285	psched_ratecfg_getrate(&opt.rate, &cl->rate);
   1286	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
   1287	psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
   1288	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
   1289	opt.quantum = cl->quantum;
   1290	opt.prio = cl->prio;
   1291	opt.level = cl->level;
   1292	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
   1293		goto nla_put_failure;
   1294	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
   1295		goto nla_put_failure;
   1296	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
   1297	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
   1298			      TCA_HTB_PAD))
   1299		goto nla_put_failure;
   1300	if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
   1301	    nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
   1302			      TCA_HTB_PAD))
   1303		goto nla_put_failure;
   1304
   1305	return nla_nest_end(skb, nest);
   1306
   1307nla_put_failure:
   1308	nla_nest_cancel(skb, nest);
   1309	return -1;
   1310}
   1311
   1312static void htb_offload_aggregate_stats(struct htb_sched *q,
   1313					struct htb_class *cl)
   1314{
   1315	u64 bytes = 0, packets = 0;
   1316	struct htb_class *c;
   1317	unsigned int i;
   1318
   1319	gnet_stats_basic_sync_init(&cl->bstats);
   1320
   1321	for (i = 0; i < q->clhash.hashsize; i++) {
   1322		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
   1323			struct htb_class *p = c;
   1324
   1325			while (p && p->level < cl->level)
   1326				p = p->parent;
   1327
   1328			if (p != cl)
   1329				continue;
   1330
   1331			bytes += u64_stats_read(&c->bstats_bias.bytes);
   1332			packets += u64_stats_read(&c->bstats_bias.packets);
   1333			if (c->level == 0) {
   1334				bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
   1335				packets += u64_stats_read(&c->leaf.q->bstats.packets);
   1336			}
   1337		}
   1338	}
   1339	_bstats_update(&cl->bstats, bytes, packets);
   1340}
   1341
   1342static int
   1343htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
   1344{
   1345	struct htb_class *cl = (struct htb_class *)arg;
   1346	struct htb_sched *q = qdisc_priv(sch);
   1347	struct gnet_stats_queue qs = {
   1348		.drops = cl->drops,
   1349		.overlimits = cl->overlimits,
   1350	};
   1351	__u32 qlen = 0;
   1352
   1353	if (!cl->level && cl->leaf.q)
   1354		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
   1355
   1356	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
   1357				    INT_MIN, INT_MAX);
   1358	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
   1359				     INT_MIN, INT_MAX);
   1360
   1361	if (q->offload) {
   1362		if (!cl->level) {
   1363			if (cl->leaf.q)
   1364				cl->bstats = cl->leaf.q->bstats;
   1365			else
   1366				gnet_stats_basic_sync_init(&cl->bstats);
   1367			_bstats_update(&cl->bstats,
   1368				       u64_stats_read(&cl->bstats_bias.bytes),
   1369				       u64_stats_read(&cl->bstats_bias.packets));
   1370		} else {
   1371			htb_offload_aggregate_stats(q, cl);
   1372		}
   1373	}
   1374
   1375	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
   1376	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
   1377	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
   1378		return -1;
   1379
   1380	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
   1381}
   1382
   1383static struct netdev_queue *
   1384htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
   1385{
   1386	struct net_device *dev = qdisc_dev(sch);
   1387	struct tc_htb_qopt_offload offload_opt;
   1388	struct htb_sched *q = qdisc_priv(sch);
   1389	int err;
   1390
   1391	if (!q->offload)
   1392		return sch->dev_queue;
   1393
   1394	offload_opt = (struct tc_htb_qopt_offload) {
   1395		.command = TC_HTB_LEAF_QUERY_QUEUE,
   1396		.classid = TC_H_MIN(tcm->tcm_parent),
   1397	};
   1398	err = htb_offload(dev, &offload_opt);
   1399	if (err || offload_opt.qid >= dev->num_tx_queues)
   1400		return NULL;
   1401	return netdev_get_tx_queue(dev, offload_opt.qid);
   1402}
   1403
   1404static struct Qdisc *
   1405htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
   1406{
   1407	struct net_device *dev = dev_queue->dev;
   1408	struct Qdisc *old_q;
   1409
   1410	if (dev->flags & IFF_UP)
   1411		dev_deactivate(dev);
   1412	old_q = dev_graft_qdisc(dev_queue, new_q);
   1413	if (new_q)
   1414		new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
   1415	if (dev->flags & IFF_UP)
   1416		dev_activate(dev);
   1417
   1418	return old_q;
   1419}
   1420
   1421static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
   1422{
   1423	struct netdev_queue *queue;
   1424
   1425	queue = cl->leaf.offload_queue;
   1426	if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
   1427		WARN_ON(cl->leaf.q->dev_queue != queue);
   1428
   1429	return queue;
   1430}
   1431
   1432static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
   1433				   struct htb_class *cl_new, bool destroying)
   1434{
   1435	struct netdev_queue *queue_old, *queue_new;
   1436	struct net_device *dev = qdisc_dev(sch);
   1437
   1438	queue_old = htb_offload_get_queue(cl_old);
   1439	queue_new = htb_offload_get_queue(cl_new);
   1440
   1441	if (!destroying) {
   1442		struct Qdisc *qdisc;
   1443
   1444		if (dev->flags & IFF_UP)
   1445			dev_deactivate(dev);
   1446		qdisc = dev_graft_qdisc(queue_old, NULL);
   1447		WARN_ON(qdisc != cl_old->leaf.q);
   1448	}
   1449
   1450	if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
   1451		cl_old->leaf.q->dev_queue = queue_new;
   1452	cl_old->leaf.offload_queue = queue_new;
   1453
   1454	if (!destroying) {
   1455		struct Qdisc *qdisc;
   1456
   1457		qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
   1458		if (dev->flags & IFF_UP)
   1459			dev_activate(dev);
   1460		WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
   1461	}
   1462}
   1463
   1464static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
   1465		     struct Qdisc **old, struct netlink_ext_ack *extack)
   1466{
   1467	struct netdev_queue *dev_queue = sch->dev_queue;
   1468	struct htb_class *cl = (struct htb_class *)arg;
   1469	struct htb_sched *q = qdisc_priv(sch);
   1470	struct Qdisc *old_q;
   1471
   1472	if (cl->level)
   1473		return -EINVAL;
   1474
   1475	if (q->offload)
   1476		dev_queue = htb_offload_get_queue(cl);
   1477
   1478	if (!new) {
   1479		new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
   1480					cl->common.classid, extack);
   1481		if (!new)
   1482			return -ENOBUFS;
   1483	}
   1484
   1485	if (q->offload) {
   1486		htb_set_lockdep_class_child(new);
   1487		/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
   1488		qdisc_refcount_inc(new);
   1489		old_q = htb_graft_helper(dev_queue, new);
   1490	}
   1491
   1492	*old = qdisc_replace(sch, new, &cl->leaf.q);
   1493
   1494	if (q->offload) {
   1495		WARN_ON(old_q != *old);
   1496		qdisc_put(old_q);
   1497	}
   1498
   1499	return 0;
   1500}
   1501
   1502static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
   1503{
   1504	struct htb_class *cl = (struct htb_class *)arg;
   1505	return !cl->level ? cl->leaf.q : NULL;
   1506}
   1507
   1508static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
   1509{
   1510	struct htb_class *cl = (struct htb_class *)arg;
   1511
   1512	htb_deactivate(qdisc_priv(sch), cl);
   1513}
   1514
   1515static inline int htb_parent_last_child(struct htb_class *cl)
   1516{
   1517	if (!cl->parent)
   1518		/* the root class */
   1519		return 0;
   1520	if (cl->parent->children > 1)
   1521		/* not the last child */
   1522		return 0;
   1523	return 1;
   1524}
   1525
   1526static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
   1527			       struct Qdisc *new_q)
   1528{
   1529	struct htb_sched *q = qdisc_priv(sch);
   1530	struct htb_class *parent = cl->parent;
   1531
   1532	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
   1533
   1534	if (parent->cmode != HTB_CAN_SEND)
   1535		htb_safe_rb_erase(&parent->pq_node,
   1536				  &q->hlevel[parent->level].wait_pq);
   1537
   1538	parent->level = 0;
   1539	memset(&parent->inner, 0, sizeof(parent->inner));
   1540	parent->leaf.q = new_q ? new_q : &noop_qdisc;
   1541	parent->tokens = parent->buffer;
   1542	parent->ctokens = parent->cbuffer;
   1543	parent->t_c = ktime_get_ns();
   1544	parent->cmode = HTB_CAN_SEND;
   1545	if (q->offload)
   1546		parent->leaf.offload_queue = cl->leaf.offload_queue;
   1547}
   1548
   1549static void htb_parent_to_leaf_offload(struct Qdisc *sch,
   1550				       struct netdev_queue *dev_queue,
   1551				       struct Qdisc *new_q)
   1552{
   1553	struct Qdisc *old_q;
   1554
   1555	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
   1556	if (new_q)
   1557		qdisc_refcount_inc(new_q);
   1558	old_q = htb_graft_helper(dev_queue, new_q);
   1559	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
   1560}
   1561
   1562static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
   1563				     bool last_child, bool destroying,
   1564				     struct netlink_ext_ack *extack)
   1565{
   1566	struct tc_htb_qopt_offload offload_opt;
   1567	struct netdev_queue *dev_queue;
   1568	struct Qdisc *q = cl->leaf.q;
   1569	struct Qdisc *old = NULL;
   1570	int err;
   1571
   1572	if (cl->level)
   1573		return -EINVAL;
   1574
   1575	WARN_ON(!q);
   1576	dev_queue = htb_offload_get_queue(cl);
   1577	old = htb_graft_helper(dev_queue, NULL);
   1578	if (destroying)
   1579		/* Before HTB is destroyed, the kernel grafts noop_qdisc to
   1580		 * all queues.
   1581		 */
   1582		WARN_ON(!(old->flags & TCQ_F_BUILTIN));
   1583	else
   1584		WARN_ON(old != q);
   1585
   1586	if (cl->parent) {
   1587		_bstats_update(&cl->parent->bstats_bias,
   1588			       u64_stats_read(&q->bstats.bytes),
   1589			       u64_stats_read(&q->bstats.packets));
   1590	}
   1591
   1592	offload_opt = (struct tc_htb_qopt_offload) {
   1593		.command = !last_child ? TC_HTB_LEAF_DEL :
   1594			   destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
   1595			   TC_HTB_LEAF_DEL_LAST,
   1596		.classid = cl->common.classid,
   1597		.extack = extack,
   1598	};
   1599	err = htb_offload(qdisc_dev(sch), &offload_opt);
   1600
   1601	if (!err || destroying)
   1602		qdisc_put(old);
   1603	else
   1604		htb_graft_helper(dev_queue, old);
   1605
   1606	if (last_child)
   1607		return err;
   1608
   1609	if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
   1610		u32 classid = TC_H_MAJ(sch->handle) |
   1611			      TC_H_MIN(offload_opt.classid);
   1612		struct htb_class *moved_cl = htb_find(classid, sch);
   1613
   1614		htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
   1615	}
   1616
   1617	return err;
   1618}
   1619
   1620static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
   1621{
   1622	if (!cl->level) {
   1623		WARN_ON(!cl->leaf.q);
   1624		qdisc_put(cl->leaf.q);
   1625	}
   1626	gen_kill_estimator(&cl->rate_est);
   1627	tcf_block_put(cl->block);
   1628	kfree(cl);
   1629}
   1630
   1631static void htb_destroy(struct Qdisc *sch)
   1632{
   1633	struct net_device *dev = qdisc_dev(sch);
   1634	struct tc_htb_qopt_offload offload_opt;
   1635	struct htb_sched *q = qdisc_priv(sch);
   1636	struct hlist_node *next;
   1637	bool nonempty, changed;
   1638	struct htb_class *cl;
   1639	unsigned int i;
   1640
   1641	cancel_work_sync(&q->work);
   1642	qdisc_watchdog_cancel(&q->watchdog);
   1643	/* This line used to be after htb_destroy_class call below
   1644	 * and surprisingly it worked in 2.4. But it must precede it
   1645	 * because filter need its target class alive to be able to call
   1646	 * unbind_filter on it (without Oops).
   1647	 */
   1648	tcf_block_put(q->block);
   1649
   1650	for (i = 0; i < q->clhash.hashsize; i++) {
   1651		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
   1652			tcf_block_put(cl->block);
   1653			cl->block = NULL;
   1654		}
   1655	}
   1656
   1657	do {
   1658		nonempty = false;
   1659		changed = false;
   1660		for (i = 0; i < q->clhash.hashsize; i++) {
   1661			hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
   1662						  common.hnode) {
   1663				bool last_child;
   1664
   1665				if (!q->offload) {
   1666					htb_destroy_class(sch, cl);
   1667					continue;
   1668				}
   1669
   1670				nonempty = true;
   1671
   1672				if (cl->level)
   1673					continue;
   1674
   1675				changed = true;
   1676
   1677				last_child = htb_parent_last_child(cl);
   1678				htb_destroy_class_offload(sch, cl, last_child,
   1679							  true, NULL);
   1680				qdisc_class_hash_remove(&q->clhash,
   1681							&cl->common);
   1682				if (cl->parent)
   1683					cl->parent->children--;
   1684				if (last_child)
   1685					htb_parent_to_leaf(sch, cl, NULL);
   1686				htb_destroy_class(sch, cl);
   1687			}
   1688		}
   1689	} while (changed);
   1690	WARN_ON(nonempty);
   1691
   1692	qdisc_class_hash_destroy(&q->clhash);
   1693	__qdisc_reset_queue(&q->direct_queue);
   1694
   1695	if (!q->offload)
   1696		return;
   1697
   1698	offload_opt = (struct tc_htb_qopt_offload) {
   1699		.command = TC_HTB_DESTROY,
   1700	};
   1701	htb_offload(dev, &offload_opt);
   1702
   1703	if (!q->direct_qdiscs)
   1704		return;
   1705	for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
   1706		qdisc_put(q->direct_qdiscs[i]);
   1707	kfree(q->direct_qdiscs);
   1708}
   1709
   1710static int htb_delete(struct Qdisc *sch, unsigned long arg,
   1711		      struct netlink_ext_ack *extack)
   1712{
   1713	struct htb_sched *q = qdisc_priv(sch);
   1714	struct htb_class *cl = (struct htb_class *)arg;
   1715	struct Qdisc *new_q = NULL;
   1716	int last_child = 0;
   1717	int err;
   1718
   1719	/* TODO: why don't allow to delete subtree ? references ? does
   1720	 * tc subsys guarantee us that in htb_destroy it holds no class
   1721	 * refs so that we can remove children safely there ?
   1722	 */
   1723	if (cl->children || cl->filter_cnt)
   1724		return -EBUSY;
   1725
   1726	if (!cl->level && htb_parent_last_child(cl))
   1727		last_child = 1;
   1728
   1729	if (q->offload) {
   1730		err = htb_destroy_class_offload(sch, cl, last_child, false,
   1731						extack);
   1732		if (err)
   1733			return err;
   1734	}
   1735
   1736	if (last_child) {
   1737		struct netdev_queue *dev_queue = sch->dev_queue;
   1738
   1739		if (q->offload)
   1740			dev_queue = htb_offload_get_queue(cl);
   1741
   1742		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
   1743					  cl->parent->common.classid,
   1744					  NULL);
   1745		if (q->offload) {
   1746			if (new_q)
   1747				htb_set_lockdep_class_child(new_q);
   1748			htb_parent_to_leaf_offload(sch, dev_queue, new_q);
   1749		}
   1750	}
   1751
   1752	sch_tree_lock(sch);
   1753
   1754	if (!cl->level)
   1755		qdisc_purge_queue(cl->leaf.q);
   1756
   1757	/* delete from hash and active; remainder in destroy_class */
   1758	qdisc_class_hash_remove(&q->clhash, &cl->common);
   1759	if (cl->parent)
   1760		cl->parent->children--;
   1761
   1762	if (cl->prio_activity)
   1763		htb_deactivate(q, cl);
   1764
   1765	if (cl->cmode != HTB_CAN_SEND)
   1766		htb_safe_rb_erase(&cl->pq_node,
   1767				  &q->hlevel[cl->level].wait_pq);
   1768
   1769	if (last_child)
   1770		htb_parent_to_leaf(sch, cl, new_q);
   1771
   1772	sch_tree_unlock(sch);
   1773
   1774	htb_destroy_class(sch, cl);
   1775	return 0;
   1776}
   1777
   1778static int htb_change_class(struct Qdisc *sch, u32 classid,
   1779			    u32 parentid, struct nlattr **tca,
   1780			    unsigned long *arg, struct netlink_ext_ack *extack)
   1781{
   1782	int err = -EINVAL;
   1783	struct htb_sched *q = qdisc_priv(sch);
   1784	struct htb_class *cl = (struct htb_class *)*arg, *parent;
   1785	struct tc_htb_qopt_offload offload_opt;
   1786	struct nlattr *opt = tca[TCA_OPTIONS];
   1787	struct nlattr *tb[TCA_HTB_MAX + 1];
   1788	struct Qdisc *parent_qdisc = NULL;
   1789	struct netdev_queue *dev_queue;
   1790	struct tc_htb_opt *hopt;
   1791	u64 rate64, ceil64;
   1792	int warn = 0;
   1793
   1794	/* extract all subattrs from opt attr */
   1795	if (!opt)
   1796		goto failure;
   1797
   1798	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
   1799					  NULL);
   1800	if (err < 0)
   1801		goto failure;
   1802
   1803	err = -EINVAL;
   1804	if (tb[TCA_HTB_PARMS] == NULL)
   1805		goto failure;
   1806
   1807	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
   1808
   1809	hopt = nla_data(tb[TCA_HTB_PARMS]);
   1810	if (!hopt->rate.rate || !hopt->ceil.rate)
   1811		goto failure;
   1812
   1813	if (q->offload) {
   1814		/* Options not supported by the offload. */
   1815		if (hopt->rate.overhead || hopt->ceil.overhead) {
   1816			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
   1817			goto failure;
   1818		}
   1819		if (hopt->rate.mpu || hopt->ceil.mpu) {
   1820			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
   1821			goto failure;
   1822		}
   1823		if (hopt->quantum) {
   1824			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
   1825			goto failure;
   1826		}
   1827		if (hopt->prio) {
   1828			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
   1829			goto failure;
   1830		}
   1831	}
   1832
   1833	/* Keeping backward compatible with rate_table based iproute2 tc */
   1834	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
   1835		qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
   1836					      NULL));
   1837
   1838	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
   1839		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
   1840					      NULL));
   1841
   1842	rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
   1843	ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
   1844
   1845	if (!cl) {		/* new class */
   1846		struct net_device *dev = qdisc_dev(sch);
   1847		struct Qdisc *new_q, *old_q;
   1848		int prio;
   1849		struct {
   1850			struct nlattr		nla;
   1851			struct gnet_estimator	opt;
   1852		} est = {
   1853			.nla = {
   1854				.nla_len	= nla_attr_size(sizeof(est.opt)),
   1855				.nla_type	= TCA_RATE,
   1856			},
   1857			.opt = {
   1858				/* 4s interval, 16s averaging constant */
   1859				.interval	= 2,
   1860				.ewma_log	= 2,
   1861			},
   1862		};
   1863
   1864		/* check for valid classid */
   1865		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
   1866		    htb_find(classid, sch))
   1867			goto failure;
   1868
   1869		/* check maximal depth */
   1870		if (parent && parent->parent && parent->parent->level < 2) {
   1871			pr_err("htb: tree is too deep\n");
   1872			goto failure;
   1873		}
   1874		err = -ENOBUFS;
   1875		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
   1876		if (!cl)
   1877			goto failure;
   1878
   1879		gnet_stats_basic_sync_init(&cl->bstats);
   1880		gnet_stats_basic_sync_init(&cl->bstats_bias);
   1881
   1882		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
   1883		if (err) {
   1884			kfree(cl);
   1885			goto failure;
   1886		}
   1887		if (htb_rate_est || tca[TCA_RATE]) {
   1888			err = gen_new_estimator(&cl->bstats, NULL,
   1889						&cl->rate_est,
   1890						NULL,
   1891						true,
   1892						tca[TCA_RATE] ? : &est.nla);
   1893			if (err)
   1894				goto err_block_put;
   1895		}
   1896
   1897		cl->children = 0;
   1898		RB_CLEAR_NODE(&cl->pq_node);
   1899
   1900		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
   1901			RB_CLEAR_NODE(&cl->node[prio]);
   1902
   1903		cl->common.classid = classid;
   1904
   1905		/* Make sure nothing interrupts us in between of two
   1906		 * ndo_setup_tc calls.
   1907		 */
   1908		ASSERT_RTNL();
   1909
   1910		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
   1911		 * so that can't be used inside of sch_tree_lock
   1912		 * -- thanks to Karlis Peisenieks
   1913		 */
   1914		if (!q->offload) {
   1915			dev_queue = sch->dev_queue;
   1916		} else if (!(parent && !parent->level)) {
   1917			/* Assign a dev_queue to this classid. */
   1918			offload_opt = (struct tc_htb_qopt_offload) {
   1919				.command = TC_HTB_LEAF_ALLOC_QUEUE,
   1920				.classid = cl->common.classid,
   1921				.parent_classid = parent ?
   1922					TC_H_MIN(parent->common.classid) :
   1923					TC_HTB_CLASSID_ROOT,
   1924				.rate = max_t(u64, hopt->rate.rate, rate64),
   1925				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
   1926				.extack = extack,
   1927			};
   1928			err = htb_offload(dev, &offload_opt);
   1929			if (err) {
   1930				pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n",
   1931				       err);
   1932				goto err_kill_estimator;
   1933			}
   1934			dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
   1935		} else { /* First child. */
   1936			dev_queue = htb_offload_get_queue(parent);
   1937			old_q = htb_graft_helper(dev_queue, NULL);
   1938			WARN_ON(old_q != parent->leaf.q);
   1939			offload_opt = (struct tc_htb_qopt_offload) {
   1940				.command = TC_HTB_LEAF_TO_INNER,
   1941				.classid = cl->common.classid,
   1942				.parent_classid =
   1943					TC_H_MIN(parent->common.classid),
   1944				.rate = max_t(u64, hopt->rate.rate, rate64),
   1945				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
   1946				.extack = extack,
   1947			};
   1948			err = htb_offload(dev, &offload_opt);
   1949			if (err) {
   1950				pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n",
   1951				       err);
   1952				htb_graft_helper(dev_queue, old_q);
   1953				goto err_kill_estimator;
   1954			}
   1955			_bstats_update(&parent->bstats_bias,
   1956				       u64_stats_read(&old_q->bstats.bytes),
   1957				       u64_stats_read(&old_q->bstats.packets));
   1958			qdisc_put(old_q);
   1959		}
   1960		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
   1961					  classid, NULL);
   1962		if (q->offload) {
   1963			if (new_q) {
   1964				htb_set_lockdep_class_child(new_q);
   1965				/* One ref for cl->leaf.q, the other for
   1966				 * dev_queue->qdisc.
   1967				 */
   1968				qdisc_refcount_inc(new_q);
   1969			}
   1970			old_q = htb_graft_helper(dev_queue, new_q);
   1971			/* No qdisc_put needed. */
   1972			WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
   1973		}
   1974		sch_tree_lock(sch);
   1975		if (parent && !parent->level) {
   1976			/* turn parent into inner node */
   1977			qdisc_purge_queue(parent->leaf.q);
   1978			parent_qdisc = parent->leaf.q;
   1979			if (parent->prio_activity)
   1980				htb_deactivate(q, parent);
   1981
   1982			/* remove from evt list because of level change */
   1983			if (parent->cmode != HTB_CAN_SEND) {
   1984				htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
   1985				parent->cmode = HTB_CAN_SEND;
   1986			}
   1987			parent->level = (parent->parent ? parent->parent->level
   1988					 : TC_HTB_MAXDEPTH) - 1;
   1989			memset(&parent->inner, 0, sizeof(parent->inner));
   1990		}
   1991
   1992		/* leaf (we) needs elementary qdisc */
   1993		cl->leaf.q = new_q ? new_q : &noop_qdisc;
   1994		if (q->offload)
   1995			cl->leaf.offload_queue = dev_queue;
   1996
   1997		cl->parent = parent;
   1998
   1999		/* set class to be in HTB_CAN_SEND state */
   2000		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
   2001		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
   2002		cl->mbuffer = 60ULL * NSEC_PER_SEC;	/* 1min */
   2003		cl->t_c = ktime_get_ns();
   2004		cl->cmode = HTB_CAN_SEND;
   2005
   2006		/* attach to the hash list and parent's family */
   2007		qdisc_class_hash_insert(&q->clhash, &cl->common);
   2008		if (parent)
   2009			parent->children++;
   2010		if (cl->leaf.q != &noop_qdisc)
   2011			qdisc_hash_add(cl->leaf.q, true);
   2012	} else {
   2013		if (tca[TCA_RATE]) {
   2014			err = gen_replace_estimator(&cl->bstats, NULL,
   2015						    &cl->rate_est,
   2016						    NULL,
   2017						    true,
   2018						    tca[TCA_RATE]);
   2019			if (err)
   2020				return err;
   2021		}
   2022
   2023		if (q->offload) {
   2024			struct net_device *dev = qdisc_dev(sch);
   2025
   2026			offload_opt = (struct tc_htb_qopt_offload) {
   2027				.command = TC_HTB_NODE_MODIFY,
   2028				.classid = cl->common.classid,
   2029				.rate = max_t(u64, hopt->rate.rate, rate64),
   2030				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
   2031				.extack = extack,
   2032			};
   2033			err = htb_offload(dev, &offload_opt);
   2034			if (err)
   2035				/* Estimator was replaced, and rollback may fail
   2036				 * as well, so we don't try to recover it, and
   2037				 * the estimator won't work property with the
   2038				 * offload anyway, because bstats are updated
   2039				 * only when the stats are queried.
   2040				 */
   2041				return err;
   2042		}
   2043
   2044		sch_tree_lock(sch);
   2045	}
   2046
   2047	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
   2048	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
   2049
   2050	/* it used to be a nasty bug here, we have to check that node
   2051	 * is really leaf before changing cl->leaf !
   2052	 */
   2053	if (!cl->level) {
   2054		u64 quantum = cl->rate.rate_bytes_ps;
   2055
   2056		do_div(quantum, q->rate2quantum);
   2057		cl->quantum = min_t(u64, quantum, INT_MAX);
   2058
   2059		if (!hopt->quantum && cl->quantum < 1000) {
   2060			warn = -1;
   2061			cl->quantum = 1000;
   2062		}
   2063		if (!hopt->quantum && cl->quantum > 200000) {
   2064			warn = 1;
   2065			cl->quantum = 200000;
   2066		}
   2067		if (hopt->quantum)
   2068			cl->quantum = hopt->quantum;
   2069		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
   2070			cl->prio = TC_HTB_NUMPRIO - 1;
   2071	}
   2072
   2073	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
   2074	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
   2075
   2076	sch_tree_unlock(sch);
   2077	qdisc_put(parent_qdisc);
   2078
   2079	if (warn)
   2080		pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
   2081			    cl->common.classid, (warn == -1 ? "small" : "big"));
   2082
   2083	qdisc_class_hash_grow(sch, &q->clhash);
   2084
   2085	*arg = (unsigned long)cl;
   2086	return 0;
   2087
   2088err_kill_estimator:
   2089	gen_kill_estimator(&cl->rate_est);
   2090err_block_put:
   2091	tcf_block_put(cl->block);
   2092	kfree(cl);
   2093failure:
   2094	return err;
   2095}
   2096
   2097static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
   2098				       struct netlink_ext_ack *extack)
   2099{
   2100	struct htb_sched *q = qdisc_priv(sch);
   2101	struct htb_class *cl = (struct htb_class *)arg;
   2102
   2103	return cl ? cl->block : q->block;
   2104}
   2105
   2106static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
   2107				     u32 classid)
   2108{
   2109	struct htb_class *cl = htb_find(classid, sch);
   2110
   2111	/*if (cl && !cl->level) return 0;
   2112	 * The line above used to be there to prevent attaching filters to
   2113	 * leaves. But at least tc_index filter uses this just to get class
   2114	 * for other reasons so that we have to allow for it.
   2115	 * ----
   2116	 * 19.6.2002 As Werner explained it is ok - bind filter is just
   2117	 * another way to "lock" the class - unlike "get" this lock can
   2118	 * be broken by class during destroy IIUC.
   2119	 */
   2120	if (cl)
   2121		cl->filter_cnt++;
   2122	return (unsigned long)cl;
   2123}
   2124
   2125static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
   2126{
   2127	struct htb_class *cl = (struct htb_class *)arg;
   2128
   2129	if (cl)
   2130		cl->filter_cnt--;
   2131}
   2132
   2133static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
   2134{
   2135	struct htb_sched *q = qdisc_priv(sch);
   2136	struct htb_class *cl;
   2137	unsigned int i;
   2138
   2139	if (arg->stop)
   2140		return;
   2141
   2142	for (i = 0; i < q->clhash.hashsize; i++) {
   2143		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
   2144			if (arg->count < arg->skip) {
   2145				arg->count++;
   2146				continue;
   2147			}
   2148			if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
   2149				arg->stop = 1;
   2150				return;
   2151			}
   2152			arg->count++;
   2153		}
   2154	}
   2155}
   2156
   2157static const struct Qdisc_class_ops htb_class_ops = {
   2158	.select_queue	=	htb_select_queue,
   2159	.graft		=	htb_graft,
   2160	.leaf		=	htb_leaf,
   2161	.qlen_notify	=	htb_qlen_notify,
   2162	.find		=	htb_search,
   2163	.change		=	htb_change_class,
   2164	.delete		=	htb_delete,
   2165	.walk		=	htb_walk,
   2166	.tcf_block	=	htb_tcf_block,
   2167	.bind_tcf	=	htb_bind_filter,
   2168	.unbind_tcf	=	htb_unbind_filter,
   2169	.dump		=	htb_dump_class,
   2170	.dump_stats	=	htb_dump_class_stats,
   2171};
   2172
   2173static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
   2174	.cl_ops		=	&htb_class_ops,
   2175	.id		=	"htb",
   2176	.priv_size	=	sizeof(struct htb_sched),
   2177	.enqueue	=	htb_enqueue,
   2178	.dequeue	=	htb_dequeue,
   2179	.peek		=	qdisc_peek_dequeued,
   2180	.init		=	htb_init,
   2181	.attach		=	htb_attach,
   2182	.reset		=	htb_reset,
   2183	.destroy	=	htb_destroy,
   2184	.dump		=	htb_dump,
   2185	.owner		=	THIS_MODULE,
   2186};
   2187
   2188static int __init htb_module_init(void)
   2189{
   2190	return register_qdisc(&htb_qdisc_ops);
   2191}
   2192static void __exit htb_module_exit(void)
   2193{
   2194	unregister_qdisc(&htb_qdisc_ops);
   2195}
   2196
   2197module_init(htb_module_init)
   2198module_exit(htb_module_exit)
   2199MODULE_LICENSE("GPL");