cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sch_gred.c (23388B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 * net/sched/sch_gred.c	Generic Random Early Detection queue.
      4 *
      5 * Authors:    J Hadi Salim (hadi@cyberus.ca) 1998-2002
      6 *
      7 *             991129: -  Bug fix with grio mode
      8 *		       - a better sing. AvgQ mode with Grio(WRED)
      9 *		       - A finer grained VQ dequeue based on suggestion
     10 *		         from Ren Liu
     11 *		       - More error checks
     12 *
     13 *  For all the glorious comments look at include/net/red.h
     14 */
     15
     16#include <linux/slab.h>
     17#include <linux/module.h>
     18#include <linux/types.h>
     19#include <linux/kernel.h>
     20#include <linux/skbuff.h>
     21#include <net/pkt_cls.h>
     22#include <net/pkt_sched.h>
     23#include <net/red.h>
     24
     25#define GRED_DEF_PRIO (MAX_DPs / 2)
     26#define GRED_VQ_MASK (MAX_DPs - 1)
     27
     28#define GRED_VQ_RED_FLAGS	(TC_RED_ECN | TC_RED_HARDDROP)
     29
     30struct gred_sched_data;
     31struct gred_sched;
     32
     33struct gred_sched_data {
     34	u32		limit;		/* HARD maximal queue length	*/
     35	u32		DP;		/* the drop parameters */
     36	u32		red_flags;	/* virtualQ version of red_flags */
     37	u64		bytesin;	/* bytes seen on virtualQ so far*/
     38	u32		packetsin;	/* packets seen on virtualQ so far*/
     39	u32		backlog;	/* bytes on the virtualQ */
     40	u8		prio;		/* the prio of this vq */
     41
     42	struct red_parms parms;
     43	struct red_vars  vars;
     44	struct red_stats stats;
     45};
     46
     47enum {
     48	GRED_WRED_MODE = 1,
     49	GRED_RIO_MODE,
     50};
     51
     52struct gred_sched {
     53	struct gred_sched_data *tab[MAX_DPs];
     54	unsigned long	flags;
     55	u32		red_flags;
     56	u32 		DPs;
     57	u32 		def;
     58	struct red_vars wred_set;
     59	struct tc_gred_qopt_offload *opt;
     60};
     61
     62static inline int gred_wred_mode(struct gred_sched *table)
     63{
     64	return test_bit(GRED_WRED_MODE, &table->flags);
     65}
     66
     67static inline void gred_enable_wred_mode(struct gred_sched *table)
     68{
     69	__set_bit(GRED_WRED_MODE, &table->flags);
     70}
     71
     72static inline void gred_disable_wred_mode(struct gred_sched *table)
     73{
     74	__clear_bit(GRED_WRED_MODE, &table->flags);
     75}
     76
     77static inline int gred_rio_mode(struct gred_sched *table)
     78{
     79	return test_bit(GRED_RIO_MODE, &table->flags);
     80}
     81
     82static inline void gred_enable_rio_mode(struct gred_sched *table)
     83{
     84	__set_bit(GRED_RIO_MODE, &table->flags);
     85}
     86
     87static inline void gred_disable_rio_mode(struct gred_sched *table)
     88{
     89	__clear_bit(GRED_RIO_MODE, &table->flags);
     90}
     91
     92static inline int gred_wred_mode_check(struct Qdisc *sch)
     93{
     94	struct gred_sched *table = qdisc_priv(sch);
     95	int i;
     96
     97	/* Really ugly O(n^2) but shouldn't be necessary too frequent. */
     98	for (i = 0; i < table->DPs; i++) {
     99		struct gred_sched_data *q = table->tab[i];
    100		int n;
    101
    102		if (q == NULL)
    103			continue;
    104
    105		for (n = i + 1; n < table->DPs; n++)
    106			if (table->tab[n] && table->tab[n]->prio == q->prio)
    107				return 1;
    108	}
    109
    110	return 0;
    111}
    112
    113static inline unsigned int gred_backlog(struct gred_sched *table,
    114					struct gred_sched_data *q,
    115					struct Qdisc *sch)
    116{
    117	if (gred_wred_mode(table))
    118		return sch->qstats.backlog;
    119	else
    120		return q->backlog;
    121}
    122
    123static inline u16 tc_index_to_dp(struct sk_buff *skb)
    124{
    125	return skb->tc_index & GRED_VQ_MASK;
    126}
    127
    128static inline void gred_load_wred_set(const struct gred_sched *table,
    129				      struct gred_sched_data *q)
    130{
    131	q->vars.qavg = table->wred_set.qavg;
    132	q->vars.qidlestart = table->wred_set.qidlestart;
    133}
    134
    135static inline void gred_store_wred_set(struct gred_sched *table,
    136				       struct gred_sched_data *q)
    137{
    138	table->wred_set.qavg = q->vars.qavg;
    139	table->wred_set.qidlestart = q->vars.qidlestart;
    140}
    141
    142static int gred_use_ecn(struct gred_sched_data *q)
    143{
    144	return q->red_flags & TC_RED_ECN;
    145}
    146
    147static int gred_use_harddrop(struct gred_sched_data *q)
    148{
    149	return q->red_flags & TC_RED_HARDDROP;
    150}
    151
    152static bool gred_per_vq_red_flags_used(struct gred_sched *table)
    153{
    154	unsigned int i;
    155
    156	/* Local per-vq flags couldn't have been set unless global are 0 */
    157	if (table->red_flags)
    158		return false;
    159	for (i = 0; i < MAX_DPs; i++)
    160		if (table->tab[i] && table->tab[i]->red_flags)
    161			return true;
    162	return false;
    163}
    164
    165static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
    166			struct sk_buff **to_free)
    167{
    168	struct gred_sched_data *q = NULL;
    169	struct gred_sched *t = qdisc_priv(sch);
    170	unsigned long qavg = 0;
    171	u16 dp = tc_index_to_dp(skb);
    172
    173	if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
    174		dp = t->def;
    175
    176		q = t->tab[dp];
    177		if (!q) {
    178			/* Pass through packets not assigned to a DP
    179			 * if no default DP has been configured. This
    180			 * allows for DP flows to be left untouched.
    181			 */
    182			if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
    183					sch->limit))
    184				return qdisc_enqueue_tail(skb, sch);
    185			else
    186				goto drop;
    187		}
    188
    189		/* fix tc_index? --could be controversial but needed for
    190		   requeueing */
    191		skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp;
    192	}
    193
    194	/* sum up all the qaves of prios < ours to get the new qave */
    195	if (!gred_wred_mode(t) && gred_rio_mode(t)) {
    196		int i;
    197
    198		for (i = 0; i < t->DPs; i++) {
    199			if (t->tab[i] && t->tab[i]->prio < q->prio &&
    200			    !red_is_idling(&t->tab[i]->vars))
    201				qavg += t->tab[i]->vars.qavg;
    202		}
    203
    204	}
    205
    206	q->packetsin++;
    207	q->bytesin += qdisc_pkt_len(skb);
    208
    209	if (gred_wred_mode(t))
    210		gred_load_wred_set(t, q);
    211
    212	q->vars.qavg = red_calc_qavg(&q->parms,
    213				     &q->vars,
    214				     gred_backlog(t, q, sch));
    215
    216	if (red_is_idling(&q->vars))
    217		red_end_of_idle_period(&q->vars);
    218
    219	if (gred_wred_mode(t))
    220		gred_store_wred_set(t, q);
    221
    222	switch (red_action(&q->parms, &q->vars, q->vars.qavg + qavg)) {
    223	case RED_DONT_MARK:
    224		break;
    225
    226	case RED_PROB_MARK:
    227		qdisc_qstats_overlimit(sch);
    228		if (!gred_use_ecn(q) || !INET_ECN_set_ce(skb)) {
    229			q->stats.prob_drop++;
    230			goto congestion_drop;
    231		}
    232
    233		q->stats.prob_mark++;
    234		break;
    235
    236	case RED_HARD_MARK:
    237		qdisc_qstats_overlimit(sch);
    238		if (gred_use_harddrop(q) || !gred_use_ecn(q) ||
    239		    !INET_ECN_set_ce(skb)) {
    240			q->stats.forced_drop++;
    241			goto congestion_drop;
    242		}
    243		q->stats.forced_mark++;
    244		break;
    245	}
    246
    247	if (gred_backlog(t, q, sch) + qdisc_pkt_len(skb) <= q->limit) {
    248		q->backlog += qdisc_pkt_len(skb);
    249		return qdisc_enqueue_tail(skb, sch);
    250	}
    251
    252	q->stats.pdrop++;
    253drop:
    254	return qdisc_drop(skb, sch, to_free);
    255
    256congestion_drop:
    257	qdisc_drop(skb, sch, to_free);
    258	return NET_XMIT_CN;
    259}
    260
    261static struct sk_buff *gred_dequeue(struct Qdisc *sch)
    262{
    263	struct sk_buff *skb;
    264	struct gred_sched *t = qdisc_priv(sch);
    265
    266	skb = qdisc_dequeue_head(sch);
    267
    268	if (skb) {
    269		struct gred_sched_data *q;
    270		u16 dp = tc_index_to_dp(skb);
    271
    272		if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
    273			net_warn_ratelimited("GRED: Unable to relocate VQ 0x%x after dequeue, screwing up backlog\n",
    274					     tc_index_to_dp(skb));
    275		} else {
    276			q->backlog -= qdisc_pkt_len(skb);
    277
    278			if (gred_wred_mode(t)) {
    279				if (!sch->qstats.backlog)
    280					red_start_of_idle_period(&t->wred_set);
    281			} else {
    282				if (!q->backlog)
    283					red_start_of_idle_period(&q->vars);
    284			}
    285		}
    286
    287		return skb;
    288	}
    289
    290	return NULL;
    291}
    292
    293static void gred_reset(struct Qdisc *sch)
    294{
    295	int i;
    296	struct gred_sched *t = qdisc_priv(sch);
    297
    298	qdisc_reset_queue(sch);
    299
    300	for (i = 0; i < t->DPs; i++) {
    301		struct gred_sched_data *q = t->tab[i];
    302
    303		if (!q)
    304			continue;
    305
    306		red_restart(&q->vars);
    307		q->backlog = 0;
    308	}
    309}
    310
    311static void gred_offload(struct Qdisc *sch, enum tc_gred_command command)
    312{
    313	struct gred_sched *table = qdisc_priv(sch);
    314	struct net_device *dev = qdisc_dev(sch);
    315	struct tc_gred_qopt_offload *opt = table->opt;
    316
    317	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
    318		return;
    319
    320	memset(opt, 0, sizeof(*opt));
    321	opt->command = command;
    322	opt->handle = sch->handle;
    323	opt->parent = sch->parent;
    324
    325	if (command == TC_GRED_REPLACE) {
    326		unsigned int i;
    327
    328		opt->set.grio_on = gred_rio_mode(table);
    329		opt->set.wred_on = gred_wred_mode(table);
    330		opt->set.dp_cnt = table->DPs;
    331		opt->set.dp_def = table->def;
    332
    333		for (i = 0; i < table->DPs; i++) {
    334			struct gred_sched_data *q = table->tab[i];
    335
    336			if (!q)
    337				continue;
    338			opt->set.tab[i].present = true;
    339			opt->set.tab[i].limit = q->limit;
    340			opt->set.tab[i].prio = q->prio;
    341			opt->set.tab[i].min = q->parms.qth_min >> q->parms.Wlog;
    342			opt->set.tab[i].max = q->parms.qth_max >> q->parms.Wlog;
    343			opt->set.tab[i].is_ecn = gred_use_ecn(q);
    344			opt->set.tab[i].is_harddrop = gred_use_harddrop(q);
    345			opt->set.tab[i].probability = q->parms.max_P;
    346			opt->set.tab[i].backlog = &q->backlog;
    347		}
    348		opt->set.qstats = &sch->qstats;
    349	}
    350
    351	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_GRED, opt);
    352}
    353
    354static int gred_offload_dump_stats(struct Qdisc *sch)
    355{
    356	struct gred_sched *table = qdisc_priv(sch);
    357	struct tc_gred_qopt_offload *hw_stats;
    358	u64 bytes = 0, packets = 0;
    359	unsigned int i;
    360	int ret;
    361
    362	hw_stats = kzalloc(sizeof(*hw_stats), GFP_KERNEL);
    363	if (!hw_stats)
    364		return -ENOMEM;
    365
    366	hw_stats->command = TC_GRED_STATS;
    367	hw_stats->handle = sch->handle;
    368	hw_stats->parent = sch->parent;
    369
    370	for (i = 0; i < MAX_DPs; i++) {
    371		gnet_stats_basic_sync_init(&hw_stats->stats.bstats[i]);
    372		if (table->tab[i])
    373			hw_stats->stats.xstats[i] = &table->tab[i]->stats;
    374	}
    375
    376	ret = qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_GRED, hw_stats);
    377	/* Even if driver returns failure adjust the stats - in case offload
    378	 * ended but driver still wants to adjust the values.
    379	 */
    380	for (i = 0; i < MAX_DPs; i++) {
    381		if (!table->tab[i])
    382			continue;
    383		table->tab[i]->packetsin += u64_stats_read(&hw_stats->stats.bstats[i].packets);
    384		table->tab[i]->bytesin += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
    385		table->tab[i]->backlog += hw_stats->stats.qstats[i].backlog;
    386
    387		bytes += u64_stats_read(&hw_stats->stats.bstats[i].bytes);
    388		packets += u64_stats_read(&hw_stats->stats.bstats[i].packets);
    389		sch->qstats.qlen += hw_stats->stats.qstats[i].qlen;
    390		sch->qstats.backlog += hw_stats->stats.qstats[i].backlog;
    391		sch->qstats.drops += hw_stats->stats.qstats[i].drops;
    392		sch->qstats.requeues += hw_stats->stats.qstats[i].requeues;
    393		sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
    394	}
    395	_bstats_update(&sch->bstats, bytes, packets);
    396
    397	kfree(hw_stats);
    398	return ret;
    399}
    400
    401static inline void gred_destroy_vq(struct gred_sched_data *q)
    402{
    403	kfree(q);
    404}
    405
    406static int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps,
    407				 struct netlink_ext_ack *extack)
    408{
    409	struct gred_sched *table = qdisc_priv(sch);
    410	struct tc_gred_sopt *sopt;
    411	bool red_flags_changed;
    412	int i;
    413
    414	if (!dps)
    415		return -EINVAL;
    416
    417	sopt = nla_data(dps);
    418
    419	if (sopt->DPs > MAX_DPs) {
    420		NL_SET_ERR_MSG_MOD(extack, "number of virtual queues too high");
    421		return -EINVAL;
    422	}
    423	if (sopt->DPs == 0) {
    424		NL_SET_ERR_MSG_MOD(extack,
    425				   "number of virtual queues can't be 0");
    426		return -EINVAL;
    427	}
    428	if (sopt->def_DP >= sopt->DPs) {
    429		NL_SET_ERR_MSG_MOD(extack, "default virtual queue above virtual queue count");
    430		return -EINVAL;
    431	}
    432	if (sopt->flags && gred_per_vq_red_flags_used(table)) {
    433		NL_SET_ERR_MSG_MOD(extack, "can't set per-Qdisc RED flags when per-virtual queue flags are used");
    434		return -EINVAL;
    435	}
    436
    437	sch_tree_lock(sch);
    438	table->DPs = sopt->DPs;
    439	table->def = sopt->def_DP;
    440	red_flags_changed = table->red_flags != sopt->flags;
    441	table->red_flags = sopt->flags;
    442
    443	/*
    444	 * Every entry point to GRED is synchronized with the above code
    445	 * and the DP is checked against DPs, i.e. shadowed VQs can no
    446	 * longer be found so we can unlock right here.
    447	 */
    448	sch_tree_unlock(sch);
    449
    450	if (sopt->grio) {
    451		gred_enable_rio_mode(table);
    452		gred_disable_wred_mode(table);
    453		if (gred_wred_mode_check(sch))
    454			gred_enable_wred_mode(table);
    455	} else {
    456		gred_disable_rio_mode(table);
    457		gred_disable_wred_mode(table);
    458	}
    459
    460	if (red_flags_changed)
    461		for (i = 0; i < table->DPs; i++)
    462			if (table->tab[i])
    463				table->tab[i]->red_flags =
    464					table->red_flags & GRED_VQ_RED_FLAGS;
    465
    466	for (i = table->DPs; i < MAX_DPs; i++) {
    467		if (table->tab[i]) {
    468			pr_warn("GRED: Warning: Destroying shadowed VQ 0x%x\n",
    469				i);
    470			gred_destroy_vq(table->tab[i]);
    471			table->tab[i] = NULL;
    472		}
    473	}
    474
    475	gred_offload(sch, TC_GRED_REPLACE);
    476	return 0;
    477}
    478
    479static inline int gred_change_vq(struct Qdisc *sch, int dp,
    480				 struct tc_gred_qopt *ctl, int prio,
    481				 u8 *stab, u32 max_P,
    482				 struct gred_sched_data **prealloc,
    483				 struct netlink_ext_ack *extack)
    484{
    485	struct gred_sched *table = qdisc_priv(sch);
    486	struct gred_sched_data *q = table->tab[dp];
    487
    488	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) {
    489		NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters");
    490		return -EINVAL;
    491	}
    492
    493	if (!q) {
    494		table->tab[dp] = q = *prealloc;
    495		*prealloc = NULL;
    496		if (!q)
    497			return -ENOMEM;
    498		q->red_flags = table->red_flags & GRED_VQ_RED_FLAGS;
    499	}
    500
    501	q->DP = dp;
    502	q->prio = prio;
    503	if (ctl->limit > sch->limit)
    504		q->limit = sch->limit;
    505	else
    506		q->limit = ctl->limit;
    507
    508	if (q->backlog == 0)
    509		red_end_of_idle_period(&q->vars);
    510
    511	red_set_parms(&q->parms,
    512		      ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
    513		      ctl->Scell_log, stab, max_P);
    514	red_set_vars(&q->vars);
    515	return 0;
    516}
    517
    518static const struct nla_policy gred_vq_policy[TCA_GRED_VQ_MAX + 1] = {
    519	[TCA_GRED_VQ_DP]	= { .type = NLA_U32 },
    520	[TCA_GRED_VQ_FLAGS]	= { .type = NLA_U32 },
    521};
    522
    523static const struct nla_policy gred_vqe_policy[TCA_GRED_VQ_ENTRY_MAX + 1] = {
    524	[TCA_GRED_VQ_ENTRY]	= { .type = NLA_NESTED },
    525};
    526
    527static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
    528	[TCA_GRED_PARMS]	= { .len = sizeof(struct tc_gred_qopt) },
    529	[TCA_GRED_STAB]		= { .len = 256 },
    530	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
    531	[TCA_GRED_MAX_P]	= { .type = NLA_U32 },
    532	[TCA_GRED_LIMIT]	= { .type = NLA_U32 },
    533	[TCA_GRED_VQ_LIST]	= { .type = NLA_NESTED },
    534};
    535
    536static void gred_vq_apply(struct gred_sched *table, const struct nlattr *entry)
    537{
    538	struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
    539	u32 dp;
    540
    541	nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry,
    542				    gred_vq_policy, NULL);
    543
    544	dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
    545
    546	if (tb[TCA_GRED_VQ_FLAGS])
    547		table->tab[dp]->red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
    548}
    549
    550static void gred_vqs_apply(struct gred_sched *table, struct nlattr *vqs)
    551{
    552	const struct nlattr *attr;
    553	int rem;
    554
    555	nla_for_each_nested(attr, vqs, rem) {
    556		switch (nla_type(attr)) {
    557		case TCA_GRED_VQ_ENTRY:
    558			gred_vq_apply(table, attr);
    559			break;
    560		}
    561	}
    562}
    563
    564static int gred_vq_validate(struct gred_sched *table, u32 cdp,
    565			    const struct nlattr *entry,
    566			    struct netlink_ext_ack *extack)
    567{
    568	struct nlattr *tb[TCA_GRED_VQ_MAX + 1];
    569	int err;
    570	u32 dp;
    571
    572	err = nla_parse_nested_deprecated(tb, TCA_GRED_VQ_MAX, entry,
    573					  gred_vq_policy, extack);
    574	if (err < 0)
    575		return err;
    576
    577	if (!tb[TCA_GRED_VQ_DP]) {
    578		NL_SET_ERR_MSG_MOD(extack, "Virtual queue with no index specified");
    579		return -EINVAL;
    580	}
    581	dp = nla_get_u32(tb[TCA_GRED_VQ_DP]);
    582	if (dp >= table->DPs) {
    583		NL_SET_ERR_MSG_MOD(extack, "Virtual queue with index out of bounds");
    584		return -EINVAL;
    585	}
    586	if (dp != cdp && !table->tab[dp]) {
    587		NL_SET_ERR_MSG_MOD(extack, "Virtual queue not yet instantiated");
    588		return -EINVAL;
    589	}
    590
    591	if (tb[TCA_GRED_VQ_FLAGS]) {
    592		u32 red_flags = nla_get_u32(tb[TCA_GRED_VQ_FLAGS]);
    593
    594		if (table->red_flags && table->red_flags != red_flags) {
    595			NL_SET_ERR_MSG_MOD(extack, "can't change per-virtual queue RED flags when per-Qdisc flags are used");
    596			return -EINVAL;
    597		}
    598		if (red_flags & ~GRED_VQ_RED_FLAGS) {
    599			NL_SET_ERR_MSG_MOD(extack,
    600					   "invalid RED flags specified");
    601			return -EINVAL;
    602		}
    603	}
    604
    605	return 0;
    606}
    607
    608static int gred_vqs_validate(struct gred_sched *table, u32 cdp,
    609			     struct nlattr *vqs, struct netlink_ext_ack *extack)
    610{
    611	const struct nlattr *attr;
    612	int rem, err;
    613
    614	err = nla_validate_nested_deprecated(vqs, TCA_GRED_VQ_ENTRY_MAX,
    615					     gred_vqe_policy, extack);
    616	if (err < 0)
    617		return err;
    618
    619	nla_for_each_nested(attr, vqs, rem) {
    620		switch (nla_type(attr)) {
    621		case TCA_GRED_VQ_ENTRY:
    622			err = gred_vq_validate(table, cdp, attr, extack);
    623			if (err)
    624				return err;
    625			break;
    626		default:
    627			NL_SET_ERR_MSG_MOD(extack, "GRED_VQ_LIST can contain only entry attributes");
    628			return -EINVAL;
    629		}
    630	}
    631
    632	if (rem > 0) {
    633		NL_SET_ERR_MSG_MOD(extack, "Trailing data after parsing virtual queue list");
    634		return -EINVAL;
    635	}
    636
    637	return 0;
    638}
    639
    640static int gred_change(struct Qdisc *sch, struct nlattr *opt,
    641		       struct netlink_ext_ack *extack)
    642{
    643	struct gred_sched *table = qdisc_priv(sch);
    644	struct tc_gred_qopt *ctl;
    645	struct nlattr *tb[TCA_GRED_MAX + 1];
    646	int err, prio = GRED_DEF_PRIO;
    647	u8 *stab;
    648	u32 max_P;
    649	struct gred_sched_data *prealloc;
    650
    651	if (opt == NULL)
    652		return -EINVAL;
    653
    654	err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy,
    655					  extack);
    656	if (err < 0)
    657		return err;
    658
    659	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
    660		if (tb[TCA_GRED_LIMIT] != NULL)
    661			sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
    662		return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
    663	}
    664
    665	if (tb[TCA_GRED_PARMS] == NULL ||
    666	    tb[TCA_GRED_STAB] == NULL ||
    667	    tb[TCA_GRED_LIMIT] != NULL) {
    668		NL_SET_ERR_MSG_MOD(extack, "can't configure Qdisc and virtual queue at the same time");
    669		return -EINVAL;
    670	}
    671
    672	max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
    673
    674	ctl = nla_data(tb[TCA_GRED_PARMS]);
    675	stab = nla_data(tb[TCA_GRED_STAB]);
    676
    677	if (ctl->DP >= table->DPs) {
    678		NL_SET_ERR_MSG_MOD(extack, "virtual queue index above virtual queue count");
    679		return -EINVAL;
    680	}
    681
    682	if (tb[TCA_GRED_VQ_LIST]) {
    683		err = gred_vqs_validate(table, ctl->DP, tb[TCA_GRED_VQ_LIST],
    684					extack);
    685		if (err)
    686			return err;
    687	}
    688
    689	if (gred_rio_mode(table)) {
    690		if (ctl->prio == 0) {
    691			int def_prio = GRED_DEF_PRIO;
    692
    693			if (table->tab[table->def])
    694				def_prio = table->tab[table->def]->prio;
    695
    696			printk(KERN_DEBUG "GRED: DP %u does not have a prio "
    697			       "setting default to %d\n", ctl->DP, def_prio);
    698
    699			prio = def_prio;
    700		} else
    701			prio = ctl->prio;
    702	}
    703
    704	prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
    705	sch_tree_lock(sch);
    706
    707	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P, &prealloc,
    708			     extack);
    709	if (err < 0)
    710		goto err_unlock_free;
    711
    712	if (tb[TCA_GRED_VQ_LIST])
    713		gred_vqs_apply(table, tb[TCA_GRED_VQ_LIST]);
    714
    715	if (gred_rio_mode(table)) {
    716		gred_disable_wred_mode(table);
    717		if (gred_wred_mode_check(sch))
    718			gred_enable_wred_mode(table);
    719	}
    720
    721	sch_tree_unlock(sch);
    722	kfree(prealloc);
    723
    724	gred_offload(sch, TC_GRED_REPLACE);
    725	return 0;
    726
    727err_unlock_free:
    728	sch_tree_unlock(sch);
    729	kfree(prealloc);
    730	return err;
    731}
    732
    733static int gred_init(struct Qdisc *sch, struct nlattr *opt,
    734		     struct netlink_ext_ack *extack)
    735{
    736	struct gred_sched *table = qdisc_priv(sch);
    737	struct nlattr *tb[TCA_GRED_MAX + 1];
    738	int err;
    739
    740	if (!opt)
    741		return -EINVAL;
    742
    743	err = nla_parse_nested_deprecated(tb, TCA_GRED_MAX, opt, gred_policy,
    744					  extack);
    745	if (err < 0)
    746		return err;
    747
    748	if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB]) {
    749		NL_SET_ERR_MSG_MOD(extack,
    750				   "virtual queue configuration can't be specified at initialization time");
    751		return -EINVAL;
    752	}
    753
    754	if (tb[TCA_GRED_LIMIT])
    755		sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
    756	else
    757		sch->limit = qdisc_dev(sch)->tx_queue_len
    758		             * psched_mtu(qdisc_dev(sch));
    759
    760	if (qdisc_dev(sch)->netdev_ops->ndo_setup_tc) {
    761		table->opt = kzalloc(sizeof(*table->opt), GFP_KERNEL);
    762		if (!table->opt)
    763			return -ENOMEM;
    764	}
    765
    766	return gred_change_table_def(sch, tb[TCA_GRED_DPS], extack);
    767}
    768
    769static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
    770{
    771	struct gred_sched *table = qdisc_priv(sch);
    772	struct nlattr *parms, *vqs, *opts = NULL;
    773	int i;
    774	u32 max_p[MAX_DPs];
    775	struct tc_gred_sopt sopt = {
    776		.DPs	= table->DPs,
    777		.def_DP	= table->def,
    778		.grio	= gred_rio_mode(table),
    779		.flags	= table->red_flags,
    780	};
    781
    782	if (gred_offload_dump_stats(sch))
    783		goto nla_put_failure;
    784
    785	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
    786	if (opts == NULL)
    787		goto nla_put_failure;
    788	if (nla_put(skb, TCA_GRED_DPS, sizeof(sopt), &sopt))
    789		goto nla_put_failure;
    790
    791	for (i = 0; i < MAX_DPs; i++) {
    792		struct gred_sched_data *q = table->tab[i];
    793
    794		max_p[i] = q ? q->parms.max_P : 0;
    795	}
    796	if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
    797		goto nla_put_failure;
    798
    799	if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
    800		goto nla_put_failure;
    801
    802	/* Old style all-in-one dump of VQs */
    803	parms = nla_nest_start_noflag(skb, TCA_GRED_PARMS);
    804	if (parms == NULL)
    805		goto nla_put_failure;
    806
    807	for (i = 0; i < MAX_DPs; i++) {
    808		struct gred_sched_data *q = table->tab[i];
    809		struct tc_gred_qopt opt;
    810		unsigned long qavg;
    811
    812		memset(&opt, 0, sizeof(opt));
    813
    814		if (!q) {
    815			/* hack -- fix at some point with proper message
    816			   This is how we indicate to tc that there is no VQ
    817			   at this DP */
    818
    819			opt.DP = MAX_DPs + i;
    820			goto append_opt;
    821		}
    822
    823		opt.limit	= q->limit;
    824		opt.DP		= q->DP;
    825		opt.backlog	= gred_backlog(table, q, sch);
    826		opt.prio	= q->prio;
    827		opt.qth_min	= q->parms.qth_min >> q->parms.Wlog;
    828		opt.qth_max	= q->parms.qth_max >> q->parms.Wlog;
    829		opt.Wlog	= q->parms.Wlog;
    830		opt.Plog	= q->parms.Plog;
    831		opt.Scell_log	= q->parms.Scell_log;
    832		opt.other	= q->stats.other;
    833		opt.early	= q->stats.prob_drop;
    834		opt.forced	= q->stats.forced_drop;
    835		opt.pdrop	= q->stats.pdrop;
    836		opt.packets	= q->packetsin;
    837		opt.bytesin	= q->bytesin;
    838
    839		if (gred_wred_mode(table))
    840			gred_load_wred_set(table, q);
    841
    842		qavg = red_calc_qavg(&q->parms, &q->vars,
    843				     q->vars.qavg >> q->parms.Wlog);
    844		opt.qave = qavg >> q->parms.Wlog;
    845
    846append_opt:
    847		if (nla_append(skb, sizeof(opt), &opt) < 0)
    848			goto nla_put_failure;
    849	}
    850
    851	nla_nest_end(skb, parms);
    852
    853	/* Dump the VQs again, in more structured way */
    854	vqs = nla_nest_start_noflag(skb, TCA_GRED_VQ_LIST);
    855	if (!vqs)
    856		goto nla_put_failure;
    857
    858	for (i = 0; i < MAX_DPs; i++) {
    859		struct gred_sched_data *q = table->tab[i];
    860		struct nlattr *vq;
    861
    862		if (!q)
    863			continue;
    864
    865		vq = nla_nest_start_noflag(skb, TCA_GRED_VQ_ENTRY);
    866		if (!vq)
    867			goto nla_put_failure;
    868
    869		if (nla_put_u32(skb, TCA_GRED_VQ_DP, q->DP))
    870			goto nla_put_failure;
    871
    872		if (nla_put_u32(skb, TCA_GRED_VQ_FLAGS, q->red_flags))
    873			goto nla_put_failure;
    874
    875		/* Stats */
    876		if (nla_put_u64_64bit(skb, TCA_GRED_VQ_STAT_BYTES, q->bytesin,
    877				      TCA_GRED_VQ_PAD))
    878			goto nla_put_failure;
    879		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PACKETS, q->packetsin))
    880			goto nla_put_failure;
    881		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_BACKLOG,
    882				gred_backlog(table, q, sch)))
    883			goto nla_put_failure;
    884		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_DROP,
    885				q->stats.prob_drop))
    886			goto nla_put_failure;
    887		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PROB_MARK,
    888				q->stats.prob_mark))
    889			goto nla_put_failure;
    890		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_DROP,
    891				q->stats.forced_drop))
    892			goto nla_put_failure;
    893		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_FORCED_MARK,
    894				q->stats.forced_mark))
    895			goto nla_put_failure;
    896		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_PDROP, q->stats.pdrop))
    897			goto nla_put_failure;
    898		if (nla_put_u32(skb, TCA_GRED_VQ_STAT_OTHER, q->stats.other))
    899			goto nla_put_failure;
    900
    901		nla_nest_end(skb, vq);
    902	}
    903	nla_nest_end(skb, vqs);
    904
    905	return nla_nest_end(skb, opts);
    906
    907nla_put_failure:
    908	nla_nest_cancel(skb, opts);
    909	return -EMSGSIZE;
    910}
    911
    912static void gred_destroy(struct Qdisc *sch)
    913{
    914	struct gred_sched *table = qdisc_priv(sch);
    915	int i;
    916
    917	for (i = 0; i < table->DPs; i++) {
    918		if (table->tab[i])
    919			gred_destroy_vq(table->tab[i]);
    920	}
    921	gred_offload(sch, TC_GRED_DESTROY);
    922	kfree(table->opt);
    923}
    924
    925static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
    926	.id		=	"gred",
    927	.priv_size	=	sizeof(struct gred_sched),
    928	.enqueue	=	gred_enqueue,
    929	.dequeue	=	gred_dequeue,
    930	.peek		=	qdisc_peek_head,
    931	.init		=	gred_init,
    932	.reset		=	gred_reset,
    933	.destroy	=	gred_destroy,
    934	.change		=	gred_change,
    935	.dump		=	gred_dump,
    936	.owner		=	THIS_MODULE,
    937};
    938
    939static int __init gred_module_init(void)
    940{
    941	return register_qdisc(&gred_qdisc_ops);
    942}
    943
    944static void __exit gred_module_exit(void)
    945{
    946	unregister_qdisc(&gred_qdisc_ops);
    947}
    948
    949module_init(gred_module_init)
    950module_exit(gred_module_exit)
    951
    952MODULE_LICENSE("GPL");