sch_taprio.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
sch_taprio.c (53132B)
      1// SPDX-License-Identifier: GPL-2.0
      2
      3/* net/sched/sch_taprio.c	 Time Aware Priority Scheduler
      4 *
      5 * Authors:	Vinicius Costa Gomes <vinicius.gomes@intel.com>
      6 *
      7 */
      8
      9#include <linux/ethtool.h>
     10#include <linux/types.h>
     11#include <linux/slab.h>
     12#include <linux/kernel.h>
     13#include <linux/string.h>
     14#include <linux/list.h>
     15#include <linux/errno.h>
     16#include <linux/skbuff.h>
     17#include <linux/math64.h>
     18#include <linux/module.h>
     19#include <linux/spinlock.h>
     20#include <linux/rcupdate.h>
     21#include <net/netlink.h>
     22#include <net/pkt_sched.h>
     23#include <net/pkt_cls.h>
     24#include <net/sch_generic.h>
     25#include <net/sock.h>
     26#include <net/tcp.h>
     27
     28static LIST_HEAD(taprio_list);
     29static DEFINE_SPINLOCK(taprio_list_lock);
     30
     31#define TAPRIO_ALL_GATES_OPEN -1
     32
     33#define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
     34#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
     35#define TAPRIO_FLAGS_INVALID U32_MAX
     36
     37struct sched_entry {
     38	struct list_head list;
     39
     40	/* The instant that this entry "closes" and the next one
     41	 * should open, the qdisc will make some effort so that no
     42	 * packet leaves after this time.
     43	 */
     44	ktime_t close_time;
     45	ktime_t next_txtime;
     46	atomic_t budget;
     47	int index;
     48	u32 gate_mask;
     49	u32 interval;
     50	u8 command;
     51};
     52
     53struct sched_gate_list {
     54	struct rcu_head rcu;
     55	struct list_head entries;
     56	size_t num_entries;
     57	ktime_t cycle_close_time;
     58	s64 cycle_time;
     59	s64 cycle_time_extension;
     60	s64 base_time;
     61};
     62
     63struct taprio_sched {
     64	struct Qdisc **qdiscs;
     65	struct Qdisc *root;
     66	u32 flags;
     67	enum tk_offsets tk_offset;
     68	int clockid;
     69	atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
     70				    * speeds it's sub-nanoseconds per byte
     71				    */
     72
     73	/* Protects the update side of the RCU protected current_entry */
     74	spinlock_t current_entry_lock;
     75	struct sched_entry __rcu *current_entry;
     76	struct sched_gate_list __rcu *oper_sched;
     77	struct sched_gate_list __rcu *admin_sched;
     78	struct hrtimer advance_timer;
     79	struct list_head taprio_list;
     80	struct sk_buff *(*dequeue)(struct Qdisc *sch);
     81	struct sk_buff *(*peek)(struct Qdisc *sch);
     82	u32 txtime_delay;
     83};
     84
     85struct __tc_taprio_qopt_offload {
     86	refcount_t users;
     87	struct tc_taprio_qopt_offload offload;
     88};
     89
     90static ktime_t sched_base_time(const struct sched_gate_list *sched)
     91{
     92	if (!sched)
     93		return KTIME_MAX;
     94
     95	return ns_to_ktime(sched->base_time);
     96}
     97
     98static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono)
     99{
    100	/* This pairs with WRITE_ONCE() in taprio_parse_clockid() */
    101	enum tk_offsets tk_offset = READ_ONCE(q->tk_offset);
    102
    103	switch (tk_offset) {
    104	case TK_OFFS_MAX:
    105		return mono;
    106	default:
    107		return ktime_mono_to_any(mono, tk_offset);
    108	}
    109}
    110
    111static ktime_t taprio_get_time(const struct taprio_sched *q)
    112{
    113	return taprio_mono_to_any(q, ktime_get());
    114}
    115
    116static void taprio_free_sched_cb(struct rcu_head *head)
    117{
    118	struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu);
    119	struct sched_entry *entry, *n;
    120
    121	list_for_each_entry_safe(entry, n, &sched->entries, list) {
    122		list_del(&entry->list);
    123		kfree(entry);
    124	}
    125
    126	kfree(sched);
    127}
    128
    129static void switch_schedules(struct taprio_sched *q,
    130			     struct sched_gate_list **admin,
    131			     struct sched_gate_list **oper)
    132{
    133	rcu_assign_pointer(q->oper_sched, *admin);
    134	rcu_assign_pointer(q->admin_sched, NULL);
    135
    136	if (*oper)
    137		call_rcu(&(*oper)->rcu, taprio_free_sched_cb);
    138
    139	*oper = *admin;
    140	*admin = NULL;
    141}
    142
    143/* Get how much time has been already elapsed in the current cycle. */
    144static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time)
    145{
    146	ktime_t time_since_sched_start;
    147	s32 time_elapsed;
    148
    149	time_since_sched_start = ktime_sub(time, sched->base_time);
    150	div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed);
    151
    152	return time_elapsed;
    153}
    154
    155static ktime_t get_interval_end_time(struct sched_gate_list *sched,
    156				     struct sched_gate_list *admin,
    157				     struct sched_entry *entry,
    158				     ktime_t intv_start)
    159{
    160	s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start);
    161	ktime_t intv_end, cycle_ext_end, cycle_end;
    162
    163	cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed);
    164	intv_end = ktime_add_ns(intv_start, entry->interval);
    165	cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension);
    166
    167	if (ktime_before(intv_end, cycle_end))
    168		return intv_end;
    169	else if (admin && admin != sched &&
    170		 ktime_after(admin->base_time, cycle_end) &&
    171		 ktime_before(admin->base_time, cycle_ext_end))
    172		return admin->base_time;
    173	else
    174		return cycle_end;
    175}
    176
    177static int length_to_duration(struct taprio_sched *q, int len)
    178{
    179	return div_u64(len * atomic64_read(&q->picos_per_byte), 1000);
    180}
    181
    182/* Returns the entry corresponding to next available interval. If
    183 * validate_interval is set, it only validates whether the timestamp occurs
    184 * when the gate corresponding to the skb's traffic class is open.
    185 */
    186static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb,
    187						  struct Qdisc *sch,
    188						  struct sched_gate_list *sched,
    189						  struct sched_gate_list *admin,
    190						  ktime_t time,
    191						  ktime_t *interval_start,
    192						  ktime_t *interval_end,
    193						  bool validate_interval)
    194{
    195	ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time;
    196	ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time;
    197	struct sched_entry *entry = NULL, *entry_found = NULL;
    198	struct taprio_sched *q = qdisc_priv(sch);
    199	struct net_device *dev = qdisc_dev(sch);
    200	bool entry_available = false;
    201	s32 cycle_elapsed;
    202	int tc, n;
    203
    204	tc = netdev_get_prio_tc_map(dev, skb->priority);
    205	packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb));
    206
    207	*interval_start = 0;
    208	*interval_end = 0;
    209
    210	if (!sched)
    211		return NULL;
    212
    213	cycle = sched->cycle_time;
    214	cycle_elapsed = get_cycle_time_elapsed(sched, time);
    215	curr_intv_end = ktime_sub_ns(time, cycle_elapsed);
    216	cycle_end = ktime_add_ns(curr_intv_end, cycle);
    217
    218	list_for_each_entry(entry, &sched->entries, list) {
    219		curr_intv_start = curr_intv_end;
    220		curr_intv_end = get_interval_end_time(sched, admin, entry,
    221						      curr_intv_start);
    222
    223		if (ktime_after(curr_intv_start, cycle_end))
    224			break;
    225
    226		if (!(entry->gate_mask & BIT(tc)) ||
    227		    packet_transmit_time > entry->interval)
    228			continue;
    229
    230		txtime = entry->next_txtime;
    231
    232		if (ktime_before(txtime, time) || validate_interval) {
    233			transmit_end_time = ktime_add_ns(time, packet_transmit_time);
    234			if ((ktime_before(curr_intv_start, time) &&
    235			     ktime_before(transmit_end_time, curr_intv_end)) ||
    236			    (ktime_after(curr_intv_start, time) && !validate_interval)) {
    237				entry_found = entry;
    238				*interval_start = curr_intv_start;
    239				*interval_end = curr_intv_end;
    240				break;
    241			} else if (!entry_available && !validate_interval) {
    242				/* Here, we are just trying to find out the
    243				 * first available interval in the next cycle.
    244				 */
    245				entry_available = true;
    246				entry_found = entry;
    247				*interval_start = ktime_add_ns(curr_intv_start, cycle);
    248				*interval_end = ktime_add_ns(curr_intv_end, cycle);
    249			}
    250		} else if (ktime_before(txtime, earliest_txtime) &&
    251			   !entry_available) {
    252			earliest_txtime = txtime;
    253			entry_found = entry;
    254			n = div_s64(ktime_sub(txtime, curr_intv_start), cycle);
    255			*interval_start = ktime_add(curr_intv_start, n * cycle);
    256			*interval_end = ktime_add(curr_intv_end, n * cycle);
    257		}
    258	}
    259
    260	return entry_found;
    261}
    262
    263static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
    264{
    265	struct taprio_sched *q = qdisc_priv(sch);
    266	struct sched_gate_list *sched, *admin;
    267	ktime_t interval_start, interval_end;
    268	struct sched_entry *entry;
    269
    270	rcu_read_lock();
    271	sched = rcu_dereference(q->oper_sched);
    272	admin = rcu_dereference(q->admin_sched);
    273
    274	entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp,
    275				       &interval_start, &interval_end, true);
    276	rcu_read_unlock();
    277
    278	return entry;
    279}
    280
    281static bool taprio_flags_valid(u32 flags)
    282{
    283	/* Make sure no other flag bits are set. */
    284	if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST |
    285		      TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
    286		return false;
    287	/* txtime-assist and full offload are mutually exclusive */
    288	if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
    289	    (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
    290		return false;
    291	return true;
    292}
    293
    294/* This returns the tstamp value set by TCP in terms of the set clock. */
    295static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
    296{
    297	unsigned int offset = skb_network_offset(skb);
    298	const struct ipv6hdr *ipv6h;
    299	const struct iphdr *iph;
    300	struct ipv6hdr _ipv6h;
    301
    302	ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
    303	if (!ipv6h)
    304		return 0;
    305
    306	if (ipv6h->version == 4) {
    307		iph = (struct iphdr *)ipv6h;
    308		offset += iph->ihl * 4;
    309
    310		/* special-case 6in4 tunnelling, as that is a common way to get
    311		 * v6 connectivity in the home
    312		 */
    313		if (iph->protocol == IPPROTO_IPV6) {
    314			ipv6h = skb_header_pointer(skb, offset,
    315						   sizeof(_ipv6h), &_ipv6h);
    316
    317			if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
    318				return 0;
    319		} else if (iph->protocol != IPPROTO_TCP) {
    320			return 0;
    321		}
    322	} else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) {
    323		return 0;
    324	}
    325
    326	return taprio_mono_to_any(q, skb->skb_mstamp_ns);
    327}
    328
    329/* There are a few scenarios where we will have to modify the txtime from
    330 * what is read from next_txtime in sched_entry. They are:
    331 * 1. If txtime is in the past,
    332 *    a. The gate for the traffic class is currently open and packet can be
    333 *       transmitted before it closes, schedule the packet right away.
    334 *    b. If the gate corresponding to the traffic class is going to open later
    335 *       in the cycle, set the txtime of packet to the interval start.
    336 * 2. If txtime is in the future, there are packets corresponding to the
    337 *    current traffic class waiting to be transmitted. So, the following
    338 *    possibilities exist:
    339 *    a. We can transmit the packet before the window containing the txtime
    340 *       closes.
    341 *    b. The window might close before the transmission can be completed
    342 *       successfully. So, schedule the packet in the next open window.
    343 */
    344static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch)
    345{
    346	ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp;
    347	struct taprio_sched *q = qdisc_priv(sch);
    348	struct sched_gate_list *sched, *admin;
    349	ktime_t minimum_time, now, txtime;
    350	int len, packet_transmit_time;
    351	struct sched_entry *entry;
    352	bool sched_changed;
    353
    354	now = taprio_get_time(q);
    355	minimum_time = ktime_add_ns(now, q->txtime_delay);
    356
    357	tcp_tstamp = get_tcp_tstamp(q, skb);
    358	minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp);
    359
    360	rcu_read_lock();
    361	admin = rcu_dereference(q->admin_sched);
    362	sched = rcu_dereference(q->oper_sched);
    363	if (admin && ktime_after(minimum_time, admin->base_time))
    364		switch_schedules(q, &admin, &sched);
    365
    366	/* Until the schedule starts, all the queues are open */
    367	if (!sched || ktime_before(minimum_time, sched->base_time)) {
    368		txtime = minimum_time;
    369		goto done;
    370	}
    371
    372	len = qdisc_pkt_len(skb);
    373	packet_transmit_time = length_to_duration(q, len);
    374
    375	do {
    376		sched_changed = false;
    377
    378		entry = find_entry_to_transmit(skb, sch, sched, admin,
    379					       minimum_time,
    380					       &interval_start, &interval_end,
    381					       false);
    382		if (!entry) {
    383			txtime = 0;
    384			goto done;
    385		}
    386
    387		txtime = entry->next_txtime;
    388		txtime = max_t(ktime_t, txtime, minimum_time);
    389		txtime = max_t(ktime_t, txtime, interval_start);
    390
    391		if (admin && admin != sched &&
    392		    ktime_after(txtime, admin->base_time)) {
    393			sched = admin;
    394			sched_changed = true;
    395			continue;
    396		}
    397
    398		transmit_end_time = ktime_add(txtime, packet_transmit_time);
    399		minimum_time = transmit_end_time;
    400
    401		/* Update the txtime of current entry to the next time it's
    402		 * interval starts.
    403		 */
    404		if (ktime_after(transmit_end_time, interval_end))
    405			entry->next_txtime = ktime_add(interval_start, sched->cycle_time);
    406	} while (sched_changed || ktime_after(transmit_end_time, interval_end));
    407
    408	entry->next_txtime = transmit_end_time;
    409
    410done:
    411	rcu_read_unlock();
    412	return txtime;
    413}
    414
    415static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
    416			      struct Qdisc *child, struct sk_buff **to_free)
    417{
    418	struct taprio_sched *q = qdisc_priv(sch);
    419
    420	/* sk_flags are only safe to use on full sockets. */
    421	if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
    422		if (!is_valid_interval(skb, sch))
    423			return qdisc_drop(skb, sch, to_free);
    424	} else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
    425		skb->tstamp = get_packet_txtime(skb, sch);
    426		if (!skb->tstamp)
    427			return qdisc_drop(skb, sch, to_free);
    428	}
    429
    430	qdisc_qstats_backlog_inc(sch, skb);
    431	sch->q.qlen++;
    432
    433	return qdisc_enqueue(skb, child, to_free);
    434}
    435
    436static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
    437			  struct sk_buff **to_free)
    438{
    439	struct taprio_sched *q = qdisc_priv(sch);
    440	struct Qdisc *child;
    441	int queue;
    442
    443	if (unlikely(FULL_OFFLOAD_IS_ENABLED(q->flags))) {
    444		WARN_ONCE(1, "Trying to enqueue skb into the root of a taprio qdisc configured with full offload\n");
    445		return qdisc_drop(skb, sch, to_free);
    446	}
    447
    448	queue = skb_get_queue_mapping(skb);
    449
    450	child = q->qdiscs[queue];
    451	if (unlikely(!child))
    452		return qdisc_drop(skb, sch, to_free);
    453
    454	/* Large packets might not be transmitted when the transmission duration
    455	 * exceeds any configured interval. Therefore, segment the skb into
    456	 * smaller chunks. Skip it for the full offload case, as the driver
    457	 * and/or the hardware is expected to handle this.
    458	 */
    459	if (skb_is_gso(skb) && !FULL_OFFLOAD_IS_ENABLED(q->flags)) {
    460		unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
    461		netdev_features_t features = netif_skb_features(skb);
    462		struct sk_buff *segs, *nskb;
    463		int ret;
    464
    465		segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
    466		if (IS_ERR_OR_NULL(segs))
    467			return qdisc_drop(skb, sch, to_free);
    468
    469		skb_list_walk_safe(segs, segs, nskb) {
    470			skb_mark_not_on_list(segs);
    471			qdisc_skb_cb(segs)->pkt_len = segs->len;
    472			slen += segs->len;
    473
    474			ret = taprio_enqueue_one(segs, sch, child, to_free);
    475			if (ret != NET_XMIT_SUCCESS) {
    476				if (net_xmit_drop_count(ret))
    477					qdisc_qstats_drop(sch);
    478			} else {
    479				numsegs++;
    480			}
    481		}
    482
    483		if (numsegs > 1)
    484			qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
    485		consume_skb(skb);
    486
    487		return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
    488	}
    489
    490	return taprio_enqueue_one(skb, sch, child, to_free);
    491}
    492
    493static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
    494{
    495	struct taprio_sched *q = qdisc_priv(sch);
    496	struct net_device *dev = qdisc_dev(sch);
    497	struct sched_entry *entry;
    498	struct sk_buff *skb;
    499	u32 gate_mask;
    500	int i;
    501
    502	rcu_read_lock();
    503	entry = rcu_dereference(q->current_entry);
    504	gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
    505	rcu_read_unlock();
    506
    507	if (!gate_mask)
    508		return NULL;
    509
    510	for (i = 0; i < dev->num_tx_queues; i++) {
    511		struct Qdisc *child = q->qdiscs[i];
    512		int prio;
    513		u8 tc;
    514
    515		if (unlikely(!child))
    516			continue;
    517
    518		skb = child->ops->peek(child);
    519		if (!skb)
    520			continue;
    521
    522		if (TXTIME_ASSIST_IS_ENABLED(q->flags))
    523			return skb;
    524
    525		prio = skb->priority;
    526		tc = netdev_get_prio_tc_map(dev, prio);
    527
    528		if (!(gate_mask & BIT(tc)))
    529			continue;
    530
    531		return skb;
    532	}
    533
    534	return NULL;
    535}
    536
    537static struct sk_buff *taprio_peek_offload(struct Qdisc *sch)
    538{
    539	WARN_ONCE(1, "Trying to peek into the root of a taprio qdisc configured with full offload\n");
    540
    541	return NULL;
    542}
    543
    544static struct sk_buff *taprio_peek(struct Qdisc *sch)
    545{
    546	struct taprio_sched *q = qdisc_priv(sch);
    547
    548	return q->peek(sch);
    549}
    550
    551static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
    552{
    553	atomic_set(&entry->budget,
    554		   div64_u64((u64)entry->interval * 1000,
    555			     atomic64_read(&q->picos_per_byte)));
    556}
    557
    558static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
    559{
    560	struct taprio_sched *q = qdisc_priv(sch);
    561	struct net_device *dev = qdisc_dev(sch);
    562	struct sk_buff *skb = NULL;
    563	struct sched_entry *entry;
    564	u32 gate_mask;
    565	int i;
    566
    567	rcu_read_lock();
    568	entry = rcu_dereference(q->current_entry);
    569	/* if there's no entry, it means that the schedule didn't
    570	 * start yet, so force all gates to be open, this is in
    571	 * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5
    572	 * "AdminGateStates"
    573	 */
    574	gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
    575
    576	if (!gate_mask)
    577		goto done;
    578
    579	for (i = 0; i < dev->num_tx_queues; i++) {
    580		struct Qdisc *child = q->qdiscs[i];
    581		ktime_t guard;
    582		int prio;
    583		int len;
    584		u8 tc;
    585
    586		if (unlikely(!child))
    587			continue;
    588
    589		if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
    590			skb = child->ops->dequeue(child);
    591			if (!skb)
    592				continue;
    593			goto skb_found;
    594		}
    595
    596		skb = child->ops->peek(child);
    597		if (!skb)
    598			continue;
    599
    600		prio = skb->priority;
    601		tc = netdev_get_prio_tc_map(dev, prio);
    602
    603		if (!(gate_mask & BIT(tc))) {
    604			skb = NULL;
    605			continue;
    606		}
    607
    608		len = qdisc_pkt_len(skb);
    609		guard = ktime_add_ns(taprio_get_time(q),
    610				     length_to_duration(q, len));
    611
    612		/* In the case that there's no gate entry, there's no
    613		 * guard band ...
    614		 */
    615		if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
    616		    ktime_after(guard, entry->close_time)) {
    617			skb = NULL;
    618			continue;
    619		}
    620
    621		/* ... and no budget. */
    622		if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
    623		    atomic_sub_return(len, &entry->budget) < 0) {
    624			skb = NULL;
    625			continue;
    626		}
    627
    628		skb = child->ops->dequeue(child);
    629		if (unlikely(!skb))
    630			goto done;
    631
    632skb_found:
    633		qdisc_bstats_update(sch, skb);
    634		qdisc_qstats_backlog_dec(sch, skb);
    635		sch->q.qlen--;
    636
    637		goto done;
    638	}
    639
    640done:
    641	rcu_read_unlock();
    642
    643	return skb;
    644}
    645
    646static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch)
    647{
    648	WARN_ONCE(1, "Trying to dequeue from the root of a taprio qdisc configured with full offload\n");
    649
    650	return NULL;
    651}
    652
    653static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
    654{
    655	struct taprio_sched *q = qdisc_priv(sch);
    656
    657	return q->dequeue(sch);
    658}
    659
    660static bool should_restart_cycle(const struct sched_gate_list *oper,
    661				 const struct sched_entry *entry)
    662{
    663	if (list_is_last(&entry->list, &oper->entries))
    664		return true;
    665
    666	if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0)
    667		return true;
    668
    669	return false;
    670}
    671
    672static bool should_change_schedules(const struct sched_gate_list *admin,
    673				    const struct sched_gate_list *oper,
    674				    ktime_t close_time)
    675{
    676	ktime_t next_base_time, extension_time;
    677
    678	if (!admin)
    679		return false;
    680
    681	next_base_time = sched_base_time(admin);
    682
    683	/* This is the simple case, the close_time would fall after
    684	 * the next schedule base_time.
    685	 */
    686	if (ktime_compare(next_base_time, close_time) <= 0)
    687		return true;
    688
    689	/* This is the cycle_time_extension case, if the close_time
    690	 * plus the amount that can be extended would fall after the
    691	 * next schedule base_time, we can extend the current schedule
    692	 * for that amount.
    693	 */
    694	extension_time = ktime_add_ns(close_time, oper->cycle_time_extension);
    695
    696	/* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
    697	 * how precisely the extension should be made. So after
    698	 * conformance testing, this logic may change.
    699	 */
    700	if (ktime_compare(next_base_time, extension_time) <= 0)
    701		return true;
    702
    703	return false;
    704}
    705
    706static enum hrtimer_restart advance_sched(struct hrtimer *timer)
    707{
    708	struct taprio_sched *q = container_of(timer, struct taprio_sched,
    709					      advance_timer);
    710	struct sched_gate_list *oper, *admin;
    711	struct sched_entry *entry, *next;
    712	struct Qdisc *sch = q->root;
    713	ktime_t close_time;
    714
    715	spin_lock(&q->current_entry_lock);
    716	entry = rcu_dereference_protected(q->current_entry,
    717					  lockdep_is_held(&q->current_entry_lock));
    718	oper = rcu_dereference_protected(q->oper_sched,
    719					 lockdep_is_held(&q->current_entry_lock));
    720	admin = rcu_dereference_protected(q->admin_sched,
    721					  lockdep_is_held(&q->current_entry_lock));
    722
    723	if (!oper)
    724		switch_schedules(q, &admin, &oper);
    725
    726	/* This can happen in two cases: 1. this is the very first run
    727	 * of this function (i.e. we weren't running any schedule
    728	 * previously); 2. The previous schedule just ended. The first
    729	 * entry of all schedules are pre-calculated during the
    730	 * schedule initialization.
    731	 */
    732	if (unlikely(!entry || entry->close_time == oper->base_time)) {
    733		next = list_first_entry(&oper->entries, struct sched_entry,
    734					list);
    735		close_time = next->close_time;
    736		goto first_run;
    737	}
    738
    739	if (should_restart_cycle(oper, entry)) {
    740		next = list_first_entry(&oper->entries, struct sched_entry,
    741					list);
    742		oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time,
    743						      oper->cycle_time);
    744	} else {
    745		next = list_next_entry(entry, list);
    746	}
    747
    748	close_time = ktime_add_ns(entry->close_time, next->interval);
    749	close_time = min_t(ktime_t, close_time, oper->cycle_close_time);
    750
    751	if (should_change_schedules(admin, oper, close_time)) {
    752		/* Set things so the next time this runs, the new
    753		 * schedule runs.
    754		 */
    755		close_time = sched_base_time(admin);
    756		switch_schedules(q, &admin, &oper);
    757	}
    758
    759	next->close_time = close_time;
    760	taprio_set_budget(q, next);
    761
    762first_run:
    763	rcu_assign_pointer(q->current_entry, next);
    764	spin_unlock(&q->current_entry_lock);
    765
    766	hrtimer_set_expires(&q->advance_timer, close_time);
    767
    768	rcu_read_lock();
    769	__netif_schedule(sch);
    770	rcu_read_unlock();
    771
    772	return HRTIMER_RESTART;
    773}
    774
    775static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
    776	[TCA_TAPRIO_SCHED_ENTRY_INDEX]	   = { .type = NLA_U32 },
    777	[TCA_TAPRIO_SCHED_ENTRY_CMD]	   = { .type = NLA_U8 },
    778	[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 },
    779	[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]  = { .type = NLA_U32 },
    780};
    781
    782static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
    783	[TCA_TAPRIO_ATTR_PRIOMAP]	       = {
    784		.len = sizeof(struct tc_mqprio_qopt)
    785	},
    786	[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]           = { .type = NLA_NESTED },
    787	[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]            = { .type = NLA_S64 },
    788	[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]         = { .type = NLA_NESTED },
    789	[TCA_TAPRIO_ATTR_SCHED_CLOCKID]              = { .type = NLA_S32 },
    790	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]           = { .type = NLA_S64 },
    791	[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
    792	[TCA_TAPRIO_ATTR_FLAGS]                      = { .type = NLA_U32 },
    793	[TCA_TAPRIO_ATTR_TXTIME_DELAY]		     = { .type = NLA_U32 },
    794};
    795
    796static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb,
    797			    struct sched_entry *entry,
    798			    struct netlink_ext_ack *extack)
    799{
    800	int min_duration = length_to_duration(q, ETH_ZLEN);
    801	u32 interval = 0;
    802
    803	if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD])
    804		entry->command = nla_get_u8(
    805			tb[TCA_TAPRIO_SCHED_ENTRY_CMD]);
    806
    807	if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK])
    808		entry->gate_mask = nla_get_u32(
    809			tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]);
    810
    811	if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL])
    812		interval = nla_get_u32(
    813			tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]);
    814
    815	/* The interval should allow at least the minimum ethernet
    816	 * frame to go out.
    817	 */
    818	if (interval < min_duration) {
    819		NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
    820		return -EINVAL;
    821	}
    822
    823	entry->interval = interval;
    824
    825	return 0;
    826}
    827
    828static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n,
    829			     struct sched_entry *entry, int index,
    830			     struct netlink_ext_ack *extack)
    831{
    832	struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { };
    833	int err;
    834
    835	err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n,
    836					  entry_policy, NULL);
    837	if (err < 0) {
    838		NL_SET_ERR_MSG(extack, "Could not parse nested entry");
    839		return -EINVAL;
    840	}
    841
    842	entry->index = index;
    843
    844	return fill_sched_entry(q, tb, entry, extack);
    845}
    846
    847static int parse_sched_list(struct taprio_sched *q, struct nlattr *list,
    848			    struct sched_gate_list *sched,
    849			    struct netlink_ext_ack *extack)
    850{
    851	struct nlattr *n;
    852	int err, rem;
    853	int i = 0;
    854
    855	if (!list)
    856		return -EINVAL;
    857
    858	nla_for_each_nested(n, list, rem) {
    859		struct sched_entry *entry;
    860
    861		if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) {
    862			NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'");
    863			continue;
    864		}
    865
    866		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
    867		if (!entry) {
    868			NL_SET_ERR_MSG(extack, "Not enough memory for entry");
    869			return -ENOMEM;
    870		}
    871
    872		err = parse_sched_entry(q, n, entry, i, extack);
    873		if (err < 0) {
    874			kfree(entry);
    875			return err;
    876		}
    877
    878		list_add_tail(&entry->list, &sched->entries);
    879		i++;
    880	}
    881
    882	sched->num_entries = i;
    883
    884	return i;
    885}
    886
    887static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
    888				 struct sched_gate_list *new,
    889				 struct netlink_ext_ack *extack)
    890{
    891	int err = 0;
    892
    893	if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) {
    894		NL_SET_ERR_MSG(extack, "Adding a single entry is not supported");
    895		return -ENOTSUPP;
    896	}
    897
    898	if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME])
    899		new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]);
    900
    901	if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION])
    902		new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]);
    903
    904	if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME])
    905		new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]);
    906
    907	if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST])
    908		err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST],
    909				       new, extack);
    910	if (err < 0)
    911		return err;
    912
    913	if (!new->cycle_time) {
    914		struct sched_entry *entry;
    915		ktime_t cycle = 0;
    916
    917		list_for_each_entry(entry, &new->entries, list)
    918			cycle = ktime_add_ns(cycle, entry->interval);
    919
    920		if (!cycle) {
    921			NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
    922			return -EINVAL;
    923		}
    924
    925		new->cycle_time = cycle;
    926	}
    927
    928	return 0;
    929}
    930
    931static int taprio_parse_mqprio_opt(struct net_device *dev,
    932				   struct tc_mqprio_qopt *qopt,
    933				   struct netlink_ext_ack *extack,
    934				   u32 taprio_flags)
    935{
    936	int i, j;
    937
    938	if (!qopt && !dev->num_tc) {
    939		NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
    940		return -EINVAL;
    941	}
    942
    943	/* If num_tc is already set, it means that the user already
    944	 * configured the mqprio part
    945	 */
    946	if (dev->num_tc)
    947		return 0;
    948
    949	/* Verify num_tc is not out of max range */
    950	if (qopt->num_tc > TC_MAX_QUEUE) {
    951		NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
    952		return -EINVAL;
    953	}
    954
    955	/* taprio imposes that traffic classes map 1:n to tx queues */
    956	if (qopt->num_tc > dev->num_tx_queues) {
    957		NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
    958		return -EINVAL;
    959	}
    960
    961	/* Verify priority mapping uses valid tcs */
    962	for (i = 0; i <= TC_BITMASK; i++) {
    963		if (qopt->prio_tc_map[i] >= qopt->num_tc) {
    964			NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
    965			return -EINVAL;
    966		}
    967	}
    968
    969	for (i = 0; i < qopt->num_tc; i++) {
    970		unsigned int last = qopt->offset[i] + qopt->count[i];
    971
    972		/* Verify the queue count is in tx range being equal to the
    973		 * real_num_tx_queues indicates the last queue is in use.
    974		 */
    975		if (qopt->offset[i] >= dev->num_tx_queues ||
    976		    !qopt->count[i] ||
    977		    last > dev->real_num_tx_queues) {
    978			NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
    979			return -EINVAL;
    980		}
    981
    982		if (TXTIME_ASSIST_IS_ENABLED(taprio_flags))
    983			continue;
    984
    985		/* Verify that the offset and counts do not overlap */
    986		for (j = i + 1; j < qopt->num_tc; j++) {
    987			if (last > qopt->offset[j]) {
    988				NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
    989				return -EINVAL;
    990			}
    991		}
    992	}
    993
    994	return 0;
    995}
    996
    997static int taprio_get_start_time(struct Qdisc *sch,
    998				 struct sched_gate_list *sched,
    999				 ktime_t *start)
   1000{
   1001	struct taprio_sched *q = qdisc_priv(sch);
   1002	ktime_t now, base, cycle;
   1003	s64 n;
   1004
   1005	base = sched_base_time(sched);
   1006	now = taprio_get_time(q);
   1007
   1008	if (ktime_after(base, now)) {
   1009		*start = base;
   1010		return 0;
   1011	}
   1012
   1013	cycle = sched->cycle_time;
   1014
   1015	/* The qdisc is expected to have at least one sched_entry.  Moreover,
   1016	 * any entry must have 'interval' > 0. Thus if the cycle time is zero,
   1017	 * something went really wrong. In that case, we should warn about this
   1018	 * inconsistent state and return error.
   1019	 */
   1020	if (WARN_ON(!cycle))
   1021		return -EFAULT;
   1022
   1023	/* Schedule the start time for the beginning of the next
   1024	 * cycle.
   1025	 */
   1026	n = div64_s64(ktime_sub_ns(now, base), cycle);
   1027	*start = ktime_add_ns(base, (n + 1) * cycle);
   1028	return 0;
   1029}
   1030
   1031static void setup_first_close_time(struct taprio_sched *q,
   1032				   struct sched_gate_list *sched, ktime_t base)
   1033{
   1034	struct sched_entry *first;
   1035	ktime_t cycle;
   1036
   1037	first = list_first_entry(&sched->entries,
   1038				 struct sched_entry, list);
   1039
   1040	cycle = sched->cycle_time;
   1041
   1042	/* FIXME: find a better place to do this */
   1043	sched->cycle_close_time = ktime_add_ns(base, cycle);
   1044
   1045	first->close_time = ktime_add_ns(base, first->interval);
   1046	taprio_set_budget(q, first);
   1047	rcu_assign_pointer(q->current_entry, NULL);
   1048}
   1049
   1050static void taprio_start_sched(struct Qdisc *sch,
   1051			       ktime_t start, struct sched_gate_list *new)
   1052{
   1053	struct taprio_sched *q = qdisc_priv(sch);
   1054	ktime_t expires;
   1055
   1056	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
   1057		return;
   1058
   1059	expires = hrtimer_get_expires(&q->advance_timer);
   1060	if (expires == 0)
   1061		expires = KTIME_MAX;
   1062
   1063	/* If the new schedule starts before the next expiration, we
   1064	 * reprogram it to the earliest one, so we change the admin
   1065	 * schedule to the operational one at the right time.
   1066	 */
   1067	start = min_t(ktime_t, start, expires);
   1068
   1069	hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS);
   1070}
   1071
   1072static void taprio_set_picos_per_byte(struct net_device *dev,
   1073				      struct taprio_sched *q)
   1074{
   1075	struct ethtool_link_ksettings ecmd;
   1076	int speed = SPEED_10;
   1077	int picos_per_byte;
   1078	int err;
   1079
   1080	err = __ethtool_get_link_ksettings(dev, &ecmd);
   1081	if (err < 0)
   1082		goto skip;
   1083
   1084	if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN)
   1085		speed = ecmd.base.speed;
   1086
   1087skip:
   1088	picos_per_byte = (USEC_PER_SEC * 8) / speed;
   1089
   1090	atomic64_set(&q->picos_per_byte, picos_per_byte);
   1091	netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n",
   1092		   dev->name, (long long)atomic64_read(&q->picos_per_byte),
   1093		   ecmd.base.speed);
   1094}
   1095
   1096static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
   1097			       void *ptr)
   1098{
   1099	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
   1100	struct net_device *qdev;
   1101	struct taprio_sched *q;
   1102	bool found = false;
   1103
   1104	ASSERT_RTNL();
   1105
   1106	if (event != NETDEV_UP && event != NETDEV_CHANGE)
   1107		return NOTIFY_DONE;
   1108
   1109	spin_lock(&taprio_list_lock);
   1110	list_for_each_entry(q, &taprio_list, taprio_list) {
   1111		qdev = qdisc_dev(q->root);
   1112		if (qdev == dev) {
   1113			found = true;
   1114			break;
   1115		}
   1116	}
   1117	spin_unlock(&taprio_list_lock);
   1118
   1119	if (found)
   1120		taprio_set_picos_per_byte(dev, q);
   1121
   1122	return NOTIFY_DONE;
   1123}
   1124
   1125static void setup_txtime(struct taprio_sched *q,
   1126			 struct sched_gate_list *sched, ktime_t base)
   1127{
   1128	struct sched_entry *entry;
   1129	u32 interval = 0;
   1130
   1131	list_for_each_entry(entry, &sched->entries, list) {
   1132		entry->next_txtime = ktime_add_ns(base, interval);
   1133		interval += entry->interval;
   1134	}
   1135}
   1136
   1137static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
   1138{
   1139	struct __tc_taprio_qopt_offload *__offload;
   1140
   1141	__offload = kzalloc(struct_size(__offload, offload.entries, num_entries),
   1142			    GFP_KERNEL);
   1143	if (!__offload)
   1144		return NULL;
   1145
   1146	refcount_set(&__offload->users, 1);
   1147
   1148	return &__offload->offload;
   1149}
   1150
   1151struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
   1152						  *offload)
   1153{
   1154	struct __tc_taprio_qopt_offload *__offload;
   1155
   1156	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
   1157				 offload);
   1158
   1159	refcount_inc(&__offload->users);
   1160
   1161	return offload;
   1162}
   1163EXPORT_SYMBOL_GPL(taprio_offload_get);
   1164
   1165void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
   1166{
   1167	struct __tc_taprio_qopt_offload *__offload;
   1168
   1169	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
   1170				 offload);
   1171
   1172	if (!refcount_dec_and_test(&__offload->users))
   1173		return;
   1174
   1175	kfree(__offload);
   1176}
   1177EXPORT_SYMBOL_GPL(taprio_offload_free);
   1178
   1179/* The function will only serve to keep the pointers to the "oper" and "admin"
   1180 * schedules valid in relation to their base times, so when calling dump() the
   1181 * users looks at the right schedules.
   1182 * When using full offload, the admin configuration is promoted to oper at the
   1183 * base_time in the PHC time domain.  But because the system time is not
   1184 * necessarily in sync with that, we can't just trigger a hrtimer to call
   1185 * switch_schedules at the right hardware time.
   1186 * At the moment we call this by hand right away from taprio, but in the future
   1187 * it will be useful to create a mechanism for drivers to notify taprio of the
   1188 * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
   1189 * This is left as TODO.
   1190 */
   1191static void taprio_offload_config_changed(struct taprio_sched *q)
   1192{
   1193	struct sched_gate_list *oper, *admin;
   1194
   1195	spin_lock(&q->current_entry_lock);
   1196
   1197	oper = rcu_dereference_protected(q->oper_sched,
   1198					 lockdep_is_held(&q->current_entry_lock));
   1199	admin = rcu_dereference_protected(q->admin_sched,
   1200					  lockdep_is_held(&q->current_entry_lock));
   1201
   1202	switch_schedules(q, &admin, &oper);
   1203
   1204	spin_unlock(&q->current_entry_lock);
   1205}
   1206
   1207static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
   1208{
   1209	u32 i, queue_mask = 0;
   1210
   1211	for (i = 0; i < dev->num_tc; i++) {
   1212		u32 offset, count;
   1213
   1214		if (!(tc_mask & BIT(i)))
   1215			continue;
   1216
   1217		offset = dev->tc_to_txq[i].offset;
   1218		count = dev->tc_to_txq[i].count;
   1219
   1220		queue_mask |= GENMASK(offset + count - 1, offset);
   1221	}
   1222
   1223	return queue_mask;
   1224}
   1225
   1226static void taprio_sched_to_offload(struct net_device *dev,
   1227				    struct sched_gate_list *sched,
   1228				    struct tc_taprio_qopt_offload *offload)
   1229{
   1230	struct sched_entry *entry;
   1231	int i = 0;
   1232
   1233	offload->base_time = sched->base_time;
   1234	offload->cycle_time = sched->cycle_time;
   1235	offload->cycle_time_extension = sched->cycle_time_extension;
   1236
   1237	list_for_each_entry(entry, &sched->entries, list) {
   1238		struct tc_taprio_sched_entry *e = &offload->entries[i];
   1239
   1240		e->command = entry->command;
   1241		e->interval = entry->interval;
   1242		e->gate_mask = tc_map_to_queue_mask(dev, entry->gate_mask);
   1243
   1244		i++;
   1245	}
   1246
   1247	offload->num_entries = i;
   1248}
   1249
   1250static int taprio_enable_offload(struct net_device *dev,
   1251				 struct taprio_sched *q,
   1252				 struct sched_gate_list *sched,
   1253				 struct netlink_ext_ack *extack)
   1254{
   1255	const struct net_device_ops *ops = dev->netdev_ops;
   1256	struct tc_taprio_qopt_offload *offload;
   1257	int err = 0;
   1258
   1259	if (!ops->ndo_setup_tc) {
   1260		NL_SET_ERR_MSG(extack,
   1261			       "Device does not support taprio offload");
   1262		return -EOPNOTSUPP;
   1263	}
   1264
   1265	offload = taprio_offload_alloc(sched->num_entries);
   1266	if (!offload) {
   1267		NL_SET_ERR_MSG(extack,
   1268			       "Not enough memory for enabling offload mode");
   1269		return -ENOMEM;
   1270	}
   1271	offload->enable = 1;
   1272	taprio_sched_to_offload(dev, sched, offload);
   1273
   1274	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
   1275	if (err < 0) {
   1276		NL_SET_ERR_MSG(extack,
   1277			       "Device failed to setup taprio offload");
   1278		goto done;
   1279	}
   1280
   1281done:
   1282	taprio_offload_free(offload);
   1283
   1284	return err;
   1285}
   1286
   1287static int taprio_disable_offload(struct net_device *dev,
   1288				  struct taprio_sched *q,
   1289				  struct netlink_ext_ack *extack)
   1290{
   1291	const struct net_device_ops *ops = dev->netdev_ops;
   1292	struct tc_taprio_qopt_offload *offload;
   1293	int err;
   1294
   1295	if (!FULL_OFFLOAD_IS_ENABLED(q->flags))
   1296		return 0;
   1297
   1298	if (!ops->ndo_setup_tc)
   1299		return -EOPNOTSUPP;
   1300
   1301	offload = taprio_offload_alloc(0);
   1302	if (!offload) {
   1303		NL_SET_ERR_MSG(extack,
   1304			       "Not enough memory to disable offload mode");
   1305		return -ENOMEM;
   1306	}
   1307	offload->enable = 0;
   1308
   1309	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
   1310	if (err < 0) {
   1311		NL_SET_ERR_MSG(extack,
   1312			       "Device failed to disable offload");
   1313		goto out;
   1314	}
   1315
   1316out:
   1317	taprio_offload_free(offload);
   1318
   1319	return err;
   1320}
   1321
   1322/* If full offload is enabled, the only possible clockid is the net device's
   1323 * PHC. For that reason, specifying a clockid through netlink is incorrect.
   1324 * For txtime-assist, it is implicitly assumed that the device's PHC is kept
   1325 * in sync with the specified clockid via a user space daemon such as phc2sys.
   1326 * For both software taprio and txtime-assist, the clockid is used for the
   1327 * hrtimer that advances the schedule and hence mandatory.
   1328 */
   1329static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
   1330				struct netlink_ext_ack *extack)
   1331{
   1332	struct taprio_sched *q = qdisc_priv(sch);
   1333	struct net_device *dev = qdisc_dev(sch);
   1334	int err = -EINVAL;
   1335
   1336	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
   1337		const struct ethtool_ops *ops = dev->ethtool_ops;
   1338		struct ethtool_ts_info info = {
   1339			.cmd = ETHTOOL_GET_TS_INFO,
   1340			.phc_index = -1,
   1341		};
   1342
   1343		if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
   1344			NL_SET_ERR_MSG(extack,
   1345				       "The 'clockid' cannot be specified for full offload");
   1346			goto out;
   1347		}
   1348
   1349		if (ops && ops->get_ts_info)
   1350			err = ops->get_ts_info(dev, &info);
   1351
   1352		if (err || info.phc_index < 0) {
   1353			NL_SET_ERR_MSG(extack,
   1354				       "Device does not have a PTP clock");
   1355			err = -ENOTSUPP;
   1356			goto out;
   1357		}
   1358	} else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
   1359		int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
   1360		enum tk_offsets tk_offset;
   1361
   1362		/* We only support static clockids and we don't allow
   1363		 * for it to be modified after the first init.
   1364		 */
   1365		if (clockid < 0 ||
   1366		    (q->clockid != -1 && q->clockid != clockid)) {
   1367			NL_SET_ERR_MSG(extack,
   1368				       "Changing the 'clockid' of a running schedule is not supported");
   1369			err = -ENOTSUPP;
   1370			goto out;
   1371		}
   1372
   1373		switch (clockid) {
   1374		case CLOCK_REALTIME:
   1375			tk_offset = TK_OFFS_REAL;
   1376			break;
   1377		case CLOCK_MONOTONIC:
   1378			tk_offset = TK_OFFS_MAX;
   1379			break;
   1380		case CLOCK_BOOTTIME:
   1381			tk_offset = TK_OFFS_BOOT;
   1382			break;
   1383		case CLOCK_TAI:
   1384			tk_offset = TK_OFFS_TAI;
   1385			break;
   1386		default:
   1387			NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
   1388			err = -EINVAL;
   1389			goto out;
   1390		}
   1391		/* This pairs with READ_ONCE() in taprio_mono_to_any */
   1392		WRITE_ONCE(q->tk_offset, tk_offset);
   1393
   1394		q->clockid = clockid;
   1395	} else {
   1396		NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
   1397		goto out;
   1398	}
   1399
   1400	/* Everything went ok, return success. */
   1401	err = 0;
   1402
   1403out:
   1404	return err;
   1405}
   1406
   1407static int taprio_mqprio_cmp(const struct net_device *dev,
   1408			     const struct tc_mqprio_qopt *mqprio)
   1409{
   1410	int i;
   1411
   1412	if (!mqprio || mqprio->num_tc != dev->num_tc)
   1413		return -1;
   1414
   1415	for (i = 0; i < mqprio->num_tc; i++)
   1416		if (dev->tc_to_txq[i].count != mqprio->count[i] ||
   1417		    dev->tc_to_txq[i].offset != mqprio->offset[i])
   1418			return -1;
   1419
   1420	for (i = 0; i <= TC_BITMASK; i++)
   1421		if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
   1422			return -1;
   1423
   1424	return 0;
   1425}
   1426
   1427/* The semantics of the 'flags' argument in relation to 'change()'
   1428 * requests, are interpreted following two rules (which are applied in
   1429 * this order): (1) an omitted 'flags' argument is interpreted as
   1430 * zero; (2) the 'flags' of a "running" taprio instance cannot be
   1431 * changed.
   1432 */
   1433static int taprio_new_flags(const struct nlattr *attr, u32 old,
   1434			    struct netlink_ext_ack *extack)
   1435{
   1436	u32 new = 0;
   1437
   1438	if (attr)
   1439		new = nla_get_u32(attr);
   1440
   1441	if (old != TAPRIO_FLAGS_INVALID && old != new) {
   1442		NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
   1443		return -EOPNOTSUPP;
   1444	}
   1445
   1446	if (!taprio_flags_valid(new)) {
   1447		NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
   1448		return -EINVAL;
   1449	}
   1450
   1451	return new;
   1452}
   1453
   1454static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
   1455			 struct netlink_ext_ack *extack)
   1456{
   1457	struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
   1458	struct sched_gate_list *oper, *admin, *new_admin;
   1459	struct taprio_sched *q = qdisc_priv(sch);
   1460	struct net_device *dev = qdisc_dev(sch);
   1461	struct tc_mqprio_qopt *mqprio = NULL;
   1462	unsigned long flags;
   1463	ktime_t start;
   1464	int i, err;
   1465
   1466	err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
   1467					  taprio_policy, extack);
   1468	if (err < 0)
   1469		return err;
   1470
   1471	if (tb[TCA_TAPRIO_ATTR_PRIOMAP])
   1472		mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]);
   1473
   1474	err = taprio_new_flags(tb[TCA_TAPRIO_ATTR_FLAGS],
   1475			       q->flags, extack);
   1476	if (err < 0)
   1477		return err;
   1478
   1479	q->flags = err;
   1480
   1481	err = taprio_parse_mqprio_opt(dev, mqprio, extack, q->flags);
   1482	if (err < 0)
   1483		return err;
   1484
   1485	new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL);
   1486	if (!new_admin) {
   1487		NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule");
   1488		return -ENOMEM;
   1489	}
   1490	INIT_LIST_HEAD(&new_admin->entries);
   1491
   1492	rcu_read_lock();
   1493	oper = rcu_dereference(q->oper_sched);
   1494	admin = rcu_dereference(q->admin_sched);
   1495	rcu_read_unlock();
   1496
   1497	/* no changes - no new mqprio settings */
   1498	if (!taprio_mqprio_cmp(dev, mqprio))
   1499		mqprio = NULL;
   1500
   1501	if (mqprio && (oper || admin)) {
   1502		NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
   1503		err = -ENOTSUPP;
   1504		goto free_sched;
   1505	}
   1506
   1507	err = parse_taprio_schedule(q, tb, new_admin, extack);
   1508	if (err < 0)
   1509		goto free_sched;
   1510
   1511	if (new_admin->num_entries == 0) {
   1512		NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule");
   1513		err = -EINVAL;
   1514		goto free_sched;
   1515	}
   1516
   1517	err = taprio_parse_clockid(sch, tb, extack);
   1518	if (err < 0)
   1519		goto free_sched;
   1520
   1521	taprio_set_picos_per_byte(dev, q);
   1522
   1523	if (mqprio) {
   1524		err = netdev_set_num_tc(dev, mqprio->num_tc);
   1525		if (err)
   1526			goto free_sched;
   1527		for (i = 0; i < mqprio->num_tc; i++)
   1528			netdev_set_tc_queue(dev, i,
   1529					    mqprio->count[i],
   1530					    mqprio->offset[i]);
   1531
   1532		/* Always use supplied priority mappings */
   1533		for (i = 0; i <= TC_BITMASK; i++)
   1534			netdev_set_prio_tc_map(dev, i,
   1535					       mqprio->prio_tc_map[i]);
   1536	}
   1537
   1538	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
   1539		err = taprio_enable_offload(dev, q, new_admin, extack);
   1540	else
   1541		err = taprio_disable_offload(dev, q, extack);
   1542	if (err)
   1543		goto free_sched;
   1544
   1545	/* Protects against enqueue()/dequeue() */
   1546	spin_lock_bh(qdisc_lock(sch));
   1547
   1548	if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) {
   1549		if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) {
   1550			NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled");
   1551			err = -EINVAL;
   1552			goto unlock;
   1553		}
   1554
   1555		q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
   1556	}
   1557
   1558	if (!TXTIME_ASSIST_IS_ENABLED(q->flags) &&
   1559	    !FULL_OFFLOAD_IS_ENABLED(q->flags) &&
   1560	    !hrtimer_active(&q->advance_timer)) {
   1561		hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
   1562		q->advance_timer.function = advance_sched;
   1563	}
   1564
   1565	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
   1566		q->dequeue = taprio_dequeue_offload;
   1567		q->peek = taprio_peek_offload;
   1568	} else {
   1569		/* Be sure to always keep the function pointers
   1570		 * in a consistent state.
   1571		 */
   1572		q->dequeue = taprio_dequeue_soft;
   1573		q->peek = taprio_peek_soft;
   1574	}
   1575
   1576	err = taprio_get_start_time(sch, new_admin, &start);
   1577	if (err < 0) {
   1578		NL_SET_ERR_MSG(extack, "Internal error: failed get start time");
   1579		goto unlock;
   1580	}
   1581
   1582	setup_txtime(q, new_admin, start);
   1583
   1584	if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
   1585		if (!oper) {
   1586			rcu_assign_pointer(q->oper_sched, new_admin);
   1587			err = 0;
   1588			new_admin = NULL;
   1589			goto unlock;
   1590		}
   1591
   1592		rcu_assign_pointer(q->admin_sched, new_admin);
   1593		if (admin)
   1594			call_rcu(&admin->rcu, taprio_free_sched_cb);
   1595	} else {
   1596		setup_first_close_time(q, new_admin, start);
   1597
   1598		/* Protects against advance_sched() */
   1599		spin_lock_irqsave(&q->current_entry_lock, flags);
   1600
   1601		taprio_start_sched(sch, start, new_admin);
   1602
   1603		rcu_assign_pointer(q->admin_sched, new_admin);
   1604		if (admin)
   1605			call_rcu(&admin->rcu, taprio_free_sched_cb);
   1606
   1607		spin_unlock_irqrestore(&q->current_entry_lock, flags);
   1608
   1609		if (FULL_OFFLOAD_IS_ENABLED(q->flags))
   1610			taprio_offload_config_changed(q);
   1611	}
   1612
   1613	new_admin = NULL;
   1614	err = 0;
   1615
   1616unlock:
   1617	spin_unlock_bh(qdisc_lock(sch));
   1618
   1619free_sched:
   1620	if (new_admin)
   1621		call_rcu(&new_admin->rcu, taprio_free_sched_cb);
   1622
   1623	return err;
   1624}
   1625
   1626static void taprio_reset(struct Qdisc *sch)
   1627{
   1628	struct taprio_sched *q = qdisc_priv(sch);
   1629	struct net_device *dev = qdisc_dev(sch);
   1630	int i;
   1631
   1632	hrtimer_cancel(&q->advance_timer);
   1633	if (q->qdiscs) {
   1634		for (i = 0; i < dev->num_tx_queues; i++)
   1635			if (q->qdiscs[i])
   1636				qdisc_reset(q->qdiscs[i]);
   1637	}
   1638	sch->qstats.backlog = 0;
   1639	sch->q.qlen = 0;
   1640}
   1641
   1642static void taprio_destroy(struct Qdisc *sch)
   1643{
   1644	struct taprio_sched *q = qdisc_priv(sch);
   1645	struct net_device *dev = qdisc_dev(sch);
   1646	unsigned int i;
   1647
   1648	spin_lock(&taprio_list_lock);
   1649	list_del(&q->taprio_list);
   1650	spin_unlock(&taprio_list_lock);
   1651
   1652	/* Note that taprio_reset() might not be called if an error
   1653	 * happens in qdisc_create(), after taprio_init() has been called.
   1654	 */
   1655	hrtimer_cancel(&q->advance_timer);
   1656
   1657	taprio_disable_offload(dev, q, NULL);
   1658
   1659	if (q->qdiscs) {
   1660		for (i = 0; i < dev->num_tx_queues; i++)
   1661			qdisc_put(q->qdiscs[i]);
   1662
   1663		kfree(q->qdiscs);
   1664	}
   1665	q->qdiscs = NULL;
   1666
   1667	netdev_reset_tc(dev);
   1668
   1669	if (q->oper_sched)
   1670		call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb);
   1671
   1672	if (q->admin_sched)
   1673		call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb);
   1674}
   1675
   1676static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
   1677		       struct netlink_ext_ack *extack)
   1678{
   1679	struct taprio_sched *q = qdisc_priv(sch);
   1680	struct net_device *dev = qdisc_dev(sch);
   1681	int i;
   1682
   1683	spin_lock_init(&q->current_entry_lock);
   1684
   1685	hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
   1686	q->advance_timer.function = advance_sched;
   1687
   1688	q->dequeue = taprio_dequeue_soft;
   1689	q->peek = taprio_peek_soft;
   1690
   1691	q->root = sch;
   1692
   1693	/* We only support static clockids. Use an invalid value as default
   1694	 * and get the valid one on taprio_change().
   1695	 */
   1696	q->clockid = -1;
   1697	q->flags = TAPRIO_FLAGS_INVALID;
   1698
   1699	spin_lock(&taprio_list_lock);
   1700	list_add(&q->taprio_list, &taprio_list);
   1701	spin_unlock(&taprio_list_lock);
   1702
   1703	if (sch->parent != TC_H_ROOT)
   1704		return -EOPNOTSUPP;
   1705
   1706	if (!netif_is_multiqueue(dev))
   1707		return -EOPNOTSUPP;
   1708
   1709	/* pre-allocate qdisc, attachment can't fail */
   1710	q->qdiscs = kcalloc(dev->num_tx_queues,
   1711			    sizeof(q->qdiscs[0]),
   1712			    GFP_KERNEL);
   1713
   1714	if (!q->qdiscs)
   1715		return -ENOMEM;
   1716
   1717	if (!opt)
   1718		return -EINVAL;
   1719
   1720	for (i = 0; i < dev->num_tx_queues; i++) {
   1721		struct netdev_queue *dev_queue;
   1722		struct Qdisc *qdisc;
   1723
   1724		dev_queue = netdev_get_tx_queue(dev, i);
   1725		qdisc = qdisc_create_dflt(dev_queue,
   1726					  &pfifo_qdisc_ops,
   1727					  TC_H_MAKE(TC_H_MAJ(sch->handle),
   1728						    TC_H_MIN(i + 1)),
   1729					  extack);
   1730		if (!qdisc)
   1731			return -ENOMEM;
   1732
   1733		if (i < dev->real_num_tx_queues)
   1734			qdisc_hash_add(qdisc, false);
   1735
   1736		q->qdiscs[i] = qdisc;
   1737	}
   1738
   1739	return taprio_change(sch, opt, extack);
   1740}
   1741
   1742static void taprio_attach(struct Qdisc *sch)
   1743{
   1744	struct taprio_sched *q = qdisc_priv(sch);
   1745	struct net_device *dev = qdisc_dev(sch);
   1746	unsigned int ntx;
   1747
   1748	/* Attach underlying qdisc */
   1749	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
   1750		struct Qdisc *qdisc = q->qdiscs[ntx];
   1751		struct Qdisc *old;
   1752
   1753		if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
   1754			qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
   1755			old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
   1756		} else {
   1757			old = dev_graft_qdisc(qdisc->dev_queue, sch);
   1758			qdisc_refcount_inc(sch);
   1759		}
   1760		if (old)
   1761			qdisc_put(old);
   1762	}
   1763
   1764	/* access to the child qdiscs is not needed in offload mode */
   1765	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
   1766		kfree(q->qdiscs);
   1767		q->qdiscs = NULL;
   1768	}
   1769}
   1770
   1771static struct netdev_queue *taprio_queue_get(struct Qdisc *sch,
   1772					     unsigned long cl)
   1773{
   1774	struct net_device *dev = qdisc_dev(sch);
   1775	unsigned long ntx = cl - 1;
   1776
   1777	if (ntx >= dev->num_tx_queues)
   1778		return NULL;
   1779
   1780	return netdev_get_tx_queue(dev, ntx);
   1781}
   1782
   1783static int taprio_graft(struct Qdisc *sch, unsigned long cl,
   1784			struct Qdisc *new, struct Qdisc **old,
   1785			struct netlink_ext_ack *extack)
   1786{
   1787	struct taprio_sched *q = qdisc_priv(sch);
   1788	struct net_device *dev = qdisc_dev(sch);
   1789	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
   1790
   1791	if (!dev_queue)
   1792		return -EINVAL;
   1793
   1794	if (dev->flags & IFF_UP)
   1795		dev_deactivate(dev);
   1796
   1797	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
   1798		*old = dev_graft_qdisc(dev_queue, new);
   1799	} else {
   1800		*old = q->qdiscs[cl - 1];
   1801		q->qdiscs[cl - 1] = new;
   1802	}
   1803
   1804	if (new)
   1805		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
   1806
   1807	if (dev->flags & IFF_UP)
   1808		dev_activate(dev);
   1809
   1810	return 0;
   1811}
   1812
   1813static int dump_entry(struct sk_buff *msg,
   1814		      const struct sched_entry *entry)
   1815{
   1816	struct nlattr *item;
   1817
   1818	item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY);
   1819	if (!item)
   1820		return -ENOSPC;
   1821
   1822	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index))
   1823		goto nla_put_failure;
   1824
   1825	if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command))
   1826		goto nla_put_failure;
   1827
   1828	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK,
   1829			entry->gate_mask))
   1830		goto nla_put_failure;
   1831
   1832	if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL,
   1833			entry->interval))
   1834		goto nla_put_failure;
   1835
   1836	return nla_nest_end(msg, item);
   1837
   1838nla_put_failure:
   1839	nla_nest_cancel(msg, item);
   1840	return -1;
   1841}
   1842
   1843static int dump_schedule(struct sk_buff *msg,
   1844			 const struct sched_gate_list *root)
   1845{
   1846	struct nlattr *entry_list;
   1847	struct sched_entry *entry;
   1848
   1849	if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME,
   1850			root->base_time, TCA_TAPRIO_PAD))
   1851		return -1;
   1852
   1853	if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME,
   1854			root->cycle_time, TCA_TAPRIO_PAD))
   1855		return -1;
   1856
   1857	if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION,
   1858			root->cycle_time_extension, TCA_TAPRIO_PAD))
   1859		return -1;
   1860
   1861	entry_list = nla_nest_start_noflag(msg,
   1862					   TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST);
   1863	if (!entry_list)
   1864		goto error_nest;
   1865
   1866	list_for_each_entry(entry, &root->entries, list) {
   1867		if (dump_entry(msg, entry) < 0)
   1868			goto error_nest;
   1869	}
   1870
   1871	nla_nest_end(msg, entry_list);
   1872	return 0;
   1873
   1874error_nest:
   1875	nla_nest_cancel(msg, entry_list);
   1876	return -1;
   1877}
   1878
   1879static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
   1880{
   1881	struct taprio_sched *q = qdisc_priv(sch);
   1882	struct net_device *dev = qdisc_dev(sch);
   1883	struct sched_gate_list *oper, *admin;
   1884	struct tc_mqprio_qopt opt = { 0 };
   1885	struct nlattr *nest, *sched_nest;
   1886	unsigned int i;
   1887
   1888	rcu_read_lock();
   1889	oper = rcu_dereference(q->oper_sched);
   1890	admin = rcu_dereference(q->admin_sched);
   1891
   1892	opt.num_tc = netdev_get_num_tc(dev);
   1893	memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
   1894
   1895	for (i = 0; i < netdev_get_num_tc(dev); i++) {
   1896		opt.count[i] = dev->tc_to_txq[i].count;
   1897		opt.offset[i] = dev->tc_to_txq[i].offset;
   1898	}
   1899
   1900	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
   1901	if (!nest)
   1902		goto start_error;
   1903
   1904	if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
   1905		goto options_error;
   1906
   1907	if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
   1908	    nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
   1909		goto options_error;
   1910
   1911	if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
   1912		goto options_error;
   1913
   1914	if (q->txtime_delay &&
   1915	    nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
   1916		goto options_error;
   1917
   1918	if (oper && dump_schedule(skb, oper))
   1919		goto options_error;
   1920
   1921	if (!admin)
   1922		goto done;
   1923
   1924	sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED);
   1925	if (!sched_nest)
   1926		goto options_error;
   1927
   1928	if (dump_schedule(skb, admin))
   1929		goto admin_error;
   1930
   1931	nla_nest_end(skb, sched_nest);
   1932
   1933done:
   1934	rcu_read_unlock();
   1935
   1936	return nla_nest_end(skb, nest);
   1937
   1938admin_error:
   1939	nla_nest_cancel(skb, sched_nest);
   1940
   1941options_error:
   1942	nla_nest_cancel(skb, nest);
   1943
   1944start_error:
   1945	rcu_read_unlock();
   1946	return -ENOSPC;
   1947}
   1948
   1949static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl)
   1950{
   1951	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
   1952
   1953	if (!dev_queue)
   1954		return NULL;
   1955
   1956	return dev_queue->qdisc_sleeping;
   1957}
   1958
   1959static unsigned long taprio_find(struct Qdisc *sch, u32 classid)
   1960{
   1961	unsigned int ntx = TC_H_MIN(classid);
   1962
   1963	if (!taprio_queue_get(sch, ntx))
   1964		return 0;
   1965	return ntx;
   1966}
   1967
   1968static int taprio_dump_class(struct Qdisc *sch, unsigned long cl,
   1969			     struct sk_buff *skb, struct tcmsg *tcm)
   1970{
   1971	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
   1972
   1973	tcm->tcm_parent = TC_H_ROOT;
   1974	tcm->tcm_handle |= TC_H_MIN(cl);
   1975	tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
   1976
   1977	return 0;
   1978}
   1979
   1980static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
   1981				   struct gnet_dump *d)
   1982	__releases(d->lock)
   1983	__acquires(d->lock)
   1984{
   1985	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
   1986
   1987	sch = dev_queue->qdisc_sleeping;
   1988	if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
   1989	    qdisc_qstats_copy(d, sch) < 0)
   1990		return -1;
   1991	return 0;
   1992}
   1993
   1994static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
   1995{
   1996	struct net_device *dev = qdisc_dev(sch);
   1997	unsigned long ntx;
   1998
   1999	if (arg->stop)
   2000		return;
   2001
   2002	arg->count = arg->skip;
   2003	for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
   2004		if (arg->fn(sch, ntx + 1, arg) < 0) {
   2005			arg->stop = 1;
   2006			break;
   2007		}
   2008		arg->count++;
   2009	}
   2010}
   2011
   2012static struct netdev_queue *taprio_select_queue(struct Qdisc *sch,
   2013						struct tcmsg *tcm)
   2014{
   2015	return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent));
   2016}
   2017
   2018static const struct Qdisc_class_ops taprio_class_ops = {
   2019	.graft		= taprio_graft,
   2020	.leaf		= taprio_leaf,
   2021	.find		= taprio_find,
   2022	.walk		= taprio_walk,
   2023	.dump		= taprio_dump_class,
   2024	.dump_stats	= taprio_dump_class_stats,
   2025	.select_queue	= taprio_select_queue,
   2026};
   2027
   2028static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
   2029	.cl_ops		= &taprio_class_ops,
   2030	.id		= "taprio",
   2031	.priv_size	= sizeof(struct taprio_sched),
   2032	.init		= taprio_init,
   2033	.change		= taprio_change,
   2034	.destroy	= taprio_destroy,
   2035	.reset		= taprio_reset,
   2036	.attach		= taprio_attach,
   2037	.peek		= taprio_peek,
   2038	.dequeue	= taprio_dequeue,
   2039	.enqueue	= taprio_enqueue,
   2040	.dump		= taprio_dump,
   2041	.owner		= THIS_MODULE,
   2042};
   2043
   2044static struct notifier_block taprio_device_notifier = {
   2045	.notifier_call = taprio_dev_notifier,
   2046};
   2047
   2048static int __init taprio_module_init(void)
   2049{
   2050	int err = register_netdevice_notifier(&taprio_device_notifier);
   2051
   2052	if (err)
   2053		return err;
   2054
   2055	return register_qdisc(&taprio_qdisc_ops);
   2056}
   2057
   2058static void __exit taprio_module_exit(void)
   2059{
   2060	unregister_qdisc(&taprio_qdisc_ops);
   2061	unregister_netdevice_notifier(&taprio_device_notifier);
   2062}
   2063
   2064module_init(taprio_module_init);
   2065module_exit(taprio_module_exit);
   2066MODULE_LICENSE("GPL");