cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

sja1105_tas.c (29349B)


      1// SPDX-License-Identifier: GPL-2.0
      2/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
      3 */
      4#include "sja1105.h"
      5
      6#define SJA1105_TAS_CLKSRC_DISABLED	0
      7#define SJA1105_TAS_CLKSRC_STANDALONE	1
      8#define SJA1105_TAS_CLKSRC_AS6802	2
      9#define SJA1105_TAS_CLKSRC_PTP		3
     10#define SJA1105_GATE_MASK		GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
     11
     12#define work_to_sja1105_tas(d) \
     13	container_of((d), struct sja1105_tas_data, tas_work)
     14#define tas_to_sja1105(d) \
     15	container_of((d), struct sja1105_private, tas_data)
     16
     17static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
     18{
     19	struct sja1105_tas_data *tas_data = &priv->tas_data;
     20	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
     21	struct dsa_switch *ds = priv->ds;
     22	s64 earliest_base_time = S64_MAX;
     23	s64 latest_base_time = 0;
     24	s64 its_cycle_time = 0;
     25	s64 max_cycle_time = 0;
     26	int port;
     27
     28	tas_data->enabled = false;
     29
     30	for (port = 0; port < ds->num_ports; port++) {
     31		const struct tc_taprio_qopt_offload *offload;
     32
     33		offload = tas_data->offload[port];
     34		if (!offload)
     35			continue;
     36
     37		tas_data->enabled = true;
     38
     39		if (max_cycle_time < offload->cycle_time)
     40			max_cycle_time = offload->cycle_time;
     41		if (latest_base_time < offload->base_time)
     42			latest_base_time = offload->base_time;
     43		if (earliest_base_time > offload->base_time) {
     44			earliest_base_time = offload->base_time;
     45			its_cycle_time = offload->cycle_time;
     46		}
     47	}
     48
     49	if (!list_empty(&gating_cfg->entries)) {
     50		tas_data->enabled = true;
     51
     52		if (max_cycle_time < gating_cfg->cycle_time)
     53			max_cycle_time = gating_cfg->cycle_time;
     54		if (latest_base_time < gating_cfg->base_time)
     55			latest_base_time = gating_cfg->base_time;
     56		if (earliest_base_time > gating_cfg->base_time) {
     57			earliest_base_time = gating_cfg->base_time;
     58			its_cycle_time = gating_cfg->cycle_time;
     59		}
     60	}
     61
     62	if (!tas_data->enabled)
     63		return 0;
     64
     65	/* Roll the earliest base time over until it is in a comparable
     66	 * time base with the latest, then compare their deltas.
     67	 * We want to enforce that all ports' base times are within
     68	 * SJA1105_TAS_MAX_DELTA 200ns cycles of one another.
     69	 */
     70	earliest_base_time = future_base_time(earliest_base_time,
     71					      its_cycle_time,
     72					      latest_base_time);
     73	while (earliest_base_time > latest_base_time)
     74		earliest_base_time -= its_cycle_time;
     75	if (latest_base_time - earliest_base_time >
     76	    sja1105_delta_to_ns(SJA1105_TAS_MAX_DELTA)) {
     77		dev_err(ds->dev,
     78			"Base times too far apart: min %llu max %llu\n",
     79			earliest_base_time, latest_base_time);
     80		return -ERANGE;
     81	}
     82
     83	tas_data->earliest_base_time = earliest_base_time;
     84	tas_data->max_cycle_time = max_cycle_time;
     85
     86	dev_dbg(ds->dev, "earliest base time %lld ns\n", earliest_base_time);
     87	dev_dbg(ds->dev, "latest base time %lld ns\n", latest_base_time);
     88	dev_dbg(ds->dev, "longest cycle time %lld ns\n", max_cycle_time);
     89
     90	return 0;
     91}
     92
     93/* Lo and behold: the egress scheduler from hell.
     94 *
     95 * At the hardware level, the Time-Aware Shaper holds a global linear arrray of
     96 * all schedule entries for all ports. These are the Gate Control List (GCL)
     97 * entries, let's call them "timeslots" for short. This linear array of
     98 * timeslots is held in BLK_IDX_SCHEDULE.
     99 *
    100 * Then there are a maximum of 8 "execution threads" inside the switch, which
    101 * iterate cyclically through the "schedule". Each "cycle" has an entry point
    102 * and an exit point, both being timeslot indices in the schedule table. The
    103 * hardware calls each cycle a "subschedule".
    104 *
    105 * Subschedule (cycle) i starts when
    106 *   ptpclkval >= ptpschtm + BLK_IDX_SCHEDULE_ENTRY_POINTS[i].delta.
    107 *
    108 * The hardware scheduler iterates BLK_IDX_SCHEDULE with a k ranging from
    109 *   k = BLK_IDX_SCHEDULE_ENTRY_POINTS[i].address to
    110 *   k = BLK_IDX_SCHEDULE_PARAMS.subscheind[i]
    111 *
    112 * For each schedule entry (timeslot) k, the engine executes the gate control
    113 * list entry for the duration of BLK_IDX_SCHEDULE[k].delta.
    114 *
    115 *         +---------+
    116 *         |         | BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS
    117 *         +---------+
    118 *              |
    119 *              +-----------------+
    120 *                                | .actsubsch
    121 *  BLK_IDX_SCHEDULE_ENTRY_POINTS v
    122 *                 +-------+-------+
    123 *                 |cycle 0|cycle 1|
    124 *                 +-------+-------+
    125 *                   |  |      |  |
    126 *  +----------------+  |      |  +-------------------------------------+
    127 *  |   .subschindx     |      |             .subschindx                |
    128 *  |                   |      +---------------+                        |
    129 *  |          .address |        .address      |                        |
    130 *  |                   |                      |                        |
    131 *  |                   |                      |                        |
    132 *  |  BLK_IDX_SCHEDULE v                      v                        |
    133 *  |              +-------+-------+-------+-------+-------+------+     |
    134 *  |              |entry 0|entry 1|entry 2|entry 3|entry 4|entry5|     |
    135 *  |              +-------+-------+-------+-------+-------+------+     |
    136 *  |                                  ^                    ^  ^  ^     |
    137 *  |                                  |                    |  |  |     |
    138 *  |        +-------------------------+                    |  |  |     |
    139 *  |        |              +-------------------------------+  |  |     |
    140 *  |        |              |              +-------------------+  |     |
    141 *  |        |              |              |                      |     |
    142 *  | +---------------------------------------------------------------+ |
    143 *  | |subscheind[0]<=subscheind[1]<=subscheind[2]<=...<=subscheind[7]| |
    144 *  | +---------------------------------------------------------------+ |
    145 *  |        ^              ^                BLK_IDX_SCHEDULE_PARAMS    |
    146 *  |        |              |                                           |
    147 *  +--------+              +-------------------------------------------+
    148 *
    149 *  In the above picture there are two subschedules (cycles):
    150 *
    151 *  - cycle 0: iterates the schedule table from 0 to 2 (and back)
    152 *  - cycle 1: iterates the schedule table from 3 to 5 (and back)
    153 *
    154 *  All other possible execution threads must be marked as unused by making
    155 *  their "subschedule end index" (subscheind) equal to the last valid
    156 *  subschedule's end index (in this case 5).
    157 */
    158int sja1105_init_scheduling(struct sja1105_private *priv)
    159{
    160	struct sja1105_schedule_entry_points_entry *schedule_entry_points;
    161	struct sja1105_schedule_entry_points_params_entry
    162					*schedule_entry_points_params;
    163	struct sja1105_schedule_params_entry *schedule_params;
    164	struct sja1105_tas_data *tas_data = &priv->tas_data;
    165	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
    166	struct sja1105_schedule_entry *schedule;
    167	struct dsa_switch *ds = priv->ds;
    168	struct sja1105_table *table;
    169	int schedule_start_idx;
    170	s64 entry_point_delta;
    171	int schedule_end_idx;
    172	int num_entries = 0;
    173	int num_cycles = 0;
    174	int cycle = 0;
    175	int i, k = 0;
    176	int port, rc;
    177
    178	rc = sja1105_tas_set_runtime_params(priv);
    179	if (rc < 0)
    180		return rc;
    181
    182	/* Discard previous Schedule Table */
    183	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
    184	if (table->entry_count) {
    185		kfree(table->entries);
    186		table->entry_count = 0;
    187	}
    188
    189	/* Discard previous Schedule Entry Points Parameters Table */
    190	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
    191	if (table->entry_count) {
    192		kfree(table->entries);
    193		table->entry_count = 0;
    194	}
    195
    196	/* Discard previous Schedule Parameters Table */
    197	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
    198	if (table->entry_count) {
    199		kfree(table->entries);
    200		table->entry_count = 0;
    201	}
    202
    203	/* Discard previous Schedule Entry Points Table */
    204	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
    205	if (table->entry_count) {
    206		kfree(table->entries);
    207		table->entry_count = 0;
    208	}
    209
    210	/* Figure out the dimensioning of the problem */
    211	for (port = 0; port < ds->num_ports; port++) {
    212		if (tas_data->offload[port]) {
    213			num_entries += tas_data->offload[port]->num_entries;
    214			num_cycles++;
    215		}
    216	}
    217
    218	if (!list_empty(&gating_cfg->entries)) {
    219		num_entries += gating_cfg->num_entries;
    220		num_cycles++;
    221	}
    222
    223	/* Nothing to do */
    224	if (!num_cycles)
    225		return 0;
    226
    227	/* Pre-allocate space in the static config tables */
    228
    229	/* Schedule Table */
    230	table = &priv->static_config.tables[BLK_IDX_SCHEDULE];
    231	table->entries = kcalloc(num_entries, table->ops->unpacked_entry_size,
    232				 GFP_KERNEL);
    233	if (!table->entries)
    234		return -ENOMEM;
    235	table->entry_count = num_entries;
    236	schedule = table->entries;
    237
    238	/* Schedule Points Parameters Table */
    239	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS];
    240	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
    241				 table->ops->unpacked_entry_size, GFP_KERNEL);
    242	if (!table->entries)
    243		/* Previously allocated memory will be freed automatically in
    244		 * sja1105_static_config_free. This is true for all early
    245		 * returns below.
    246		 */
    247		return -ENOMEM;
    248	table->entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT;
    249	schedule_entry_points_params = table->entries;
    250
    251	/* Schedule Parameters Table */
    252	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_PARAMS];
    253	table->entries = kcalloc(SJA1105_MAX_SCHEDULE_PARAMS_COUNT,
    254				 table->ops->unpacked_entry_size, GFP_KERNEL);
    255	if (!table->entries)
    256		return -ENOMEM;
    257	table->entry_count = SJA1105_MAX_SCHEDULE_PARAMS_COUNT;
    258	schedule_params = table->entries;
    259
    260	/* Schedule Entry Points Table */
    261	table = &priv->static_config.tables[BLK_IDX_SCHEDULE_ENTRY_POINTS];
    262	table->entries = kcalloc(num_cycles, table->ops->unpacked_entry_size,
    263				 GFP_KERNEL);
    264	if (!table->entries)
    265		return -ENOMEM;
    266	table->entry_count = num_cycles;
    267	schedule_entry_points = table->entries;
    268
    269	/* Finally start populating the static config tables */
    270	schedule_entry_points_params->clksrc = SJA1105_TAS_CLKSRC_PTP;
    271	schedule_entry_points_params->actsubsch = num_cycles - 1;
    272
    273	for (port = 0; port < ds->num_ports; port++) {
    274		const struct tc_taprio_qopt_offload *offload;
    275		/* Relative base time */
    276		s64 rbt;
    277
    278		offload = tas_data->offload[port];
    279		if (!offload)
    280			continue;
    281
    282		schedule_start_idx = k;
    283		schedule_end_idx = k + offload->num_entries - 1;
    284		/* This is the base time expressed as a number of TAS ticks
    285		 * relative to PTPSCHTM, which we'll (perhaps improperly) call
    286		 * the operational base time.
    287		 */
    288		rbt = future_base_time(offload->base_time,
    289				       offload->cycle_time,
    290				       tas_data->earliest_base_time);
    291		rbt -= tas_data->earliest_base_time;
    292		/* UM10944.pdf 4.2.2. Schedule Entry Points table says that
    293		 * delta cannot be zero, which is shitty. Advance all relative
    294		 * base times by 1 TAS delta, so that even the earliest base
    295		 * time becomes 1 in relative terms. Then start the operational
    296		 * base time (PTPSCHTM) one TAS delta earlier than planned.
    297		 */
    298		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
    299
    300		schedule_entry_points[cycle].subschindx = cycle;
    301		schedule_entry_points[cycle].delta = entry_point_delta;
    302		schedule_entry_points[cycle].address = schedule_start_idx;
    303
    304		/* The subschedule end indices need to be
    305		 * monotonically increasing.
    306		 */
    307		for (i = cycle; i < 8; i++)
    308			schedule_params->subscheind[i] = schedule_end_idx;
    309
    310		for (i = 0; i < offload->num_entries; i++, k++) {
    311			s64 delta_ns = offload->entries[i].interval;
    312
    313			schedule[k].delta = ns_to_sja1105_delta(delta_ns);
    314			schedule[k].destports = BIT(port);
    315			schedule[k].resmedia_en = true;
    316			schedule[k].resmedia = SJA1105_GATE_MASK &
    317					~offload->entries[i].gate_mask;
    318		}
    319		cycle++;
    320	}
    321
    322	if (!list_empty(&gating_cfg->entries)) {
    323		struct sja1105_gate_entry *e;
    324
    325		/* Relative base time */
    326		s64 rbt;
    327
    328		schedule_start_idx = k;
    329		schedule_end_idx = k + gating_cfg->num_entries - 1;
    330		rbt = future_base_time(gating_cfg->base_time,
    331				       gating_cfg->cycle_time,
    332				       tas_data->earliest_base_time);
    333		rbt -= tas_data->earliest_base_time;
    334		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
    335
    336		schedule_entry_points[cycle].subschindx = cycle;
    337		schedule_entry_points[cycle].delta = entry_point_delta;
    338		schedule_entry_points[cycle].address = schedule_start_idx;
    339
    340		for (i = cycle; i < 8; i++)
    341			schedule_params->subscheind[i] = schedule_end_idx;
    342
    343		list_for_each_entry(e, &gating_cfg->entries, list) {
    344			schedule[k].delta = ns_to_sja1105_delta(e->interval);
    345			schedule[k].destports = e->rule->vl.destports;
    346			schedule[k].setvalid = true;
    347			schedule[k].txen = true;
    348			schedule[k].vlindex = e->rule->vl.sharindx;
    349			schedule[k].winstindex = e->rule->vl.sharindx;
    350			if (e->gate_state) /* Gate open */
    351				schedule[k].winst = true;
    352			else /* Gate closed */
    353				schedule[k].winend = true;
    354			k++;
    355		}
    356	}
    357
    358	return 0;
    359}
    360
    361/* Be there 2 port subschedules, each executing an arbitrary number of gate
    362 * open/close events cyclically.
    363 * None of those gate events must ever occur at the exact same time, otherwise
    364 * the switch is known to act in exotically strange ways.
    365 * However the hardware doesn't bother performing these integrity checks.
    366 * So here we are with the task of validating whether the new @admin offload
    367 * has any conflict with the already established TAS configuration in
    368 * tas_data->offload.  We already know the other ports are in harmony with one
    369 * another, otherwise we wouldn't have saved them.
    370 * Each gate event executes periodically, with a period of @cycle_time and a
    371 * phase given by its cycle's @base_time plus its offset within the cycle
    372 * (which in turn is given by the length of the events prior to it).
    373 * There are two aspects to possible collisions:
    374 * - Collisions within one cycle's (actually the longest cycle's) time frame.
    375 *   For that, we need to compare the cartesian product of each possible
    376 *   occurrence of each event within one cycle time.
    377 * - Collisions in the future. Events may not collide within one cycle time,
    378 *   but if two port schedules don't have the same periodicity (aka the cycle
    379 *   times aren't multiples of one another), they surely will some time in the
    380 *   future (actually they will collide an infinite amount of times).
    381 */
    382static bool
    383sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
    384			    const struct tc_taprio_qopt_offload *admin)
    385{
    386	struct sja1105_tas_data *tas_data = &priv->tas_data;
    387	const struct tc_taprio_qopt_offload *offload;
    388	s64 max_cycle_time, min_cycle_time;
    389	s64 delta1, delta2;
    390	s64 rbt1, rbt2;
    391	s64 stop_time;
    392	s64 t1, t2;
    393	int i, j;
    394	s32 rem;
    395
    396	offload = tas_data->offload[port];
    397	if (!offload)
    398		return false;
    399
    400	/* Check if the two cycle times are multiples of one another.
    401	 * If they aren't, then they will surely collide.
    402	 */
    403	max_cycle_time = max(offload->cycle_time, admin->cycle_time);
    404	min_cycle_time = min(offload->cycle_time, admin->cycle_time);
    405	div_s64_rem(max_cycle_time, min_cycle_time, &rem);
    406	if (rem)
    407		return true;
    408
    409	/* Calculate the "reduced" base time of each of the two cycles
    410	 * (transposed back as close to 0 as possible) by dividing to
    411	 * the cycle time.
    412	 */
    413	div_s64_rem(offload->base_time, offload->cycle_time, &rem);
    414	rbt1 = rem;
    415
    416	div_s64_rem(admin->base_time, admin->cycle_time, &rem);
    417	rbt2 = rem;
    418
    419	stop_time = max_cycle_time + max(rbt1, rbt2);
    420
    421	/* delta1 is the relative base time of each GCL entry within
    422	 * the established ports' TAS config.
    423	 */
    424	for (i = 0, delta1 = 0;
    425	     i < offload->num_entries;
    426	     delta1 += offload->entries[i].interval, i++) {
    427		/* delta2 is the relative base time of each GCL entry
    428		 * within the newly added TAS config.
    429		 */
    430		for (j = 0, delta2 = 0;
    431		     j < admin->num_entries;
    432		     delta2 += admin->entries[j].interval, j++) {
    433			/* t1 follows all possible occurrences of the
    434			 * established ports' GCL entry i within the
    435			 * first cycle time.
    436			 */
    437			for (t1 = rbt1 + delta1;
    438			     t1 <= stop_time;
    439			     t1 += offload->cycle_time) {
    440				/* t2 follows all possible occurrences
    441				 * of the newly added GCL entry j
    442				 * within the first cycle time.
    443				 */
    444				for (t2 = rbt2 + delta2;
    445				     t2 <= stop_time;
    446				     t2 += admin->cycle_time) {
    447					if (t1 == t2) {
    448						dev_warn(priv->ds->dev,
    449							 "GCL entry %d collides with entry %d of port %d\n",
    450							 j, i, port);
    451						return true;
    452					}
    453				}
    454			}
    455		}
    456	}
    457
    458	return false;
    459}
    460
    461/* Check the tc-taprio configuration on @port for conflicts with the tc-gate
    462 * global subschedule. If @port is -1, check it against all ports.
    463 * To reuse the sja1105_tas_check_conflicts logic without refactoring it,
    464 * convert the gating configuration to a dummy tc-taprio offload structure.
    465 */
    466bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
    467				    struct netlink_ext_ack *extack)
    468{
    469	struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
    470	size_t num_entries = gating_cfg->num_entries;
    471	struct tc_taprio_qopt_offload *dummy;
    472	struct dsa_switch *ds = priv->ds;
    473	struct sja1105_gate_entry *e;
    474	bool conflict;
    475	int i = 0;
    476
    477	if (list_empty(&gating_cfg->entries))
    478		return false;
    479
    480	dummy = kzalloc(struct_size(dummy, entries, num_entries), GFP_KERNEL);
    481	if (!dummy) {
    482		NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory");
    483		return true;
    484	}
    485
    486	dummy->num_entries = num_entries;
    487	dummy->base_time = gating_cfg->base_time;
    488	dummy->cycle_time = gating_cfg->cycle_time;
    489
    490	list_for_each_entry(e, &gating_cfg->entries, list)
    491		dummy->entries[i++].interval = e->interval;
    492
    493	if (port != -1) {
    494		conflict = sja1105_tas_check_conflicts(priv, port, dummy);
    495	} else {
    496		for (port = 0; port < ds->num_ports; port++) {
    497			conflict = sja1105_tas_check_conflicts(priv, port,
    498							       dummy);
    499			if (conflict)
    500				break;
    501		}
    502	}
    503
    504	kfree(dummy);
    505
    506	return conflict;
    507}
    508
    509int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
    510			    struct tc_taprio_qopt_offload *admin)
    511{
    512	struct sja1105_private *priv = ds->priv;
    513	struct sja1105_tas_data *tas_data = &priv->tas_data;
    514	int other_port, rc, i;
    515
    516	/* Can't change an already configured port (must delete qdisc first).
    517	 * Can't delete the qdisc from an unconfigured port.
    518	 */
    519	if (!!tas_data->offload[port] == admin->enable)
    520		return -EINVAL;
    521
    522	if (!admin->enable) {
    523		taprio_offload_free(tas_data->offload[port]);
    524		tas_data->offload[port] = NULL;
    525
    526		rc = sja1105_init_scheduling(priv);
    527		if (rc < 0)
    528			return rc;
    529
    530		return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
    531	}
    532
    533	/* The cycle time extension is the amount of time the last cycle from
    534	 * the old OPER needs to be extended in order to phase-align with the
    535	 * base time of the ADMIN when that becomes the new OPER.
    536	 * But of course our switch needs to be reset to switch-over between
    537	 * the ADMIN and the OPER configs - so much for a seamless transition.
    538	 * So don't add insult over injury and just say we don't support cycle
    539	 * time extension.
    540	 */
    541	if (admin->cycle_time_extension)
    542		return -ENOTSUPP;
    543
    544	for (i = 0; i < admin->num_entries; i++) {
    545		s64 delta_ns = admin->entries[i].interval;
    546		s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
    547		bool too_long, too_short;
    548
    549		too_long = (delta_cycles >= SJA1105_TAS_MAX_DELTA);
    550		too_short = (delta_cycles == 0);
    551		if (too_long || too_short) {
    552			dev_err(priv->ds->dev,
    553				"Interval %llu too %s for GCL entry %d\n",
    554				delta_ns, too_long ? "long" : "short", i);
    555			return -ERANGE;
    556		}
    557	}
    558
    559	for (other_port = 0; other_port < ds->num_ports; other_port++) {
    560		if (other_port == port)
    561			continue;
    562
    563		if (sja1105_tas_check_conflicts(priv, other_port, admin))
    564			return -ERANGE;
    565	}
    566
    567	if (sja1105_gating_check_conflicts(priv, port, NULL)) {
    568		dev_err(ds->dev, "Conflict with tc-gate schedule\n");
    569		return -ERANGE;
    570	}
    571
    572	tas_data->offload[port] = taprio_offload_get(admin);
    573
    574	rc = sja1105_init_scheduling(priv);
    575	if (rc < 0)
    576		return rc;
    577
    578	return sja1105_static_config_reload(priv, SJA1105_SCHEDULING);
    579}
    580
    581static int sja1105_tas_check_running(struct sja1105_private *priv)
    582{
    583	struct sja1105_tas_data *tas_data = &priv->tas_data;
    584	struct dsa_switch *ds = priv->ds;
    585	struct sja1105_ptp_cmd cmd = {0};
    586	int rc;
    587
    588	rc = sja1105_ptp_commit(ds, &cmd, SPI_READ);
    589	if (rc < 0)
    590		return rc;
    591
    592	if (cmd.ptpstrtsch == 1)
    593		/* Schedule successfully started */
    594		tas_data->state = SJA1105_TAS_STATE_RUNNING;
    595	else if (cmd.ptpstopsch == 1)
    596		/* Schedule is stopped */
    597		tas_data->state = SJA1105_TAS_STATE_DISABLED;
    598	else
    599		/* Schedule is probably not configured with PTP clock source */
    600		rc = -EINVAL;
    601
    602	return rc;
    603}
    604
    605/* Write to PTPCLKCORP */
    606static int sja1105_tas_adjust_drift(struct sja1105_private *priv,
    607				    u64 correction)
    608{
    609	const struct sja1105_regs *regs = priv->info->regs;
    610	u32 ptpclkcorp = ns_to_sja1105_ticks(correction);
    611
    612	return sja1105_xfer_u32(priv, SPI_WRITE, regs->ptpclkcorp,
    613				&ptpclkcorp, NULL);
    614}
    615
    616/* Write to PTPSCHTM */
    617static int sja1105_tas_set_base_time(struct sja1105_private *priv,
    618				     u64 base_time)
    619{
    620	const struct sja1105_regs *regs = priv->info->regs;
    621	u64 ptpschtm = ns_to_sja1105_ticks(base_time);
    622
    623	return sja1105_xfer_u64(priv, SPI_WRITE, regs->ptpschtm,
    624				&ptpschtm, NULL);
    625}
    626
    627static int sja1105_tas_start(struct sja1105_private *priv)
    628{
    629	struct sja1105_tas_data *tas_data = &priv->tas_data;
    630	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
    631	struct dsa_switch *ds = priv->ds;
    632	int rc;
    633
    634	dev_dbg(ds->dev, "Starting the TAS\n");
    635
    636	if (tas_data->state == SJA1105_TAS_STATE_ENABLED_NOT_RUNNING ||
    637	    tas_data->state == SJA1105_TAS_STATE_RUNNING) {
    638		dev_err(ds->dev, "TAS already started\n");
    639		return -EINVAL;
    640	}
    641
    642	cmd->ptpstrtsch = 1;
    643	cmd->ptpstopsch = 0;
    644
    645	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
    646	if (rc < 0)
    647		return rc;
    648
    649	tas_data->state = SJA1105_TAS_STATE_ENABLED_NOT_RUNNING;
    650
    651	return 0;
    652}
    653
    654static int sja1105_tas_stop(struct sja1105_private *priv)
    655{
    656	struct sja1105_tas_data *tas_data = &priv->tas_data;
    657	struct sja1105_ptp_cmd *cmd = &priv->ptp_data.cmd;
    658	struct dsa_switch *ds = priv->ds;
    659	int rc;
    660
    661	dev_dbg(ds->dev, "Stopping the TAS\n");
    662
    663	if (tas_data->state == SJA1105_TAS_STATE_DISABLED) {
    664		dev_err(ds->dev, "TAS already disabled\n");
    665		return -EINVAL;
    666	}
    667
    668	cmd->ptpstopsch = 1;
    669	cmd->ptpstrtsch = 0;
    670
    671	rc = sja1105_ptp_commit(ds, cmd, SPI_WRITE);
    672	if (rc < 0)
    673		return rc;
    674
    675	tas_data->state = SJA1105_TAS_STATE_DISABLED;
    676
    677	return 0;
    678}
    679
    680/* The schedule engine and the PTP clock are driven by the same oscillator, and
    681 * they run in parallel. But whilst the PTP clock can keep an absolute
    682 * time-of-day, the schedule engine is only running in 'ticks' (25 ticks make
    683 * up a delta, which is 200ns), and wrapping around at the end of each cycle.
    684 * The schedule engine is started when the PTP clock reaches the PTPSCHTM time
    685 * (in PTP domain).
    686 * Because the PTP clock can be rate-corrected (accelerated or slowed down) by
    687 * a software servo, and the schedule engine clock runs in parallel to the PTP
    688 * clock, there is logic internal to the switch that periodically keeps the
    689 * schedule engine from drifting away. The frequency with which this internal
    690 * syntonization happens is the PTP clock correction period (PTPCLKCORP). It is
    691 * a value also in the PTP clock domain, and is also rate-corrected.
    692 * To be precise, during a correction period, there is logic to determine by
    693 * how many scheduler clock ticks has the PTP clock drifted. At the end of each
    694 * correction period/beginning of new one, the length of a delta is shrunk or
    695 * expanded with an integer number of ticks, compared with the typical 25.
    696 * So a delta lasts for 200ns (or 25 ticks) only on average.
    697 * Sometimes it is longer, sometimes it is shorter. The internal syntonization
    698 * logic can adjust for at most 5 ticks each 20 ticks.
    699 *
    700 * The first implication is that you should choose your schedule correction
    701 * period to be an integer multiple of the schedule length. Preferably one.
    702 * In case there are schedules of multiple ports active, then the correction
    703 * period needs to be a multiple of them all. Given the restriction that the
    704 * cycle times have to be multiples of one another anyway, this means the
    705 * correction period can simply be the largest cycle time, hence the current
    706 * choice. This way, the updates are always synchronous to the transmission
    707 * cycle, and therefore predictable.
    708 *
    709 * The second implication is that at the beginning of a correction period, the
    710 * first few deltas will be modulated in time, until the schedule engine is
    711 * properly phase-aligned with the PTP clock. For this reason, you should place
    712 * your best-effort traffic at the beginning of a cycle, and your
    713 * time-triggered traffic afterwards.
    714 *
    715 * The third implication is that once the schedule engine is started, it can
    716 * only adjust for so much drift within a correction period. In the servo you
    717 * can only change the PTPCLKRATE, but not step the clock (PTPCLKADD). If you
    718 * want to do the latter, you need to stop and restart the schedule engine,
    719 * which is what the state machine handles.
    720 */
    721static void sja1105_tas_state_machine(struct work_struct *work)
    722{
    723	struct sja1105_tas_data *tas_data = work_to_sja1105_tas(work);
    724	struct sja1105_private *priv = tas_to_sja1105(tas_data);
    725	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
    726	struct timespec64 base_time_ts, now_ts;
    727	struct dsa_switch *ds = priv->ds;
    728	struct timespec64 diff;
    729	s64 base_time, now;
    730	int rc = 0;
    731
    732	mutex_lock(&ptp_data->lock);
    733
    734	switch (tas_data->state) {
    735	case SJA1105_TAS_STATE_DISABLED:
    736		/* Can't do anything at all if clock is still being stepped */
    737		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ)
    738			break;
    739
    740		rc = sja1105_tas_adjust_drift(priv, tas_data->max_cycle_time);
    741		if (rc < 0)
    742			break;
    743
    744		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
    745		if (rc < 0)
    746			break;
    747
    748		/* Plan to start the earliest schedule first. The others
    749		 * will be started in hardware, by way of their respective
    750		 * entry points delta.
    751		 * Try our best to avoid fringe cases (race condition between
    752		 * ptpschtm and ptpstrtsch) by pushing the oper_base_time at
    753		 * least one second in the future from now. This is not ideal,
    754		 * but this only needs to buy us time until the
    755		 * sja1105_tas_start command below gets executed.
    756		 */
    757		base_time = future_base_time(tas_data->earliest_base_time,
    758					     tas_data->max_cycle_time,
    759					     now + 1ull * NSEC_PER_SEC);
    760		base_time -= sja1105_delta_to_ns(1);
    761
    762		rc = sja1105_tas_set_base_time(priv, base_time);
    763		if (rc < 0)
    764			break;
    765
    766		tas_data->oper_base_time = base_time;
    767
    768		rc = sja1105_tas_start(priv);
    769		if (rc < 0)
    770			break;
    771
    772		base_time_ts = ns_to_timespec64(base_time);
    773		now_ts = ns_to_timespec64(now);
    774
    775		dev_dbg(ds->dev, "OPER base time %lld.%09ld (now %lld.%09ld)\n",
    776			base_time_ts.tv_sec, base_time_ts.tv_nsec,
    777			now_ts.tv_sec, now_ts.tv_nsec);
    778
    779		break;
    780
    781	case SJA1105_TAS_STATE_ENABLED_NOT_RUNNING:
    782		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
    783			/* Clock was stepped.. bad news for TAS */
    784			sja1105_tas_stop(priv);
    785			break;
    786		}
    787
    788		/* Check if TAS has actually started, by comparing the
    789		 * scheduled start time with the SJA1105 PTP clock
    790		 */
    791		rc = __sja1105_ptp_gettimex(ds, &now, NULL);
    792		if (rc < 0)
    793			break;
    794
    795		if (now < tas_data->oper_base_time) {
    796			/* TAS has not started yet */
    797			diff = ns_to_timespec64(tas_data->oper_base_time - now);
    798			dev_dbg(ds->dev, "time to start: [%lld.%09ld]",
    799				diff.tv_sec, diff.tv_nsec);
    800			break;
    801		}
    802
    803		/* Time elapsed, what happened? */
    804		rc = sja1105_tas_check_running(priv);
    805		if (rc < 0)
    806			break;
    807
    808		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
    809			/* TAS has started */
    810			dev_err(ds->dev,
    811				"TAS not started despite time elapsed\n");
    812
    813		break;
    814
    815	case SJA1105_TAS_STATE_RUNNING:
    816		/* Clock was stepped.. bad news for TAS */
    817		if (tas_data->last_op != SJA1105_PTP_ADJUSTFREQ) {
    818			sja1105_tas_stop(priv);
    819			break;
    820		}
    821
    822		rc = sja1105_tas_check_running(priv);
    823		if (rc < 0)
    824			break;
    825
    826		if (tas_data->state != SJA1105_TAS_STATE_RUNNING)
    827			dev_err(ds->dev, "TAS surprisingly stopped\n");
    828
    829		break;
    830
    831	default:
    832		if (net_ratelimit())
    833			dev_err(ds->dev, "TAS in an invalid state (incorrect use of API)!\n");
    834	}
    835
    836	if (rc && net_ratelimit())
    837		dev_err(ds->dev, "An operation returned %d\n", rc);
    838
    839	mutex_unlock(&ptp_data->lock);
    840}
    841
    842void sja1105_tas_clockstep(struct dsa_switch *ds)
    843{
    844	struct sja1105_private *priv = ds->priv;
    845	struct sja1105_tas_data *tas_data = &priv->tas_data;
    846
    847	if (!tas_data->enabled)
    848		return;
    849
    850	tas_data->last_op = SJA1105_PTP_CLOCKSTEP;
    851	schedule_work(&tas_data->tas_work);
    852}
    853
    854void sja1105_tas_adjfreq(struct dsa_switch *ds)
    855{
    856	struct sja1105_private *priv = ds->priv;
    857	struct sja1105_tas_data *tas_data = &priv->tas_data;
    858
    859	if (!tas_data->enabled)
    860		return;
    861
    862	/* No reason to schedule the workqueue, nothing changed */
    863	if (tas_data->state == SJA1105_TAS_STATE_RUNNING)
    864		return;
    865
    866	tas_data->last_op = SJA1105_PTP_ADJUSTFREQ;
    867	schedule_work(&tas_data->tas_work);
    868}
    869
    870void sja1105_tas_setup(struct dsa_switch *ds)
    871{
    872	struct sja1105_private *priv = ds->priv;
    873	struct sja1105_tas_data *tas_data = &priv->tas_data;
    874
    875	INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
    876	tas_data->state = SJA1105_TAS_STATE_DISABLED;
    877	tas_data->last_op = SJA1105_PTP_NONE;
    878
    879	INIT_LIST_HEAD(&tas_data->gating_cfg.entries);
    880}
    881
    882void sja1105_tas_teardown(struct dsa_switch *ds)
    883{
    884	struct sja1105_private *priv = ds->priv;
    885	struct tc_taprio_qopt_offload *offload;
    886	int port;
    887
    888	cancel_work_sync(&priv->tas_data.tas_work);
    889
    890	for (port = 0; port < ds->num_ports; port++) {
    891		offload = priv->tas_data.offload[port];
    892		if (!offload)
    893			continue;
    894
    895		taprio_offload_free(offload);
    896	}
    897}