cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

rotate.c (13486B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/* Handle fileserver selection and rotation.
      3 *
      4 * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
      5 * Written by David Howells (dhowells@redhat.com)
      6 */
      7
      8#include <linux/kernel.h>
      9#include <linux/slab.h>
     10#include <linux/fs.h>
     11#include <linux/sched.h>
     12#include <linux/delay.h>
     13#include <linux/sched/signal.h>
     14#include "internal.h"
     15#include "afs_fs.h"
     16
     17/*
     18 * Begin iteration through a server list, starting with the vnode's last used
     19 * server if possible, or the last recorded good server if not.
     20 */
     21static bool afs_start_fs_iteration(struct afs_operation *op,
     22				   struct afs_vnode *vnode)
     23{
     24	struct afs_server *server;
     25	void *cb_server;
     26	int i;
     27
     28	read_lock(&op->volume->servers_lock);
     29	op->server_list = afs_get_serverlist(
     30		rcu_dereference_protected(op->volume->servers,
     31					  lockdep_is_held(&op->volume->servers_lock)));
     32	read_unlock(&op->volume->servers_lock);
     33
     34	op->untried = (1UL << op->server_list->nr_servers) - 1;
     35	op->index = READ_ONCE(op->server_list->preferred);
     36
     37	cb_server = vnode->cb_server;
     38	if (cb_server) {
     39		/* See if the vnode's preferred record is still available */
     40		for (i = 0; i < op->server_list->nr_servers; i++) {
     41			server = op->server_list->servers[i].server;
     42			if (server == cb_server) {
     43				op->index = i;
     44				goto found_interest;
     45			}
     46		}
     47
     48		/* If we have a lock outstanding on a server that's no longer
     49		 * serving this vnode, then we can't switch to another server
     50		 * and have to return an error.
     51		 */
     52		if (op->flags & AFS_OPERATION_CUR_ONLY) {
     53			op->error = -ESTALE;
     54			return false;
     55		}
     56
     57		/* Note that the callback promise is effectively broken */
     58		write_seqlock(&vnode->cb_lock);
     59		ASSERTCMP(cb_server, ==, vnode->cb_server);
     60		vnode->cb_server = NULL;
     61		if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
     62			vnode->cb_break++;
     63		write_sequnlock(&vnode->cb_lock);
     64	}
     65
     66found_interest:
     67	return true;
     68}
     69
     70/*
     71 * Post volume busy note.
     72 */
     73static void afs_busy(struct afs_volume *volume, u32 abort_code)
     74{
     75	const char *m;
     76
     77	switch (abort_code) {
     78	case VOFFLINE:		m = "offline";		break;
     79	case VRESTARTING:	m = "restarting";	break;
     80	case VSALVAGING:	m = "being salvaged";	break;
     81	default:		m = "busy";		break;
     82	}
     83
     84	pr_notice("kAFS: Volume %llu '%s' is %s\n", volume->vid, volume->name, m);
     85}
     86
     87/*
     88 * Sleep and retry the operation to the same fileserver.
     89 */
     90static bool afs_sleep_and_retry(struct afs_operation *op)
     91{
     92	if (!(op->flags & AFS_OPERATION_UNINTR)) {
     93		msleep_interruptible(1000);
     94		if (signal_pending(current)) {
     95			op->error = -ERESTARTSYS;
     96			return false;
     97		}
     98	} else {
     99		msleep(1000);
    100	}
    101
    102	return true;
    103}
    104
    105/*
    106 * Select the fileserver to use.  May be called multiple times to rotate
    107 * through the fileservers.
    108 */
    109bool afs_select_fileserver(struct afs_operation *op)
    110{
    111	struct afs_addr_list *alist;
    112	struct afs_server *server;
    113	struct afs_vnode *vnode = op->file[0].vnode;
    114	struct afs_error e;
    115	u32 rtt;
    116	int error = op->ac.error, i;
    117
    118	_enter("%lx[%d],%lx[%d],%d,%d",
    119	       op->untried, op->index,
    120	       op->ac.tried, op->ac.index,
    121	       error, op->ac.abort_code);
    122
    123	if (op->flags & AFS_OPERATION_STOP) {
    124		_leave(" = f [stopped]");
    125		return false;
    126	}
    127
    128	op->nr_iterations++;
    129
    130	/* Evaluate the result of the previous operation, if there was one. */
    131	switch (error) {
    132	case SHRT_MAX:
    133		goto start;
    134
    135	case 0:
    136	default:
    137		/* Success or local failure.  Stop. */
    138		op->error = error;
    139		op->flags |= AFS_OPERATION_STOP;
    140		_leave(" = f [okay/local %d]", error);
    141		return false;
    142
    143	case -ECONNABORTED:
    144		/* The far side rejected the operation on some grounds.  This
    145		 * might involve the server being busy or the volume having been moved.
    146		 */
    147		switch (op->ac.abort_code) {
    148		case VNOVOL:
    149			/* This fileserver doesn't know about the volume.
    150			 * - May indicate that the VL is wrong - retry once and compare
    151			 *   the results.
    152			 * - May indicate that the fileserver couldn't attach to the vol.
    153			 */
    154			if (op->flags & AFS_OPERATION_VNOVOL) {
    155				op->error = -EREMOTEIO;
    156				goto next_server;
    157			}
    158
    159			write_lock(&op->volume->servers_lock);
    160			op->server_list->vnovol_mask |= 1 << op->index;
    161			write_unlock(&op->volume->servers_lock);
    162
    163			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
    164			error = afs_check_volume_status(op->volume, op);
    165			if (error < 0)
    166				goto failed_set_error;
    167
    168			if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
    169				op->error = -ENOMEDIUM;
    170				goto failed;
    171			}
    172
    173			/* If the server list didn't change, then assume that
    174			 * it's the fileserver having trouble.
    175			 */
    176			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
    177				op->error = -EREMOTEIO;
    178				goto next_server;
    179			}
    180
    181			/* Try again */
    182			op->flags |= AFS_OPERATION_VNOVOL;
    183			_leave(" = t [vnovol]");
    184			return true;
    185
    186		case VSALVAGE: /* TODO: Should this return an error or iterate? */
    187		case VVOLEXISTS:
    188		case VNOSERVICE:
    189		case VONLINE:
    190		case VDISKFULL:
    191		case VOVERQUOTA:
    192			op->error = afs_abort_to_error(op->ac.abort_code);
    193			goto next_server;
    194
    195		case VOFFLINE:
    196			if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &op->volume->flags)) {
    197				afs_busy(op->volume, op->ac.abort_code);
    198				clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
    199			}
    200			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
    201				op->error = -EADV;
    202				goto failed;
    203			}
    204			if (op->flags & AFS_OPERATION_CUR_ONLY) {
    205				op->error = -ESTALE;
    206				goto failed;
    207			}
    208			goto busy;
    209
    210		case VSALVAGING:
    211		case VRESTARTING:
    212		case VBUSY:
    213			/* Retry after going round all the servers unless we
    214			 * have a file lock we need to maintain.
    215			 */
    216			if (op->flags & AFS_OPERATION_NO_VSLEEP) {
    217				op->error = -EBUSY;
    218				goto failed;
    219			}
    220			if (!test_and_set_bit(AFS_VOLUME_BUSY, &op->volume->flags)) {
    221				afs_busy(op->volume, op->ac.abort_code);
    222				clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
    223			}
    224		busy:
    225			if (op->flags & AFS_OPERATION_CUR_ONLY) {
    226				if (!afs_sleep_and_retry(op))
    227					goto failed;
    228
    229				 /* Retry with same server & address */
    230				_leave(" = t [vbusy]");
    231				return true;
    232			}
    233
    234			op->flags |= AFS_OPERATION_VBUSY;
    235			goto next_server;
    236
    237		case VMOVED:
    238			/* The volume migrated to another server.  We consider
    239			 * consider all locks and callbacks broken and request
    240			 * an update from the VLDB.
    241			 *
    242			 * We also limit the number of VMOVED hops we will
    243			 * honour, just in case someone sets up a loop.
    244			 */
    245			if (op->flags & AFS_OPERATION_VMOVED) {
    246				op->error = -EREMOTEIO;
    247				goto failed;
    248			}
    249			op->flags |= AFS_OPERATION_VMOVED;
    250
    251			set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
    252			set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
    253			error = afs_check_volume_status(op->volume, op);
    254			if (error < 0)
    255				goto failed_set_error;
    256
    257			/* If the server list didn't change, then the VLDB is
    258			 * out of sync with the fileservers.  This is hopefully
    259			 * a temporary condition, however, so we don't want to
    260			 * permanently block access to the file.
    261			 *
    262			 * TODO: Try other fileservers if we can.
    263			 *
    264			 * TODO: Retry a few times with sleeps.
    265			 */
    266			if (rcu_access_pointer(op->volume->servers) == op->server_list) {
    267				op->error = -ENOMEDIUM;
    268				goto failed;
    269			}
    270
    271			goto restart_from_beginning;
    272
    273		default:
    274			clear_bit(AFS_VOLUME_OFFLINE, &op->volume->flags);
    275			clear_bit(AFS_VOLUME_BUSY, &op->volume->flags);
    276			op->error = afs_abort_to_error(op->ac.abort_code);
    277			goto failed;
    278		}
    279
    280	case -ETIMEDOUT:
    281	case -ETIME:
    282		if (op->error != -EDESTADDRREQ)
    283			goto iterate_address;
    284		fallthrough;
    285	case -ERFKILL:
    286	case -EADDRNOTAVAIL:
    287	case -ENETUNREACH:
    288	case -EHOSTUNREACH:
    289	case -EHOSTDOWN:
    290	case -ECONNREFUSED:
    291		_debug("no conn");
    292		op->error = error;
    293		goto iterate_address;
    294
    295	case -ENETRESET:
    296		pr_warn("kAFS: Peer reset %s (op=%x)\n",
    297			op->type ? op->type->name : "???", op->debug_id);
    298		fallthrough;
    299	case -ECONNRESET:
    300		_debug("call reset");
    301		op->error = error;
    302		goto failed;
    303	}
    304
    305restart_from_beginning:
    306	_debug("restart");
    307	afs_end_cursor(&op->ac);
    308	op->server = NULL;
    309	afs_put_serverlist(op->net, op->server_list);
    310	op->server_list = NULL;
    311start:
    312	_debug("start");
    313	/* See if we need to do an update of the volume record.  Note that the
    314	 * volume may have moved or even have been deleted.
    315	 */
    316	error = afs_check_volume_status(op->volume, op);
    317	if (error < 0)
    318		goto failed_set_error;
    319
    320	if (!afs_start_fs_iteration(op, vnode))
    321		goto failed;
    322
    323	_debug("__ VOL %llx __", op->volume->vid);
    324
    325pick_server:
    326	_debug("pick [%lx]", op->untried);
    327
    328	error = afs_wait_for_fs_probes(op->server_list, op->untried);
    329	if (error < 0)
    330		goto failed_set_error;
    331
    332	/* Pick the untried server with the lowest RTT.  If we have outstanding
    333	 * callbacks, we stick with the server we're already using if we can.
    334	 */
    335	if (op->server) {
    336		_debug("server %u", op->index);
    337		if (test_bit(op->index, &op->untried))
    338			goto selected_server;
    339		op->server = NULL;
    340		_debug("no server");
    341	}
    342
    343	op->index = -1;
    344	rtt = U32_MAX;
    345	for (i = 0; i < op->server_list->nr_servers; i++) {
    346		struct afs_server *s = op->server_list->servers[i].server;
    347
    348		if (!test_bit(i, &op->untried) ||
    349		    !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
    350			continue;
    351		if (s->probe.rtt < rtt) {
    352			op->index = i;
    353			rtt = s->probe.rtt;
    354		}
    355	}
    356
    357	if (op->index == -1)
    358		goto no_more_servers;
    359
    360selected_server:
    361	_debug("use %d", op->index);
    362	__clear_bit(op->index, &op->untried);
    363
    364	/* We're starting on a different fileserver from the list.  We need to
    365	 * check it, create a callback intercept, find its address list and
    366	 * probe its capabilities before we use it.
    367	 */
    368	ASSERTCMP(op->ac.alist, ==, NULL);
    369	server = op->server_list->servers[op->index].server;
    370
    371	if (!afs_check_server_record(op, server))
    372		goto failed;
    373
    374	_debug("USING SERVER: %pU", &server->uuid);
    375
    376	op->flags |= AFS_OPERATION_RETRY_SERVER;
    377	op->server = server;
    378	if (vnode->cb_server != server) {
    379		vnode->cb_server = server;
    380		vnode->cb_s_break = server->cb_s_break;
    381		vnode->cb_fs_s_break = atomic_read(&server->cell->fs_s_break);
    382		vnode->cb_v_break = vnode->volume->cb_v_break;
    383		clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
    384	}
    385
    386	read_lock(&server->fs_lock);
    387	alist = rcu_dereference_protected(server->addresses,
    388					  lockdep_is_held(&server->fs_lock));
    389	afs_get_addrlist(alist);
    390	read_unlock(&server->fs_lock);
    391
    392retry_server:
    393	memset(&op->ac, 0, sizeof(op->ac));
    394
    395	if (!op->ac.alist)
    396		op->ac.alist = alist;
    397	else
    398		afs_put_addrlist(alist);
    399
    400	op->ac.index = -1;
    401
    402iterate_address:
    403	ASSERT(op->ac.alist);
    404	/* Iterate over the current server's address list to try and find an
    405	 * address on which it will respond to us.
    406	 */
    407	if (!afs_iterate_addresses(&op->ac))
    408		goto out_of_addresses;
    409
    410	_debug("address [%u] %u/%u %pISp",
    411	       op->index, op->ac.index, op->ac.alist->nr_addrs,
    412	       &op->ac.alist->addrs[op->ac.index].transport);
    413
    414	_leave(" = t");
    415	return true;
    416
    417out_of_addresses:
    418	/* We've now had a failure to respond on all of a server's addresses -
    419	 * immediately probe them again and consider retrying the server.
    420	 */
    421	afs_probe_fileserver(op->net, op->server);
    422	if (op->flags & AFS_OPERATION_RETRY_SERVER) {
    423		alist = op->ac.alist;
    424		error = afs_wait_for_one_fs_probe(
    425			op->server, !(op->flags & AFS_OPERATION_UNINTR));
    426		switch (error) {
    427		case 0:
    428			op->flags &= ~AFS_OPERATION_RETRY_SERVER;
    429			goto retry_server;
    430		case -ERESTARTSYS:
    431			goto failed_set_error;
    432		case -ETIME:
    433		case -EDESTADDRREQ:
    434			goto next_server;
    435		}
    436	}
    437
    438next_server:
    439	_debug("next");
    440	afs_end_cursor(&op->ac);
    441	goto pick_server;
    442
    443no_more_servers:
    444	/* That's all the servers poked to no good effect.  Try again if some
    445	 * of them were busy.
    446	 */
    447	if (op->flags & AFS_OPERATION_VBUSY)
    448		goto restart_from_beginning;
    449
    450	e.error = -EDESTADDRREQ;
    451	e.responded = false;
    452	for (i = 0; i < op->server_list->nr_servers; i++) {
    453		struct afs_server *s = op->server_list->servers[i].server;
    454
    455		afs_prioritise_error(&e, READ_ONCE(s->probe.error),
    456				     s->probe.abort_code);
    457	}
    458
    459	error = e.error;
    460
    461failed_set_error:
    462	op->error = error;
    463failed:
    464	op->flags |= AFS_OPERATION_STOP;
    465	afs_end_cursor(&op->ac);
    466	_leave(" = f [failed %d]", op->error);
    467	return false;
    468}
    469
    470/*
    471 * Dump cursor state in the case of the error being EDESTADDRREQ.
    472 */
    473void afs_dump_edestaddrreq(const struct afs_operation *op)
    474{
    475	static int count;
    476	int i;
    477
    478	if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
    479		return;
    480	count++;
    481
    482	rcu_read_lock();
    483
    484	pr_notice("EDESTADDR occurred\n");
    485	pr_notice("FC: cbb=%x cbb2=%x fl=%x err=%hd\n",
    486		  op->file[0].cb_break_before,
    487		  op->file[1].cb_break_before, op->flags, op->error);
    488	pr_notice("FC: ut=%lx ix=%d ni=%u\n",
    489		  op->untried, op->index, op->nr_iterations);
    490
    491	if (op->server_list) {
    492		const struct afs_server_list *sl = op->server_list;
    493		pr_notice("FC: SL nr=%u pr=%u vnov=%hx\n",
    494			  sl->nr_servers, sl->preferred, sl->vnovol_mask);
    495		for (i = 0; i < sl->nr_servers; i++) {
    496			const struct afs_server *s = sl->servers[i].server;
    497			pr_notice("FC: server fl=%lx av=%u %pU\n",
    498				  s->flags, s->addr_version, &s->uuid);
    499			if (s->addresses) {
    500				const struct afs_addr_list *a =
    501					rcu_dereference(s->addresses);
    502				pr_notice("FC:  - av=%u nr=%u/%u/%u pr=%u\n",
    503					  a->version,
    504					  a->nr_ipv4, a->nr_addrs, a->max_addrs,
    505					  a->preferred);
    506				pr_notice("FC:  - R=%lx F=%lx\n",
    507					  a->responded, a->failed);
    508				if (a == op->ac.alist)
    509					pr_notice("FC:  - current\n");
    510			}
    511		}
    512	}
    513
    514	pr_notice("AC: t=%lx ax=%u ac=%d er=%d r=%u ni=%u\n",
    515		  op->ac.tried, op->ac.index, op->ac.abort_code, op->ac.error,
    516		  op->ac.responded, op->ac.nr_iterations);
    517	rcu_read_unlock();
    518}