cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fs_probe.c (12178B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/* AFS fileserver probing
      3 *
      4 * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
      5 * Written by David Howells (dhowells@redhat.com)
      6 */
      7
      8#include <linux/sched.h>
      9#include <linux/slab.h>
     10#include "afs_fs.h"
     11#include "internal.h"
     12#include "protocol_afs.h"
     13#include "protocol_yfs.h"
     14
     15static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
     16static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
     17
     18/*
     19 * Start the probe polling timer.  We have to supply it with an inc on the
     20 * outstanding server count.
     21 */
     22static void afs_schedule_fs_probe(struct afs_net *net,
     23				  struct afs_server *server, bool fast)
     24{
     25	unsigned long atj;
     26
     27	if (!net->live)
     28		return;
     29
     30	atj = server->probed_at;
     31	atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
     32
     33	afs_inc_servers_outstanding(net);
     34	if (timer_reduce(&net->fs_probe_timer, atj))
     35		afs_dec_servers_outstanding(net);
     36}
     37
     38/*
     39 * Handle the completion of a set of probes.
     40 */
     41static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
     42{
     43	bool responded = server->probe.responded;
     44
     45	write_seqlock(&net->fs_lock);
     46	if (responded) {
     47		list_add_tail(&server->probe_link, &net->fs_probe_slow);
     48	} else {
     49		server->rtt = UINT_MAX;
     50		clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
     51		list_add_tail(&server->probe_link, &net->fs_probe_fast);
     52	}
     53	write_sequnlock(&net->fs_lock);
     54
     55	afs_schedule_fs_probe(net, server, !responded);
     56}
     57
     58/*
     59 * Handle the completion of a probe.
     60 */
     61static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
     62{
     63	_enter("");
     64
     65	if (atomic_dec_and_test(&server->probe_outstanding))
     66		afs_finished_fs_probe(net, server);
     67
     68	wake_up_all(&server->probe_wq);
     69}
     70
     71/*
     72 * Handle inability to send a probe due to ENOMEM when trying to allocate a
     73 * call struct.
     74 */
     75static void afs_fs_probe_not_done(struct afs_net *net,
     76				  struct afs_server *server,
     77				  struct afs_addr_cursor *ac)
     78{
     79	struct afs_addr_list *alist = ac->alist;
     80	unsigned int index = ac->index;
     81
     82	_enter("");
     83
     84	trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
     85	spin_lock(&server->probe_lock);
     86
     87	server->probe.local_failure = true;
     88	if (server->probe.error == 0)
     89		server->probe.error = -ENOMEM;
     90
     91	set_bit(index, &alist->failed);
     92
     93	spin_unlock(&server->probe_lock);
     94	return afs_done_one_fs_probe(net, server);
     95}
     96
     97/*
     98 * Process the result of probing a fileserver.  This is called after successful
     99 * or failed delivery of an FS.GetCapabilities operation.
    100 */
    101void afs_fileserver_probe_result(struct afs_call *call)
    102{
    103	struct afs_addr_list *alist = call->alist;
    104	struct afs_server *server = call->server;
    105	unsigned int index = call->addr_ix;
    106	unsigned int rtt_us = 0, cap0;
    107	int ret = call->error;
    108
    109	_enter("%pU,%u", &server->uuid, index);
    110
    111	spin_lock(&server->probe_lock);
    112
    113	switch (ret) {
    114	case 0:
    115		server->probe.error = 0;
    116		goto responded;
    117	case -ECONNABORTED:
    118		if (!server->probe.responded) {
    119			server->probe.abort_code = call->abort_code;
    120			server->probe.error = ret;
    121		}
    122		goto responded;
    123	case -ENOMEM:
    124	case -ENONET:
    125		clear_bit(index, &alist->responded);
    126		server->probe.local_failure = true;
    127		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
    128		goto out;
    129	case -ECONNRESET: /* Responded, but call expired. */
    130	case -ERFKILL:
    131	case -EADDRNOTAVAIL:
    132	case -ENETUNREACH:
    133	case -EHOSTUNREACH:
    134	case -EHOSTDOWN:
    135	case -ECONNREFUSED:
    136	case -ETIMEDOUT:
    137	case -ETIME:
    138	default:
    139		clear_bit(index, &alist->responded);
    140		set_bit(index, &alist->failed);
    141		if (!server->probe.responded &&
    142		    (server->probe.error == 0 ||
    143		     server->probe.error == -ETIMEDOUT ||
    144		     server->probe.error == -ETIME))
    145			server->probe.error = ret;
    146		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
    147		goto out;
    148	}
    149
    150responded:
    151	clear_bit(index, &alist->failed);
    152
    153	if (call->service_id == YFS_FS_SERVICE) {
    154		server->probe.is_yfs = true;
    155		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
    156		alist->addrs[index].srx_service = call->service_id;
    157	} else {
    158		server->probe.not_yfs = true;
    159		if (!server->probe.is_yfs) {
    160			clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
    161			alist->addrs[index].srx_service = call->service_id;
    162		}
    163		cap0 = ntohl(call->tmp);
    164		if (cap0 & AFS3_VICED_CAPABILITY_64BITFILES)
    165			set_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
    166		else
    167			clear_bit(AFS_SERVER_FL_HAS_FS64, &server->flags);
    168	}
    169
    170	if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
    171	    rtt_us < server->probe.rtt) {
    172		server->probe.rtt = rtt_us;
    173		server->rtt = rtt_us;
    174		alist->preferred = index;
    175	}
    176
    177	smp_wmb(); /* Set rtt before responded. */
    178	server->probe.responded = true;
    179	set_bit(index, &alist->responded);
    180	set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
    181out:
    182	spin_unlock(&server->probe_lock);
    183
    184	_debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
    185	       &server->uuid, index, &alist->addrs[index].transport,
    186	       rtt_us, ret);
    187
    188	return afs_done_one_fs_probe(call->net, server);
    189}
    190
    191/*
    192 * Probe one or all of a fileserver's addresses to find out the best route and
    193 * to query its capabilities.
    194 */
    195void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
    196			     struct key *key, bool all)
    197{
    198	struct afs_addr_cursor ac = {
    199		.index = 0,
    200	};
    201
    202	_enter("%pU", &server->uuid);
    203
    204	read_lock(&server->fs_lock);
    205	ac.alist = rcu_dereference_protected(server->addresses,
    206					     lockdep_is_held(&server->fs_lock));
    207	afs_get_addrlist(ac.alist);
    208	read_unlock(&server->fs_lock);
    209
    210	server->probed_at = jiffies;
    211	atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
    212	memset(&server->probe, 0, sizeof(server->probe));
    213	server->probe.rtt = UINT_MAX;
    214
    215	ac.index = ac.alist->preferred;
    216	if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
    217		all = true;
    218
    219	if (all) {
    220		for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
    221			if (!afs_fs_get_capabilities(net, server, &ac, key))
    222				afs_fs_probe_not_done(net, server, &ac);
    223	} else {
    224		if (!afs_fs_get_capabilities(net, server, &ac, key))
    225			afs_fs_probe_not_done(net, server, &ac);
    226	}
    227
    228	afs_put_addrlist(ac.alist);
    229}
    230
    231/*
    232 * Wait for the first as-yet untried fileserver to respond.
    233 */
    234int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
    235{
    236	struct wait_queue_entry *waits;
    237	struct afs_server *server;
    238	unsigned int rtt = UINT_MAX, rtt_s;
    239	bool have_responders = false;
    240	int pref = -1, i;
    241
    242	_enter("%u,%lx", slist->nr_servers, untried);
    243
    244	/* Only wait for servers that have a probe outstanding. */
    245	for (i = 0; i < slist->nr_servers; i++) {
    246		if (test_bit(i, &untried)) {
    247			server = slist->servers[i].server;
    248			if (!atomic_read(&server->probe_outstanding))
    249				__clear_bit(i, &untried);
    250			if (server->probe.responded)
    251				have_responders = true;
    252		}
    253	}
    254	if (have_responders || !untried)
    255		return 0;
    256
    257	waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
    258	if (!waits)
    259		return -ENOMEM;
    260
    261	for (i = 0; i < slist->nr_servers; i++) {
    262		if (test_bit(i, &untried)) {
    263			server = slist->servers[i].server;
    264			init_waitqueue_entry(&waits[i], current);
    265			add_wait_queue(&server->probe_wq, &waits[i]);
    266		}
    267	}
    268
    269	for (;;) {
    270		bool still_probing = false;
    271
    272		set_current_state(TASK_INTERRUPTIBLE);
    273		for (i = 0; i < slist->nr_servers; i++) {
    274			if (test_bit(i, &untried)) {
    275				server = slist->servers[i].server;
    276				if (server->probe.responded)
    277					goto stop;
    278				if (atomic_read(&server->probe_outstanding))
    279					still_probing = true;
    280			}
    281		}
    282
    283		if (!still_probing || signal_pending(current))
    284			goto stop;
    285		schedule();
    286	}
    287
    288stop:
    289	set_current_state(TASK_RUNNING);
    290
    291	for (i = 0; i < slist->nr_servers; i++) {
    292		if (test_bit(i, &untried)) {
    293			server = slist->servers[i].server;
    294			rtt_s = READ_ONCE(server->rtt);
    295			if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
    296			    rtt_s < rtt) {
    297				pref = i;
    298				rtt = rtt_s;
    299			}
    300
    301			remove_wait_queue(&server->probe_wq, &waits[i]);
    302		}
    303	}
    304
    305	kfree(waits);
    306
    307	if (pref == -1 && signal_pending(current))
    308		return -ERESTARTSYS;
    309
    310	if (pref >= 0)
    311		slist->preferred = pref;
    312	return 0;
    313}
    314
    315/*
    316 * Probe timer.  We have an increment on fs_outstanding that we need to pass
    317 * along to the work item.
    318 */
    319void afs_fs_probe_timer(struct timer_list *timer)
    320{
    321	struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
    322
    323	if (!net->live || !queue_work(afs_wq, &net->fs_prober))
    324		afs_dec_servers_outstanding(net);
    325}
    326
    327/*
    328 * Dispatch a probe to a server.
    329 */
    330static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
    331	__releases(&net->fs_lock)
    332{
    333	struct key *key = NULL;
    334
    335	/* We remove it from the queues here - it will be added back to
    336	 * one of the queues on the completion of the probe.
    337	 */
    338	list_del_init(&server->probe_link);
    339
    340	afs_get_server(server, afs_server_trace_get_probe);
    341	write_sequnlock(&net->fs_lock);
    342
    343	afs_fs_probe_fileserver(net, server, key, all);
    344	afs_put_server(net, server, afs_server_trace_put_probe);
    345}
    346
    347/*
    348 * Probe a server immediately without waiting for its due time to come
    349 * round.  This is used when all of the addresses have been tried.
    350 */
    351void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
    352{
    353	write_seqlock(&net->fs_lock);
    354	if (!list_empty(&server->probe_link))
    355		return afs_dispatch_fs_probe(net, server, true);
    356	write_sequnlock(&net->fs_lock);
    357}
    358
    359/*
    360 * Probe dispatcher to regularly dispatch probes to keep NAT alive.
    361 */
    362void afs_fs_probe_dispatcher(struct work_struct *work)
    363{
    364	struct afs_net *net = container_of(work, struct afs_net, fs_prober);
    365	struct afs_server *fast, *slow, *server;
    366	unsigned long nowj, timer_at, poll_at;
    367	bool first_pass = true, set_timer = false;
    368
    369	if (!net->live)
    370		return;
    371
    372	_enter("");
    373
    374	if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
    375		_leave(" [none]");
    376		return;
    377	}
    378
    379again:
    380	write_seqlock(&net->fs_lock);
    381
    382	fast = slow = server = NULL;
    383	nowj = jiffies;
    384	timer_at = nowj + MAX_JIFFY_OFFSET;
    385
    386	if (!list_empty(&net->fs_probe_fast)) {
    387		fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
    388		poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
    389		if (time_before(nowj, poll_at)) {
    390			timer_at = poll_at;
    391			set_timer = true;
    392			fast = NULL;
    393		}
    394	}
    395
    396	if (!list_empty(&net->fs_probe_slow)) {
    397		slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
    398		poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
    399		if (time_before(nowj, poll_at)) {
    400			if (time_before(poll_at, timer_at))
    401			    timer_at = poll_at;
    402			set_timer = true;
    403			slow = NULL;
    404		}
    405	}
    406
    407	server = fast ?: slow;
    408	if (server)
    409		_debug("probe %pU", &server->uuid);
    410
    411	if (server && (first_pass || !need_resched())) {
    412		afs_dispatch_fs_probe(net, server, server == fast);
    413		first_pass = false;
    414		goto again;
    415	}
    416
    417	write_sequnlock(&net->fs_lock);
    418
    419	if (server) {
    420		if (!queue_work(afs_wq, &net->fs_prober))
    421			afs_dec_servers_outstanding(net);
    422		_leave(" [requeue]");
    423	} else if (set_timer) {
    424		if (timer_reduce(&net->fs_probe_timer, timer_at))
    425			afs_dec_servers_outstanding(net);
    426		_leave(" [timer]");
    427	} else {
    428		afs_dec_servers_outstanding(net);
    429		_leave(" [quiesce]");
    430	}
    431}
    432
    433/*
    434 * Wait for a probe on a particular fileserver to complete for 2s.
    435 */
    436int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
    437{
    438	struct wait_queue_entry wait;
    439	unsigned long timo = 2 * HZ;
    440
    441	if (atomic_read(&server->probe_outstanding) == 0)
    442		goto dont_wait;
    443
    444	init_wait_entry(&wait, 0);
    445	for (;;) {
    446		prepare_to_wait_event(&server->probe_wq, &wait,
    447				      is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
    448		if (timo == 0 ||
    449		    server->probe.responded ||
    450		    atomic_read(&server->probe_outstanding) == 0 ||
    451		    (is_intr && signal_pending(current)))
    452			break;
    453		timo = schedule_timeout(timo);
    454	}
    455
    456	finish_wait(&server->probe_wq, &wait);
    457
    458dont_wait:
    459	if (server->probe.responded)
    460		return 0;
    461	if (is_intr && signal_pending(current))
    462		return -ERESTARTSYS;
    463	if (timo == 0)
    464		return -ETIME;
    465	return -EDESTADDRREQ;
    466}
    467
    468/*
    469 * Clean up the probing when the namespace is killed off.
    470 */
    471void afs_fs_probe_cleanup(struct afs_net *net)
    472{
    473	if (del_timer_sync(&net->fs_probe_timer))
    474		afs_dec_servers_outstanding(net);
    475}