cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

nfs4state.c (73750B)


      1/*
      2 *  fs/nfs/nfs4state.c
      3 *
      4 *  Client-side XDR for NFSv4.
      5 *
      6 *  Copyright (c) 2002 The Regents of the University of Michigan.
      7 *  All rights reserved.
      8 *
      9 *  Kendrick Smith <kmsmith@umich.edu>
     10 *
     11 *  Redistribution and use in source and binary forms, with or without
     12 *  modification, are permitted provided that the following conditions
     13 *  are met:
     14 *
     15 *  1. Redistributions of source code must retain the above copyright
     16 *     notice, this list of conditions and the following disclaimer.
     17 *  2. Redistributions in binary form must reproduce the above copyright
     18 *     notice, this list of conditions and the following disclaimer in the
     19 *     documentation and/or other materials provided with the distribution.
     20 *  3. Neither the name of the University nor the names of its
     21 *     contributors may be used to endorse or promote products derived
     22 *     from this software without specific prior written permission.
     23 *
     24 *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
     25 *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     26 *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     27 *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28 *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     29 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     30 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     31 *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     32 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     33 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     34 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     35 *
     36 * Implementation of the NFSv4 state model.  For the time being,
     37 * this is minimal, but will be made much more complex in a
     38 * subsequent patch.
     39 */
     40
     41#include <linux/kernel.h>
     42#include <linux/slab.h>
     43#include <linux/fs.h>
     44#include <linux/nfs_fs.h>
     45#include <linux/kthread.h>
     46#include <linux/module.h>
     47#include <linux/random.h>
     48#include <linux/ratelimit.h>
     49#include <linux/workqueue.h>
     50#include <linux/bitops.h>
     51#include <linux/jiffies.h>
     52#include <linux/sched/mm.h>
     53
     54#include <linux/sunrpc/clnt.h>
     55
     56#include "nfs4_fs.h"
     57#include "callback.h"
     58#include "delegation.h"
     59#include "internal.h"
     60#include "nfs4idmap.h"
     61#include "nfs4session.h"
     62#include "pnfs.h"
     63#include "netns.h"
     64#include "nfs4trace.h"
     65
     66#define NFSDBG_FACILITY		NFSDBG_STATE
     67
     68#define OPENOWNER_POOL_SIZE	8
     69
     70const nfs4_stateid zero_stateid = {
     71	{ .data = { 0 } },
     72	.type = NFS4_SPECIAL_STATEID_TYPE,
     73};
     74const nfs4_stateid invalid_stateid = {
     75	{
     76		/* Funky initialiser keeps older gcc versions happy */
     77		.data = { 0xff, 0xff, 0xff, 0xff, 0 },
     78	},
     79	.type = NFS4_INVALID_STATEID_TYPE,
     80};
     81
     82const nfs4_stateid current_stateid = {
     83	{
     84		/* Funky initialiser keeps older gcc versions happy */
     85		.data = { 0x0, 0x0, 0x0, 0x1, 0 },
     86	},
     87	.type = NFS4_SPECIAL_STATEID_TYPE,
     88};
     89
     90static DEFINE_MUTEX(nfs_clid_init_mutex);
     91
     92static int nfs4_setup_state_renewal(struct nfs_client *clp)
     93{
     94	int status;
     95	struct nfs_fsinfo fsinfo;
     96
     97	if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
     98		nfs4_schedule_state_renewal(clp);
     99		return 0;
    100	}
    101
    102	status = nfs4_proc_get_lease_time(clp, &fsinfo);
    103	if (status == 0) {
    104		nfs4_set_lease_period(clp, fsinfo.lease_time * HZ);
    105		nfs4_schedule_state_renewal(clp);
    106	}
    107
    108	return status;
    109}
    110
    111int nfs4_init_clientid(struct nfs_client *clp, const struct cred *cred)
    112{
    113	struct nfs4_setclientid_res clid = {
    114		.clientid = clp->cl_clientid,
    115		.confirm = clp->cl_confirm,
    116	};
    117	unsigned short port;
    118	int status;
    119	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
    120
    121	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
    122		goto do_confirm;
    123	port = nn->nfs_callback_tcpport;
    124	if (clp->cl_addr.ss_family == AF_INET6)
    125		port = nn->nfs_callback_tcpport6;
    126
    127	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
    128	if (status != 0)
    129		goto out;
    130	clp->cl_clientid = clid.clientid;
    131	clp->cl_confirm = clid.confirm;
    132	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
    133do_confirm:
    134	status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
    135	if (status != 0)
    136		goto out;
    137	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
    138	nfs4_setup_state_renewal(clp);
    139out:
    140	return status;
    141}
    142
    143/**
    144 * nfs40_discover_server_trunking - Detect server IP address trunking (mv0)
    145 *
    146 * @clp: nfs_client under test
    147 * @result: OUT: found nfs_client, or clp
    148 * @cred: credential to use for trunking test
    149 *
    150 * Returns zero, a negative errno, or a negative NFS4ERR status.
    151 * If zero is returned, an nfs_client pointer is planted in
    152 * "result".
    153 *
    154 * Note: The returned client may not yet be marked ready.
    155 */
    156int nfs40_discover_server_trunking(struct nfs_client *clp,
    157				   struct nfs_client **result,
    158				   const struct cred *cred)
    159{
    160	struct nfs4_setclientid_res clid = {
    161		.clientid = clp->cl_clientid,
    162		.confirm = clp->cl_confirm,
    163	};
    164	struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
    165	unsigned short port;
    166	int status;
    167
    168	port = nn->nfs_callback_tcpport;
    169	if (clp->cl_addr.ss_family == AF_INET6)
    170		port = nn->nfs_callback_tcpport6;
    171
    172	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
    173	if (status != 0)
    174		goto out;
    175	clp->cl_clientid = clid.clientid;
    176	clp->cl_confirm = clid.confirm;
    177
    178	status = nfs40_walk_client_list(clp, result, cred);
    179	if (status == 0) {
    180		/* Sustain the lease, even if it's empty.  If the clientid4
    181		 * goes stale it's of no use for trunking discovery. */
    182		nfs4_schedule_state_renewal(*result);
    183
    184		/* If the client state need to recover, do it. */
    185		if (clp->cl_state)
    186			nfs4_schedule_state_manager(clp);
    187	}
    188out:
    189	return status;
    190}
    191
    192const struct cred *nfs4_get_machine_cred(struct nfs_client *clp)
    193{
    194	return get_cred(rpc_machine_cred());
    195}
    196
    197static void nfs4_root_machine_cred(struct nfs_client *clp)
    198{
    199
    200	/* Force root creds instead of machine */
    201	clp->cl_principal = NULL;
    202	clp->cl_rpcclient->cl_principal = NULL;
    203}
    204
    205static const struct cred *
    206nfs4_get_renew_cred_server_locked(struct nfs_server *server)
    207{
    208	const struct cred *cred = NULL;
    209	struct nfs4_state_owner *sp;
    210	struct rb_node *pos;
    211
    212	for (pos = rb_first(&server->state_owners);
    213	     pos != NULL;
    214	     pos = rb_next(pos)) {
    215		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
    216		if (list_empty(&sp->so_states))
    217			continue;
    218		cred = get_cred(sp->so_cred);
    219		break;
    220	}
    221	return cred;
    222}
    223
    224/**
    225 * nfs4_get_renew_cred - Acquire credential for a renew operation
    226 * @clp: client state handle
    227 *
    228 * Returns an rpc_cred with reference count bumped, or NULL.
    229 * Caller must hold clp->cl_lock.
    230 */
    231const struct cred *nfs4_get_renew_cred(struct nfs_client *clp)
    232{
    233	const struct cred *cred = NULL;
    234	struct nfs_server *server;
    235
    236	/* Use machine credentials if available */
    237	cred = nfs4_get_machine_cred(clp);
    238	if (cred != NULL)
    239		goto out;
    240
    241	spin_lock(&clp->cl_lock);
    242	rcu_read_lock();
    243	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
    244		cred = nfs4_get_renew_cred_server_locked(server);
    245		if (cred != NULL)
    246			break;
    247	}
    248	rcu_read_unlock();
    249	spin_unlock(&clp->cl_lock);
    250
    251out:
    252	return cred;
    253}
    254
    255static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl)
    256{
    257	if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
    258		spin_lock(&tbl->slot_tbl_lock);
    259		nfs41_wake_slot_table(tbl);
    260		spin_unlock(&tbl->slot_tbl_lock);
    261	}
    262}
    263
    264static void nfs4_end_drain_session(struct nfs_client *clp)
    265{
    266	struct nfs4_session *ses = clp->cl_session;
    267
    268	if (clp->cl_slot_tbl) {
    269		nfs4_end_drain_slot_table(clp->cl_slot_tbl);
    270		return;
    271	}
    272
    273	if (ses != NULL) {
    274		nfs4_end_drain_slot_table(&ses->bc_slot_table);
    275		nfs4_end_drain_slot_table(&ses->fc_slot_table);
    276	}
    277}
    278
    279static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
    280{
    281	set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
    282	spin_lock(&tbl->slot_tbl_lock);
    283	if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
    284		reinit_completion(&tbl->complete);
    285		spin_unlock(&tbl->slot_tbl_lock);
    286		return wait_for_completion_interruptible(&tbl->complete);
    287	}
    288	spin_unlock(&tbl->slot_tbl_lock);
    289	return 0;
    290}
    291
    292static int nfs4_begin_drain_session(struct nfs_client *clp)
    293{
    294	struct nfs4_session *ses = clp->cl_session;
    295	int ret;
    296
    297	if (clp->cl_slot_tbl)
    298		return nfs4_drain_slot_tbl(clp->cl_slot_tbl);
    299
    300	/* back channel */
    301	ret = nfs4_drain_slot_tbl(&ses->bc_slot_table);
    302	if (ret)
    303		return ret;
    304	/* fore channel */
    305	return nfs4_drain_slot_tbl(&ses->fc_slot_table);
    306}
    307
    308#if defined(CONFIG_NFS_V4_1)
    309
    310static void nfs41_finish_session_reset(struct nfs_client *clp)
    311{
    312	clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
    313	clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
    314	/* create_session negotiated new slot table */
    315	clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
    316	nfs4_setup_state_renewal(clp);
    317}
    318
    319int nfs41_init_clientid(struct nfs_client *clp, const struct cred *cred)
    320{
    321	int status;
    322
    323	if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
    324		goto do_confirm;
    325	status = nfs4_proc_exchange_id(clp, cred);
    326	if (status != 0)
    327		goto out;
    328	set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
    329do_confirm:
    330	status = nfs4_proc_create_session(clp, cred);
    331	if (status != 0)
    332		goto out;
    333	nfs41_finish_session_reset(clp);
    334	nfs_mark_client_ready(clp, NFS_CS_READY);
    335out:
    336	return status;
    337}
    338
    339/**
    340 * nfs41_discover_server_trunking - Detect server IP address trunking (mv1)
    341 *
    342 * @clp: nfs_client under test
    343 * @result: OUT: found nfs_client, or clp
    344 * @cred: credential to use for trunking test
    345 *
    346 * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status.
    347 * If NFS4_OK is returned, an nfs_client pointer is planted in
    348 * "result".
    349 *
    350 * Note: The returned client may not yet be marked ready.
    351 */
    352int nfs41_discover_server_trunking(struct nfs_client *clp,
    353				   struct nfs_client **result,
    354				   const struct cred *cred)
    355{
    356	int status;
    357
    358	status = nfs4_proc_exchange_id(clp, cred);
    359	if (status != NFS4_OK)
    360		return status;
    361
    362	status = nfs41_walk_client_list(clp, result, cred);
    363	if (status < 0)
    364		return status;
    365	if (clp != *result)
    366		return 0;
    367
    368	/*
    369	 * Purge state if the client id was established in a prior
    370	 * instance and the client id could not have arrived on the
    371	 * server via Transparent State Migration.
    372	 */
    373	if (clp->cl_exchange_flags & EXCHGID4_FLAG_CONFIRMED_R) {
    374		if (!test_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags))
    375			set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
    376		else
    377			set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
    378	}
    379	nfs4_schedule_state_manager(clp);
    380	status = nfs_wait_client_init_complete(clp);
    381	if (status < 0)
    382		nfs_put_client(clp);
    383	return status;
    384}
    385
    386#endif /* CONFIG_NFS_V4_1 */
    387
    388/**
    389 * nfs4_get_clid_cred - Acquire credential for a setclientid operation
    390 * @clp: client state handle
    391 *
    392 * Returns a cred with reference count bumped, or NULL.
    393 */
    394const struct cred *nfs4_get_clid_cred(struct nfs_client *clp)
    395{
    396	const struct cred *cred;
    397
    398	cred = nfs4_get_machine_cred(clp);
    399	return cred;
    400}
    401
    402static struct nfs4_state_owner *
    403nfs4_find_state_owner_locked(struct nfs_server *server, const struct cred *cred)
    404{
    405	struct rb_node **p = &server->state_owners.rb_node,
    406		       *parent = NULL;
    407	struct nfs4_state_owner *sp;
    408	int cmp;
    409
    410	while (*p != NULL) {
    411		parent = *p;
    412		sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
    413		cmp = cred_fscmp(cred, sp->so_cred);
    414
    415		if (cmp < 0)
    416			p = &parent->rb_left;
    417		else if (cmp > 0)
    418			p = &parent->rb_right;
    419		else {
    420			if (!list_empty(&sp->so_lru))
    421				list_del_init(&sp->so_lru);
    422			atomic_inc(&sp->so_count);
    423			return sp;
    424		}
    425	}
    426	return NULL;
    427}
    428
    429static struct nfs4_state_owner *
    430nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
    431{
    432	struct nfs_server *server = new->so_server;
    433	struct rb_node **p = &server->state_owners.rb_node,
    434		       *parent = NULL;
    435	struct nfs4_state_owner *sp;
    436	int cmp;
    437
    438	while (*p != NULL) {
    439		parent = *p;
    440		sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
    441		cmp = cred_fscmp(new->so_cred, sp->so_cred);
    442
    443		if (cmp < 0)
    444			p = &parent->rb_left;
    445		else if (cmp > 0)
    446			p = &parent->rb_right;
    447		else {
    448			if (!list_empty(&sp->so_lru))
    449				list_del_init(&sp->so_lru);
    450			atomic_inc(&sp->so_count);
    451			return sp;
    452		}
    453	}
    454	rb_link_node(&new->so_server_node, parent, p);
    455	rb_insert_color(&new->so_server_node, &server->state_owners);
    456	return new;
    457}
    458
    459static void
    460nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
    461{
    462	struct nfs_server *server = sp->so_server;
    463
    464	if (!RB_EMPTY_NODE(&sp->so_server_node))
    465		rb_erase(&sp->so_server_node, &server->state_owners);
    466}
    467
    468static void
    469nfs4_init_seqid_counter(struct nfs_seqid_counter *sc)
    470{
    471	sc->create_time = ktime_get();
    472	sc->flags = 0;
    473	sc->counter = 0;
    474	spin_lock_init(&sc->lock);
    475	INIT_LIST_HEAD(&sc->list);
    476	rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue");
    477}
    478
    479static void
    480nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc)
    481{
    482	rpc_destroy_wait_queue(&sc->wait);
    483}
    484
    485/*
    486 * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
    487 * create a new state_owner.
    488 *
    489 */
    490static struct nfs4_state_owner *
    491nfs4_alloc_state_owner(struct nfs_server *server,
    492		const struct cred *cred,
    493		gfp_t gfp_flags)
    494{
    495	struct nfs4_state_owner *sp;
    496
    497	sp = kzalloc(sizeof(*sp), gfp_flags);
    498	if (!sp)
    499		return NULL;
    500	sp->so_seqid.owner_id = ida_simple_get(&server->openowner_id, 0, 0,
    501						gfp_flags);
    502	if (sp->so_seqid.owner_id < 0) {
    503		kfree(sp);
    504		return NULL;
    505	}
    506	sp->so_server = server;
    507	sp->so_cred = get_cred(cred);
    508	spin_lock_init(&sp->so_lock);
    509	INIT_LIST_HEAD(&sp->so_states);
    510	nfs4_init_seqid_counter(&sp->so_seqid);
    511	atomic_set(&sp->so_count, 1);
    512	INIT_LIST_HEAD(&sp->so_lru);
    513	seqcount_spinlock_init(&sp->so_reclaim_seqcount, &sp->so_lock);
    514	mutex_init(&sp->so_delegreturn_mutex);
    515	return sp;
    516}
    517
    518static void
    519nfs4_reset_state_owner(struct nfs4_state_owner *sp)
    520{
    521	/* This state_owner is no longer usable, but must
    522	 * remain in place so that state recovery can find it
    523	 * and the opens associated with it.
    524	 * It may also be used for new 'open' request to
    525	 * return a delegation to the server.
    526	 * So update the 'create_time' so that it looks like
    527	 * a new state_owner.  This will cause the server to
    528	 * request an OPEN_CONFIRM to start a new sequence.
    529	 */
    530	sp->so_seqid.create_time = ktime_get();
    531}
    532
    533static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
    534{
    535	nfs4_destroy_seqid_counter(&sp->so_seqid);
    536	put_cred(sp->so_cred);
    537	ida_simple_remove(&sp->so_server->openowner_id, sp->so_seqid.owner_id);
    538	kfree(sp);
    539}
    540
    541static void nfs4_gc_state_owners(struct nfs_server *server)
    542{
    543	struct nfs_client *clp = server->nfs_client;
    544	struct nfs4_state_owner *sp, *tmp;
    545	unsigned long time_min, time_max;
    546	LIST_HEAD(doomed);
    547
    548	spin_lock(&clp->cl_lock);
    549	time_max = jiffies;
    550	time_min = (long)time_max - (long)clp->cl_lease_time;
    551	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
    552		/* NB: LRU is sorted so that oldest is at the head */
    553		if (time_in_range(sp->so_expires, time_min, time_max))
    554			break;
    555		list_move(&sp->so_lru, &doomed);
    556		nfs4_remove_state_owner_locked(sp);
    557	}
    558	spin_unlock(&clp->cl_lock);
    559
    560	list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
    561		list_del(&sp->so_lru);
    562		nfs4_free_state_owner(sp);
    563	}
    564}
    565
    566/**
    567 * nfs4_get_state_owner - Look up a state owner given a credential
    568 * @server: nfs_server to search
    569 * @cred: RPC credential to match
    570 * @gfp_flags: allocation mode
    571 *
    572 * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
    573 */
    574struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
    575					      const struct cred *cred,
    576					      gfp_t gfp_flags)
    577{
    578	struct nfs_client *clp = server->nfs_client;
    579	struct nfs4_state_owner *sp, *new;
    580
    581	spin_lock(&clp->cl_lock);
    582	sp = nfs4_find_state_owner_locked(server, cred);
    583	spin_unlock(&clp->cl_lock);
    584	if (sp != NULL)
    585		goto out;
    586	new = nfs4_alloc_state_owner(server, cred, gfp_flags);
    587	if (new == NULL)
    588		goto out;
    589	spin_lock(&clp->cl_lock);
    590	sp = nfs4_insert_state_owner_locked(new);
    591	spin_unlock(&clp->cl_lock);
    592	if (sp != new)
    593		nfs4_free_state_owner(new);
    594out:
    595	nfs4_gc_state_owners(server);
    596	return sp;
    597}
    598
    599/**
    600 * nfs4_put_state_owner - Release a nfs4_state_owner
    601 * @sp: state owner data to release
    602 *
    603 * Note that we keep released state owners on an LRU
    604 * list.
    605 * This caches valid state owners so that they can be
    606 * reused, to avoid the OPEN_CONFIRM on minor version 0.
    607 * It also pins the uniquifier of dropped state owners for
    608 * a while, to ensure that those state owner names are
    609 * never reused.
    610 */
    611void nfs4_put_state_owner(struct nfs4_state_owner *sp)
    612{
    613	struct nfs_server *server = sp->so_server;
    614	struct nfs_client *clp = server->nfs_client;
    615
    616	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
    617		return;
    618
    619	sp->so_expires = jiffies;
    620	list_add_tail(&sp->so_lru, &server->state_owners_lru);
    621	spin_unlock(&clp->cl_lock);
    622}
    623
    624/**
    625 * nfs4_purge_state_owners - Release all cached state owners
    626 * @server: nfs_server with cached state owners to release
    627 * @head: resulting list of state owners
    628 *
    629 * Called at umount time.  Remaining state owners will be on
    630 * the LRU with ref count of zero.
    631 * Note that the state owners are not freed, but are added
    632 * to the list @head, which can later be used as an argument
    633 * to nfs4_free_state_owners.
    634 */
    635void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head)
    636{
    637	struct nfs_client *clp = server->nfs_client;
    638	struct nfs4_state_owner *sp, *tmp;
    639
    640	spin_lock(&clp->cl_lock);
    641	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
    642		list_move(&sp->so_lru, head);
    643		nfs4_remove_state_owner_locked(sp);
    644	}
    645	spin_unlock(&clp->cl_lock);
    646}
    647
    648/**
    649 * nfs4_free_state_owners - Release all cached state owners
    650 * @head: resulting list of state owners
    651 *
    652 * Frees a list of state owners that was generated by
    653 * nfs4_purge_state_owners
    654 */
    655void nfs4_free_state_owners(struct list_head *head)
    656{
    657	struct nfs4_state_owner *sp, *tmp;
    658
    659	list_for_each_entry_safe(sp, tmp, head, so_lru) {
    660		list_del(&sp->so_lru);
    661		nfs4_free_state_owner(sp);
    662	}
    663}
    664
    665static struct nfs4_state *
    666nfs4_alloc_open_state(void)
    667{
    668	struct nfs4_state *state;
    669
    670	state = kzalloc(sizeof(*state), GFP_KERNEL_ACCOUNT);
    671	if (!state)
    672		return NULL;
    673	refcount_set(&state->count, 1);
    674	INIT_LIST_HEAD(&state->lock_states);
    675	spin_lock_init(&state->state_lock);
    676	seqlock_init(&state->seqlock);
    677	init_waitqueue_head(&state->waitq);
    678	return state;
    679}
    680
    681void
    682nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode)
    683{
    684	if (state->state == fmode)
    685		return;
    686	/* NB! List reordering - see the reclaim code for why.  */
    687	if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
    688		if (fmode & FMODE_WRITE)
    689			list_move(&state->open_states, &state->owner->so_states);
    690		else
    691			list_move_tail(&state->open_states, &state->owner->so_states);
    692	}
    693	state->state = fmode;
    694}
    695
    696static struct nfs4_state *
    697__nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
    698{
    699	struct nfs_inode *nfsi = NFS_I(inode);
    700	struct nfs4_state *state;
    701
    702	list_for_each_entry_rcu(state, &nfsi->open_states, inode_states) {
    703		if (state->owner != owner)
    704			continue;
    705		if (!nfs4_valid_open_stateid(state))
    706			continue;
    707		if (refcount_inc_not_zero(&state->count))
    708			return state;
    709	}
    710	return NULL;
    711}
    712
    713static void
    714nfs4_free_open_state(struct nfs4_state *state)
    715{
    716	kfree_rcu(state, rcu_head);
    717}
    718
    719struct nfs4_state *
    720nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
    721{
    722	struct nfs4_state *state, *new;
    723	struct nfs_inode *nfsi = NFS_I(inode);
    724
    725	rcu_read_lock();
    726	state = __nfs4_find_state_byowner(inode, owner);
    727	rcu_read_unlock();
    728	if (state)
    729		goto out;
    730	new = nfs4_alloc_open_state();
    731	spin_lock(&owner->so_lock);
    732	spin_lock(&inode->i_lock);
    733	state = __nfs4_find_state_byowner(inode, owner);
    734	if (state == NULL && new != NULL) {
    735		state = new;
    736		state->owner = owner;
    737		atomic_inc(&owner->so_count);
    738		ihold(inode);
    739		state->inode = inode;
    740		list_add_rcu(&state->inode_states, &nfsi->open_states);
    741		spin_unlock(&inode->i_lock);
    742		/* Note: The reclaim code dictates that we add stateless
    743		 * and read-only stateids to the end of the list */
    744		list_add_tail(&state->open_states, &owner->so_states);
    745		spin_unlock(&owner->so_lock);
    746	} else {
    747		spin_unlock(&inode->i_lock);
    748		spin_unlock(&owner->so_lock);
    749		if (new)
    750			nfs4_free_open_state(new);
    751	}
    752out:
    753	return state;
    754}
    755
    756void nfs4_put_open_state(struct nfs4_state *state)
    757{
    758	struct inode *inode = state->inode;
    759	struct nfs4_state_owner *owner = state->owner;
    760
    761	if (!refcount_dec_and_lock(&state->count, &owner->so_lock))
    762		return;
    763	spin_lock(&inode->i_lock);
    764	list_del_rcu(&state->inode_states);
    765	list_del(&state->open_states);
    766	spin_unlock(&inode->i_lock);
    767	spin_unlock(&owner->so_lock);
    768	nfs4_inode_return_delegation_on_close(inode);
    769	iput(inode);
    770	nfs4_free_open_state(state);
    771	nfs4_put_state_owner(owner);
    772}
    773
    774/*
    775 * Close the current file.
    776 */
    777static void __nfs4_close(struct nfs4_state *state,
    778		fmode_t fmode, gfp_t gfp_mask, int wait)
    779{
    780	struct nfs4_state_owner *owner = state->owner;
    781	int call_close = 0;
    782	fmode_t newstate;
    783
    784	atomic_inc(&owner->so_count);
    785	/* Protect against nfs4_find_state() */
    786	spin_lock(&owner->so_lock);
    787	switch (fmode & (FMODE_READ | FMODE_WRITE)) {
    788		case FMODE_READ:
    789			state->n_rdonly--;
    790			break;
    791		case FMODE_WRITE:
    792			state->n_wronly--;
    793			break;
    794		case FMODE_READ|FMODE_WRITE:
    795			state->n_rdwr--;
    796	}
    797	newstate = FMODE_READ|FMODE_WRITE;
    798	if (state->n_rdwr == 0) {
    799		if (state->n_rdonly == 0) {
    800			newstate &= ~FMODE_READ;
    801			call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
    802			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
    803		}
    804		if (state->n_wronly == 0) {
    805			newstate &= ~FMODE_WRITE;
    806			call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
    807			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
    808		}
    809		if (newstate == 0)
    810			clear_bit(NFS_DELEGATED_STATE, &state->flags);
    811	}
    812	nfs4_state_set_mode_locked(state, newstate);
    813	spin_unlock(&owner->so_lock);
    814
    815	if (!call_close) {
    816		nfs4_put_open_state(state);
    817		nfs4_put_state_owner(owner);
    818	} else
    819		nfs4_do_close(state, gfp_mask, wait);
    820}
    821
    822void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
    823{
    824	__nfs4_close(state, fmode, GFP_KERNEL, 0);
    825}
    826
    827void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
    828{
    829	__nfs4_close(state, fmode, GFP_KERNEL, 1);
    830}
    831
    832/*
    833 * Search the state->lock_states for an existing lock_owner
    834 * that is compatible with either of the given owners.
    835 * If the second is non-zero, then the first refers to a Posix-lock
    836 * owner (current->files) and the second refers to a flock/OFD
    837 * owner (struct file*).  In that case, prefer a match for the first
    838 * owner.
    839 * If both sorts of locks are held on the one file we cannot know
    840 * which stateid was intended to be used, so a "correct" choice cannot
    841 * be made.  Failing that, a "consistent" choice is preferable.  The
    842 * consistent choice we make is to prefer the first owner, that of a
    843 * Posix lock.
    844 */
    845static struct nfs4_lock_state *
    846__nfs4_find_lock_state(struct nfs4_state *state,
    847		       fl_owner_t fl_owner, fl_owner_t fl_owner2)
    848{
    849	struct nfs4_lock_state *pos, *ret = NULL;
    850	list_for_each_entry(pos, &state->lock_states, ls_locks) {
    851		if (pos->ls_owner == fl_owner) {
    852			ret = pos;
    853			break;
    854		}
    855		if (pos->ls_owner == fl_owner2)
    856			ret = pos;
    857	}
    858	if (ret)
    859		refcount_inc(&ret->ls_count);
    860	return ret;
    861}
    862
    863/*
    864 * Return a compatible lock_state. If no initialized lock_state structure
    865 * exists, return an uninitialized one.
    866 *
    867 */
    868static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
    869{
    870	struct nfs4_lock_state *lsp;
    871	struct nfs_server *server = state->owner->so_server;
    872
    873	lsp = kzalloc(sizeof(*lsp), GFP_KERNEL_ACCOUNT);
    874	if (lsp == NULL)
    875		return NULL;
    876	nfs4_init_seqid_counter(&lsp->ls_seqid);
    877	refcount_set(&lsp->ls_count, 1);
    878	lsp->ls_state = state;
    879	lsp->ls_owner = fl_owner;
    880	lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id,
    881						0, 0, GFP_KERNEL_ACCOUNT);
    882	if (lsp->ls_seqid.owner_id < 0)
    883		goto out_free;
    884	INIT_LIST_HEAD(&lsp->ls_locks);
    885	return lsp;
    886out_free:
    887	kfree(lsp);
    888	return NULL;
    889}
    890
    891void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
    892{
    893	ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
    894	nfs4_destroy_seqid_counter(&lsp->ls_seqid);
    895	kfree(lsp);
    896}
    897
    898/*
    899 * Return a compatible lock_state. If no initialized lock_state structure
    900 * exists, return an uninitialized one.
    901 *
    902 */
    903static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
    904{
    905	struct nfs4_lock_state *lsp, *new = NULL;
    906	
    907	for(;;) {
    908		spin_lock(&state->state_lock);
    909		lsp = __nfs4_find_lock_state(state, owner, NULL);
    910		if (lsp != NULL)
    911			break;
    912		if (new != NULL) {
    913			list_add(&new->ls_locks, &state->lock_states);
    914			set_bit(LK_STATE_IN_USE, &state->flags);
    915			lsp = new;
    916			new = NULL;
    917			break;
    918		}
    919		spin_unlock(&state->state_lock);
    920		new = nfs4_alloc_lock_state(state, owner);
    921		if (new == NULL)
    922			return NULL;
    923	}
    924	spin_unlock(&state->state_lock);
    925	if (new != NULL)
    926		nfs4_free_lock_state(state->owner->so_server, new);
    927	return lsp;
    928}
    929
    930/*
    931 * Release reference to lock_state, and free it if we see that
    932 * it is no longer in use
    933 */
    934void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
    935{
    936	struct nfs_server *server;
    937	struct nfs4_state *state;
    938
    939	if (lsp == NULL)
    940		return;
    941	state = lsp->ls_state;
    942	if (!refcount_dec_and_lock(&lsp->ls_count, &state->state_lock))
    943		return;
    944	list_del(&lsp->ls_locks);
    945	if (list_empty(&state->lock_states))
    946		clear_bit(LK_STATE_IN_USE, &state->flags);
    947	spin_unlock(&state->state_lock);
    948	server = state->owner->so_server;
    949	if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
    950		struct nfs_client *clp = server->nfs_client;
    951
    952		clp->cl_mvops->free_lock_state(server, lsp);
    953	} else
    954		nfs4_free_lock_state(server, lsp);
    955}
    956
    957static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
    958{
    959	struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
    960
    961	dst->fl_u.nfs4_fl.owner = lsp;
    962	refcount_inc(&lsp->ls_count);
    963}
    964
    965static void nfs4_fl_release_lock(struct file_lock *fl)
    966{
    967	nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
    968}
    969
    970static const struct file_lock_operations nfs4_fl_lock_ops = {
    971	.fl_copy_lock = nfs4_fl_copy_lock,
    972	.fl_release_private = nfs4_fl_release_lock,
    973};
    974
    975int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
    976{
    977	struct nfs4_lock_state *lsp;
    978
    979	if (fl->fl_ops != NULL)
    980		return 0;
    981	lsp = nfs4_get_lock_state(state, fl->fl_owner);
    982	if (lsp == NULL)
    983		return -ENOMEM;
    984	fl->fl_u.nfs4_fl.owner = lsp;
    985	fl->fl_ops = &nfs4_fl_lock_ops;
    986	return 0;
    987}
    988
    989static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
    990		struct nfs4_state *state,
    991		const struct nfs_lock_context *l_ctx)
    992{
    993	struct nfs4_lock_state *lsp;
    994	fl_owner_t fl_owner, fl_flock_owner;
    995	int ret = -ENOENT;
    996
    997	if (l_ctx == NULL)
    998		goto out;
    999
   1000	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
   1001		goto out;
   1002
   1003	fl_owner = l_ctx->lockowner;
   1004	fl_flock_owner = l_ctx->open_context->flock_owner;
   1005
   1006	spin_lock(&state->state_lock);
   1007	lsp = __nfs4_find_lock_state(state, fl_owner, fl_flock_owner);
   1008	if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
   1009		ret = -EIO;
   1010	else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
   1011		nfs4_stateid_copy(dst, &lsp->ls_stateid);
   1012		ret = 0;
   1013	}
   1014	spin_unlock(&state->state_lock);
   1015	nfs4_put_lock_state(lsp);
   1016out:
   1017	return ret;
   1018}
   1019
   1020bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
   1021{
   1022	bool ret;
   1023	const nfs4_stateid *src;
   1024	int seq;
   1025
   1026	do {
   1027		ret = false;
   1028		src = &zero_stateid;
   1029		seq = read_seqbegin(&state->seqlock);
   1030		if (test_bit(NFS_OPEN_STATE, &state->flags)) {
   1031			src = &state->open_stateid;
   1032			ret = true;
   1033		}
   1034		nfs4_stateid_copy(dst, src);
   1035	} while (read_seqretry(&state->seqlock, seq));
   1036	return ret;
   1037}
   1038
   1039/*
   1040 * Byte-range lock aware utility to initialize the stateid of read/write
   1041 * requests.
   1042 */
   1043int nfs4_select_rw_stateid(struct nfs4_state *state,
   1044		fmode_t fmode, const struct nfs_lock_context *l_ctx,
   1045		nfs4_stateid *dst, const struct cred **cred)
   1046{
   1047	int ret;
   1048
   1049	if (!nfs4_valid_open_stateid(state))
   1050		return -EIO;
   1051	if (cred != NULL)
   1052		*cred = NULL;
   1053	ret = nfs4_copy_lock_stateid(dst, state, l_ctx);
   1054	if (ret == -EIO)
   1055		/* A lost lock - don't even consider delegations */
   1056		goto out;
   1057	/* returns true if delegation stateid found and copied */
   1058	if (nfs4_copy_delegation_stateid(state->inode, fmode, dst, cred)) {
   1059		ret = 0;
   1060		goto out;
   1061	}
   1062	if (ret != -ENOENT)
   1063		/* nfs4_copy_delegation_stateid() didn't over-write
   1064		 * dst, so it still has the lock stateid which we now
   1065		 * choose to use.
   1066		 */
   1067		goto out;
   1068	ret = nfs4_copy_open_stateid(dst, state) ? 0 : -EAGAIN;
   1069out:
   1070	if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
   1071		dst->seqid = 0;
   1072	return ret;
   1073}
   1074
   1075struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
   1076{
   1077	struct nfs_seqid *new;
   1078
   1079	new = kmalloc(sizeof(*new), gfp_mask);
   1080	if (new == NULL)
   1081		return ERR_PTR(-ENOMEM);
   1082	new->sequence = counter;
   1083	INIT_LIST_HEAD(&new->list);
   1084	new->task = NULL;
   1085	return new;
   1086}
   1087
   1088void nfs_release_seqid(struct nfs_seqid *seqid)
   1089{
   1090	struct nfs_seqid_counter *sequence;
   1091
   1092	if (seqid == NULL || list_empty(&seqid->list))
   1093		return;
   1094	sequence = seqid->sequence;
   1095	spin_lock(&sequence->lock);
   1096	list_del_init(&seqid->list);
   1097	if (!list_empty(&sequence->list)) {
   1098		struct nfs_seqid *next;
   1099
   1100		next = list_first_entry(&sequence->list,
   1101				struct nfs_seqid, list);
   1102		rpc_wake_up_queued_task(&sequence->wait, next->task);
   1103	}
   1104	spin_unlock(&sequence->lock);
   1105}
   1106
   1107void nfs_free_seqid(struct nfs_seqid *seqid)
   1108{
   1109	nfs_release_seqid(seqid);
   1110	kfree(seqid);
   1111}
   1112
   1113/*
   1114 * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
   1115 * failed with a seqid incrementing error -
   1116 * see comments nfs4.h:seqid_mutating_error()
   1117 */
   1118static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
   1119{
   1120	switch (status) {
   1121		case 0:
   1122			break;
   1123		case -NFS4ERR_BAD_SEQID:
   1124			if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
   1125				return;
   1126			pr_warn_ratelimited("NFS: v4 server returned a bad"
   1127					" sequence-id error on an"
   1128					" unconfirmed sequence %p!\n",
   1129					seqid->sequence);
   1130			return;
   1131		case -NFS4ERR_STALE_CLIENTID:
   1132		case -NFS4ERR_STALE_STATEID:
   1133		case -NFS4ERR_BAD_STATEID:
   1134		case -NFS4ERR_BADXDR:
   1135		case -NFS4ERR_RESOURCE:
   1136		case -NFS4ERR_NOFILEHANDLE:
   1137		case -NFS4ERR_MOVED:
   1138			/* Non-seqid mutating errors */
   1139			return;
   1140	}
   1141	/*
   1142	 * Note: no locking needed as we are guaranteed to be first
   1143	 * on the sequence list
   1144	 */
   1145	seqid->sequence->counter++;
   1146}
   1147
   1148void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
   1149{
   1150	struct nfs4_state_owner *sp;
   1151
   1152	if (seqid == NULL)
   1153		return;
   1154
   1155	sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid);
   1156	if (status == -NFS4ERR_BAD_SEQID)
   1157		nfs4_reset_state_owner(sp);
   1158	if (!nfs4_has_session(sp->so_server->nfs_client))
   1159		nfs_increment_seqid(status, seqid);
   1160}
   1161
   1162/*
   1163 * Increment the seqid if the LOCK/LOCKU succeeded, or
   1164 * failed with a seqid incrementing error -
   1165 * see comments nfs4.h:seqid_mutating_error()
   1166 */
   1167void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
   1168{
   1169	if (seqid != NULL)
   1170		nfs_increment_seqid(status, seqid);
   1171}
   1172
   1173int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
   1174{
   1175	struct nfs_seqid_counter *sequence;
   1176	int status = 0;
   1177
   1178	if (seqid == NULL)
   1179		goto out;
   1180	sequence = seqid->sequence;
   1181	spin_lock(&sequence->lock);
   1182	seqid->task = task;
   1183	if (list_empty(&seqid->list))
   1184		list_add_tail(&seqid->list, &sequence->list);
   1185	if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
   1186		goto unlock;
   1187	rpc_sleep_on(&sequence->wait, task, NULL);
   1188	status = -EAGAIN;
   1189unlock:
   1190	spin_unlock(&sequence->lock);
   1191out:
   1192	return status;
   1193}
   1194
   1195static int nfs4_run_state_manager(void *);
   1196
   1197static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
   1198{
   1199	clear_and_wake_up_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
   1200	rpc_wake_up(&clp->cl_rpcwaitq);
   1201}
   1202
   1203/*
   1204 * Schedule the nfs_client asynchronous state management routine
   1205 */
   1206void nfs4_schedule_state_manager(struct nfs_client *clp)
   1207{
   1208	struct task_struct *task;
   1209	char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
   1210	struct rpc_clnt *cl = clp->cl_rpcclient;
   1211
   1212	while (cl != cl->cl_parent)
   1213		cl = cl->cl_parent;
   1214
   1215	set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
   1216	if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
   1217		wake_up_var(&clp->cl_state);
   1218		return;
   1219	}
   1220	set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
   1221	__module_get(THIS_MODULE);
   1222	refcount_inc(&clp->cl_count);
   1223
   1224	/* The rcu_read_lock() is not strictly necessary, as the state
   1225	 * manager is the only thread that ever changes the rpc_xprt
   1226	 * after it's initialized.  At this point, we're single threaded. */
   1227	rcu_read_lock();
   1228	snprintf(buf, sizeof(buf), "%s-manager",
   1229			rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
   1230	rcu_read_unlock();
   1231	task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
   1232	if (IS_ERR(task)) {
   1233		printk(KERN_ERR "%s: kthread_run: %ld\n",
   1234			__func__, PTR_ERR(task));
   1235		nfs4_clear_state_manager_bit(clp);
   1236		clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
   1237		nfs_put_client(clp);
   1238		module_put(THIS_MODULE);
   1239	}
   1240}
   1241
   1242/*
   1243 * Schedule a lease recovery attempt
   1244 */
   1245void nfs4_schedule_lease_recovery(struct nfs_client *clp)
   1246{
   1247	if (!clp)
   1248		return;
   1249	if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
   1250		set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
   1251	dprintk("%s: scheduling lease recovery for server %s\n", __func__,
   1252			clp->cl_hostname);
   1253	nfs4_schedule_state_manager(clp);
   1254}
   1255EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
   1256
   1257/**
   1258 * nfs4_schedule_migration_recovery - trigger migration recovery
   1259 *
   1260 * @server: FSID that is migrating
   1261 *
   1262 * Returns zero if recovery has started, otherwise a negative NFS4ERR
   1263 * value is returned.
   1264 */
   1265int nfs4_schedule_migration_recovery(const struct nfs_server *server)
   1266{
   1267	struct nfs_client *clp = server->nfs_client;
   1268
   1269	if (server->fh_expire_type != NFS4_FH_PERSISTENT) {
   1270		pr_err("NFS: volatile file handles not supported (server %s)\n",
   1271				clp->cl_hostname);
   1272		return -NFS4ERR_IO;
   1273	}
   1274
   1275	if (test_bit(NFS_MIG_FAILED, &server->mig_status))
   1276		return -NFS4ERR_IO;
   1277
   1278	dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n",
   1279			__func__,
   1280			(unsigned long long)server->fsid.major,
   1281			(unsigned long long)server->fsid.minor,
   1282			clp->cl_hostname);
   1283
   1284	set_bit(NFS_MIG_IN_TRANSITION,
   1285			&((struct nfs_server *)server)->mig_status);
   1286	set_bit(NFS4CLNT_MOVED, &clp->cl_state);
   1287
   1288	nfs4_schedule_state_manager(clp);
   1289	return 0;
   1290}
   1291EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery);
   1292
   1293/**
   1294 * nfs4_schedule_lease_moved_recovery - start lease-moved recovery
   1295 *
   1296 * @clp: server to check for moved leases
   1297 *
   1298 */
   1299void nfs4_schedule_lease_moved_recovery(struct nfs_client *clp)
   1300{
   1301	dprintk("%s: scheduling lease-moved recovery for client ID %llx on %s\n",
   1302		__func__, clp->cl_clientid, clp->cl_hostname);
   1303
   1304	set_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state);
   1305	nfs4_schedule_state_manager(clp);
   1306}
   1307EXPORT_SYMBOL_GPL(nfs4_schedule_lease_moved_recovery);
   1308
   1309int nfs4_wait_clnt_recover(struct nfs_client *clp)
   1310{
   1311	int res;
   1312
   1313	might_sleep();
   1314
   1315	refcount_inc(&clp->cl_count);
   1316	res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
   1317				 nfs_wait_bit_killable, TASK_KILLABLE);
   1318	if (res)
   1319		goto out;
   1320	if (clp->cl_cons_state < 0)
   1321		res = clp->cl_cons_state;
   1322out:
   1323	nfs_put_client(clp);
   1324	return res;
   1325}
   1326
   1327int nfs4_client_recover_expired_lease(struct nfs_client *clp)
   1328{
   1329	unsigned int loop;
   1330	int ret;
   1331
   1332	for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
   1333		ret = nfs4_wait_clnt_recover(clp);
   1334		if (ret != 0)
   1335			break;
   1336		if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
   1337		    !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
   1338			break;
   1339		nfs4_schedule_state_manager(clp);
   1340		ret = -EIO;
   1341	}
   1342	return ret;
   1343}
   1344
   1345/*
   1346 * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
   1347 * @clp: client to process
   1348 *
   1349 * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a
   1350 * resend of the SETCLIENTID and hence re-establish the
   1351 * callback channel. Then return all existing delegations.
   1352 */
   1353static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
   1354{
   1355	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
   1356	nfs_expire_all_delegations(clp);
   1357	dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
   1358			clp->cl_hostname);
   1359}
   1360
   1361void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
   1362{
   1363	nfs40_handle_cb_pathdown(clp);
   1364	nfs4_schedule_state_manager(clp);
   1365}
   1366
   1367static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
   1368{
   1369
   1370	if (!nfs4_valid_open_stateid(state))
   1371		return 0;
   1372	set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
   1373	/* Don't recover state that expired before the reboot */
   1374	if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) {
   1375		clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
   1376		return 0;
   1377	}
   1378	set_bit(NFS_OWNER_RECLAIM_REBOOT, &state->owner->so_flags);
   1379	set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
   1380	return 1;
   1381}
   1382
   1383int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
   1384{
   1385	if (!nfs4_valid_open_stateid(state))
   1386		return 0;
   1387	set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
   1388	clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
   1389	set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags);
   1390	set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
   1391	return 1;
   1392}
   1393
   1394int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
   1395{
   1396	struct nfs_client *clp = server->nfs_client;
   1397
   1398	if (!nfs4_state_mark_reclaim_nograce(clp, state))
   1399		return -EBADF;
   1400	nfs_inode_find_delegation_state_and_recover(state->inode,
   1401			&state->stateid);
   1402	dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
   1403			clp->cl_hostname);
   1404	nfs4_schedule_state_manager(clp);
   1405	return 0;
   1406}
   1407EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
   1408
   1409static struct nfs4_lock_state *
   1410nfs_state_find_lock_state_by_stateid(struct nfs4_state *state,
   1411		const nfs4_stateid *stateid)
   1412{
   1413	struct nfs4_lock_state *pos;
   1414
   1415	list_for_each_entry(pos, &state->lock_states, ls_locks) {
   1416		if (!test_bit(NFS_LOCK_INITIALIZED, &pos->ls_flags))
   1417			continue;
   1418		if (nfs4_stateid_match_or_older(&pos->ls_stateid, stateid))
   1419			return pos;
   1420	}
   1421	return NULL;
   1422}
   1423
   1424static bool nfs_state_lock_state_matches_stateid(struct nfs4_state *state,
   1425		const nfs4_stateid *stateid)
   1426{
   1427	bool found = false;
   1428
   1429	if (test_bit(LK_STATE_IN_USE, &state->flags)) {
   1430		spin_lock(&state->state_lock);
   1431		if (nfs_state_find_lock_state_by_stateid(state, stateid))
   1432			found = true;
   1433		spin_unlock(&state->state_lock);
   1434	}
   1435	return found;
   1436}
   1437
   1438void nfs_inode_find_state_and_recover(struct inode *inode,
   1439		const nfs4_stateid *stateid)
   1440{
   1441	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
   1442	struct nfs_inode *nfsi = NFS_I(inode);
   1443	struct nfs_open_context *ctx;
   1444	struct nfs4_state *state;
   1445	bool found = false;
   1446
   1447	rcu_read_lock();
   1448	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
   1449		state = ctx->state;
   1450		if (state == NULL)
   1451			continue;
   1452		if (nfs4_stateid_match_or_older(&state->stateid, stateid) &&
   1453		    nfs4_state_mark_reclaim_nograce(clp, state)) {
   1454			found = true;
   1455			continue;
   1456		}
   1457		if (test_bit(NFS_OPEN_STATE, &state->flags) &&
   1458		    nfs4_stateid_match_or_older(&state->open_stateid, stateid) &&
   1459		    nfs4_state_mark_reclaim_nograce(clp, state)) {
   1460			found = true;
   1461			continue;
   1462		}
   1463		if (nfs_state_lock_state_matches_stateid(state, stateid) &&
   1464		    nfs4_state_mark_reclaim_nograce(clp, state))
   1465			found = true;
   1466	}
   1467	rcu_read_unlock();
   1468
   1469	nfs_inode_find_delegation_state_and_recover(inode, stateid);
   1470	if (found)
   1471		nfs4_schedule_state_manager(clp);
   1472}
   1473
   1474static void nfs4_state_mark_open_context_bad(struct nfs4_state *state, int err)
   1475{
   1476	struct inode *inode = state->inode;
   1477	struct nfs_inode *nfsi = NFS_I(inode);
   1478	struct nfs_open_context *ctx;
   1479
   1480	rcu_read_lock();
   1481	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
   1482		if (ctx->state != state)
   1483			continue;
   1484		set_bit(NFS_CONTEXT_BAD, &ctx->flags);
   1485		pr_warn("NFSv4: state recovery failed for open file %pd2, "
   1486				"error = %d\n", ctx->dentry, err);
   1487	}
   1488	rcu_read_unlock();
   1489}
   1490
   1491static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
   1492{
   1493	set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags);
   1494	nfs4_state_mark_open_context_bad(state, error);
   1495}
   1496
   1497
   1498static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
   1499{
   1500	struct inode *inode = state->inode;
   1501	struct nfs_inode *nfsi = NFS_I(inode);
   1502	struct file_lock *fl;
   1503	struct nfs4_lock_state *lsp;
   1504	int status = 0;
   1505	struct file_lock_context *flctx = inode->i_flctx;
   1506	struct list_head *list;
   1507
   1508	if (flctx == NULL)
   1509		return 0;
   1510
   1511	list = &flctx->flc_posix;
   1512
   1513	/* Guard against delegation returns and new lock/unlock calls */
   1514	down_write(&nfsi->rwsem);
   1515	spin_lock(&flctx->flc_lock);
   1516restart:
   1517	list_for_each_entry(fl, list, fl_list) {
   1518		if (nfs_file_open_context(fl->fl_file)->state != state)
   1519			continue;
   1520		spin_unlock(&flctx->flc_lock);
   1521		status = ops->recover_lock(state, fl);
   1522		switch (status) {
   1523		case 0:
   1524			break;
   1525		case -ETIMEDOUT:
   1526		case -ESTALE:
   1527		case -NFS4ERR_ADMIN_REVOKED:
   1528		case -NFS4ERR_STALE_STATEID:
   1529		case -NFS4ERR_BAD_STATEID:
   1530		case -NFS4ERR_EXPIRED:
   1531		case -NFS4ERR_NO_GRACE:
   1532		case -NFS4ERR_STALE_CLIENTID:
   1533		case -NFS4ERR_BADSESSION:
   1534		case -NFS4ERR_BADSLOT:
   1535		case -NFS4ERR_BAD_HIGH_SLOT:
   1536		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
   1537			goto out;
   1538		default:
   1539			pr_err("NFS: %s: unhandled error %d\n",
   1540					__func__, status);
   1541			fallthrough;
   1542		case -ENOMEM:
   1543		case -NFS4ERR_DENIED:
   1544		case -NFS4ERR_RECLAIM_BAD:
   1545		case -NFS4ERR_RECLAIM_CONFLICT:
   1546			lsp = fl->fl_u.nfs4_fl.owner;
   1547			if (lsp)
   1548				set_bit(NFS_LOCK_LOST, &lsp->ls_flags);
   1549			status = 0;
   1550		}
   1551		spin_lock(&flctx->flc_lock);
   1552	}
   1553	if (list == &flctx->flc_posix) {
   1554		list = &flctx->flc_flock;
   1555		goto restart;
   1556	}
   1557	spin_unlock(&flctx->flc_lock);
   1558out:
   1559	up_write(&nfsi->rwsem);
   1560	return status;
   1561}
   1562
   1563#ifdef CONFIG_NFS_V4_2
   1564static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state *state)
   1565{
   1566	struct nfs4_copy_state *copy;
   1567
   1568	if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
   1569		!test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags))
   1570		return;
   1571
   1572	spin_lock(&sp->so_server->nfs_client->cl_lock);
   1573	list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
   1574		if ((test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags) &&
   1575				!nfs4_stateid_match_other(&state->stateid,
   1576				&copy->parent_dst_state->stateid)))
   1577				continue;
   1578		copy->flags = 1;
   1579		if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
   1580				&state->flags)) {
   1581			clear_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags);
   1582			complete(&copy->completion);
   1583		}
   1584	}
   1585	list_for_each_entry(copy, &sp->so_server->ss_copies, src_copies) {
   1586		if ((test_bit(NFS_CLNT_SRC_SSC_COPY_STATE, &state->flags) &&
   1587				!nfs4_stateid_match_other(&state->stateid,
   1588				&copy->parent_src_state->stateid)))
   1589				continue;
   1590		copy->flags = 1;
   1591		if (test_and_clear_bit(NFS_CLNT_DST_SSC_COPY_STATE,
   1592				&state->flags))
   1593			complete(&copy->completion);
   1594	}
   1595	spin_unlock(&sp->so_server->nfs_client->cl_lock);
   1596}
   1597#else /* !CONFIG_NFS_V4_2 */
   1598static inline void nfs42_complete_copies(struct nfs4_state_owner *sp,
   1599					 struct nfs4_state *state)
   1600{
   1601}
   1602#endif /* CONFIG_NFS_V4_2 */
   1603
   1604static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_state *state,
   1605				     const struct nfs4_state_recovery_ops *ops,
   1606				     int *lost_locks)
   1607{
   1608	struct nfs4_lock_state *lock;
   1609	int status;
   1610
   1611	status = ops->recover_open(sp, state);
   1612	if (status < 0)
   1613		return status;
   1614
   1615	status = nfs4_reclaim_locks(state, ops);
   1616	if (status < 0)
   1617		return status;
   1618
   1619	if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
   1620		spin_lock(&state->state_lock);
   1621		list_for_each_entry(lock, &state->lock_states, ls_locks) {
   1622			trace_nfs4_state_lock_reclaim(state, lock);
   1623			if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
   1624				*lost_locks += 1;
   1625		}
   1626		spin_unlock(&state->state_lock);
   1627	}
   1628
   1629	nfs42_complete_copies(sp, state);
   1630	clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
   1631	return status;
   1632}
   1633
   1634static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp,
   1635				   const struct nfs4_state_recovery_ops *ops,
   1636				   int *lost_locks)
   1637{
   1638	struct nfs4_state *state;
   1639	unsigned int loop = 0;
   1640	int status = 0;
   1641#ifdef CONFIG_NFS_V4_2
   1642	bool found_ssc_copy_state = false;
   1643#endif /* CONFIG_NFS_V4_2 */
   1644
   1645	/* Note: we rely on the sp->so_states list being ordered 
   1646	 * so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
   1647	 * states first.
   1648	 * This is needed to ensure that the server won't give us any
   1649	 * read delegations that we have to return if, say, we are
   1650	 * recovering after a network partition or a reboot from a
   1651	 * server that doesn't support a grace period.
   1652	 */
   1653	spin_lock(&sp->so_lock);
   1654	raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
   1655restart:
   1656	list_for_each_entry(state, &sp->so_states, open_states) {
   1657		if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
   1658			continue;
   1659		if (!nfs4_valid_open_stateid(state))
   1660			continue;
   1661		if (state->state == 0)
   1662			continue;
   1663#ifdef CONFIG_NFS_V4_2
   1664		if (test_bit(NFS_SRV_SSC_COPY_STATE, &state->flags)) {
   1665			nfs4_state_mark_recovery_failed(state, -EIO);
   1666			found_ssc_copy_state = true;
   1667			continue;
   1668		}
   1669#endif /* CONFIG_NFS_V4_2 */
   1670		refcount_inc(&state->count);
   1671		spin_unlock(&sp->so_lock);
   1672		status = __nfs4_reclaim_open_state(sp, state, ops, lost_locks);
   1673
   1674		switch (status) {
   1675		default:
   1676			if (status >= 0) {
   1677				loop = 0;
   1678				break;
   1679			}
   1680			printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status);
   1681			fallthrough;
   1682		case -ENOENT:
   1683		case -ENOMEM:
   1684		case -EACCES:
   1685		case -EROFS:
   1686		case -EIO:
   1687		case -ESTALE:
   1688			/* Open state on this file cannot be recovered */
   1689			nfs4_state_mark_recovery_failed(state, status);
   1690			break;
   1691		case -EAGAIN:
   1692			ssleep(1);
   1693			if (loop++ < 10) {
   1694				set_bit(ops->state_flag_bit, &state->flags);
   1695				break;
   1696			}
   1697			fallthrough;
   1698		case -NFS4ERR_ADMIN_REVOKED:
   1699		case -NFS4ERR_STALE_STATEID:
   1700		case -NFS4ERR_OLD_STATEID:
   1701		case -NFS4ERR_BAD_STATEID:
   1702		case -NFS4ERR_RECLAIM_BAD:
   1703		case -NFS4ERR_RECLAIM_CONFLICT:
   1704			nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
   1705			break;
   1706		case -NFS4ERR_EXPIRED:
   1707		case -NFS4ERR_NO_GRACE:
   1708			nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
   1709			fallthrough;
   1710		case -NFS4ERR_STALE_CLIENTID:
   1711		case -NFS4ERR_BADSESSION:
   1712		case -NFS4ERR_BADSLOT:
   1713		case -NFS4ERR_BAD_HIGH_SLOT:
   1714		case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
   1715		case -ETIMEDOUT:
   1716			goto out_err;
   1717		}
   1718		nfs4_put_open_state(state);
   1719		spin_lock(&sp->so_lock);
   1720		goto restart;
   1721	}
   1722	raw_write_seqcount_end(&sp->so_reclaim_seqcount);
   1723	spin_unlock(&sp->so_lock);
   1724#ifdef CONFIG_NFS_V4_2
   1725	if (found_ssc_copy_state)
   1726		return -EIO;
   1727#endif /* CONFIG_NFS_V4_2 */
   1728	return 0;
   1729out_err:
   1730	nfs4_put_open_state(state);
   1731	spin_lock(&sp->so_lock);
   1732	raw_write_seqcount_end(&sp->so_reclaim_seqcount);
   1733	spin_unlock(&sp->so_lock);
   1734	return status;
   1735}
   1736
   1737static void nfs4_clear_open_state(struct nfs4_state *state)
   1738{
   1739	struct nfs4_lock_state *lock;
   1740
   1741	clear_bit(NFS_DELEGATED_STATE, &state->flags);
   1742	clear_bit(NFS_O_RDONLY_STATE, &state->flags);
   1743	clear_bit(NFS_O_WRONLY_STATE, &state->flags);
   1744	clear_bit(NFS_O_RDWR_STATE, &state->flags);
   1745	spin_lock(&state->state_lock);
   1746	list_for_each_entry(lock, &state->lock_states, ls_locks) {
   1747		lock->ls_seqid.flags = 0;
   1748		clear_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags);
   1749	}
   1750	spin_unlock(&state->state_lock);
   1751}
   1752
   1753static void nfs4_reset_seqids(struct nfs_server *server,
   1754	int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
   1755{
   1756	struct nfs_client *clp = server->nfs_client;
   1757	struct nfs4_state_owner *sp;
   1758	struct rb_node *pos;
   1759	struct nfs4_state *state;
   1760
   1761	spin_lock(&clp->cl_lock);
   1762	for (pos = rb_first(&server->state_owners);
   1763	     pos != NULL;
   1764	     pos = rb_next(pos)) {
   1765		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
   1766		sp->so_seqid.flags = 0;
   1767		spin_lock(&sp->so_lock);
   1768		list_for_each_entry(state, &sp->so_states, open_states) {
   1769			if (mark_reclaim(clp, state))
   1770				nfs4_clear_open_state(state);
   1771		}
   1772		spin_unlock(&sp->so_lock);
   1773	}
   1774	spin_unlock(&clp->cl_lock);
   1775}
   1776
   1777static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
   1778	int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
   1779{
   1780	struct nfs_server *server;
   1781
   1782	rcu_read_lock();
   1783	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
   1784		nfs4_reset_seqids(server, mark_reclaim);
   1785	rcu_read_unlock();
   1786}
   1787
   1788static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
   1789{
   1790	/* Mark all delegations for reclaim */
   1791	nfs_delegation_mark_reclaim(clp);
   1792	nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
   1793}
   1794
   1795static int nfs4_reclaim_complete(struct nfs_client *clp,
   1796				 const struct nfs4_state_recovery_ops *ops,
   1797				 const struct cred *cred)
   1798{
   1799	/* Notify the server we're done reclaiming our state */
   1800	if (ops->reclaim_complete)
   1801		return ops->reclaim_complete(clp, cred);
   1802	return 0;
   1803}
   1804
   1805static void nfs4_clear_reclaim_server(struct nfs_server *server)
   1806{
   1807	struct nfs_client *clp = server->nfs_client;
   1808	struct nfs4_state_owner *sp;
   1809	struct rb_node *pos;
   1810	struct nfs4_state *state;
   1811
   1812	spin_lock(&clp->cl_lock);
   1813	for (pos = rb_first(&server->state_owners);
   1814	     pos != NULL;
   1815	     pos = rb_next(pos)) {
   1816		sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
   1817		spin_lock(&sp->so_lock);
   1818		list_for_each_entry(state, &sp->so_states, open_states) {
   1819			if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
   1820						&state->flags))
   1821				continue;
   1822			nfs4_state_mark_reclaim_nograce(clp, state);
   1823		}
   1824		spin_unlock(&sp->so_lock);
   1825	}
   1826	spin_unlock(&clp->cl_lock);
   1827}
   1828
   1829static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
   1830{
   1831	struct nfs_server *server;
   1832
   1833	if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
   1834		return 0;
   1835
   1836	rcu_read_lock();
   1837	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
   1838		nfs4_clear_reclaim_server(server);
   1839	rcu_read_unlock();
   1840
   1841	nfs_delegation_reap_unclaimed(clp);
   1842	return 1;
   1843}
   1844
   1845static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
   1846{
   1847	const struct nfs4_state_recovery_ops *ops;
   1848	const struct cred *cred;
   1849	int err;
   1850
   1851	if (!nfs4_state_clear_reclaim_reboot(clp))
   1852		return;
   1853	ops = clp->cl_mvops->reboot_recovery_ops;
   1854	cred = nfs4_get_clid_cred(clp);
   1855	err = nfs4_reclaim_complete(clp, ops, cred);
   1856	put_cred(cred);
   1857	if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
   1858		set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
   1859}
   1860
   1861static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
   1862{
   1863	nfs_mark_test_expired_all_delegations(clp);
   1864	nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
   1865}
   1866
   1867static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
   1868{
   1869	switch (error) {
   1870	case 0:
   1871		break;
   1872	case -NFS4ERR_CB_PATH_DOWN:
   1873		nfs40_handle_cb_pathdown(clp);
   1874		break;
   1875	case -NFS4ERR_NO_GRACE:
   1876		nfs4_state_end_reclaim_reboot(clp);
   1877		break;
   1878	case -NFS4ERR_STALE_CLIENTID:
   1879		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
   1880		nfs4_state_start_reclaim_reboot(clp);
   1881		break;
   1882	case -NFS4ERR_EXPIRED:
   1883		set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
   1884		nfs4_state_start_reclaim_nograce(clp);
   1885		break;
   1886	case -NFS4ERR_BADSESSION:
   1887	case -NFS4ERR_BADSLOT:
   1888	case -NFS4ERR_BAD_HIGH_SLOT:
   1889	case -NFS4ERR_DEADSESSION:
   1890	case -NFS4ERR_SEQ_FALSE_RETRY:
   1891	case -NFS4ERR_SEQ_MISORDERED:
   1892		set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
   1893		/* Zero session reset errors */
   1894		break;
   1895	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
   1896		set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
   1897		break;
   1898	default:
   1899		dprintk("%s: failed to handle error %d for server %s\n",
   1900				__func__, error, clp->cl_hostname);
   1901		return error;
   1902	}
   1903	dprintk("%s: handled error %d for server %s\n", __func__, error,
   1904			clp->cl_hostname);
   1905	return 0;
   1906}
   1907
   1908static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
   1909{
   1910	struct nfs4_state_owner *sp;
   1911	struct nfs_server *server;
   1912	struct rb_node *pos;
   1913	LIST_HEAD(freeme);
   1914	int status = 0;
   1915	int lost_locks = 0;
   1916
   1917restart:
   1918	rcu_read_lock();
   1919	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
   1920		nfs4_purge_state_owners(server, &freeme);
   1921		spin_lock(&clp->cl_lock);
   1922		for (pos = rb_first(&server->state_owners);
   1923		     pos != NULL;
   1924		     pos = rb_next(pos)) {
   1925			sp = rb_entry(pos,
   1926				struct nfs4_state_owner, so_server_node);
   1927			if (!test_and_clear_bit(ops->owner_flag_bit,
   1928							&sp->so_flags))
   1929				continue;
   1930			if (!atomic_inc_not_zero(&sp->so_count))
   1931				continue;
   1932			spin_unlock(&clp->cl_lock);
   1933			rcu_read_unlock();
   1934
   1935			status = nfs4_reclaim_open_state(sp, ops, &lost_locks);
   1936			if (status < 0) {
   1937				if (lost_locks)
   1938					pr_warn("NFS: %s: lost %d locks\n",
   1939						clp->cl_hostname, lost_locks);
   1940				set_bit(ops->owner_flag_bit, &sp->so_flags);
   1941				nfs4_put_state_owner(sp);
   1942				status = nfs4_recovery_handle_error(clp, status);
   1943				return (status != 0) ? status : -EAGAIN;
   1944			}
   1945
   1946			nfs4_put_state_owner(sp);
   1947			goto restart;
   1948		}
   1949		spin_unlock(&clp->cl_lock);
   1950	}
   1951	rcu_read_unlock();
   1952	nfs4_free_state_owners(&freeme);
   1953	if (lost_locks)
   1954		pr_warn("NFS: %s: lost %d locks\n",
   1955			clp->cl_hostname, lost_locks);
   1956	return 0;
   1957}
   1958
   1959static int nfs4_check_lease(struct nfs_client *clp)
   1960{
   1961	const struct cred *cred;
   1962	const struct nfs4_state_maintenance_ops *ops =
   1963		clp->cl_mvops->state_renewal_ops;
   1964	int status;
   1965
   1966	/* Is the client already known to have an expired lease? */
   1967	if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
   1968		return 0;
   1969	cred = ops->get_state_renewal_cred(clp);
   1970	if (cred == NULL) {
   1971		cred = nfs4_get_clid_cred(clp);
   1972		status = -ENOKEY;
   1973		if (cred == NULL)
   1974			goto out;
   1975	}
   1976	status = ops->renew_lease(clp, cred);
   1977	put_cred(cred);
   1978	if (status == -ETIMEDOUT) {
   1979		set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
   1980		return 0;
   1981	}
   1982out:
   1983	return nfs4_recovery_handle_error(clp, status);
   1984}
   1985
   1986/* Set NFS4CLNT_LEASE_EXPIRED and reclaim reboot state for all v4.0 errors
   1987 * and for recoverable errors on EXCHANGE_ID for v4.1
   1988 */
   1989static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
   1990{
   1991	switch (status) {
   1992	case -NFS4ERR_SEQ_MISORDERED:
   1993		if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
   1994			return -ESERVERFAULT;
   1995		/* Lease confirmation error: retry after purging the lease */
   1996		ssleep(1);
   1997		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
   1998		break;
   1999	case -NFS4ERR_STALE_CLIENTID:
   2000		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
   2001		nfs4_state_start_reclaim_reboot(clp);
   2002		break;
   2003	case -NFS4ERR_CLID_INUSE:
   2004		pr_err("NFS: Server %s reports our clientid is in use\n",
   2005			clp->cl_hostname);
   2006		nfs_mark_client_ready(clp, -EPERM);
   2007		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
   2008		return -EPERM;
   2009	case -EACCES:
   2010	case -NFS4ERR_DELAY:
   2011	case -EAGAIN:
   2012		ssleep(1);
   2013		break;
   2014
   2015	case -NFS4ERR_MINOR_VERS_MISMATCH:
   2016		if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
   2017			nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
   2018		dprintk("%s: exit with error %d for server %s\n",
   2019				__func__, -EPROTONOSUPPORT, clp->cl_hostname);
   2020		return -EPROTONOSUPPORT;
   2021	case -ENOSPC:
   2022		if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
   2023			nfs_mark_client_ready(clp, -EIO);
   2024		return -EIO;
   2025	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
   2026				 * in nfs4_exchange_id */
   2027	default:
   2028		dprintk("%s: exit with error %d for server %s\n", __func__,
   2029				status, clp->cl_hostname);
   2030		return status;
   2031	}
   2032	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
   2033	dprintk("%s: handled error %d for server %s\n", __func__, status,
   2034			clp->cl_hostname);
   2035	return 0;
   2036}
   2037
   2038static int nfs4_establish_lease(struct nfs_client *clp)
   2039{
   2040	const struct cred *cred;
   2041	const struct nfs4_state_recovery_ops *ops =
   2042		clp->cl_mvops->reboot_recovery_ops;
   2043	int status;
   2044
   2045	status = nfs4_begin_drain_session(clp);
   2046	if (status != 0)
   2047		return status;
   2048	cred = nfs4_get_clid_cred(clp);
   2049	if (cred == NULL)
   2050		return -ENOENT;
   2051	status = ops->establish_clid(clp, cred);
   2052	put_cred(cred);
   2053	if (status != 0)
   2054		return status;
   2055	pnfs_destroy_all_layouts(clp);
   2056	return 0;
   2057}
   2058
   2059/*
   2060 * Returns zero or a negative errno.  NFS4ERR values are converted
   2061 * to local errno values.
   2062 */
   2063static int nfs4_reclaim_lease(struct nfs_client *clp)
   2064{
   2065	int status;
   2066
   2067	status = nfs4_establish_lease(clp);
   2068	if (status < 0)
   2069		return nfs4_handle_reclaim_lease_error(clp, status);
   2070	if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state))
   2071		nfs4_state_start_reclaim_nograce(clp);
   2072	if (!test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
   2073		set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
   2074	clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
   2075	clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
   2076	return 0;
   2077}
   2078
   2079static int nfs4_purge_lease(struct nfs_client *clp)
   2080{
   2081	int status;
   2082
   2083	status = nfs4_establish_lease(clp);
   2084	if (status < 0)
   2085		return nfs4_handle_reclaim_lease_error(clp, status);
   2086	clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
   2087	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
   2088	nfs4_state_start_reclaim_nograce(clp);
   2089	return 0;
   2090}
   2091
   2092/*
   2093 * Try remote migration of one FSID from a source server to a
   2094 * destination server.  The source server provides a list of
   2095 * potential destinations.
   2096 *
   2097 * Returns zero or a negative NFS4ERR status code.
   2098 */
   2099static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred)
   2100{
   2101	struct nfs_client *clp = server->nfs_client;
   2102	struct nfs4_fs_locations *locations = NULL;
   2103	struct inode *inode;
   2104	struct page *page;
   2105	int status, result;
   2106
   2107	dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__,
   2108			(unsigned long long)server->fsid.major,
   2109			(unsigned long long)server->fsid.minor,
   2110			clp->cl_hostname);
   2111
   2112	result = 0;
   2113	page = alloc_page(GFP_KERNEL);
   2114	locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
   2115	if (page == NULL || locations == NULL) {
   2116		dprintk("<-- %s: no memory\n", __func__);
   2117		goto out;
   2118	}
   2119	locations->fattr = nfs_alloc_fattr();
   2120	if (locations->fattr == NULL) {
   2121		dprintk("<-- %s: no memory\n", __func__);
   2122		goto out;
   2123	}
   2124
   2125	inode = d_inode(server->super->s_root);
   2126	result = nfs4_proc_get_locations(server, NFS_FH(inode), locations,
   2127					 page, cred);
   2128	if (result) {
   2129		dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
   2130			__func__, result);
   2131		goto out;
   2132	}
   2133
   2134	result = -NFS4ERR_NXIO;
   2135	if (!locations->nlocations)
   2136		goto out;
   2137
   2138	if (!(locations->fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
   2139		dprintk("<-- %s: No fs_locations data, migration skipped\n",
   2140			__func__);
   2141		goto out;
   2142	}
   2143
   2144	status = nfs4_begin_drain_session(clp);
   2145	if (status != 0) {
   2146		result = status;
   2147		goto out;
   2148	}
   2149
   2150	status = nfs4_replace_transport(server, locations);
   2151	if (status != 0) {
   2152		dprintk("<-- %s: failed to replace transport: %d\n",
   2153			__func__, status);
   2154		goto out;
   2155	}
   2156
   2157	result = 0;
   2158	dprintk("<-- %s: migration succeeded\n", __func__);
   2159
   2160out:
   2161	if (page != NULL)
   2162		__free_page(page);
   2163	if (locations != NULL)
   2164		kfree(locations->fattr);
   2165	kfree(locations);
   2166	if (result) {
   2167		pr_err("NFS: migration recovery failed (server %s)\n",
   2168				clp->cl_hostname);
   2169		set_bit(NFS_MIG_FAILED, &server->mig_status);
   2170	}
   2171	return result;
   2172}
   2173
   2174/*
   2175 * Returns zero or a negative NFS4ERR status code.
   2176 */
   2177static int nfs4_handle_migration(struct nfs_client *clp)
   2178{
   2179	const struct nfs4_state_maintenance_ops *ops =
   2180				clp->cl_mvops->state_renewal_ops;
   2181	struct nfs_server *server;
   2182	const struct cred *cred;
   2183
   2184	dprintk("%s: migration reported on \"%s\"\n", __func__,
   2185			clp->cl_hostname);
   2186
   2187	cred = ops->get_state_renewal_cred(clp);
   2188	if (cred == NULL)
   2189		return -NFS4ERR_NOENT;
   2190
   2191	clp->cl_mig_gen++;
   2192restart:
   2193	rcu_read_lock();
   2194	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
   2195		int status;
   2196
   2197		if (server->mig_gen == clp->cl_mig_gen)
   2198			continue;
   2199		server->mig_gen = clp->cl_mig_gen;
   2200
   2201		if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION,
   2202						&server->mig_status))
   2203			continue;
   2204
   2205		rcu_read_unlock();
   2206		status = nfs4_try_migration(server, cred);
   2207		if (status < 0) {
   2208			put_cred(cred);
   2209			return status;
   2210		}
   2211		goto restart;
   2212	}
   2213	rcu_read_unlock();
   2214	put_cred(cred);
   2215	return 0;
   2216}
   2217
   2218/*
   2219 * Test each nfs_server on the clp's cl_superblocks list to see
   2220 * if it's moved to another server.  Stop when the server no longer
   2221 * returns NFS4ERR_LEASE_MOVED.
   2222 */
   2223static int nfs4_handle_lease_moved(struct nfs_client *clp)
   2224{
   2225	const struct nfs4_state_maintenance_ops *ops =
   2226				clp->cl_mvops->state_renewal_ops;
   2227	struct nfs_server *server;
   2228	const struct cred *cred;
   2229
   2230	dprintk("%s: lease moved reported on \"%s\"\n", __func__,
   2231			clp->cl_hostname);
   2232
   2233	cred = ops->get_state_renewal_cred(clp);
   2234	if (cred == NULL)
   2235		return -NFS4ERR_NOENT;
   2236
   2237	clp->cl_mig_gen++;
   2238restart:
   2239	rcu_read_lock();
   2240	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
   2241		struct inode *inode;
   2242		int status;
   2243
   2244		if (server->mig_gen == clp->cl_mig_gen)
   2245			continue;
   2246		server->mig_gen = clp->cl_mig_gen;
   2247
   2248		rcu_read_unlock();
   2249
   2250		inode = d_inode(server->super->s_root);
   2251		status = nfs4_proc_fsid_present(inode, cred);
   2252		if (status != -NFS4ERR_MOVED)
   2253			goto restart;	/* wasn't this one */
   2254		if (nfs4_try_migration(server, cred) == -NFS4ERR_LEASE_MOVED)
   2255			goto restart;	/* there are more */
   2256		goto out;
   2257	}
   2258	rcu_read_unlock();
   2259
   2260out:
   2261	put_cred(cred);
   2262	return 0;
   2263}
   2264
   2265/**
   2266 * nfs4_discover_server_trunking - Detect server IP address trunking
   2267 *
   2268 * @clp: nfs_client under test
   2269 * @result: OUT: found nfs_client, or clp
   2270 *
   2271 * Returns zero or a negative errno.  If zero is returned,
   2272 * an nfs_client pointer is planted in "result".
   2273 *
   2274 * Note: since we are invoked in process context, and
   2275 * not from inside the state manager, we cannot use
   2276 * nfs4_handle_reclaim_lease_error().
   2277 */
   2278int nfs4_discover_server_trunking(struct nfs_client *clp,
   2279				  struct nfs_client **result)
   2280{
   2281	const struct nfs4_state_recovery_ops *ops =
   2282				clp->cl_mvops->reboot_recovery_ops;
   2283	struct rpc_clnt *clnt;
   2284	const struct cred *cred;
   2285	int i, status;
   2286
   2287	dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname);
   2288
   2289	clnt = clp->cl_rpcclient;
   2290	i = 0;
   2291
   2292	mutex_lock(&nfs_clid_init_mutex);
   2293again:
   2294	status  = -ENOENT;
   2295	cred = nfs4_get_clid_cred(clp);
   2296	if (cred == NULL)
   2297		goto out_unlock;
   2298
   2299	status = ops->detect_trunking(clp, result, cred);
   2300	put_cred(cred);
   2301	switch (status) {
   2302	case 0:
   2303	case -EINTR:
   2304	case -ERESTARTSYS:
   2305		break;
   2306	case -ETIMEDOUT:
   2307		if (clnt->cl_softrtry)
   2308			break;
   2309		fallthrough;
   2310	case -NFS4ERR_DELAY:
   2311	case -EAGAIN:
   2312		ssleep(1);
   2313		fallthrough;
   2314	case -NFS4ERR_STALE_CLIENTID:
   2315		dprintk("NFS: %s after status %d, retrying\n",
   2316			__func__, status);
   2317		goto again;
   2318	case -EACCES:
   2319		if (i++ == 0) {
   2320			nfs4_root_machine_cred(clp);
   2321			goto again;
   2322		}
   2323		if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX)
   2324			break;
   2325		fallthrough;
   2326	case -NFS4ERR_CLID_INUSE:
   2327	case -NFS4ERR_WRONGSEC:
   2328		/* No point in retrying if we already used RPC_AUTH_UNIX */
   2329		if (clnt->cl_auth->au_flavor == RPC_AUTH_UNIX) {
   2330			status = -EPERM;
   2331			break;
   2332		}
   2333		clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX);
   2334		if (IS_ERR(clnt)) {
   2335			status = PTR_ERR(clnt);
   2336			break;
   2337		}
   2338		/* Note: this is safe because we haven't yet marked the
   2339		 * client as ready, so we are the only user of
   2340		 * clp->cl_rpcclient
   2341		 */
   2342		clnt = xchg(&clp->cl_rpcclient, clnt);
   2343		rpc_shutdown_client(clnt);
   2344		clnt = clp->cl_rpcclient;
   2345		goto again;
   2346
   2347	case -NFS4ERR_MINOR_VERS_MISMATCH:
   2348		status = -EPROTONOSUPPORT;
   2349		break;
   2350
   2351	case -EKEYEXPIRED:
   2352	case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
   2353				 * in nfs4_exchange_id */
   2354		status = -EKEYEXPIRED;
   2355		break;
   2356	default:
   2357		pr_warn("NFS: %s unhandled error %d. Exiting with error EIO\n",
   2358				__func__, status);
   2359		status = -EIO;
   2360	}
   2361
   2362out_unlock:
   2363	mutex_unlock(&nfs_clid_init_mutex);
   2364	dprintk("NFS: %s: status = %d\n", __func__, status);
   2365	return status;
   2366}
   2367
   2368#ifdef CONFIG_NFS_V4_1
   2369void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
   2370{
   2371	struct nfs_client *clp = session->clp;
   2372
   2373	switch (err) {
   2374	default:
   2375		set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
   2376		break;
   2377	case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
   2378		set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
   2379	}
   2380	nfs4_schedule_state_manager(clp);
   2381}
   2382EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
   2383
   2384void nfs41_notify_server(struct nfs_client *clp)
   2385{
   2386	/* Use CHECK_LEASE to ping the server with a SEQUENCE */
   2387	set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
   2388	nfs4_schedule_state_manager(clp);
   2389}
   2390
   2391static void nfs4_reset_all_state(struct nfs_client *clp)
   2392{
   2393	if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
   2394		set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
   2395		clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
   2396		nfs4_state_start_reclaim_nograce(clp);
   2397		dprintk("%s: scheduling reset of all state for server %s!\n",
   2398				__func__, clp->cl_hostname);
   2399		nfs4_schedule_state_manager(clp);
   2400	}
   2401}
   2402
   2403static void nfs41_handle_server_reboot(struct nfs_client *clp)
   2404{
   2405	if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
   2406		nfs4_state_start_reclaim_reboot(clp);
   2407		dprintk("%s: server %s rebooted!\n", __func__,
   2408				clp->cl_hostname);
   2409		nfs4_schedule_state_manager(clp);
   2410	}
   2411}
   2412
   2413static void nfs41_handle_all_state_revoked(struct nfs_client *clp)
   2414{
   2415	nfs4_reset_all_state(clp);
   2416	dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
   2417}
   2418
   2419static void nfs41_handle_some_state_revoked(struct nfs_client *clp)
   2420{
   2421	nfs4_state_start_reclaim_nograce(clp);
   2422	nfs4_schedule_state_manager(clp);
   2423
   2424	dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
   2425}
   2426
   2427static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
   2428{
   2429	/* FIXME: For now, we destroy all layouts. */
   2430	pnfs_destroy_all_layouts(clp);
   2431	nfs_test_expired_all_delegations(clp);
   2432	dprintk("%s: Recallable state revoked on server %s!\n", __func__,
   2433			clp->cl_hostname);
   2434}
   2435
   2436static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
   2437{
   2438	set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
   2439	nfs4_schedule_state_manager(clp);
   2440
   2441	dprintk("%s: server %s declared a backchannel fault\n", __func__,
   2442			clp->cl_hostname);
   2443}
   2444
   2445static void nfs41_handle_cb_path_down(struct nfs_client *clp)
   2446{
   2447	if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
   2448		&clp->cl_state) == 0)
   2449		nfs4_schedule_state_manager(clp);
   2450}
   2451
   2452void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags,
   2453		bool recovery)
   2454{
   2455	if (!flags)
   2456		return;
   2457
   2458	dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
   2459		__func__, clp->cl_hostname, clp->cl_clientid, flags);
   2460	/*
   2461	 * If we're called from the state manager thread, then assume we're
   2462	 * already handling the RECLAIM_NEEDED and/or STATE_REVOKED.
   2463	 * Those flags are expected to remain set until we're done
   2464	 * recovering (see RFC5661, section 18.46.3).
   2465	 */
   2466	if (recovery)
   2467		goto out_recovery;
   2468
   2469	if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
   2470		nfs41_handle_server_reboot(clp);
   2471	if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED))
   2472		nfs41_handle_all_state_revoked(clp);
   2473	if (flags & (SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
   2474			    SEQ4_STATUS_ADMIN_STATE_REVOKED))
   2475		nfs41_handle_some_state_revoked(clp);
   2476	if (flags & SEQ4_STATUS_LEASE_MOVED)
   2477		nfs4_schedule_lease_moved_recovery(clp);
   2478	if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
   2479		nfs41_handle_recallable_state_revoked(clp);
   2480out_recovery:
   2481	if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
   2482		nfs41_handle_backchannel_fault(clp);
   2483	else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
   2484				SEQ4_STATUS_CB_PATH_DOWN_SESSION))
   2485		nfs41_handle_cb_path_down(clp);
   2486}
   2487
   2488static int nfs4_reset_session(struct nfs_client *clp)
   2489{
   2490	const struct cred *cred;
   2491	int status;
   2492
   2493	if (!nfs4_has_session(clp))
   2494		return 0;
   2495	status = nfs4_begin_drain_session(clp);
   2496	if (status != 0)
   2497		return status;
   2498	cred = nfs4_get_clid_cred(clp);
   2499	status = nfs4_proc_destroy_session(clp->cl_session, cred);
   2500	switch (status) {
   2501	case 0:
   2502	case -NFS4ERR_BADSESSION:
   2503	case -NFS4ERR_DEADSESSION:
   2504		break;
   2505	case -NFS4ERR_BACK_CHAN_BUSY:
   2506	case -NFS4ERR_DELAY:
   2507		set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
   2508		status = 0;
   2509		ssleep(1);
   2510		goto out;
   2511	default:
   2512		status = nfs4_recovery_handle_error(clp, status);
   2513		goto out;
   2514	}
   2515
   2516	memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
   2517	status = nfs4_proc_create_session(clp, cred);
   2518	if (status) {
   2519		dprintk("%s: session reset failed with status %d for server %s!\n",
   2520			__func__, status, clp->cl_hostname);
   2521		status = nfs4_handle_reclaim_lease_error(clp, status);
   2522		goto out;
   2523	}
   2524	nfs41_finish_session_reset(clp);
   2525	dprintk("%s: session reset was successful for server %s!\n",
   2526			__func__, clp->cl_hostname);
   2527out:
   2528	put_cred(cred);
   2529	return status;
   2530}
   2531
   2532static int nfs4_bind_conn_to_session(struct nfs_client *clp)
   2533{
   2534	const struct cred *cred;
   2535	int ret;
   2536
   2537	if (!nfs4_has_session(clp))
   2538		return 0;
   2539	ret = nfs4_begin_drain_session(clp);
   2540	if (ret != 0)
   2541		return ret;
   2542	cred = nfs4_get_clid_cred(clp);
   2543	ret = nfs4_proc_bind_conn_to_session(clp, cred);
   2544	put_cred(cred);
   2545	clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
   2546	switch (ret) {
   2547	case 0:
   2548		dprintk("%s: bind_conn_to_session was successful for server %s!\n",
   2549			__func__, clp->cl_hostname);
   2550		break;
   2551	case -NFS4ERR_DELAY:
   2552		ssleep(1);
   2553		set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
   2554		break;
   2555	default:
   2556		return nfs4_recovery_handle_error(clp, ret);
   2557	}
   2558	return 0;
   2559}
   2560
   2561static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
   2562{
   2563	int iomode = 0;
   2564
   2565	if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &clp->cl_state))
   2566		iomode += IOMODE_READ;
   2567	if (test_and_clear_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &clp->cl_state))
   2568		iomode += IOMODE_RW;
   2569	/* Note: IOMODE_READ + IOMODE_RW == IOMODE_ANY */
   2570	if (iomode) {
   2571		pnfs_layout_return_unused_byclid(clp, iomode);
   2572		set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
   2573	}
   2574}
   2575#else /* CONFIG_NFS_V4_1 */
   2576static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
   2577
   2578static int nfs4_bind_conn_to_session(struct nfs_client *clp)
   2579{
   2580	return 0;
   2581}
   2582
   2583static void nfs4_layoutreturn_any_run(struct nfs_client *clp)
   2584{
   2585}
   2586#endif /* CONFIG_NFS_V4_1 */
   2587
   2588static void nfs4_state_manager(struct nfs_client *clp)
   2589{
   2590	unsigned int memflags;
   2591	int status = 0;
   2592	const char *section = "", *section_sep = "";
   2593
   2594	/*
   2595	 * State recovery can deadlock if the direct reclaim code tries
   2596	 * start NFS writeback. So ensure memory allocations are all
   2597	 * GFP_NOFS.
   2598	 */
   2599	memflags = memalloc_nofs_save();
   2600
   2601	/* Ensure exclusive access to NFSv4 state */
   2602	do {
   2603		trace_nfs4_state_mgr(clp);
   2604		clear_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
   2605		if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
   2606			section = "purge state";
   2607			status = nfs4_purge_lease(clp);
   2608			if (status < 0)
   2609				goto out_error;
   2610			continue;
   2611		}
   2612
   2613		if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
   2614			section = "lease expired";
   2615			/* We're going to have to re-establish a clientid */
   2616			status = nfs4_reclaim_lease(clp);
   2617			if (status < 0)
   2618				goto out_error;
   2619			continue;
   2620		}
   2621
   2622		/* Initialize or reset the session */
   2623		if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) {
   2624			section = "reset session";
   2625			status = nfs4_reset_session(clp);
   2626			if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
   2627				continue;
   2628			if (status < 0)
   2629				goto out_error;
   2630		}
   2631
   2632		/* Send BIND_CONN_TO_SESSION */
   2633		if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
   2634				&clp->cl_state)) {
   2635			section = "bind conn to session";
   2636			status = nfs4_bind_conn_to_session(clp);
   2637			if (status < 0)
   2638				goto out_error;
   2639			continue;
   2640		}
   2641
   2642		if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
   2643			section = "check lease";
   2644			status = nfs4_check_lease(clp);
   2645			if (status < 0)
   2646				goto out_error;
   2647			continue;
   2648		}
   2649
   2650		if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) {
   2651			section = "migration";
   2652			status = nfs4_handle_migration(clp);
   2653			if (status < 0)
   2654				goto out_error;
   2655		}
   2656
   2657		if (test_and_clear_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state)) {
   2658			section = "lease moved";
   2659			status = nfs4_handle_lease_moved(clp);
   2660			if (status < 0)
   2661				goto out_error;
   2662		}
   2663
   2664		/* First recover reboot state... */
   2665		if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
   2666			section = "reclaim reboot";
   2667			status = nfs4_do_reclaim(clp,
   2668				clp->cl_mvops->reboot_recovery_ops);
   2669			if (status == -EAGAIN)
   2670				continue;
   2671			if (status < 0)
   2672				goto out_error;
   2673			nfs4_state_end_reclaim_reboot(clp);
   2674		}
   2675
   2676		/* Detect expired delegations... */
   2677		if (test_and_clear_bit(NFS4CLNT_DELEGATION_EXPIRED, &clp->cl_state)) {
   2678			section = "detect expired delegations";
   2679			nfs_reap_expired_delegations(clp);
   2680			continue;
   2681		}
   2682
   2683		/* Now recover expired state... */
   2684		if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
   2685			section = "reclaim nograce";
   2686			status = nfs4_do_reclaim(clp,
   2687				clp->cl_mvops->nograce_recovery_ops);
   2688			if (status == -EAGAIN)
   2689				continue;
   2690			if (status < 0)
   2691				goto out_error;
   2692			clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
   2693		}
   2694
   2695		memalloc_nofs_restore(memflags);
   2696		nfs4_end_drain_session(clp);
   2697		nfs4_clear_state_manager_bit(clp);
   2698
   2699		if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) {
   2700			if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
   2701				nfs_client_return_marked_delegations(clp);
   2702				set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
   2703			}
   2704			nfs4_layoutreturn_any_run(clp);
   2705			clear_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state);
   2706		}
   2707
   2708		return;
   2709
   2710	} while (refcount_read(&clp->cl_count) > 1 && !signalled());
   2711	goto out_drain;
   2712
   2713out_error:
   2714	if (strlen(section))
   2715		section_sep = ": ";
   2716	trace_nfs4_state_mgr_failed(clp, section, status);
   2717	pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
   2718			" with error %d\n", section_sep, section,
   2719			clp->cl_hostname, -status);
   2720	ssleep(1);
   2721out_drain:
   2722	memalloc_nofs_restore(memflags);
   2723	nfs4_end_drain_session(clp);
   2724	nfs4_clear_state_manager_bit(clp);
   2725}
   2726
   2727static int nfs4_run_state_manager(void *ptr)
   2728{
   2729	struct nfs_client *clp = ptr;
   2730	struct rpc_clnt *cl = clp->cl_rpcclient;
   2731
   2732	while (cl != cl->cl_parent)
   2733		cl = cl->cl_parent;
   2734
   2735	allow_signal(SIGKILL);
   2736again:
   2737	set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
   2738	nfs4_state_manager(clp);
   2739	if (atomic_read(&cl->cl_swapper)) {
   2740		wait_var_event_interruptible(&clp->cl_state,
   2741					     test_bit(NFS4CLNT_RUN_MANAGER,
   2742						      &clp->cl_state));
   2743		if (atomic_read(&cl->cl_swapper) &&
   2744		    test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
   2745			goto again;
   2746		/* Either no longer a swapper, or were signalled */
   2747	}
   2748	clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
   2749
   2750	if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
   2751	    test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
   2752	    !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
   2753		goto again;
   2754
   2755	nfs_put_client(clp);
   2756	module_put_and_kthread_exit(0);
   2757	return 0;
   2758}