cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

nfs4callback.c (34860B)


      1/*
      2 *  Copyright (c) 2001 The Regents of the University of Michigan.
      3 *  All rights reserved.
      4 *
      5 *  Kendrick Smith <kmsmith@umich.edu>
      6 *  Andy Adamson <andros@umich.edu>
      7 *
      8 *  Redistribution and use in source and binary forms, with or without
      9 *  modification, are permitted provided that the following conditions
     10 *  are met:
     11 *
     12 *  1. Redistributions of source code must retain the above copyright
     13 *     notice, this list of conditions and the following disclaimer.
     14 *  2. Redistributions in binary form must reproduce the above copyright
     15 *     notice, this list of conditions and the following disclaimer in the
     16 *     documentation and/or other materials provided with the distribution.
     17 *  3. Neither the name of the University nor the names of its
     18 *     contributors may be used to endorse or promote products derived
     19 *     from this software without specific prior written permission.
     20 *
     21 *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
     22 *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     23 *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     24 *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25 *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     26 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     27 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     28 *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     29 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     30 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     31 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     32 */
     33
     34#include <linux/sunrpc/clnt.h>
     35#include <linux/sunrpc/xprt.h>
     36#include <linux/sunrpc/svc_xprt.h>
     37#include <linux/slab.h>
     38#include "nfsd.h"
     39#include "state.h"
     40#include "netns.h"
     41#include "trace.h"
     42#include "xdr4cb.h"
     43#include "xdr4.h"
     44
     45#define NFSDDBG_FACILITY                NFSDDBG_PROC
     46
     47static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
     48
     49#define NFSPROC4_CB_NULL 0
     50#define NFSPROC4_CB_COMPOUND 1
     51
     52/* Index of predefined Linux callback client operations */
     53
     54struct nfs4_cb_compound_hdr {
     55	/* args */
     56	u32		ident;	/* minorversion 0 only */
     57	u32		nops;
     58	__be32		*nops_p;
     59	u32		minorversion;
     60	/* res */
     61	int		status;
     62};
     63
     64static __be32 *xdr_encode_empty_array(__be32 *p)
     65{
     66	*p++ = xdr_zero;
     67	return p;
     68}
     69
     70/*
     71 * Encode/decode NFSv4 CB basic data types
     72 *
     73 * Basic NFSv4 callback data types are defined in section 15 of RFC
     74 * 3530: "Network File System (NFS) version 4 Protocol" and section
     75 * 20 of RFC 5661: "Network File System (NFS) Version 4 Minor Version
     76 * 1 Protocol"
     77 */
     78
     79/*
     80 *	nfs_cb_opnum4
     81 *
     82 *	enum nfs_cb_opnum4 {
     83 *		OP_CB_GETATTR		= 3,
     84 *		  ...
     85 *	};
     86 */
     87enum nfs_cb_opnum4 {
     88	OP_CB_GETATTR			= 3,
     89	OP_CB_RECALL			= 4,
     90	OP_CB_LAYOUTRECALL		= 5,
     91	OP_CB_NOTIFY			= 6,
     92	OP_CB_PUSH_DELEG		= 7,
     93	OP_CB_RECALL_ANY		= 8,
     94	OP_CB_RECALLABLE_OBJ_AVAIL	= 9,
     95	OP_CB_RECALL_SLOT		= 10,
     96	OP_CB_SEQUENCE			= 11,
     97	OP_CB_WANTS_CANCELLED		= 12,
     98	OP_CB_NOTIFY_LOCK		= 13,
     99	OP_CB_NOTIFY_DEVICEID		= 14,
    100	OP_CB_OFFLOAD			= 15,
    101	OP_CB_ILLEGAL			= 10044
    102};
    103
    104static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
    105{
    106	__be32 *p;
    107
    108	p = xdr_reserve_space(xdr, 4);
    109	*p = cpu_to_be32(op);
    110}
    111
    112/*
    113 * nfs_fh4
    114 *
    115 *	typedef opaque nfs_fh4<NFS4_FHSIZE>;
    116 */
    117static void encode_nfs_fh4(struct xdr_stream *xdr, const struct knfsd_fh *fh)
    118{
    119	u32 length = fh->fh_size;
    120	__be32 *p;
    121
    122	BUG_ON(length > NFS4_FHSIZE);
    123	p = xdr_reserve_space(xdr, 4 + length);
    124	xdr_encode_opaque(p, &fh->fh_raw, length);
    125}
    126
    127/*
    128 * stateid4
    129 *
    130 *	struct stateid4 {
    131 *		uint32_t	seqid;
    132 *		opaque		other[12];
    133 *	};
    134 */
    135static void encode_stateid4(struct xdr_stream *xdr, const stateid_t *sid)
    136{
    137	__be32 *p;
    138
    139	p = xdr_reserve_space(xdr, NFS4_STATEID_SIZE);
    140	*p++ = cpu_to_be32(sid->si_generation);
    141	xdr_encode_opaque_fixed(p, &sid->si_opaque, NFS4_STATEID_OTHER_SIZE);
    142}
    143
    144/*
    145 * sessionid4
    146 *
    147 *	typedef opaque sessionid4[NFS4_SESSIONID_SIZE];
    148 */
    149static void encode_sessionid4(struct xdr_stream *xdr,
    150			      const struct nfsd4_session *session)
    151{
    152	__be32 *p;
    153
    154	p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
    155	xdr_encode_opaque_fixed(p, session->se_sessionid.data,
    156					NFS4_MAX_SESSIONID_LEN);
    157}
    158
    159/*
    160 * nfsstat4
    161 */
    162static const struct {
    163	int stat;
    164	int errno;
    165} nfs_cb_errtbl[] = {
    166	{ NFS4_OK,		0		},
    167	{ NFS4ERR_PERM,		-EPERM		},
    168	{ NFS4ERR_NOENT,	-ENOENT		},
    169	{ NFS4ERR_IO,		-EIO		},
    170	{ NFS4ERR_NXIO,		-ENXIO		},
    171	{ NFS4ERR_ACCESS,	-EACCES		},
    172	{ NFS4ERR_EXIST,	-EEXIST		},
    173	{ NFS4ERR_XDEV,		-EXDEV		},
    174	{ NFS4ERR_NOTDIR,	-ENOTDIR	},
    175	{ NFS4ERR_ISDIR,	-EISDIR		},
    176	{ NFS4ERR_INVAL,	-EINVAL		},
    177	{ NFS4ERR_FBIG,		-EFBIG		},
    178	{ NFS4ERR_NOSPC,	-ENOSPC		},
    179	{ NFS4ERR_ROFS,		-EROFS		},
    180	{ NFS4ERR_MLINK,	-EMLINK		},
    181	{ NFS4ERR_NAMETOOLONG,	-ENAMETOOLONG	},
    182	{ NFS4ERR_NOTEMPTY,	-ENOTEMPTY	},
    183	{ NFS4ERR_DQUOT,	-EDQUOT		},
    184	{ NFS4ERR_STALE,	-ESTALE		},
    185	{ NFS4ERR_BADHANDLE,	-EBADHANDLE	},
    186	{ NFS4ERR_BAD_COOKIE,	-EBADCOOKIE	},
    187	{ NFS4ERR_NOTSUPP,	-ENOTSUPP	},
    188	{ NFS4ERR_TOOSMALL,	-ETOOSMALL	},
    189	{ NFS4ERR_SERVERFAULT,	-ESERVERFAULT	},
    190	{ NFS4ERR_BADTYPE,	-EBADTYPE	},
    191	{ NFS4ERR_LOCKED,	-EAGAIN		},
    192	{ NFS4ERR_RESOURCE,	-EREMOTEIO	},
    193	{ NFS4ERR_SYMLINK,	-ELOOP		},
    194	{ NFS4ERR_OP_ILLEGAL,	-EOPNOTSUPP	},
    195	{ NFS4ERR_DEADLOCK,	-EDEADLK	},
    196	{ -1,			-EIO		}
    197};
    198
    199/*
    200 * If we cannot translate the error, the recovery routines should
    201 * handle it.
    202 *
    203 * Note: remaining NFSv4 error codes have values > 10000, so should
    204 * not conflict with native Linux error codes.
    205 */
    206static int nfs_cb_stat_to_errno(int status)
    207{
    208	int i;
    209
    210	for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
    211		if (nfs_cb_errtbl[i].stat == status)
    212			return nfs_cb_errtbl[i].errno;
    213	}
    214
    215	dprintk("NFSD: Unrecognized NFS CB status value: %u\n", status);
    216	return -status;
    217}
    218
    219static int decode_cb_op_status(struct xdr_stream *xdr,
    220			       enum nfs_cb_opnum4 expected, int *status)
    221{
    222	__be32 *p;
    223	u32 op;
    224
    225	p = xdr_inline_decode(xdr, 4 + 4);
    226	if (unlikely(p == NULL))
    227		goto out_overflow;
    228	op = be32_to_cpup(p++);
    229	if (unlikely(op != expected))
    230		goto out_unexpected;
    231	*status = nfs_cb_stat_to_errno(be32_to_cpup(p));
    232	return 0;
    233out_overflow:
    234	return -EIO;
    235out_unexpected:
    236	dprintk("NFSD: Callback server returned operation %d but "
    237		"we issued a request for %d\n", op, expected);
    238	return -EIO;
    239}
    240
    241/*
    242 * CB_COMPOUND4args
    243 *
    244 *	struct CB_COMPOUND4args {
    245 *		utf8str_cs	tag;
    246 *		uint32_t	minorversion;
    247 *		uint32_t	callback_ident;
    248 *		nfs_cb_argop4	argarray<>;
    249 *	};
    250*/
    251static void encode_cb_compound4args(struct xdr_stream *xdr,
    252				    struct nfs4_cb_compound_hdr *hdr)
    253{
    254	__be32 * p;
    255
    256	p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
    257	p = xdr_encode_empty_array(p);		/* empty tag */
    258	*p++ = cpu_to_be32(hdr->minorversion);
    259	*p++ = cpu_to_be32(hdr->ident);
    260
    261	hdr->nops_p = p;
    262	*p = cpu_to_be32(hdr->nops);		/* argarray element count */
    263}
    264
    265/*
    266 * Update argarray element count
    267 */
    268static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr)
    269{
    270	BUG_ON(hdr->nops > NFS4_MAX_BACK_CHANNEL_OPS);
    271	*hdr->nops_p = cpu_to_be32(hdr->nops);
    272}
    273
    274/*
    275 * CB_COMPOUND4res
    276 *
    277 *	struct CB_COMPOUND4res {
    278 *		nfsstat4	status;
    279 *		utf8str_cs	tag;
    280 *		nfs_cb_resop4	resarray<>;
    281 *	};
    282 */
    283static int decode_cb_compound4res(struct xdr_stream *xdr,
    284				  struct nfs4_cb_compound_hdr *hdr)
    285{
    286	u32 length;
    287	__be32 *p;
    288
    289	p = xdr_inline_decode(xdr, 4 + 4);
    290	if (unlikely(p == NULL))
    291		goto out_overflow;
    292	hdr->status = be32_to_cpup(p++);
    293	/* Ignore the tag */
    294	length = be32_to_cpup(p++);
    295	p = xdr_inline_decode(xdr, length + 4);
    296	if (unlikely(p == NULL))
    297		goto out_overflow;
    298	p += XDR_QUADLEN(length);
    299	hdr->nops = be32_to_cpup(p);
    300	return 0;
    301out_overflow:
    302	return -EIO;
    303}
    304
    305/*
    306 * CB_RECALL4args
    307 *
    308 *	struct CB_RECALL4args {
    309 *		stateid4	stateid;
    310 *		bool		truncate;
    311 *		nfs_fh4		fh;
    312 *	};
    313 */
    314static void encode_cb_recall4args(struct xdr_stream *xdr,
    315				  const struct nfs4_delegation *dp,
    316				  struct nfs4_cb_compound_hdr *hdr)
    317{
    318	__be32 *p;
    319
    320	encode_nfs_cb_opnum4(xdr, OP_CB_RECALL);
    321	encode_stateid4(xdr, &dp->dl_stid.sc_stateid);
    322
    323	p = xdr_reserve_space(xdr, 4);
    324	*p++ = xdr_zero;			/* truncate */
    325
    326	encode_nfs_fh4(xdr, &dp->dl_stid.sc_file->fi_fhandle);
    327
    328	hdr->nops++;
    329}
    330
    331/*
    332 * CB_SEQUENCE4args
    333 *
    334 *	struct CB_SEQUENCE4args {
    335 *		sessionid4		csa_sessionid;
    336 *		sequenceid4		csa_sequenceid;
    337 *		slotid4			csa_slotid;
    338 *		slotid4			csa_highest_slotid;
    339 *		bool			csa_cachethis;
    340 *		referring_call_list4	csa_referring_call_lists<>;
    341 *	};
    342 */
    343static void encode_cb_sequence4args(struct xdr_stream *xdr,
    344				    const struct nfsd4_callback *cb,
    345				    struct nfs4_cb_compound_hdr *hdr)
    346{
    347	struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
    348	__be32 *p;
    349
    350	if (hdr->minorversion == 0)
    351		return;
    352
    353	encode_nfs_cb_opnum4(xdr, OP_CB_SEQUENCE);
    354	encode_sessionid4(xdr, session);
    355
    356	p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4 + 4);
    357	*p++ = cpu_to_be32(session->se_cb_seq_nr);	/* csa_sequenceid */
    358	*p++ = xdr_zero;			/* csa_slotid */
    359	*p++ = xdr_zero;			/* csa_highest_slotid */
    360	*p++ = xdr_zero;			/* csa_cachethis */
    361	xdr_encode_empty_array(p);		/* csa_referring_call_lists */
    362
    363	hdr->nops++;
    364}
    365
    366/*
    367 * CB_SEQUENCE4resok
    368 *
    369 *	struct CB_SEQUENCE4resok {
    370 *		sessionid4	csr_sessionid;
    371 *		sequenceid4	csr_sequenceid;
    372 *		slotid4		csr_slotid;
    373 *		slotid4		csr_highest_slotid;
    374 *		slotid4		csr_target_highest_slotid;
    375 *	};
    376 *
    377 *	union CB_SEQUENCE4res switch (nfsstat4 csr_status) {
    378 *	case NFS4_OK:
    379 *		CB_SEQUENCE4resok	csr_resok4;
    380 *	default:
    381 *		void;
    382 *	};
    383 *
    384 * Our current back channel implmentation supports a single backchannel
    385 * with a single slot.
    386 */
    387static int decode_cb_sequence4resok(struct xdr_stream *xdr,
    388				    struct nfsd4_callback *cb)
    389{
    390	struct nfsd4_session *session = cb->cb_clp->cl_cb_session;
    391	int status = -ESERVERFAULT;
    392	__be32 *p;
    393	u32 dummy;
    394
    395	/*
    396	 * If the server returns different values for sessionID, slotID or
    397	 * sequence number, the server is looney tunes.
    398	 */
    399	p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN + 4 + 4 + 4 + 4);
    400	if (unlikely(p == NULL))
    401		goto out_overflow;
    402
    403	if (memcmp(p, session->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
    404		dprintk("NFS: %s Invalid session id\n", __func__);
    405		goto out;
    406	}
    407	p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
    408
    409	dummy = be32_to_cpup(p++);
    410	if (dummy != session->se_cb_seq_nr) {
    411		dprintk("NFS: %s Invalid sequence number\n", __func__);
    412		goto out;
    413	}
    414
    415	dummy = be32_to_cpup(p++);
    416	if (dummy != 0) {
    417		dprintk("NFS: %s Invalid slotid\n", __func__);
    418		goto out;
    419	}
    420
    421	/*
    422	 * FIXME: process highest slotid and target highest slotid
    423	 */
    424	status = 0;
    425out:
    426	cb->cb_seq_status = status;
    427	return status;
    428out_overflow:
    429	status = -EIO;
    430	goto out;
    431}
    432
    433static int decode_cb_sequence4res(struct xdr_stream *xdr,
    434				  struct nfsd4_callback *cb)
    435{
    436	int status;
    437
    438	if (cb->cb_clp->cl_minorversion == 0)
    439		return 0;
    440
    441	status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_seq_status);
    442	if (unlikely(status || cb->cb_seq_status))
    443		return status;
    444
    445	return decode_cb_sequence4resok(xdr, cb);
    446}
    447
    448/*
    449 * NFSv4.0 and NFSv4.1 XDR encode functions
    450 *
    451 * NFSv4.0 callback argument types are defined in section 15 of RFC
    452 * 3530: "Network File System (NFS) version 4 Protocol" and section 20
    453 * of RFC 5661:  "Network File System (NFS) Version 4 Minor Version 1
    454 * Protocol".
    455 */
    456
    457/*
    458 * NB: Without this zero space reservation, callbacks over krb5p fail
    459 */
    460static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
    461				 const void *__unused)
    462{
    463	xdr_reserve_space(xdr, 0);
    464}
    465
    466/*
    467 * 20.2. Operation 4: CB_RECALL - Recall a Delegation
    468 */
    469static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
    470				   const void *data)
    471{
    472	const struct nfsd4_callback *cb = data;
    473	const struct nfs4_delegation *dp = cb_to_delegation(cb);
    474	struct nfs4_cb_compound_hdr hdr = {
    475		.ident = cb->cb_clp->cl_cb_ident,
    476		.minorversion = cb->cb_clp->cl_minorversion,
    477	};
    478
    479	encode_cb_compound4args(xdr, &hdr);
    480	encode_cb_sequence4args(xdr, cb, &hdr);
    481	encode_cb_recall4args(xdr, dp, &hdr);
    482	encode_cb_nops(&hdr);
    483}
    484
    485
    486/*
    487 * NFSv4.0 and NFSv4.1 XDR decode functions
    488 *
    489 * NFSv4.0 callback result types are defined in section 15 of RFC
    490 * 3530: "Network File System (NFS) version 4 Protocol" and section 20
    491 * of RFC 5661:  "Network File System (NFS) Version 4 Minor Version 1
    492 * Protocol".
    493 */
    494
    495static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
    496				void *__unused)
    497{
    498	return 0;
    499}
    500
    501/*
    502 * 20.2. Operation 4: CB_RECALL - Recall a Delegation
    503 */
    504static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
    505				  struct xdr_stream *xdr,
    506				  void *data)
    507{
    508	struct nfsd4_callback *cb = data;
    509	struct nfs4_cb_compound_hdr hdr;
    510	int status;
    511
    512	status = decode_cb_compound4res(xdr, &hdr);
    513	if (unlikely(status))
    514		return status;
    515
    516	status = decode_cb_sequence4res(xdr, cb);
    517	if (unlikely(status || cb->cb_seq_status))
    518		return status;
    519
    520	return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status);
    521}
    522
    523#ifdef CONFIG_NFSD_PNFS
    524/*
    525 * CB_LAYOUTRECALL4args
    526 *
    527 *	struct layoutrecall_file4 {
    528 *		nfs_fh4         lor_fh;
    529 *		offset4         lor_offset;
    530 *		length4         lor_length;
    531 *		stateid4        lor_stateid;
    532 *	};
    533 *
    534 *	union layoutrecall4 switch(layoutrecall_type4 lor_recalltype) {
    535 *	case LAYOUTRECALL4_FILE:
    536 *		layoutrecall_file4 lor_layout;
    537 *	case LAYOUTRECALL4_FSID:
    538 *		fsid4              lor_fsid;
    539 *	case LAYOUTRECALL4_ALL:
    540 *		void;
    541 *	};
    542 *
    543 *	struct CB_LAYOUTRECALL4args {
    544 *		layouttype4             clora_type;
    545 *		layoutiomode4           clora_iomode;
    546 *		bool                    clora_changed;
    547 *		layoutrecall4           clora_recall;
    548 *	};
    549 */
    550static void encode_cb_layout4args(struct xdr_stream *xdr,
    551				  const struct nfs4_layout_stateid *ls,
    552				  struct nfs4_cb_compound_hdr *hdr)
    553{
    554	__be32 *p;
    555
    556	BUG_ON(hdr->minorversion == 0);
    557
    558	p = xdr_reserve_space(xdr, 5 * 4);
    559	*p++ = cpu_to_be32(OP_CB_LAYOUTRECALL);
    560	*p++ = cpu_to_be32(ls->ls_layout_type);
    561	*p++ = cpu_to_be32(IOMODE_ANY);
    562	*p++ = cpu_to_be32(1);
    563	*p = cpu_to_be32(RETURN_FILE);
    564
    565	encode_nfs_fh4(xdr, &ls->ls_stid.sc_file->fi_fhandle);
    566
    567	p = xdr_reserve_space(xdr, 2 * 8);
    568	p = xdr_encode_hyper(p, 0);
    569	xdr_encode_hyper(p, NFS4_MAX_UINT64);
    570
    571	encode_stateid4(xdr, &ls->ls_recall_sid);
    572
    573	hdr->nops++;
    574}
    575
    576static void nfs4_xdr_enc_cb_layout(struct rpc_rqst *req,
    577				   struct xdr_stream *xdr,
    578				   const void *data)
    579{
    580	const struct nfsd4_callback *cb = data;
    581	const struct nfs4_layout_stateid *ls =
    582		container_of(cb, struct nfs4_layout_stateid, ls_recall);
    583	struct nfs4_cb_compound_hdr hdr = {
    584		.ident = 0,
    585		.minorversion = cb->cb_clp->cl_minorversion,
    586	};
    587
    588	encode_cb_compound4args(xdr, &hdr);
    589	encode_cb_sequence4args(xdr, cb, &hdr);
    590	encode_cb_layout4args(xdr, ls, &hdr);
    591	encode_cb_nops(&hdr);
    592}
    593
    594static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp,
    595				  struct xdr_stream *xdr,
    596				  void *data)
    597{
    598	struct nfsd4_callback *cb = data;
    599	struct nfs4_cb_compound_hdr hdr;
    600	int status;
    601
    602	status = decode_cb_compound4res(xdr, &hdr);
    603	if (unlikely(status))
    604		return status;
    605
    606	status = decode_cb_sequence4res(xdr, cb);
    607	if (unlikely(status || cb->cb_seq_status))
    608		return status;
    609
    610	return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status);
    611}
    612#endif /* CONFIG_NFSD_PNFS */
    613
    614static void encode_stateowner(struct xdr_stream *xdr, struct nfs4_stateowner *so)
    615{
    616	__be32	*p;
    617
    618	p = xdr_reserve_space(xdr, 8 + 4 + so->so_owner.len);
    619	p = xdr_encode_opaque_fixed(p, &so->so_client->cl_clientid, 8);
    620	xdr_encode_opaque(p, so->so_owner.data, so->so_owner.len);
    621}
    622
    623static void nfs4_xdr_enc_cb_notify_lock(struct rpc_rqst *req,
    624					struct xdr_stream *xdr,
    625					const void *data)
    626{
    627	const struct nfsd4_callback *cb = data;
    628	const struct nfsd4_blocked_lock *nbl =
    629		container_of(cb, struct nfsd4_blocked_lock, nbl_cb);
    630	struct nfs4_lockowner *lo = (struct nfs4_lockowner *)nbl->nbl_lock.fl_owner;
    631	struct nfs4_cb_compound_hdr hdr = {
    632		.ident = 0,
    633		.minorversion = cb->cb_clp->cl_minorversion,
    634	};
    635
    636	__be32 *p;
    637
    638	BUG_ON(hdr.minorversion == 0);
    639
    640	encode_cb_compound4args(xdr, &hdr);
    641	encode_cb_sequence4args(xdr, cb, &hdr);
    642
    643	p = xdr_reserve_space(xdr, 4);
    644	*p = cpu_to_be32(OP_CB_NOTIFY_LOCK);
    645	encode_nfs_fh4(xdr, &nbl->nbl_fh);
    646	encode_stateowner(xdr, &lo->lo_owner);
    647	hdr.nops++;
    648
    649	encode_cb_nops(&hdr);
    650}
    651
    652static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp,
    653					struct xdr_stream *xdr,
    654					void *data)
    655{
    656	struct nfsd4_callback *cb = data;
    657	struct nfs4_cb_compound_hdr hdr;
    658	int status;
    659
    660	status = decode_cb_compound4res(xdr, &hdr);
    661	if (unlikely(status))
    662		return status;
    663
    664	status = decode_cb_sequence4res(xdr, cb);
    665	if (unlikely(status || cb->cb_seq_status))
    666		return status;
    667
    668	return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status);
    669}
    670
    671/*
    672 * struct write_response4 {
    673 *	stateid4	wr_callback_id<1>;
    674 *	length4		wr_count;
    675 *	stable_how4	wr_committed;
    676 *	verifier4	wr_writeverf;
    677 * };
    678 * union offload_info4 switch (nfsstat4 coa_status) {
    679 *	case NFS4_OK:
    680 *		write_response4	coa_resok4;
    681 *	default:
    682 *	length4		coa_bytes_copied;
    683 * };
    684 * struct CB_OFFLOAD4args {
    685 *	nfs_fh4		coa_fh;
    686 *	stateid4	coa_stateid;
    687 *	offload_info4	coa_offload_info;
    688 * };
    689 */
    690static void encode_offload_info4(struct xdr_stream *xdr,
    691				 __be32 nfserr,
    692				 const struct nfsd4_copy *cp)
    693{
    694	__be32 *p;
    695
    696	p = xdr_reserve_space(xdr, 4);
    697	*p++ = nfserr;
    698	if (!nfserr) {
    699		p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
    700		p = xdr_encode_empty_array(p);
    701		p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written);
    702		*p++ = cpu_to_be32(cp->cp_res.wr_stable_how);
    703		p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data,
    704					    NFS4_VERIFIER_SIZE);
    705	} else {
    706		p = xdr_reserve_space(xdr, 8);
    707		/* We always return success if bytes were written */
    708		p = xdr_encode_hyper(p, 0);
    709	}
    710}
    711
    712static void encode_cb_offload4args(struct xdr_stream *xdr,
    713				   __be32 nfserr,
    714				   const struct knfsd_fh *fh,
    715				   const struct nfsd4_copy *cp,
    716				   struct nfs4_cb_compound_hdr *hdr)
    717{
    718	__be32 *p;
    719
    720	p = xdr_reserve_space(xdr, 4);
    721	*p++ = cpu_to_be32(OP_CB_OFFLOAD);
    722	encode_nfs_fh4(xdr, fh);
    723	encode_stateid4(xdr, &cp->cp_res.cb_stateid);
    724	encode_offload_info4(xdr, nfserr, cp);
    725
    726	hdr->nops++;
    727}
    728
    729static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req,
    730				    struct xdr_stream *xdr,
    731				    const void *data)
    732{
    733	const struct nfsd4_callback *cb = data;
    734	const struct nfsd4_copy *cp =
    735		container_of(cb, struct nfsd4_copy, cp_cb);
    736	struct nfs4_cb_compound_hdr hdr = {
    737		.ident = 0,
    738		.minorversion = cb->cb_clp->cl_minorversion,
    739	};
    740
    741	encode_cb_compound4args(xdr, &hdr);
    742	encode_cb_sequence4args(xdr, cb, &hdr);
    743	encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr);
    744	encode_cb_nops(&hdr);
    745}
    746
    747static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
    748				   struct xdr_stream *xdr,
    749				   void *data)
    750{
    751	struct nfsd4_callback *cb = data;
    752	struct nfs4_cb_compound_hdr hdr;
    753	int status;
    754
    755	status = decode_cb_compound4res(xdr, &hdr);
    756	if (unlikely(status))
    757		return status;
    758
    759	status = decode_cb_sequence4res(xdr, cb);
    760	if (unlikely(status || cb->cb_seq_status))
    761		return status;
    762
    763	return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status);
    764}
    765/*
    766 * RPC procedure tables
    767 */
    768#define PROC(proc, call, argtype, restype)				\
    769[NFSPROC4_CLNT_##proc] = {						\
    770	.p_proc    = NFSPROC4_CB_##call,				\
    771	.p_encode  = nfs4_xdr_enc_##argtype,		\
    772	.p_decode  = nfs4_xdr_dec_##restype,				\
    773	.p_arglen  = NFS4_enc_##argtype##_sz,				\
    774	.p_replen  = NFS4_dec_##restype##_sz,				\
    775	.p_statidx = NFSPROC4_CB_##call,				\
    776	.p_name    = #proc,						\
    777}
    778
    779static const struct rpc_procinfo nfs4_cb_procedures[] = {
    780	PROC(CB_NULL,	NULL,		cb_null,	cb_null),
    781	PROC(CB_RECALL,	COMPOUND,	cb_recall,	cb_recall),
    782#ifdef CONFIG_NFSD_PNFS
    783	PROC(CB_LAYOUT,	COMPOUND,	cb_layout,	cb_layout),
    784#endif
    785	PROC(CB_NOTIFY_LOCK,	COMPOUND,	cb_notify_lock,	cb_notify_lock),
    786	PROC(CB_OFFLOAD,	COMPOUND,	cb_offload,	cb_offload),
    787};
    788
    789static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
    790static const struct rpc_version nfs_cb_version4 = {
    791/*
    792 * Note on the callback rpc program version number: despite language in rfc
    793 * 5661 section 18.36.3 requiring servers to use 4 in this field, the
    794 * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
    795 * in practice that appears to be what implementations use.  The section
    796 * 18.36.3 language is expected to be fixed in an erratum.
    797 */
    798	.number			= 1,
    799	.nrprocs		= ARRAY_SIZE(nfs4_cb_procedures),
    800	.procs			= nfs4_cb_procedures,
    801	.counts			= nfs4_cb_counts,
    802};
    803
    804static const struct rpc_version *nfs_cb_version[2] = {
    805	[1] = &nfs_cb_version4,
    806};
    807
    808static const struct rpc_program cb_program;
    809
    810static struct rpc_stat cb_stats = {
    811	.program		= &cb_program
    812};
    813
    814#define NFS4_CALLBACK 0x40000000
    815static const struct rpc_program cb_program = {
    816	.name			= "nfs4_cb",
    817	.number			= NFS4_CALLBACK,
    818	.nrvers			= ARRAY_SIZE(nfs_cb_version),
    819	.version		= nfs_cb_version,
    820	.stats			= &cb_stats,
    821	.pipe_dir_name		= "nfsd4_cb",
    822};
    823
    824static int max_cb_time(struct net *net)
    825{
    826	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
    827
    828	/*
    829	 * nfsd4_lease is set to at most one hour in __nfsd4_write_time,
    830	 * so we can use 32-bit math on it. Warn if that assumption
    831	 * ever stops being true.
    832	 */
    833	if (WARN_ON_ONCE(nn->nfsd4_lease > 3600))
    834		return 360 * HZ;
    835
    836	return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ;
    837}
    838
    839static struct workqueue_struct *callback_wq;
    840
    841static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
    842{
    843	return queue_work(callback_wq, &cb->cb_work);
    844}
    845
    846static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
    847{
    848	atomic_inc(&clp->cl_cb_inflight);
    849}
    850
    851static void nfsd41_cb_inflight_end(struct nfs4_client *clp)
    852{
    853
    854	if (atomic_dec_and_test(&clp->cl_cb_inflight))
    855		wake_up_var(&clp->cl_cb_inflight);
    856}
    857
    858static void nfsd41_cb_inflight_wait_complete(struct nfs4_client *clp)
    859{
    860	wait_var_event(&clp->cl_cb_inflight,
    861			!atomic_read(&clp->cl_cb_inflight));
    862}
    863
    864static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc_clnt *client, struct nfsd4_session *ses)
    865{
    866	if (clp->cl_minorversion == 0) {
    867		client->cl_principal = clp->cl_cred.cr_targ_princ ?
    868			clp->cl_cred.cr_targ_princ : "nfs";
    869
    870		return get_cred(rpc_machine_cred());
    871	} else {
    872		struct cred *kcred;
    873
    874		kcred = prepare_kernel_cred(NULL);
    875		if (!kcred)
    876			return NULL;
    877
    878		kcred->uid = ses->se_cb_sec.uid;
    879		kcred->gid = ses->se_cb_sec.gid;
    880		return kcred;
    881	}
    882}
    883
    884static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
    885{
    886	int maxtime = max_cb_time(clp->net);
    887	struct rpc_timeout	timeparms = {
    888		.to_initval	= maxtime,
    889		.to_retries	= 0,
    890		.to_maxval	= maxtime,
    891	};
    892	struct rpc_create_args args = {
    893		.net		= clp->net,
    894		.address	= (struct sockaddr *) &conn->cb_addr,
    895		.addrsize	= conn->cb_addrlen,
    896		.saddress	= (struct sockaddr *) &conn->cb_saddr,
    897		.timeout	= &timeparms,
    898		.program	= &cb_program,
    899		.version	= 1,
    900		.flags		= (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
    901		.cred		= current_cred(),
    902	};
    903	struct rpc_clnt *client;
    904	const struct cred *cred;
    905
    906	if (clp->cl_minorversion == 0) {
    907		if (!clp->cl_cred.cr_principal &&
    908		    (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) {
    909			trace_nfsd_cb_setup_err(clp, -EINVAL);
    910			return -EINVAL;
    911		}
    912		args.client_name = clp->cl_cred.cr_principal;
    913		args.prognumber	= conn->cb_prog;
    914		args.protocol = XPRT_TRANSPORT_TCP;
    915		args.authflavor = clp->cl_cred.cr_flavor;
    916		clp->cl_cb_ident = conn->cb_ident;
    917	} else {
    918		if (!conn->cb_xprt)
    919			return -EINVAL;
    920		clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
    921		clp->cl_cb_session = ses;
    922		args.bc_xprt = conn->cb_xprt;
    923		args.prognumber = clp->cl_cb_session->se_cb_prog;
    924		args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
    925				XPRT_TRANSPORT_BC;
    926		args.authflavor = ses->se_cb_sec.flavor;
    927	}
    928	/* Create RPC client */
    929	client = rpc_create(&args);
    930	if (IS_ERR(client)) {
    931		trace_nfsd_cb_setup_err(clp, PTR_ERR(client));
    932		return PTR_ERR(client);
    933	}
    934	cred = get_backchannel_cred(clp, client, ses);
    935	if (!cred) {
    936		trace_nfsd_cb_setup_err(clp, -ENOMEM);
    937		rpc_shutdown_client(client);
    938		return -ENOMEM;
    939	}
    940	clp->cl_cb_client = client;
    941	clp->cl_cb_cred = cred;
    942	rcu_read_lock();
    943	trace_nfsd_cb_setup(clp, rpc_peeraddr2str(client, RPC_DISPLAY_NETID),
    944			    args.authflavor);
    945	rcu_read_unlock();
    946	return 0;
    947}
    948
    949static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate)
    950{
    951	if (clp->cl_cb_state != newstate) {
    952		clp->cl_cb_state = newstate;
    953		trace_nfsd_cb_state(clp);
    954	}
    955}
    956
    957static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
    958{
    959	if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
    960		return;
    961	nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN);
    962}
    963
    964static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
    965{
    966	if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
    967		return;
    968	nfsd4_mark_cb_state(clp, NFSD4_CB_FAULT);
    969}
    970
    971static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
    972{
    973	struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
    974
    975	if (task->tk_status)
    976		nfsd4_mark_cb_down(clp, task->tk_status);
    977	else
    978		nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
    979}
    980
    981static void nfsd4_cb_probe_release(void *calldata)
    982{
    983	struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
    984
    985	nfsd41_cb_inflight_end(clp);
    986
    987}
    988
    989static const struct rpc_call_ops nfsd4_cb_probe_ops = {
    990	/* XXX: release method to ensure we set the cb channel down if
    991	 * necessary on early failure? */
    992	.rpc_call_done = nfsd4_cb_probe_done,
    993	.rpc_release = nfsd4_cb_probe_release,
    994};
    995
    996/*
    997 * Poke the callback thread to process any updates to the callback
    998 * parameters, and send a null probe.
    999 */
   1000void nfsd4_probe_callback(struct nfs4_client *clp)
   1001{
   1002	trace_nfsd_cb_probe(clp);
   1003	nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN);
   1004	set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
   1005	nfsd4_run_cb(&clp->cl_cb_null);
   1006}
   1007
   1008void nfsd4_probe_callback_sync(struct nfs4_client *clp)
   1009{
   1010	nfsd4_probe_callback(clp);
   1011	flush_workqueue(callback_wq);
   1012}
   1013
   1014void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
   1015{
   1016	nfsd4_mark_cb_state(clp, NFSD4_CB_UNKNOWN);
   1017	spin_lock(&clp->cl_lock);
   1018	memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
   1019	spin_unlock(&clp->cl_lock);
   1020}
   1021
   1022/*
   1023 * There's currently a single callback channel slot.
   1024 * If the slot is available, then mark it busy.  Otherwise, set the
   1025 * thread for sleeping on the callback RPC wait queue.
   1026 */
   1027static bool nfsd41_cb_get_slot(struct nfsd4_callback *cb, struct rpc_task *task)
   1028{
   1029	struct nfs4_client *clp = cb->cb_clp;
   1030
   1031	if (!cb->cb_holds_slot &&
   1032	    test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
   1033		rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
   1034		/* Race breaker */
   1035		if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
   1036			dprintk("%s slot is busy\n", __func__);
   1037			return false;
   1038		}
   1039		rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
   1040	}
   1041	cb->cb_holds_slot = true;
   1042	return true;
   1043}
   1044
   1045static void nfsd41_cb_release_slot(struct nfsd4_callback *cb)
   1046{
   1047	struct nfs4_client *clp = cb->cb_clp;
   1048
   1049	if (cb->cb_holds_slot) {
   1050		cb->cb_holds_slot = false;
   1051		clear_bit(0, &clp->cl_cb_slot_busy);
   1052		rpc_wake_up_next(&clp->cl_cb_waitq);
   1053	}
   1054}
   1055
   1056static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
   1057{
   1058	struct nfs4_client *clp = cb->cb_clp;
   1059
   1060	nfsd41_cb_release_slot(cb);
   1061	if (cb->cb_ops && cb->cb_ops->release)
   1062		cb->cb_ops->release(cb);
   1063	nfsd41_cb_inflight_end(clp);
   1064}
   1065
   1066/*
   1067 * TODO: cb_sequence should support referring call lists, cachethis, multiple
   1068 * slots, and mark callback channel down on communication errors.
   1069 */
   1070static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
   1071{
   1072	struct nfsd4_callback *cb = calldata;
   1073	struct nfs4_client *clp = cb->cb_clp;
   1074	u32 minorversion = clp->cl_minorversion;
   1075
   1076	/*
   1077	 * cb_seq_status is only set in decode_cb_sequence4res,
   1078	 * and so will remain 1 if an rpc level failure occurs.
   1079	 */
   1080	cb->cb_seq_status = 1;
   1081	cb->cb_status = 0;
   1082	if (minorversion && !nfsd41_cb_get_slot(cb, task))
   1083		return;
   1084	rpc_call_start(task);
   1085}
   1086
   1087static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback *cb)
   1088{
   1089	struct nfs4_client *clp = cb->cb_clp;
   1090	struct nfsd4_session *session = clp->cl_cb_session;
   1091	bool ret = true;
   1092
   1093	if (!clp->cl_minorversion) {
   1094		/*
   1095		 * If the backchannel connection was shut down while this
   1096		 * task was queued, we need to resubmit it after setting up
   1097		 * a new backchannel connection.
   1098		 *
   1099		 * Note that if we lost our callback connection permanently
   1100		 * the submission code will error out, so we don't need to
   1101		 * handle that case here.
   1102		 */
   1103		if (RPC_SIGNALLED(task))
   1104			goto need_restart;
   1105
   1106		return true;
   1107	}
   1108
   1109	if (!cb->cb_holds_slot)
   1110		goto need_restart;
   1111
   1112	switch (cb->cb_seq_status) {
   1113	case 0:
   1114		/*
   1115		 * No need for lock, access serialized in nfsd4_cb_prepare
   1116		 *
   1117		 * RFC5661 20.9.3
   1118		 * If CB_SEQUENCE returns an error, then the state of the slot
   1119		 * (sequence ID, cached reply) MUST NOT change.
   1120		 */
   1121		++session->se_cb_seq_nr;
   1122		break;
   1123	case -ESERVERFAULT:
   1124		++session->se_cb_seq_nr;
   1125		fallthrough;
   1126	case 1:
   1127	case -NFS4ERR_BADSESSION:
   1128		nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
   1129		ret = false;
   1130		break;
   1131	case -NFS4ERR_DELAY:
   1132		if (!rpc_restart_call(task))
   1133			goto out;
   1134
   1135		rpc_delay(task, 2 * HZ);
   1136		return false;
   1137	case -NFS4ERR_BADSLOT:
   1138		goto retry_nowait;
   1139	case -NFS4ERR_SEQ_MISORDERED:
   1140		if (session->se_cb_seq_nr != 1) {
   1141			session->se_cb_seq_nr = 1;
   1142			goto retry_nowait;
   1143		}
   1144		break;
   1145	default:
   1146		nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
   1147		dprintk("%s: unprocessed error %d\n", __func__,
   1148			cb->cb_seq_status);
   1149	}
   1150
   1151	nfsd41_cb_release_slot(cb);
   1152	dprintk("%s: freed slot, new seqid=%d\n", __func__,
   1153		clp->cl_cb_session->se_cb_seq_nr);
   1154
   1155	if (RPC_SIGNALLED(task))
   1156		goto need_restart;
   1157out:
   1158	return ret;
   1159retry_nowait:
   1160	if (rpc_restart_call_prepare(task))
   1161		ret = false;
   1162	goto out;
   1163need_restart:
   1164	if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
   1165		task->tk_status = 0;
   1166		cb->cb_need_restart = true;
   1167	}
   1168	return false;
   1169}
   1170
   1171static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
   1172{
   1173	struct nfsd4_callback *cb = calldata;
   1174	struct nfs4_client *clp = cb->cb_clp;
   1175
   1176	if (!nfsd4_cb_sequence_done(task, cb))
   1177		return;
   1178
   1179	if (cb->cb_status) {
   1180		WARN_ON_ONCE(task->tk_status);
   1181		task->tk_status = cb->cb_status;
   1182	}
   1183
   1184	switch (cb->cb_ops->done(cb, task)) {
   1185	case 0:
   1186		task->tk_status = 0;
   1187		rpc_restart_call_prepare(task);
   1188		return;
   1189	case 1:
   1190		switch (task->tk_status) {
   1191		case -EIO:
   1192		case -ETIMEDOUT:
   1193		case -EACCES:
   1194			nfsd4_mark_cb_down(clp, task->tk_status);
   1195		}
   1196		break;
   1197	default:
   1198		BUG();
   1199	}
   1200}
   1201
   1202static void nfsd4_cb_release(void *calldata)
   1203{
   1204	struct nfsd4_callback *cb = calldata;
   1205
   1206	if (cb->cb_need_restart)
   1207		nfsd4_queue_cb(cb);
   1208	else
   1209		nfsd41_destroy_cb(cb);
   1210
   1211}
   1212
   1213static const struct rpc_call_ops nfsd4_cb_ops = {
   1214	.rpc_call_prepare = nfsd4_cb_prepare,
   1215	.rpc_call_done = nfsd4_cb_done,
   1216	.rpc_release = nfsd4_cb_release,
   1217};
   1218
   1219int nfsd4_create_callback_queue(void)
   1220{
   1221	callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0);
   1222	if (!callback_wq)
   1223		return -ENOMEM;
   1224	return 0;
   1225}
   1226
   1227void nfsd4_destroy_callback_queue(void)
   1228{
   1229	destroy_workqueue(callback_wq);
   1230}
   1231
   1232/* must be called under the state lock */
   1233void nfsd4_shutdown_callback(struct nfs4_client *clp)
   1234{
   1235	if (clp->cl_cb_state != NFSD4_CB_UNKNOWN)
   1236		trace_nfsd_cb_shutdown(clp);
   1237
   1238	set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags);
   1239	/*
   1240	 * Note this won't actually result in a null callback;
   1241	 * instead, nfsd4_run_cb_null() will detect the killed
   1242	 * client, destroy the rpc client, and stop:
   1243	 */
   1244	nfsd4_run_cb(&clp->cl_cb_null);
   1245	flush_workqueue(callback_wq);
   1246	nfsd41_cb_inflight_wait_complete(clp);
   1247}
   1248
   1249/* requires cl_lock: */
   1250static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
   1251{
   1252	struct nfsd4_session *s;
   1253	struct nfsd4_conn *c;
   1254
   1255	list_for_each_entry(s, &clp->cl_sessions, se_perclnt) {
   1256		list_for_each_entry(c, &s->se_conns, cn_persession) {
   1257			if (c->cn_flags & NFS4_CDFC4_BACK)
   1258				return c;
   1259		}
   1260	}
   1261	return NULL;
   1262}
   1263
   1264/*
   1265 * Note there isn't a lot of locking in this code; instead we depend on
   1266 * the fact that it is run from the callback_wq, which won't run two
   1267 * work items at once.  So, for example, callback_wq handles all access
   1268 * of cl_cb_client and all calls to rpc_create or rpc_shutdown_client.
   1269 */
   1270static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
   1271{
   1272	struct nfs4_cb_conn conn;
   1273	struct nfs4_client *clp = cb->cb_clp;
   1274	struct nfsd4_session *ses = NULL;
   1275	struct nfsd4_conn *c;
   1276	int err;
   1277
   1278	/*
   1279	 * This is either an update, or the client dying; in either case,
   1280	 * kill the old client:
   1281	 */
   1282	if (clp->cl_cb_client) {
   1283		rpc_shutdown_client(clp->cl_cb_client);
   1284		clp->cl_cb_client = NULL;
   1285		put_cred(clp->cl_cb_cred);
   1286		clp->cl_cb_cred = NULL;
   1287	}
   1288	if (clp->cl_cb_conn.cb_xprt) {
   1289		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
   1290		clp->cl_cb_conn.cb_xprt = NULL;
   1291	}
   1292	if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
   1293		return;
   1294	spin_lock(&clp->cl_lock);
   1295	/*
   1296	 * Only serialized callback code is allowed to clear these
   1297	 * flags; main nfsd code can only set them:
   1298	 */
   1299	BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
   1300	clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
   1301	memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
   1302	c = __nfsd4_find_backchannel(clp);
   1303	if (c) {
   1304		svc_xprt_get(c->cn_xprt);
   1305		conn.cb_xprt = c->cn_xprt;
   1306		ses = c->cn_session;
   1307	}
   1308	spin_unlock(&clp->cl_lock);
   1309
   1310	err = setup_callback_client(clp, &conn, ses);
   1311	if (err) {
   1312		nfsd4_mark_cb_down(clp, err);
   1313		if (c)
   1314			svc_xprt_put(c->cn_xprt);
   1315		return;
   1316	}
   1317}
   1318
   1319static void
   1320nfsd4_run_cb_work(struct work_struct *work)
   1321{
   1322	struct nfsd4_callback *cb =
   1323		container_of(work, struct nfsd4_callback, cb_work);
   1324	struct nfs4_client *clp = cb->cb_clp;
   1325	struct rpc_clnt *clnt;
   1326	int flags;
   1327
   1328	if (cb->cb_need_restart) {
   1329		cb->cb_need_restart = false;
   1330	} else {
   1331		if (cb->cb_ops && cb->cb_ops->prepare)
   1332			cb->cb_ops->prepare(cb);
   1333	}
   1334
   1335	if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
   1336		nfsd4_process_cb_update(cb);
   1337
   1338	clnt = clp->cl_cb_client;
   1339	if (!clnt) {
   1340		/* Callback channel broken, or client killed; give up: */
   1341		nfsd41_destroy_cb(cb);
   1342		return;
   1343	}
   1344
   1345	/*
   1346	 * Don't send probe messages for 4.1 or later.
   1347	 */
   1348	if (!cb->cb_ops && clp->cl_minorversion) {
   1349		nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
   1350		nfsd41_destroy_cb(cb);
   1351		return;
   1352	}
   1353
   1354	cb->cb_msg.rpc_cred = clp->cl_cb_cred;
   1355	flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
   1356	rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
   1357			cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
   1358}
   1359
   1360void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
   1361		const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
   1362{
   1363	cb->cb_clp = clp;
   1364	cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
   1365	cb->cb_msg.rpc_argp = cb;
   1366	cb->cb_msg.rpc_resp = cb;
   1367	cb->cb_ops = ops;
   1368	INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
   1369	cb->cb_seq_status = 1;
   1370	cb->cb_status = 0;
   1371	cb->cb_need_restart = false;
   1372	cb->cb_holds_slot = false;
   1373}
   1374
   1375void nfsd4_run_cb(struct nfsd4_callback *cb)
   1376{
   1377	struct nfs4_client *clp = cb->cb_clp;
   1378
   1379	nfsd41_cb_inflight_begin(clp);
   1380	if (!nfsd4_queue_cb(cb))
   1381		nfsd41_cb_inflight_end(clp);
   1382}