dir.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
dir.c (84854B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *  linux/fs/nfs/dir.c
      4 *
      5 *  Copyright (C) 1992  Rick Sladkey
      6 *
      7 *  nfs directory handling functions
      8 *
      9 * 10 Apr 1996	Added silly rename for unlink	--okir
     10 * 28 Sep 1996	Improved directory cache --okir
     11 * 23 Aug 1997  Claus Heine claus@momo.math.rwth-aachen.de 
     12 *              Re-implemented silly rename for unlink, newly implemented
     13 *              silly rename for nfs_rename() following the suggestions
     14 *              of Olaf Kirch (okir) found in this file.
     15 *              Following Linus comments on my original hack, this version
     16 *              depends only on the dcache stuff and doesn't touch the inode
     17 *              layer (iput() and friends).
     18 *  6 Jun 1999	Cache readdir lookups in the page cache. -DaveM
     19 */
     20
     21#include <linux/compat.h>
     22#include <linux/module.h>
     23#include <linux/time.h>
     24#include <linux/errno.h>
     25#include <linux/stat.h>
     26#include <linux/fcntl.h>
     27#include <linux/string.h>
     28#include <linux/kernel.h>
     29#include <linux/slab.h>
     30#include <linux/mm.h>
     31#include <linux/sunrpc/clnt.h>
     32#include <linux/nfs_fs.h>
     33#include <linux/nfs_mount.h>
     34#include <linux/pagemap.h>
     35#include <linux/pagevec.h>
     36#include <linux/namei.h>
     37#include <linux/mount.h>
     38#include <linux/swap.h>
     39#include <linux/sched.h>
     40#include <linux/kmemleak.h>
     41#include <linux/xattr.h>
     42#include <linux/hash.h>
     43
     44#include "delegation.h"
     45#include "iostat.h"
     46#include "internal.h"
     47#include "fscache.h"
     48
     49#include "nfstrace.h"
     50
     51/* #define NFS_DEBUG_VERBOSE 1 */
     52
     53static int nfs_opendir(struct inode *, struct file *);
     54static int nfs_closedir(struct inode *, struct file *);
     55static int nfs_readdir(struct file *, struct dir_context *);
     56static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
     57static loff_t nfs_llseek_dir(struct file *, loff_t, int);
     58static void nfs_readdir_free_folio(struct folio *);
     59
     60const struct file_operations nfs_dir_operations = {
     61	.llseek		= nfs_llseek_dir,
     62	.read		= generic_read_dir,
     63	.iterate_shared	= nfs_readdir,
     64	.open		= nfs_opendir,
     65	.release	= nfs_closedir,
     66	.fsync		= nfs_fsync_dir,
     67};
     68
     69const struct address_space_operations nfs_dir_aops = {
     70	.free_folio = nfs_readdir_free_folio,
     71};
     72
     73#define NFS_INIT_DTSIZE PAGE_SIZE
     74
     75static struct nfs_open_dir_context *
     76alloc_nfs_open_dir_context(struct inode *dir)
     77{
     78	struct nfs_inode *nfsi = NFS_I(dir);
     79	struct nfs_open_dir_context *ctx;
     80
     81	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
     82	if (ctx != NULL) {
     83		ctx->attr_gencount = nfsi->attr_gencount;
     84		ctx->dtsize = NFS_INIT_DTSIZE;
     85		spin_lock(&dir->i_lock);
     86		if (list_empty(&nfsi->open_files) &&
     87		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
     88			nfs_set_cache_invalid(dir,
     89					      NFS_INO_INVALID_DATA |
     90						      NFS_INO_REVAL_FORCED);
     91		list_add_tail_rcu(&ctx->list, &nfsi->open_files);
     92		memcpy(ctx->verf, nfsi->cookieverf, sizeof(ctx->verf));
     93		spin_unlock(&dir->i_lock);
     94		return ctx;
     95	}
     96	return  ERR_PTR(-ENOMEM);
     97}
     98
     99static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
    100{
    101	spin_lock(&dir->i_lock);
    102	list_del_rcu(&ctx->list);
    103	spin_unlock(&dir->i_lock);
    104	kfree_rcu(ctx, rcu_head);
    105}
    106
    107/*
    108 * Open file
    109 */
    110static int
    111nfs_opendir(struct inode *inode, struct file *filp)
    112{
    113	int res = 0;
    114	struct nfs_open_dir_context *ctx;
    115
    116	dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
    117
    118	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
    119
    120	ctx = alloc_nfs_open_dir_context(inode);
    121	if (IS_ERR(ctx)) {
    122		res = PTR_ERR(ctx);
    123		goto out;
    124	}
    125	filp->private_data = ctx;
    126out:
    127	return res;
    128}
    129
    130static int
    131nfs_closedir(struct inode *inode, struct file *filp)
    132{
    133	put_nfs_open_dir_context(file_inode(filp), filp->private_data);
    134	return 0;
    135}
    136
    137struct nfs_cache_array_entry {
    138	u64 cookie;
    139	u64 ino;
    140	const char *name;
    141	unsigned int name_len;
    142	unsigned char d_type;
    143};
    144
    145struct nfs_cache_array {
    146	u64 change_attr;
    147	u64 last_cookie;
    148	unsigned int size;
    149	unsigned char page_full : 1,
    150		      page_is_eof : 1,
    151		      cookies_are_ordered : 1;
    152	struct nfs_cache_array_entry array[];
    153};
    154
    155struct nfs_readdir_descriptor {
    156	struct file	*file;
    157	struct page	*page;
    158	struct dir_context *ctx;
    159	pgoff_t		page_index;
    160	pgoff_t		page_index_max;
    161	u64		dir_cookie;
    162	u64		last_cookie;
    163	loff_t		current_index;
    164
    165	__be32		verf[NFS_DIR_VERIFIER_SIZE];
    166	unsigned long	dir_verifier;
    167	unsigned long	timestamp;
    168	unsigned long	gencount;
    169	unsigned long	attr_gencount;
    170	unsigned int	cache_entry_index;
    171	unsigned int	buffer_fills;
    172	unsigned int	dtsize;
    173	bool clear_cache;
    174	bool plus;
    175	bool eob;
    176	bool eof;
    177};
    178
    179static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
    180{
    181	struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
    182	unsigned int maxsize = server->dtsize;
    183
    184	if (sz > maxsize)
    185		sz = maxsize;
    186	if (sz < NFS_MIN_FILE_IO_SIZE)
    187		sz = NFS_MIN_FILE_IO_SIZE;
    188	desc->dtsize = sz;
    189}
    190
    191static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
    192{
    193	nfs_set_dtsize(desc, desc->dtsize >> 1);
    194}
    195
    196static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
    197{
    198	nfs_set_dtsize(desc, desc->dtsize << 1);
    199}
    200
    201static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie,
    202					u64 change_attr)
    203{
    204	struct nfs_cache_array *array;
    205
    206	array = kmap_atomic(page);
    207	array->change_attr = change_attr;
    208	array->last_cookie = last_cookie;
    209	array->size = 0;
    210	array->page_full = 0;
    211	array->page_is_eof = 0;
    212	array->cookies_are_ordered = 1;
    213	kunmap_atomic(array);
    214}
    215
    216/*
    217 * we are freeing strings created by nfs_add_to_readdir_array()
    218 */
    219static void nfs_readdir_clear_array(struct page *page)
    220{
    221	struct nfs_cache_array *array;
    222	unsigned int i;
    223
    224	array = kmap_atomic(page);
    225	for (i = 0; i < array->size; i++)
    226		kfree(array->array[i].name);
    227	array->size = 0;
    228	kunmap_atomic(array);
    229}
    230
    231static void nfs_readdir_free_folio(struct folio *folio)
    232{
    233	nfs_readdir_clear_array(&folio->page);
    234}
    235
    236static void nfs_readdir_page_reinit_array(struct page *page, u64 last_cookie,
    237					  u64 change_attr)
    238{
    239	nfs_readdir_clear_array(page);
    240	nfs_readdir_page_init_array(page, last_cookie, change_attr);
    241}
    242
    243static struct page *
    244nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags)
    245{
    246	struct page *page = alloc_page(gfp_flags);
    247	if (page)
    248		nfs_readdir_page_init_array(page, last_cookie, 0);
    249	return page;
    250}
    251
    252static void nfs_readdir_page_array_free(struct page *page)
    253{
    254	if (page) {
    255		nfs_readdir_clear_array(page);
    256		put_page(page);
    257	}
    258}
    259
    260static u64 nfs_readdir_array_index_cookie(struct nfs_cache_array *array)
    261{
    262	return array->size == 0 ? array->last_cookie : array->array[0].cookie;
    263}
    264
    265static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
    266{
    267	array->page_is_eof = 1;
    268	array->page_full = 1;
    269}
    270
    271static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
    272{
    273	return array->page_full;
    274}
    275
    276/*
    277 * the caller is responsible for freeing qstr.name
    278 * when called by nfs_readdir_add_to_array, the strings will be freed in
    279 * nfs_clear_readdir_array()
    280 */
    281static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
    282{
    283	const char *ret = kmemdup_nul(name, len, GFP_KERNEL);
    284
    285	/*
    286	 * Avoid a kmemleak false positive. The pointer to the name is stored
    287	 * in a page cache page which kmemleak does not scan.
    288	 */
    289	if (ret != NULL)
    290		kmemleak_not_leak(ret);
    291	return ret;
    292}
    293
    294static size_t nfs_readdir_array_maxentries(void)
    295{
    296	return (PAGE_SIZE - sizeof(struct nfs_cache_array)) /
    297	       sizeof(struct nfs_cache_array_entry);
    298}
    299
    300/*
    301 * Check that the next array entry lies entirely within the page bounds
    302 */
    303static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
    304{
    305	if (array->page_full)
    306		return -ENOSPC;
    307	if (array->size == nfs_readdir_array_maxentries()) {
    308		array->page_full = 1;
    309		return -ENOSPC;
    310	}
    311	return 0;
    312}
    313
    314static int nfs_readdir_page_array_append(struct page *page,
    315					 const struct nfs_entry *entry,
    316					 u64 *cookie)
    317{
    318	struct nfs_cache_array *array;
    319	struct nfs_cache_array_entry *cache_entry;
    320	const char *name;
    321	int ret = -ENOMEM;
    322
    323	name = nfs_readdir_copy_name(entry->name, entry->len);
    324
    325	array = kmap_atomic(page);
    326	if (!name)
    327		goto out;
    328	ret = nfs_readdir_array_can_expand(array);
    329	if (ret) {
    330		kfree(name);
    331		goto out;
    332	}
    333
    334	cache_entry = &array->array[array->size];
    335	cache_entry->cookie = array->last_cookie;
    336	cache_entry->ino = entry->ino;
    337	cache_entry->d_type = entry->d_type;
    338	cache_entry->name_len = entry->len;
    339	cache_entry->name = name;
    340	array->last_cookie = entry->cookie;
    341	if (array->last_cookie <= cache_entry->cookie)
    342		array->cookies_are_ordered = 0;
    343	array->size++;
    344	if (entry->eof != 0)
    345		nfs_readdir_array_set_eof(array);
    346out:
    347	*cookie = array->last_cookie;
    348	kunmap_atomic(array);
    349	return ret;
    350}
    351
    352#define NFS_READDIR_COOKIE_MASK (U32_MAX >> 14)
    353/*
    354 * Hash algorithm allowing content addressible access to sequences
    355 * of directory cookies. Content is addressed by the value of the
    356 * cookie index of the first readdir entry in a page.
    357 *
    358 * We select only the first 18 bits to avoid issues with excessive
    359 * memory use for the page cache XArray. 18 bits should allow the caching
    360 * of 262144 pages of sequences of readdir entries. Since each page holds
    361 * 127 readdir entries for a typical 64-bit system, that works out to a
    362 * cache of ~ 33 million entries per directory.
    363 */
    364static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
    365{
    366	if (cookie == 0)
    367		return 0;
    368	return hash_64(cookie, 18);
    369}
    370
    371static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
    372				      u64 change_attr)
    373{
    374	struct nfs_cache_array *array = kmap_atomic(page);
    375	int ret = true;
    376
    377	if (array->change_attr != change_attr)
    378		ret = false;
    379	if (nfs_readdir_array_index_cookie(array) != last_cookie)
    380		ret = false;
    381	kunmap_atomic(array);
    382	return ret;
    383}
    384
    385static void nfs_readdir_page_unlock_and_put(struct page *page)
    386{
    387	unlock_page(page);
    388	put_page(page);
    389}
    390
    391static void nfs_readdir_page_init_and_validate(struct page *page, u64 cookie,
    392					       u64 change_attr)
    393{
    394	if (PageUptodate(page)) {
    395		if (nfs_readdir_page_validate(page, cookie, change_attr))
    396			return;
    397		nfs_readdir_clear_array(page);
    398	}
    399	nfs_readdir_page_init_array(page, cookie, change_attr);
    400	SetPageUptodate(page);
    401}
    402
    403static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
    404						u64 cookie, u64 change_attr)
    405{
    406	pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
    407	struct page *page;
    408
    409	page = grab_cache_page(mapping, index);
    410	if (!page)
    411		return NULL;
    412	nfs_readdir_page_init_and_validate(page, cookie, change_attr);
    413	return page;
    414}
    415
    416static u64 nfs_readdir_page_last_cookie(struct page *page)
    417{
    418	struct nfs_cache_array *array;
    419	u64 ret;
    420
    421	array = kmap_atomic(page);
    422	ret = array->last_cookie;
    423	kunmap_atomic(array);
    424	return ret;
    425}
    426
    427static bool nfs_readdir_page_needs_filling(struct page *page)
    428{
    429	struct nfs_cache_array *array;
    430	bool ret;
    431
    432	array = kmap_atomic(page);
    433	ret = !nfs_readdir_array_is_full(array);
    434	kunmap_atomic(array);
    435	return ret;
    436}
    437
    438static void nfs_readdir_page_set_eof(struct page *page)
    439{
    440	struct nfs_cache_array *array;
    441
    442	array = kmap_atomic(page);
    443	nfs_readdir_array_set_eof(array);
    444	kunmap_atomic(array);
    445}
    446
    447static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
    448					      u64 cookie, u64 change_attr)
    449{
    450	pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
    451	struct page *page;
    452
    453	page = grab_cache_page_nowait(mapping, index);
    454	if (!page)
    455		return NULL;
    456	nfs_readdir_page_init_and_validate(page, cookie, change_attr);
    457	if (nfs_readdir_page_last_cookie(page) != cookie)
    458		nfs_readdir_page_reinit_array(page, cookie, change_attr);
    459	return page;
    460}
    461
    462static inline
    463int is_32bit_api(void)
    464{
    465#ifdef CONFIG_COMPAT
    466	return in_compat_syscall();
    467#else
    468	return (BITS_PER_LONG == 32);
    469#endif
    470}
    471
    472static
    473bool nfs_readdir_use_cookie(const struct file *filp)
    474{
    475	if ((filp->f_mode & FMODE_32BITHASH) ||
    476	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
    477		return false;
    478	return true;
    479}
    480
    481static void nfs_readdir_seek_next_array(struct nfs_cache_array *array,
    482					struct nfs_readdir_descriptor *desc)
    483{
    484	if (array->page_full) {
    485		desc->last_cookie = array->last_cookie;
    486		desc->current_index += array->size;
    487		desc->cache_entry_index = 0;
    488		desc->page_index++;
    489	} else
    490		desc->last_cookie = nfs_readdir_array_index_cookie(array);
    491}
    492
    493static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc)
    494{
    495	desc->current_index = 0;
    496	desc->last_cookie = 0;
    497	desc->page_index = 0;
    498}
    499
    500static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
    501				      struct nfs_readdir_descriptor *desc)
    502{
    503	loff_t diff = desc->ctx->pos - desc->current_index;
    504	unsigned int index;
    505
    506	if (diff < 0)
    507		goto out_eof;
    508	if (diff >= array->size) {
    509		if (array->page_is_eof)
    510			goto out_eof;
    511		nfs_readdir_seek_next_array(array, desc);
    512		return -EAGAIN;
    513	}
    514
    515	index = (unsigned int)diff;
    516	desc->dir_cookie = array->array[index].cookie;
    517	desc->cache_entry_index = index;
    518	return 0;
    519out_eof:
    520	desc->eof = true;
    521	return -EBADCOOKIE;
    522}
    523
    524static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
    525					      u64 cookie)
    526{
    527	if (!array->cookies_are_ordered)
    528		return true;
    529	/* Optimisation for monotonically increasing cookies */
    530	if (cookie >= array->last_cookie)
    531		return false;
    532	if (array->size && cookie < array->array[0].cookie)
    533		return false;
    534	return true;
    535}
    536
    537static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
    538					 struct nfs_readdir_descriptor *desc)
    539{
    540	unsigned int i;
    541	int status = -EAGAIN;
    542
    543	if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
    544		goto check_eof;
    545
    546	for (i = 0; i < array->size; i++) {
    547		if (array->array[i].cookie == desc->dir_cookie) {
    548			if (nfs_readdir_use_cookie(desc->file))
    549				desc->ctx->pos = desc->dir_cookie;
    550			else
    551				desc->ctx->pos = desc->current_index + i;
    552			desc->cache_entry_index = i;
    553			return 0;
    554		}
    555	}
    556check_eof:
    557	if (array->page_is_eof) {
    558		status = -EBADCOOKIE;
    559		if (desc->dir_cookie == array->last_cookie)
    560			desc->eof = true;
    561	} else
    562		nfs_readdir_seek_next_array(array, desc);
    563	return status;
    564}
    565
    566static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
    567{
    568	struct nfs_cache_array *array;
    569	int status;
    570
    571	array = kmap_atomic(desc->page);
    572
    573	if (desc->dir_cookie == 0)
    574		status = nfs_readdir_search_for_pos(array, desc);
    575	else
    576		status = nfs_readdir_search_for_cookie(array, desc);
    577
    578	kunmap_atomic(array);
    579	return status;
    580}
    581
    582/* Fill a page with xdr information before transferring to the cache page */
    583static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
    584				  __be32 *verf, u64 cookie,
    585				  struct page **pages, size_t bufsize,
    586				  __be32 *verf_res)
    587{
    588	struct inode *inode = file_inode(desc->file);
    589	struct nfs_readdir_arg arg = {
    590		.dentry = file_dentry(desc->file),
    591		.cred = desc->file->f_cred,
    592		.verf = verf,
    593		.cookie = cookie,
    594		.pages = pages,
    595		.page_len = bufsize,
    596		.plus = desc->plus,
    597	};
    598	struct nfs_readdir_res res = {
    599		.verf = verf_res,
    600	};
    601	unsigned long	timestamp, gencount;
    602	int		error;
    603
    604 again:
    605	timestamp = jiffies;
    606	gencount = nfs_inc_attr_generation_counter();
    607	desc->dir_verifier = nfs_save_change_attribute(inode);
    608	error = NFS_PROTO(inode)->readdir(&arg, &res);
    609	if (error < 0) {
    610		/* We requested READDIRPLUS, but the server doesn't grok it */
    611		if (error == -ENOTSUPP && desc->plus) {
    612			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
    613			desc->plus = arg.plus = false;
    614			goto again;
    615		}
    616		goto error;
    617	}
    618	desc->timestamp = timestamp;
    619	desc->gencount = gencount;
    620error:
    621	return error;
    622}
    623
    624static int xdr_decode(struct nfs_readdir_descriptor *desc,
    625		      struct nfs_entry *entry, struct xdr_stream *xdr)
    626{
    627	struct inode *inode = file_inode(desc->file);
    628	int error;
    629
    630	error = NFS_PROTO(inode)->decode_dirent(xdr, entry, desc->plus);
    631	if (error)
    632		return error;
    633	entry->fattr->time_start = desc->timestamp;
    634	entry->fattr->gencount = desc->gencount;
    635	return 0;
    636}
    637
    638/* Match file and dirent using either filehandle or fileid
    639 * Note: caller is responsible for checking the fsid
    640 */
    641static
    642int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
    643{
    644	struct inode *inode;
    645	struct nfs_inode *nfsi;
    646
    647	if (d_really_is_negative(dentry))
    648		return 0;
    649
    650	inode = d_inode(dentry);
    651	if (is_bad_inode(inode) || NFS_STALE(inode))
    652		return 0;
    653
    654	nfsi = NFS_I(inode);
    655	if (entry->fattr->fileid != nfsi->fileid)
    656		return 0;
    657	if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0)
    658		return 0;
    659	return 1;
    660}
    661
    662#define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL)
    663
    664static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx,
    665				unsigned int cache_hits,
    666				unsigned int cache_misses)
    667{
    668	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
    669		return false;
    670	if (ctx->pos == 0 ||
    671	    cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD)
    672		return true;
    673	return false;
    674}
    675
    676/*
    677 * This function is called by the getattr code to request the
    678 * use of readdirplus to accelerate any future lookups in the same
    679 * directory.
    680 */
    681void nfs_readdir_record_entry_cache_hit(struct inode *dir)
    682{
    683	struct nfs_inode *nfsi = NFS_I(dir);
    684	struct nfs_open_dir_context *ctx;
    685
    686	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
    687	    S_ISDIR(dir->i_mode)) {
    688		rcu_read_lock();
    689		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
    690			atomic_inc(&ctx->cache_hits);
    691		rcu_read_unlock();
    692	}
    693}
    694
    695/*
    696 * This function is mainly for use by nfs_getattr().
    697 *
    698 * If this is an 'ls -l', we want to force use of readdirplus.
    699 */
    700void nfs_readdir_record_entry_cache_miss(struct inode *dir)
    701{
    702	struct nfs_inode *nfsi = NFS_I(dir);
    703	struct nfs_open_dir_context *ctx;
    704
    705	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
    706	    S_ISDIR(dir->i_mode)) {
    707		rcu_read_lock();
    708		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
    709			atomic_inc(&ctx->cache_misses);
    710		rcu_read_unlock();
    711	}
    712}
    713
    714static void nfs_lookup_advise_force_readdirplus(struct inode *dir,
    715						unsigned int flags)
    716{
    717	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
    718		return;
    719	if (flags & (LOOKUP_EXCL | LOOKUP_PARENT | LOOKUP_REVAL))
    720		return;
    721	nfs_readdir_record_entry_cache_miss(dir);
    722}
    723
    724static
    725void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
    726		unsigned long dir_verifier)
    727{
    728	struct qstr filename = QSTR_INIT(entry->name, entry->len);
    729	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
    730	struct dentry *dentry;
    731	struct dentry *alias;
    732	struct inode *inode;
    733	int status;
    734
    735	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
    736		return;
    737	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
    738		return;
    739	if (filename.len == 0)
    740		return;
    741	/* Validate that the name doesn't contain any illegal '\0' */
    742	if (strnlen(filename.name, filename.len) != filename.len)
    743		return;
    744	/* ...or '/' */
    745	if (strnchr(filename.name, filename.len, '/'))
    746		return;
    747	if (filename.name[0] == '.') {
    748		if (filename.len == 1)
    749			return;
    750		if (filename.len == 2 && filename.name[1] == '.')
    751			return;
    752	}
    753	filename.hash = full_name_hash(parent, filename.name, filename.len);
    754
    755	dentry = d_lookup(parent, &filename);
    756again:
    757	if (!dentry) {
    758		dentry = d_alloc_parallel(parent, &filename, &wq);
    759		if (IS_ERR(dentry))
    760			return;
    761	}
    762	if (!d_in_lookup(dentry)) {
    763		/* Is there a mountpoint here? If so, just exit */
    764		if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
    765					&entry->fattr->fsid))
    766			goto out;
    767		if (nfs_same_file(dentry, entry)) {
    768			if (!entry->fh->size)
    769				goto out;
    770			nfs_set_verifier(dentry, dir_verifier);
    771			status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
    772			if (!status)
    773				nfs_setsecurity(d_inode(dentry), entry->fattr);
    774			trace_nfs_readdir_lookup_revalidate(d_inode(parent),
    775							    dentry, 0, status);
    776			goto out;
    777		} else {
    778			trace_nfs_readdir_lookup_revalidate_failed(
    779				d_inode(parent), dentry, 0);
    780			d_invalidate(dentry);
    781			dput(dentry);
    782			dentry = NULL;
    783			goto again;
    784		}
    785	}
    786	if (!entry->fh->size) {
    787		d_lookup_done(dentry);
    788		goto out;
    789	}
    790
    791	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
    792	alias = d_splice_alias(inode, dentry);
    793	d_lookup_done(dentry);
    794	if (alias) {
    795		if (IS_ERR(alias))
    796			goto out;
    797		dput(dentry);
    798		dentry = alias;
    799	}
    800	nfs_set_verifier(dentry, dir_verifier);
    801	trace_nfs_readdir_lookup(d_inode(parent), dentry, 0);
    802out:
    803	dput(dentry);
    804}
    805
    806static int nfs_readdir_entry_decode(struct nfs_readdir_descriptor *desc,
    807				    struct nfs_entry *entry,
    808				    struct xdr_stream *stream)
    809{
    810	int ret;
    811
    812	if (entry->fattr->label)
    813		entry->fattr->label->len = NFS4_MAXLABELLEN;
    814	ret = xdr_decode(desc, entry, stream);
    815	if (ret || !desc->plus)
    816		return ret;
    817	nfs_prime_dcache(file_dentry(desc->file), entry, desc->dir_verifier);
    818	return 0;
    819}
    820
    821/* Perform conversion from xdr to cache array */
    822static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
    823				   struct nfs_entry *entry,
    824				   struct page **xdr_pages, unsigned int buflen,
    825				   struct page **arrays, size_t narrays,
    826				   u64 change_attr)
    827{
    828	struct address_space *mapping = desc->file->f_mapping;
    829	struct xdr_stream stream;
    830	struct xdr_buf buf;
    831	struct page *scratch, *new, *page = *arrays;
    832	u64 cookie;
    833	int status;
    834
    835	scratch = alloc_page(GFP_KERNEL);
    836	if (scratch == NULL)
    837		return -ENOMEM;
    838
    839	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
    840	xdr_set_scratch_page(&stream, scratch);
    841
    842	do {
    843		status = nfs_readdir_entry_decode(desc, entry, &stream);
    844		if (status != 0)
    845			break;
    846
    847		status = nfs_readdir_page_array_append(page, entry, &cookie);
    848		if (status != -ENOSPC)
    849			continue;
    850
    851		if (page->mapping != mapping) {
    852			if (!--narrays)
    853				break;
    854			new = nfs_readdir_page_array_alloc(cookie, GFP_KERNEL);
    855			if (!new)
    856				break;
    857			arrays++;
    858			*arrays = page = new;
    859		} else {
    860			new = nfs_readdir_page_get_next(mapping, cookie,
    861							change_attr);
    862			if (!new)
    863				break;
    864			if (page != *arrays)
    865				nfs_readdir_page_unlock_and_put(page);
    866			page = new;
    867		}
    868		desc->page_index_max++;
    869		status = nfs_readdir_page_array_append(page, entry, &cookie);
    870	} while (!status && !entry->eof);
    871
    872	switch (status) {
    873	case -EBADCOOKIE:
    874		if (!entry->eof)
    875			break;
    876		nfs_readdir_page_set_eof(page);
    877		fallthrough;
    878	case -EAGAIN:
    879		status = 0;
    880		break;
    881	case -ENOSPC:
    882		status = 0;
    883		if (!desc->plus)
    884			break;
    885		while (!nfs_readdir_entry_decode(desc, entry, &stream))
    886			;
    887	}
    888
    889	if (page != *arrays)
    890		nfs_readdir_page_unlock_and_put(page);
    891
    892	put_page(scratch);
    893	return status;
    894}
    895
    896static void nfs_readdir_free_pages(struct page **pages, size_t npages)
    897{
    898	while (npages--)
    899		put_page(pages[npages]);
    900	kfree(pages);
    901}
    902
    903/*
    904 * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
    905 * to nfs_readdir_free_pages()
    906 */
    907static struct page **nfs_readdir_alloc_pages(size_t npages)
    908{
    909	struct page **pages;
    910	size_t i;
    911
    912	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
    913	if (!pages)
    914		return NULL;
    915	for (i = 0; i < npages; i++) {
    916		struct page *page = alloc_page(GFP_KERNEL);
    917		if (page == NULL)
    918			goto out_freepages;
    919		pages[i] = page;
    920	}
    921	return pages;
    922
    923out_freepages:
    924	nfs_readdir_free_pages(pages, i);
    925	return NULL;
    926}
    927
    928static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
    929				    __be32 *verf_arg, __be32 *verf_res,
    930				    struct page **arrays, size_t narrays)
    931{
    932	u64 change_attr;
    933	struct page **pages;
    934	struct page *page = *arrays;
    935	struct nfs_entry *entry;
    936	size_t array_size;
    937	struct inode *inode = file_inode(desc->file);
    938	unsigned int dtsize = desc->dtsize;
    939	unsigned int pglen;
    940	int status = -ENOMEM;
    941
    942	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
    943	if (!entry)
    944		return -ENOMEM;
    945	entry->cookie = nfs_readdir_page_last_cookie(page);
    946	entry->fh = nfs_alloc_fhandle();
    947	entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
    948	entry->server = NFS_SERVER(inode);
    949	if (entry->fh == NULL || entry->fattr == NULL)
    950		goto out;
    951
    952	array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
    953	pages = nfs_readdir_alloc_pages(array_size);
    954	if (!pages)
    955		goto out;
    956
    957	change_attr = inode_peek_iversion_raw(inode);
    958	status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages,
    959					dtsize, verf_res);
    960	if (status < 0)
    961		goto free_pages;
    962
    963	pglen = status;
    964	if (pglen != 0)
    965		status = nfs_readdir_page_filler(desc, entry, pages, pglen,
    966						 arrays, narrays, change_attr);
    967	else
    968		nfs_readdir_page_set_eof(page);
    969	desc->buffer_fills++;
    970
    971free_pages:
    972	nfs_readdir_free_pages(pages, array_size);
    973out:
    974	nfs_free_fattr(entry->fattr);
    975	nfs_free_fhandle(entry->fh);
    976	kfree(entry);
    977	return status;
    978}
    979
    980static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
    981{
    982	put_page(desc->page);
    983	desc->page = NULL;
    984}
    985
    986static void
    987nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
    988{
    989	unlock_page(desc->page);
    990	nfs_readdir_page_put(desc);
    991}
    992
    993static struct page *
    994nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
    995{
    996	struct address_space *mapping = desc->file->f_mapping;
    997	u64 change_attr = inode_peek_iversion_raw(mapping->host);
    998	u64 cookie = desc->last_cookie;
    999	struct page *page;
   1000
   1001	page = nfs_readdir_page_get_locked(mapping, cookie, change_attr);
   1002	if (!page)
   1003		return NULL;
   1004	if (desc->clear_cache && !nfs_readdir_page_needs_filling(page))
   1005		nfs_readdir_page_reinit_array(page, cookie, change_attr);
   1006	return page;
   1007}
   1008
   1009/*
   1010 * Returns 0 if desc->dir_cookie was found on page desc->page_index
   1011 * and locks the page to prevent removal from the page cache.
   1012 */
   1013static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
   1014{
   1015	struct inode *inode = file_inode(desc->file);
   1016	struct nfs_inode *nfsi = NFS_I(inode);
   1017	__be32 verf[NFS_DIR_VERIFIER_SIZE];
   1018	int res;
   1019
   1020	desc->page = nfs_readdir_page_get_cached(desc);
   1021	if (!desc->page)
   1022		return -ENOMEM;
   1023	if (nfs_readdir_page_needs_filling(desc->page)) {
   1024		/* Grow the dtsize if we had to go back for more pages */
   1025		if (desc->page_index == desc->page_index_max)
   1026			nfs_grow_dtsize(desc);
   1027		desc->page_index_max = desc->page_index;
   1028		trace_nfs_readdir_cache_fill(desc->file, nfsi->cookieverf,
   1029					     desc->last_cookie,
   1030					     desc->page->index, desc->dtsize);
   1031		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
   1032					       &desc->page, 1);
   1033		if (res < 0) {
   1034			nfs_readdir_page_unlock_and_put_cached(desc);
   1035			trace_nfs_readdir_cache_fill_done(inode, res);
   1036			if (res == -EBADCOOKIE || res == -ENOTSYNC) {
   1037				invalidate_inode_pages2(desc->file->f_mapping);
   1038				nfs_readdir_rewind_search(desc);
   1039				trace_nfs_readdir_invalidate_cache_range(
   1040					inode, 0, MAX_LFS_FILESIZE);
   1041				return -EAGAIN;
   1042			}
   1043			return res;
   1044		}
   1045		/*
   1046		 * Set the cookie verifier if the page cache was empty
   1047		 */
   1048		if (desc->last_cookie == 0 &&
   1049		    memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) {
   1050			memcpy(nfsi->cookieverf, verf,
   1051			       sizeof(nfsi->cookieverf));
   1052			invalidate_inode_pages2_range(desc->file->f_mapping, 1,
   1053						      -1);
   1054			trace_nfs_readdir_invalidate_cache_range(
   1055				inode, 1, MAX_LFS_FILESIZE);
   1056		}
   1057		desc->clear_cache = false;
   1058	}
   1059	res = nfs_readdir_search_array(desc);
   1060	if (res == 0)
   1061		return 0;
   1062	nfs_readdir_page_unlock_and_put_cached(desc);
   1063	return res;
   1064}
   1065
   1066/* Search for desc->dir_cookie from the beginning of the page cache */
   1067static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
   1068{
   1069	int res;
   1070
   1071	do {
   1072		res = find_and_lock_cache_page(desc);
   1073	} while (res == -EAGAIN);
   1074	return res;
   1075}
   1076
   1077/*
   1078 * Once we've found the start of the dirent within a page: fill 'er up...
   1079 */
   1080static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
   1081			   const __be32 *verf)
   1082{
   1083	struct file	*file = desc->file;
   1084	struct nfs_cache_array *array;
   1085	unsigned int i;
   1086
   1087	array = kmap(desc->page);
   1088	for (i = desc->cache_entry_index; i < array->size; i++) {
   1089		struct nfs_cache_array_entry *ent;
   1090
   1091		ent = &array->array[i];
   1092		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
   1093		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
   1094			desc->eob = true;
   1095			break;
   1096		}
   1097		memcpy(desc->verf, verf, sizeof(desc->verf));
   1098		if (i == array->size - 1) {
   1099			desc->dir_cookie = array->last_cookie;
   1100			nfs_readdir_seek_next_array(array, desc);
   1101		} else {
   1102			desc->dir_cookie = array->array[i + 1].cookie;
   1103			desc->last_cookie = array->array[0].cookie;
   1104		}
   1105		if (nfs_readdir_use_cookie(file))
   1106			desc->ctx->pos = desc->dir_cookie;
   1107		else
   1108			desc->ctx->pos++;
   1109	}
   1110	if (array->page_is_eof)
   1111		desc->eof = !desc->eob;
   1112
   1113	kunmap(desc->page);
   1114	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
   1115			(unsigned long long)desc->dir_cookie);
   1116}
   1117
   1118/*
   1119 * If we cannot find a cookie in our cache, we suspect that this is
   1120 * because it points to a deleted file, so we ask the server to return
   1121 * whatever it thinks is the next entry. We then feed this to filldir.
   1122 * If all goes well, we should then be able to find our way round the
   1123 * cache on the next call to readdir_search_pagecache();
   1124 *
   1125 * NOTE: we cannot add the anonymous page to the pagecache because
   1126 *	 the data it contains might not be page aligned. Besides,
   1127 *	 we should already have a complete representation of the
   1128 *	 directory in the page cache by the time we get here.
   1129 */
   1130static int uncached_readdir(struct nfs_readdir_descriptor *desc)
   1131{
   1132	struct page	**arrays;
   1133	size_t		i, sz = 512;
   1134	__be32		verf[NFS_DIR_VERIFIER_SIZE];
   1135	int		status = -ENOMEM;
   1136
   1137	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n",
   1138			(unsigned long long)desc->dir_cookie);
   1139
   1140	arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL);
   1141	if (!arrays)
   1142		goto out;
   1143	arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL);
   1144	if (!arrays[0])
   1145		goto out;
   1146
   1147	desc->page_index = 0;
   1148	desc->cache_entry_index = 0;
   1149	desc->last_cookie = desc->dir_cookie;
   1150	desc->page_index_max = 0;
   1151
   1152	trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie,
   1153				   -1, desc->dtsize);
   1154
   1155	status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
   1156	if (status < 0) {
   1157		trace_nfs_readdir_uncached_done(file_inode(desc->file), status);
   1158		goto out_free;
   1159	}
   1160
   1161	for (i = 0; !desc->eob && i < sz && arrays[i]; i++) {
   1162		desc->page = arrays[i];
   1163		nfs_do_filldir(desc, verf);
   1164	}
   1165	desc->page = NULL;
   1166
   1167	/*
   1168	 * Grow the dtsize if we have to go back for more pages,
   1169	 * or shrink it if we're reading too many.
   1170	 */
   1171	if (!desc->eof) {
   1172		if (!desc->eob)
   1173			nfs_grow_dtsize(desc);
   1174		else if (desc->buffer_fills == 1 &&
   1175			 i < (desc->page_index_max >> 1))
   1176			nfs_shrink_dtsize(desc);
   1177	}
   1178out_free:
   1179	for (i = 0; i < sz && arrays[i]; i++)
   1180		nfs_readdir_page_array_free(arrays[i]);
   1181out:
   1182	if (!nfs_readdir_use_cookie(desc->file))
   1183		nfs_readdir_rewind_search(desc);
   1184	desc->page_index_max = -1;
   1185	kfree(arrays);
   1186	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
   1187	return status;
   1188}
   1189
   1190#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL)
   1191
   1192static bool nfs_readdir_handle_cache_misses(struct inode *inode,
   1193					    struct nfs_readdir_descriptor *desc,
   1194					    unsigned int cache_misses,
   1195					    bool force_clear)
   1196{
   1197	if (desc->ctx->pos == 0 || !desc->plus)
   1198		return false;
   1199	if (cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD && !force_clear)
   1200		return false;
   1201	trace_nfs_readdir_force_readdirplus(inode);
   1202	return true;
   1203}
   1204
   1205/* The file offset position represents the dirent entry number.  A
   1206   last cookie cache takes care of the common case of reading the
   1207   whole directory.
   1208 */
   1209static int nfs_readdir(struct file *file, struct dir_context *ctx)
   1210{
   1211	struct dentry	*dentry = file_dentry(file);
   1212	struct inode	*inode = d_inode(dentry);
   1213	struct nfs_inode *nfsi = NFS_I(inode);
   1214	struct nfs_open_dir_context *dir_ctx = file->private_data;
   1215	struct nfs_readdir_descriptor *desc;
   1216	unsigned int cache_hits, cache_misses;
   1217	bool force_clear;
   1218	int res;
   1219
   1220	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
   1221			file, (long long)ctx->pos);
   1222	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
   1223
   1224	/*
   1225	 * ctx->pos points to the dirent entry number.
   1226	 * *desc->dir_cookie has the cookie for the next entry. We have
   1227	 * to either find the entry with the appropriate number or
   1228	 * revalidate the cookie.
   1229	 */
   1230	nfs_revalidate_mapping(inode, file->f_mapping);
   1231
   1232	res = -ENOMEM;
   1233	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
   1234	if (!desc)
   1235		goto out;
   1236	desc->file = file;
   1237	desc->ctx = ctx;
   1238	desc->page_index_max = -1;
   1239
   1240	spin_lock(&file->f_lock);
   1241	desc->dir_cookie = dir_ctx->dir_cookie;
   1242	desc->page_index = dir_ctx->page_index;
   1243	desc->last_cookie = dir_ctx->last_cookie;
   1244	desc->attr_gencount = dir_ctx->attr_gencount;
   1245	desc->eof = dir_ctx->eof;
   1246	nfs_set_dtsize(desc, dir_ctx->dtsize);
   1247	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
   1248	cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0);
   1249	cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0);
   1250	force_clear = dir_ctx->force_clear;
   1251	spin_unlock(&file->f_lock);
   1252
   1253	if (desc->eof) {
   1254		res = 0;
   1255		goto out_free;
   1256	}
   1257
   1258	desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses);
   1259	force_clear = nfs_readdir_handle_cache_misses(inode, desc, cache_misses,
   1260						      force_clear);
   1261	desc->clear_cache = force_clear;
   1262
   1263	do {
   1264		res = readdir_search_pagecache(desc);
   1265
   1266		if (res == -EBADCOOKIE) {
   1267			res = 0;
   1268			/* This means either end of directory */
   1269			if (desc->dir_cookie && !desc->eof) {
   1270				/* Or that the server has 'lost' a cookie */
   1271				res = uncached_readdir(desc);
   1272				if (res == 0)
   1273					continue;
   1274				if (res == -EBADCOOKIE || res == -ENOTSYNC)
   1275					res = 0;
   1276			}
   1277			break;
   1278		}
   1279		if (res == -ETOOSMALL && desc->plus) {
   1280			nfs_zap_caches(inode);
   1281			desc->plus = false;
   1282			desc->eof = false;
   1283			continue;
   1284		}
   1285		if (res < 0)
   1286			break;
   1287
   1288		nfs_do_filldir(desc, nfsi->cookieverf);
   1289		nfs_readdir_page_unlock_and_put_cached(desc);
   1290		if (desc->page_index == desc->page_index_max)
   1291			desc->clear_cache = force_clear;
   1292	} while (!desc->eob && !desc->eof);
   1293
   1294	spin_lock(&file->f_lock);
   1295	dir_ctx->dir_cookie = desc->dir_cookie;
   1296	dir_ctx->last_cookie = desc->last_cookie;
   1297	dir_ctx->attr_gencount = desc->attr_gencount;
   1298	dir_ctx->page_index = desc->page_index;
   1299	dir_ctx->force_clear = force_clear;
   1300	dir_ctx->eof = desc->eof;
   1301	dir_ctx->dtsize = desc->dtsize;
   1302	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
   1303	spin_unlock(&file->f_lock);
   1304out_free:
   1305	kfree(desc);
   1306
   1307out:
   1308	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
   1309	return res;
   1310}
   1311
   1312static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
   1313{
   1314	struct nfs_open_dir_context *dir_ctx = filp->private_data;
   1315
   1316	dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
   1317			filp, offset, whence);
   1318
   1319	switch (whence) {
   1320	default:
   1321		return -EINVAL;
   1322	case SEEK_SET:
   1323		if (offset < 0)
   1324			return -EINVAL;
   1325		spin_lock(&filp->f_lock);
   1326		break;
   1327	case SEEK_CUR:
   1328		if (offset == 0)
   1329			return filp->f_pos;
   1330		spin_lock(&filp->f_lock);
   1331		offset += filp->f_pos;
   1332		if (offset < 0) {
   1333			spin_unlock(&filp->f_lock);
   1334			return -EINVAL;
   1335		}
   1336	}
   1337	if (offset != filp->f_pos) {
   1338		filp->f_pos = offset;
   1339		dir_ctx->page_index = 0;
   1340		if (!nfs_readdir_use_cookie(filp)) {
   1341			dir_ctx->dir_cookie = 0;
   1342			dir_ctx->last_cookie = 0;
   1343		} else {
   1344			dir_ctx->dir_cookie = offset;
   1345			dir_ctx->last_cookie = offset;
   1346		}
   1347		dir_ctx->eof = false;
   1348	}
   1349	spin_unlock(&filp->f_lock);
   1350	return offset;
   1351}
   1352
   1353/*
   1354 * All directory operations under NFS are synchronous, so fsync()
   1355 * is a dummy operation.
   1356 */
   1357static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
   1358			 int datasync)
   1359{
   1360	dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
   1361
   1362	nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC);
   1363	return 0;
   1364}
   1365
   1366/**
   1367 * nfs_force_lookup_revalidate - Mark the directory as having changed
   1368 * @dir: pointer to directory inode
   1369 *
   1370 * This forces the revalidation code in nfs_lookup_revalidate() to do a
   1371 * full lookup on all child dentries of 'dir' whenever a change occurs
   1372 * on the server that might have invalidated our dcache.
   1373 *
   1374 * Note that we reserve bit '0' as a tag to let us know when a dentry
   1375 * was revalidated while holding a delegation on its inode.
   1376 *
   1377 * The caller should be holding dir->i_lock
   1378 */
   1379void nfs_force_lookup_revalidate(struct inode *dir)
   1380{
   1381	NFS_I(dir)->cache_change_attribute += 2;
   1382}
   1383EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
   1384
   1385/**
   1386 * nfs_verify_change_attribute - Detects NFS remote directory changes
   1387 * @dir: pointer to parent directory inode
   1388 * @verf: previously saved change attribute
   1389 *
   1390 * Return "false" if the verifiers doesn't match the change attribute.
   1391 * This would usually indicate that the directory contents have changed on
   1392 * the server, and that any dentries need revalidating.
   1393 */
   1394static bool nfs_verify_change_attribute(struct inode *dir, unsigned long verf)
   1395{
   1396	return (verf & ~1UL) == nfs_save_change_attribute(dir);
   1397}
   1398
   1399static void nfs_set_verifier_delegated(unsigned long *verf)
   1400{
   1401	*verf |= 1UL;
   1402}
   1403
   1404#if IS_ENABLED(CONFIG_NFS_V4)
   1405static void nfs_unset_verifier_delegated(unsigned long *verf)
   1406{
   1407	*verf &= ~1UL;
   1408}
   1409#endif /* IS_ENABLED(CONFIG_NFS_V4) */
   1410
   1411static bool nfs_test_verifier_delegated(unsigned long verf)
   1412{
   1413	return verf & 1;
   1414}
   1415
   1416static bool nfs_verifier_is_delegated(struct dentry *dentry)
   1417{
   1418	return nfs_test_verifier_delegated(dentry->d_time);
   1419}
   1420
   1421static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
   1422{
   1423	struct inode *inode = d_inode(dentry);
   1424	struct inode *dir = d_inode(dentry->d_parent);
   1425
   1426	if (!nfs_verify_change_attribute(dir, verf))
   1427		return;
   1428	if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
   1429		nfs_set_verifier_delegated(&verf);
   1430	dentry->d_time = verf;
   1431}
   1432
   1433/**
   1434 * nfs_set_verifier - save a parent directory verifier in the dentry
   1435 * @dentry: pointer to dentry
   1436 * @verf: verifier to save
   1437 *
   1438 * Saves the parent directory verifier in @dentry. If the inode has
   1439 * a delegation, we also tag the dentry as having been revalidated
   1440 * while holding a delegation so that we know we don't have to
   1441 * look it up again after a directory change.
   1442 */
   1443void nfs_set_verifier(struct dentry *dentry, unsigned long verf)
   1444{
   1445
   1446	spin_lock(&dentry->d_lock);
   1447	nfs_set_verifier_locked(dentry, verf);
   1448	spin_unlock(&dentry->d_lock);
   1449}
   1450EXPORT_SYMBOL_GPL(nfs_set_verifier);
   1451
   1452#if IS_ENABLED(CONFIG_NFS_V4)
   1453/**
   1454 * nfs_clear_verifier_delegated - clear the dir verifier delegation tag
   1455 * @inode: pointer to inode
   1456 *
   1457 * Iterates through the dentries in the inode alias list and clears
   1458 * the tag used to indicate that the dentry has been revalidated
   1459 * while holding a delegation.
   1460 * This function is intended for use when the delegation is being
   1461 * returned or revoked.
   1462 */
   1463void nfs_clear_verifier_delegated(struct inode *inode)
   1464{
   1465	struct dentry *alias;
   1466
   1467	if (!inode)
   1468		return;
   1469	spin_lock(&inode->i_lock);
   1470	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
   1471		spin_lock(&alias->d_lock);
   1472		nfs_unset_verifier_delegated(&alias->d_time);
   1473		spin_unlock(&alias->d_lock);
   1474	}
   1475	spin_unlock(&inode->i_lock);
   1476}
   1477EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
   1478#endif /* IS_ENABLED(CONFIG_NFS_V4) */
   1479
   1480static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry)
   1481{
   1482	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) &&
   1483	    d_really_is_negative(dentry))
   1484		return dentry->d_time == inode_peek_iversion_raw(dir);
   1485	return nfs_verify_change_attribute(dir, dentry->d_time);
   1486}
   1487
   1488/*
   1489 * A check for whether or not the parent directory has changed.
   1490 * In the case it has, we assume that the dentries are untrustworthy
   1491 * and may need to be looked up again.
   1492 * If rcu_walk prevents us from performing a full check, return 0.
   1493 */
   1494static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
   1495			      int rcu_walk)
   1496{
   1497	if (IS_ROOT(dentry))
   1498		return 1;
   1499	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
   1500		return 0;
   1501	if (!nfs_dentry_verify_change(dir, dentry))
   1502		return 0;
   1503	/* Revalidate nfsi->cache_change_attribute before we declare a match */
   1504	if (nfs_mapping_need_revalidate_inode(dir)) {
   1505		if (rcu_walk)
   1506			return 0;
   1507		if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
   1508			return 0;
   1509	}
   1510	if (!nfs_dentry_verify_change(dir, dentry))
   1511		return 0;
   1512	return 1;
   1513}
   1514
   1515/*
   1516 * Use intent information to check whether or not we're going to do
   1517 * an O_EXCL create using this path component.
   1518 */
   1519static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
   1520{
   1521	if (NFS_PROTO(dir)->version == 2)
   1522		return 0;
   1523	return flags & LOOKUP_EXCL;
   1524}
   1525
   1526/*
   1527 * Inode and filehandle revalidation for lookups.
   1528 *
   1529 * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
   1530 * or if the intent information indicates that we're about to open this
   1531 * particular file and the "nocto" mount flag is not set.
   1532 *
   1533 */
   1534static
   1535int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
   1536{
   1537	struct nfs_server *server = NFS_SERVER(inode);
   1538	int ret;
   1539
   1540	if (IS_AUTOMOUNT(inode))
   1541		return 0;
   1542
   1543	if (flags & LOOKUP_OPEN) {
   1544		switch (inode->i_mode & S_IFMT) {
   1545		case S_IFREG:
   1546			/* A NFSv4 OPEN will revalidate later */
   1547			if (server->caps & NFS_CAP_ATOMIC_OPEN)
   1548				goto out;
   1549			fallthrough;
   1550		case S_IFDIR:
   1551			if (server->flags & NFS_MOUNT_NOCTO)
   1552				break;
   1553			/* NFS close-to-open cache consistency validation */
   1554			goto out_force;
   1555		}
   1556	}
   1557
   1558	/* VFS wants an on-the-wire revalidation */
   1559	if (flags & LOOKUP_REVAL)
   1560		goto out_force;
   1561out:
   1562	if (inode->i_nlink > 0 ||
   1563	    (inode->i_nlink == 0 &&
   1564	     test_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(inode)->flags)))
   1565		return 0;
   1566	else
   1567		return -ESTALE;
   1568out_force:
   1569	if (flags & LOOKUP_RCU)
   1570		return -ECHILD;
   1571	ret = __nfs_revalidate_inode(server, inode);
   1572	if (ret != 0)
   1573		return ret;
   1574	goto out;
   1575}
   1576
   1577static void nfs_mark_dir_for_revalidate(struct inode *inode)
   1578{
   1579	spin_lock(&inode->i_lock);
   1580	nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE);
   1581	spin_unlock(&inode->i_lock);
   1582}
   1583
   1584/*
   1585 * We judge how long we want to trust negative
   1586 * dentries by looking at the parent inode mtime.
   1587 *
   1588 * If parent mtime has changed, we revalidate, else we wait for a
   1589 * period corresponding to the parent's attribute cache timeout value.
   1590 *
   1591 * If LOOKUP_RCU prevents us from performing a full check, return 1
   1592 * suggesting a reval is needed.
   1593 *
   1594 * Note that when creating a new file, or looking up a rename target,
   1595 * then it shouldn't be necessary to revalidate a negative dentry.
   1596 */
   1597static inline
   1598int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
   1599		       unsigned int flags)
   1600{
   1601	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
   1602		return 0;
   1603	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
   1604		return 1;
   1605	/* Case insensitive server? Revalidate negative dentries */
   1606	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
   1607		return 1;
   1608	return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
   1609}
   1610
   1611static int
   1612nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
   1613			   struct inode *inode, int error)
   1614{
   1615	switch (error) {
   1616	case 1:
   1617		break;
   1618	case 0:
   1619		/*
   1620		 * We can't d_drop the root of a disconnected tree:
   1621		 * its d_hash is on the s_anon list and d_drop() would hide
   1622		 * it from shrink_dcache_for_unmount(), leading to busy
   1623		 * inodes on unmount and further oopses.
   1624		 */
   1625		if (inode && IS_ROOT(dentry))
   1626			error = 1;
   1627		break;
   1628	}
   1629	trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error);
   1630	return error;
   1631}
   1632
   1633static int
   1634nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
   1635			       unsigned int flags)
   1636{
   1637	int ret = 1;
   1638	if (nfs_neg_need_reval(dir, dentry, flags)) {
   1639		if (flags & LOOKUP_RCU)
   1640			return -ECHILD;
   1641		ret = 0;
   1642	}
   1643	return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
   1644}
   1645
   1646static int
   1647nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
   1648				struct inode *inode)
   1649{
   1650	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
   1651	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
   1652}
   1653
   1654static int nfs_lookup_revalidate_dentry(struct inode *dir,
   1655					struct dentry *dentry,
   1656					struct inode *inode, unsigned int flags)
   1657{
   1658	struct nfs_fh *fhandle;
   1659	struct nfs_fattr *fattr;
   1660	unsigned long dir_verifier;
   1661	int ret;
   1662
   1663	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
   1664
   1665	ret = -ENOMEM;
   1666	fhandle = nfs_alloc_fhandle();
   1667	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
   1668	if (fhandle == NULL || fattr == NULL)
   1669		goto out;
   1670
   1671	dir_verifier = nfs_save_change_attribute(dir);
   1672	ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
   1673	if (ret < 0) {
   1674		switch (ret) {
   1675		case -ESTALE:
   1676		case -ENOENT:
   1677			ret = 0;
   1678			break;
   1679		case -ETIMEDOUT:
   1680			if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
   1681				ret = 1;
   1682		}
   1683		goto out;
   1684	}
   1685
   1686	/* Request help from readdirplus */
   1687	nfs_lookup_advise_force_readdirplus(dir, flags);
   1688
   1689	ret = 0;
   1690	if (nfs_compare_fh(NFS_FH(inode), fhandle))
   1691		goto out;
   1692	if (nfs_refresh_inode(inode, fattr) < 0)
   1693		goto out;
   1694
   1695	nfs_setsecurity(inode, fattr);
   1696	nfs_set_verifier(dentry, dir_verifier);
   1697
   1698	ret = 1;
   1699out:
   1700	nfs_free_fattr(fattr);
   1701	nfs_free_fhandle(fhandle);
   1702
   1703	/*
   1704	 * If the lookup failed despite the dentry change attribute being
   1705	 * a match, then we should revalidate the directory cache.
   1706	 */
   1707	if (!ret && nfs_dentry_verify_change(dir, dentry))
   1708		nfs_mark_dir_for_revalidate(dir);
   1709	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
   1710}
   1711
   1712/*
   1713 * This is called every time the dcache has a lookup hit,
   1714 * and we should check whether we can really trust that
   1715 * lookup.
   1716 *
   1717 * NOTE! The hit can be a negative hit too, don't assume
   1718 * we have an inode!
   1719 *
   1720 * If the parent directory is seen to have changed, we throw out the
   1721 * cached dentry and do a new lookup.
   1722 */
   1723static int
   1724nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
   1725			 unsigned int flags)
   1726{
   1727	struct inode *inode;
   1728	int error;
   1729
   1730	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
   1731	inode = d_inode(dentry);
   1732
   1733	if (!inode)
   1734		return nfs_lookup_revalidate_negative(dir, dentry, flags);
   1735
   1736	if (is_bad_inode(inode)) {
   1737		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
   1738				__func__, dentry);
   1739		goto out_bad;
   1740	}
   1741
   1742	if (nfs_verifier_is_delegated(dentry))
   1743		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
   1744
   1745	/* Force a full look up iff the parent directory has changed */
   1746	if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
   1747	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
   1748		error = nfs_lookup_verify_inode(inode, flags);
   1749		if (error) {
   1750			if (error == -ESTALE)
   1751				nfs_mark_dir_for_revalidate(dir);
   1752			goto out_bad;
   1753		}
   1754		goto out_valid;
   1755	}
   1756
   1757	if (flags & LOOKUP_RCU)
   1758		return -ECHILD;
   1759
   1760	if (NFS_STALE(inode))
   1761		goto out_bad;
   1762
   1763	return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
   1764out_valid:
   1765	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
   1766out_bad:
   1767	if (flags & LOOKUP_RCU)
   1768		return -ECHILD;
   1769	return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
   1770}
   1771
   1772static int
   1773__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
   1774			int (*reval)(struct inode *, struct dentry *, unsigned int))
   1775{
   1776	struct dentry *parent;
   1777	struct inode *dir;
   1778	int ret;
   1779
   1780	if (flags & LOOKUP_RCU) {
   1781		parent = READ_ONCE(dentry->d_parent);
   1782		dir = d_inode_rcu(parent);
   1783		if (!dir)
   1784			return -ECHILD;
   1785		ret = reval(dir, dentry, flags);
   1786		if (parent != READ_ONCE(dentry->d_parent))
   1787			return -ECHILD;
   1788	} else {
   1789		parent = dget_parent(dentry);
   1790		ret = reval(d_inode(parent), dentry, flags);
   1791		dput(parent);
   1792	}
   1793	return ret;
   1794}
   1795
   1796static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
   1797{
   1798	return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
   1799}
   1800
   1801/*
   1802 * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
   1803 * when we don't really care about the dentry name. This is called when a
   1804 * pathwalk ends on a dentry that was not found via a normal lookup in the
   1805 * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
   1806 *
   1807 * In this situation, we just want to verify that the inode itself is OK
   1808 * since the dentry might have changed on the server.
   1809 */
   1810static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
   1811{
   1812	struct inode *inode = d_inode(dentry);
   1813	int error = 0;
   1814
   1815	/*
   1816	 * I believe we can only get a negative dentry here in the case of a
   1817	 * procfs-style symlink. Just assume it's correct for now, but we may
   1818	 * eventually need to do something more here.
   1819	 */
   1820	if (!inode) {
   1821		dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
   1822				__func__, dentry);
   1823		return 1;
   1824	}
   1825
   1826	if (is_bad_inode(inode)) {
   1827		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
   1828				__func__, dentry);
   1829		return 0;
   1830	}
   1831
   1832	error = nfs_lookup_verify_inode(inode, flags);
   1833	dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
   1834			__func__, inode->i_ino, error ? "invalid" : "valid");
   1835	return !error;
   1836}
   1837
   1838/*
   1839 * This is called from dput() when d_count is going to 0.
   1840 */
   1841static int nfs_dentry_delete(const struct dentry *dentry)
   1842{
   1843	dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
   1844		dentry, dentry->d_flags);
   1845
   1846	/* Unhash any dentry with a stale inode */
   1847	if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
   1848		return 1;
   1849
   1850	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
   1851		/* Unhash it, so that ->d_iput() would be called */
   1852		return 1;
   1853	}
   1854	if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {
   1855		/* Unhash it, so that ancestors of killed async unlink
   1856		 * files will be cleaned up during umount */
   1857		return 1;
   1858	}
   1859	return 0;
   1860
   1861}
   1862
   1863/* Ensure that we revalidate inode->i_nlink */
   1864static void nfs_drop_nlink(struct inode *inode)
   1865{
   1866	spin_lock(&inode->i_lock);
   1867	/* drop the inode if we're reasonably sure this is the last link */
   1868	if (inode->i_nlink > 0)
   1869		drop_nlink(inode);
   1870	NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
   1871	nfs_set_cache_invalid(
   1872		inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
   1873			       NFS_INO_INVALID_NLINK);
   1874	spin_unlock(&inode->i_lock);
   1875}
   1876
   1877/*
   1878 * Called when the dentry loses inode.
   1879 * We use it to clean up silly-renamed files.
   1880 */
   1881static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
   1882{
   1883	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
   1884		nfs_complete_unlink(dentry, inode);
   1885		nfs_drop_nlink(inode);
   1886	}
   1887	iput(inode);
   1888}
   1889
   1890static void nfs_d_release(struct dentry *dentry)
   1891{
   1892	/* free cached devname value, if it survived that far */
   1893	if (unlikely(dentry->d_fsdata)) {
   1894		if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
   1895			WARN_ON(1);
   1896		else
   1897			kfree(dentry->d_fsdata);
   1898	}
   1899}
   1900
   1901const struct dentry_operations nfs_dentry_operations = {
   1902	.d_revalidate	= nfs_lookup_revalidate,
   1903	.d_weak_revalidate	= nfs_weak_revalidate,
   1904	.d_delete	= nfs_dentry_delete,
   1905	.d_iput		= nfs_dentry_iput,
   1906	.d_automount	= nfs_d_automount,
   1907	.d_release	= nfs_d_release,
   1908};
   1909EXPORT_SYMBOL_GPL(nfs_dentry_operations);
   1910
   1911struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
   1912{
   1913	struct dentry *res;
   1914	struct inode *inode = NULL;
   1915	struct nfs_fh *fhandle = NULL;
   1916	struct nfs_fattr *fattr = NULL;
   1917	unsigned long dir_verifier;
   1918	int error;
   1919
   1920	dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
   1921	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
   1922
   1923	if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
   1924		return ERR_PTR(-ENAMETOOLONG);
   1925
   1926	/*
   1927	 * If we're doing an exclusive create, optimize away the lookup
   1928	 * but don't hash the dentry.
   1929	 */
   1930	if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
   1931		return NULL;
   1932
   1933	res = ERR_PTR(-ENOMEM);
   1934	fhandle = nfs_alloc_fhandle();
   1935	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(dir));
   1936	if (fhandle == NULL || fattr == NULL)
   1937		goto out;
   1938
   1939	dir_verifier = nfs_save_change_attribute(dir);
   1940	trace_nfs_lookup_enter(dir, dentry, flags);
   1941	error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
   1942	if (error == -ENOENT) {
   1943		if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
   1944			dir_verifier = inode_peek_iversion_raw(dir);
   1945		goto no_entry;
   1946	}
   1947	if (error < 0) {
   1948		res = ERR_PTR(error);
   1949		goto out;
   1950	}
   1951	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
   1952	res = ERR_CAST(inode);
   1953	if (IS_ERR(res))
   1954		goto out;
   1955
   1956	/* Notify readdir to use READDIRPLUS */
   1957	nfs_lookup_advise_force_readdirplus(dir, flags);
   1958
   1959no_entry:
   1960	res = d_splice_alias(inode, dentry);
   1961	if (res != NULL) {
   1962		if (IS_ERR(res))
   1963			goto out;
   1964		dentry = res;
   1965	}
   1966	nfs_set_verifier(dentry, dir_verifier);
   1967out:
   1968	trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res));
   1969	nfs_free_fattr(fattr);
   1970	nfs_free_fhandle(fhandle);
   1971	return res;
   1972}
   1973EXPORT_SYMBOL_GPL(nfs_lookup);
   1974
   1975void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
   1976{
   1977	/* Case insensitive server? Revalidate dentries */
   1978	if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE))
   1979		d_prune_aliases(inode);
   1980}
   1981EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
   1982
   1983#if IS_ENABLED(CONFIG_NFS_V4)
   1984static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
   1985
   1986const struct dentry_operations nfs4_dentry_operations = {
   1987	.d_revalidate	= nfs4_lookup_revalidate,
   1988	.d_weak_revalidate	= nfs_weak_revalidate,
   1989	.d_delete	= nfs_dentry_delete,
   1990	.d_iput		= nfs_dentry_iput,
   1991	.d_automount	= nfs_d_automount,
   1992	.d_release	= nfs_d_release,
   1993};
   1994EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
   1995
   1996static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
   1997{
   1998	return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
   1999}
   2000
   2001static int do_open(struct inode *inode, struct file *filp)
   2002{
   2003	nfs_fscache_open_file(inode, filp);
   2004	return 0;
   2005}
   2006
   2007static int nfs_finish_open(struct nfs_open_context *ctx,
   2008			   struct dentry *dentry,
   2009			   struct file *file, unsigned open_flags)
   2010{
   2011	int err;
   2012
   2013	err = finish_open(file, dentry, do_open);
   2014	if (err)
   2015		goto out;
   2016	if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
   2017		nfs_file_set_open_context(file, ctx);
   2018	else
   2019		err = -EOPENSTALE;
   2020out:
   2021	return err;
   2022}
   2023
   2024int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
   2025		    struct file *file, unsigned open_flags,
   2026		    umode_t mode)
   2027{
   2028	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
   2029	struct nfs_open_context *ctx;
   2030	struct dentry *res;
   2031	struct iattr attr = { .ia_valid = ATTR_OPEN };
   2032	struct inode *inode;
   2033	unsigned int lookup_flags = 0;
   2034	unsigned long dir_verifier;
   2035	bool switched = false;
   2036	int created = 0;
   2037	int err;
   2038
   2039	/* Expect a negative dentry */
   2040	BUG_ON(d_inode(dentry));
   2041
   2042	dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
   2043			dir->i_sb->s_id, dir->i_ino, dentry);
   2044
   2045	err = nfs_check_flags(open_flags);
   2046	if (err)
   2047		return err;
   2048
   2049	/* NFS only supports OPEN on regular files */
   2050	if ((open_flags & O_DIRECTORY)) {
   2051		if (!d_in_lookup(dentry)) {
   2052			/*
   2053			 * Hashed negative dentry with O_DIRECTORY: dentry was
   2054			 * revalidated and is fine, no need to perform lookup
   2055			 * again
   2056			 */
   2057			return -ENOENT;
   2058		}
   2059		lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
   2060		goto no_open;
   2061	}
   2062
   2063	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
   2064		return -ENAMETOOLONG;
   2065
   2066	if (open_flags & O_CREAT) {
   2067		struct nfs_server *server = NFS_SERVER(dir);
   2068
   2069		if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
   2070			mode &= ~current_umask();
   2071
   2072		attr.ia_valid |= ATTR_MODE;
   2073		attr.ia_mode = mode;
   2074	}
   2075	if (open_flags & O_TRUNC) {
   2076		attr.ia_valid |= ATTR_SIZE;
   2077		attr.ia_size = 0;
   2078	}
   2079
   2080	if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
   2081		d_drop(dentry);
   2082		switched = true;
   2083		dentry = d_alloc_parallel(dentry->d_parent,
   2084					  &dentry->d_name, &wq);
   2085		if (IS_ERR(dentry))
   2086			return PTR_ERR(dentry);
   2087		if (unlikely(!d_in_lookup(dentry)))
   2088			return finish_no_open(file, dentry);
   2089	}
   2090
   2091	ctx = create_nfs_open_context(dentry, open_flags, file);
   2092	err = PTR_ERR(ctx);
   2093	if (IS_ERR(ctx))
   2094		goto out;
   2095
   2096	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
   2097	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
   2098	if (created)
   2099		file->f_mode |= FMODE_CREATED;
   2100	if (IS_ERR(inode)) {
   2101		err = PTR_ERR(inode);
   2102		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
   2103		put_nfs_open_context(ctx);
   2104		d_drop(dentry);
   2105		switch (err) {
   2106		case -ENOENT:
   2107			d_splice_alias(NULL, dentry);
   2108			if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
   2109				dir_verifier = inode_peek_iversion_raw(dir);
   2110			else
   2111				dir_verifier = nfs_save_change_attribute(dir);
   2112			nfs_set_verifier(dentry, dir_verifier);
   2113			break;
   2114		case -EISDIR:
   2115		case -ENOTDIR:
   2116			goto no_open;
   2117		case -ELOOP:
   2118			if (!(open_flags & O_NOFOLLOW))
   2119				goto no_open;
   2120			break;
   2121			/* case -EINVAL: */
   2122		default:
   2123			break;
   2124		}
   2125		goto out;
   2126	}
   2127	file->f_mode |= FMODE_CAN_ODIRECT;
   2128
   2129	err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
   2130	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
   2131	put_nfs_open_context(ctx);
   2132out:
   2133	if (unlikely(switched)) {
   2134		d_lookup_done(dentry);
   2135		dput(dentry);
   2136	}
   2137	return err;
   2138
   2139no_open:
   2140	res = nfs_lookup(dir, dentry, lookup_flags);
   2141	if (!res) {
   2142		inode = d_inode(dentry);
   2143		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
   2144		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
   2145			res = ERR_PTR(-ENOTDIR);
   2146		else if (inode && S_ISREG(inode->i_mode))
   2147			res = ERR_PTR(-EOPENSTALE);
   2148	} else if (!IS_ERR(res)) {
   2149		inode = d_inode(res);
   2150		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
   2151		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
   2152			dput(res);
   2153			res = ERR_PTR(-ENOTDIR);
   2154		} else if (inode && S_ISREG(inode->i_mode)) {
   2155			dput(res);
   2156			res = ERR_PTR(-EOPENSTALE);
   2157		}
   2158	}
   2159	if (switched) {
   2160		d_lookup_done(dentry);
   2161		if (!res)
   2162			res = dentry;
   2163		else
   2164			dput(dentry);
   2165	}
   2166	if (IS_ERR(res))
   2167		return PTR_ERR(res);
   2168	return finish_no_open(file, res);
   2169}
   2170EXPORT_SYMBOL_GPL(nfs_atomic_open);
   2171
   2172static int
   2173nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
   2174			  unsigned int flags)
   2175{
   2176	struct inode *inode;
   2177
   2178	if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
   2179		goto full_reval;
   2180	if (d_mountpoint(dentry))
   2181		goto full_reval;
   2182
   2183	inode = d_inode(dentry);
   2184
   2185	/* We can't create new files in nfs_open_revalidate(), so we
   2186	 * optimize away revalidation of negative dentries.
   2187	 */
   2188	if (inode == NULL)
   2189		goto full_reval;
   2190
   2191	if (nfs_verifier_is_delegated(dentry))
   2192		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
   2193
   2194	/* NFS only supports OPEN on regular files */
   2195	if (!S_ISREG(inode->i_mode))
   2196		goto full_reval;
   2197
   2198	/* We cannot do exclusive creation on a positive dentry */
   2199	if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
   2200		goto reval_dentry;
   2201
   2202	/* Check if the directory changed */
   2203	if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
   2204		goto reval_dentry;
   2205
   2206	/* Let f_op->open() actually open (and revalidate) the file */
   2207	return 1;
   2208reval_dentry:
   2209	if (flags & LOOKUP_RCU)
   2210		return -ECHILD;
   2211	return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
   2212
   2213full_reval:
   2214	return nfs_do_lookup_revalidate(dir, dentry, flags);
   2215}
   2216
   2217static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
   2218{
   2219	return __nfs_lookup_revalidate(dentry, flags,
   2220			nfs4_do_lookup_revalidate);
   2221}
   2222
   2223#endif /* CONFIG_NFSV4 */
   2224
   2225struct dentry *
   2226nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
   2227				struct nfs_fattr *fattr)
   2228{
   2229	struct dentry *parent = dget_parent(dentry);
   2230	struct inode *dir = d_inode(parent);
   2231	struct inode *inode;
   2232	struct dentry *d;
   2233	int error;
   2234
   2235	d_drop(dentry);
   2236
   2237	if (fhandle->size == 0) {
   2238		error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
   2239		if (error)
   2240			goto out_error;
   2241	}
   2242	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
   2243	if (!(fattr->valid & NFS_ATTR_FATTR)) {
   2244		struct nfs_server *server = NFS_SB(dentry->d_sb);
   2245		error = server->nfs_client->rpc_ops->getattr(server, fhandle,
   2246				fattr, NULL);
   2247		if (error < 0)
   2248			goto out_error;
   2249	}
   2250	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
   2251	d = d_splice_alias(inode, dentry);
   2252out:
   2253	dput(parent);
   2254	return d;
   2255out_error:
   2256	d = ERR_PTR(error);
   2257	goto out;
   2258}
   2259EXPORT_SYMBOL_GPL(nfs_add_or_obtain);
   2260
   2261/*
   2262 * Code common to create, mkdir, and mknod.
   2263 */
   2264int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
   2265				struct nfs_fattr *fattr)
   2266{
   2267	struct dentry *d;
   2268
   2269	d = nfs_add_or_obtain(dentry, fhandle, fattr);
   2270	if (IS_ERR(d))
   2271		return PTR_ERR(d);
   2272
   2273	/* Callers don't care */
   2274	dput(d);
   2275	return 0;
   2276}
   2277EXPORT_SYMBOL_GPL(nfs_instantiate);
   2278
   2279/*
   2280 * Following a failed create operation, we drop the dentry rather
   2281 * than retain a negative dentry. This avoids a problem in the event
   2282 * that the operation succeeded on the server, but an error in the
   2283 * reply path made it appear to have failed.
   2284 */
   2285int nfs_create(struct user_namespace *mnt_userns, struct inode *dir,
   2286	       struct dentry *dentry, umode_t mode, bool excl)
   2287{
   2288	struct iattr attr;
   2289	int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
   2290	int error;
   2291
   2292	dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
   2293			dir->i_sb->s_id, dir->i_ino, dentry);
   2294
   2295	attr.ia_mode = mode;
   2296	attr.ia_valid = ATTR_MODE;
   2297
   2298	trace_nfs_create_enter(dir, dentry, open_flags);
   2299	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
   2300	trace_nfs_create_exit(dir, dentry, open_flags, error);
   2301	if (error != 0)
   2302		goto out_err;
   2303	return 0;
   2304out_err:
   2305	d_drop(dentry);
   2306	return error;
   2307}
   2308EXPORT_SYMBOL_GPL(nfs_create);
   2309
   2310/*
   2311 * See comments for nfs_proc_create regarding failed operations.
   2312 */
   2313int
   2314nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
   2315	  struct dentry *dentry, umode_t mode, dev_t rdev)
   2316{
   2317	struct iattr attr;
   2318	int status;
   2319
   2320	dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
   2321			dir->i_sb->s_id, dir->i_ino, dentry);
   2322
   2323	attr.ia_mode = mode;
   2324	attr.ia_valid = ATTR_MODE;
   2325
   2326	trace_nfs_mknod_enter(dir, dentry);
   2327	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
   2328	trace_nfs_mknod_exit(dir, dentry, status);
   2329	if (status != 0)
   2330		goto out_err;
   2331	return 0;
   2332out_err:
   2333	d_drop(dentry);
   2334	return status;
   2335}
   2336EXPORT_SYMBOL_GPL(nfs_mknod);
   2337
   2338/*
   2339 * See comments for nfs_proc_create regarding failed operations.
   2340 */
   2341int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
   2342	      struct dentry *dentry, umode_t mode)
   2343{
   2344	struct iattr attr;
   2345	int error;
   2346
   2347	dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
   2348			dir->i_sb->s_id, dir->i_ino, dentry);
   2349
   2350	attr.ia_valid = ATTR_MODE;
   2351	attr.ia_mode = mode | S_IFDIR;
   2352
   2353	trace_nfs_mkdir_enter(dir, dentry);
   2354	error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
   2355	trace_nfs_mkdir_exit(dir, dentry, error);
   2356	if (error != 0)
   2357		goto out_err;
   2358	return 0;
   2359out_err:
   2360	d_drop(dentry);
   2361	return error;
   2362}
   2363EXPORT_SYMBOL_GPL(nfs_mkdir);
   2364
   2365static void nfs_dentry_handle_enoent(struct dentry *dentry)
   2366{
   2367	if (simple_positive(dentry))
   2368		d_delete(dentry);
   2369}
   2370
   2371static void nfs_dentry_remove_handle_error(struct inode *dir,
   2372					   struct dentry *dentry, int error)
   2373{
   2374	switch (error) {
   2375	case -ENOENT:
   2376		d_delete(dentry);
   2377		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
   2378		break;
   2379	case 0:
   2380		nfs_d_prune_case_insensitive_aliases(d_inode(dentry));
   2381		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
   2382	}
   2383}
   2384
   2385int nfs_rmdir(struct inode *dir, struct dentry *dentry)
   2386{
   2387	int error;
   2388
   2389	dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
   2390			dir->i_sb->s_id, dir->i_ino, dentry);
   2391
   2392	trace_nfs_rmdir_enter(dir, dentry);
   2393	if (d_really_is_positive(dentry)) {
   2394		down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
   2395		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
   2396		/* Ensure the VFS deletes this inode */
   2397		switch (error) {
   2398		case 0:
   2399			clear_nlink(d_inode(dentry));
   2400			break;
   2401		case -ENOENT:
   2402			nfs_dentry_handle_enoent(dentry);
   2403		}
   2404		up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
   2405	} else
   2406		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
   2407	nfs_dentry_remove_handle_error(dir, dentry, error);
   2408	trace_nfs_rmdir_exit(dir, dentry, error);
   2409
   2410	return error;
   2411}
   2412EXPORT_SYMBOL_GPL(nfs_rmdir);
   2413
   2414/*
   2415 * Remove a file after making sure there are no pending writes,
   2416 * and after checking that the file has only one user. 
   2417 *
   2418 * We invalidate the attribute cache and free the inode prior to the operation
   2419 * to avoid possible races if the server reuses the inode.
   2420 */
   2421static int nfs_safe_remove(struct dentry *dentry)
   2422{
   2423	struct inode *dir = d_inode(dentry->d_parent);
   2424	struct inode *inode = d_inode(dentry);
   2425	int error = -EBUSY;
   2426		
   2427	dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
   2428
   2429	/* If the dentry was sillyrenamed, we simply call d_delete() */
   2430	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
   2431		error = 0;
   2432		goto out;
   2433	}
   2434
   2435	trace_nfs_remove_enter(dir, dentry);
   2436	if (inode != NULL) {
   2437		error = NFS_PROTO(dir)->remove(dir, dentry);
   2438		if (error == 0)
   2439			nfs_drop_nlink(inode);
   2440	} else
   2441		error = NFS_PROTO(dir)->remove(dir, dentry);
   2442	if (error == -ENOENT)
   2443		nfs_dentry_handle_enoent(dentry);
   2444	trace_nfs_remove_exit(dir, dentry, error);
   2445out:
   2446	return error;
   2447}
   2448
   2449/*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
   2450 *  belongs to an active ".nfs..." file and we return -EBUSY.
   2451 *
   2452 *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
   2453 */
   2454int nfs_unlink(struct inode *dir, struct dentry *dentry)
   2455{
   2456	int error;
   2457	int need_rehash = 0;
   2458
   2459	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
   2460		dir->i_ino, dentry);
   2461
   2462	trace_nfs_unlink_enter(dir, dentry);
   2463	spin_lock(&dentry->d_lock);
   2464	if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED,
   2465					     &NFS_I(d_inode(dentry))->flags)) {
   2466		spin_unlock(&dentry->d_lock);
   2467		/* Start asynchronous writeout of the inode */
   2468		write_inode_now(d_inode(dentry), 0);
   2469		error = nfs_sillyrename(dir, dentry);
   2470		goto out;
   2471	}
   2472	if (!d_unhashed(dentry)) {
   2473		__d_drop(dentry);
   2474		need_rehash = 1;
   2475	}
   2476	spin_unlock(&dentry->d_lock);
   2477	error = nfs_safe_remove(dentry);
   2478	nfs_dentry_remove_handle_error(dir, dentry, error);
   2479	if (need_rehash)
   2480		d_rehash(dentry);
   2481out:
   2482	trace_nfs_unlink_exit(dir, dentry, error);
   2483	return error;
   2484}
   2485EXPORT_SYMBOL_GPL(nfs_unlink);
   2486
   2487/*
   2488 * To create a symbolic link, most file systems instantiate a new inode,
   2489 * add a page to it containing the path, then write it out to the disk
   2490 * using prepare_write/commit_write.
   2491 *
   2492 * Unfortunately the NFS client can't create the in-core inode first
   2493 * because it needs a file handle to create an in-core inode (see
   2494 * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
   2495 * symlink request has completed on the server.
   2496 *
   2497 * So instead we allocate a raw page, copy the symname into it, then do
   2498 * the SYMLINK request with the page as the buffer.  If it succeeds, we
   2499 * now have a new file handle and can instantiate an in-core NFS inode
   2500 * and move the raw page into its mapping.
   2501 */
   2502int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
   2503		struct dentry *dentry, const char *symname)
   2504{
   2505	struct page *page;
   2506	char *kaddr;
   2507	struct iattr attr;
   2508	unsigned int pathlen = strlen(symname);
   2509	int error;
   2510
   2511	dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
   2512		dir->i_ino, dentry, symname);
   2513
   2514	if (pathlen > PAGE_SIZE)
   2515		return -ENAMETOOLONG;
   2516
   2517	attr.ia_mode = S_IFLNK | S_IRWXUGO;
   2518	attr.ia_valid = ATTR_MODE;
   2519
   2520	page = alloc_page(GFP_USER);
   2521	if (!page)
   2522		return -ENOMEM;
   2523
   2524	kaddr = page_address(page);
   2525	memcpy(kaddr, symname, pathlen);
   2526	if (pathlen < PAGE_SIZE)
   2527		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
   2528
   2529	trace_nfs_symlink_enter(dir, dentry);
   2530	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
   2531	trace_nfs_symlink_exit(dir, dentry, error);
   2532	if (error != 0) {
   2533		dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
   2534			dir->i_sb->s_id, dir->i_ino,
   2535			dentry, symname, error);
   2536		d_drop(dentry);
   2537		__free_page(page);
   2538		return error;
   2539	}
   2540
   2541	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
   2542
   2543	/*
   2544	 * No big deal if we can't add this page to the page cache here.
   2545	 * READLINK will get the missing page from the server if needed.
   2546	 */
   2547	if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
   2548							GFP_KERNEL)) {
   2549		SetPageUptodate(page);
   2550		unlock_page(page);
   2551		/*
   2552		 * add_to_page_cache_lru() grabs an extra page refcount.
   2553		 * Drop it here to avoid leaking this page later.
   2554		 */
   2555		put_page(page);
   2556	} else
   2557		__free_page(page);
   2558
   2559	return 0;
   2560}
   2561EXPORT_SYMBOL_GPL(nfs_symlink);
   2562
   2563int
   2564nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
   2565{
   2566	struct inode *inode = d_inode(old_dentry);
   2567	int error;
   2568
   2569	dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
   2570		old_dentry, dentry);
   2571
   2572	trace_nfs_link_enter(inode, dir, dentry);
   2573	d_drop(dentry);
   2574	if (S_ISREG(inode->i_mode))
   2575		nfs_sync_inode(inode);
   2576	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
   2577	if (error == 0) {
   2578		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
   2579		ihold(inode);
   2580		d_add(dentry, inode);
   2581	}
   2582	trace_nfs_link_exit(inode, dir, dentry, error);
   2583	return error;
   2584}
   2585EXPORT_SYMBOL_GPL(nfs_link);
   2586
   2587/*
   2588 * RENAME
   2589 * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
   2590 * different file handle for the same inode after a rename (e.g. when
   2591 * moving to a different directory). A fail-safe method to do so would
   2592 * be to look up old_dir/old_name, create a link to new_dir/new_name and
   2593 * rename the old file using the sillyrename stuff. This way, the original
   2594 * file in old_dir will go away when the last process iput()s the inode.
   2595 *
   2596 * FIXED.
   2597 * 
   2598 * It actually works quite well. One needs to have the possibility for
   2599 * at least one ".nfs..." file in each directory the file ever gets
   2600 * moved or linked to which happens automagically with the new
   2601 * implementation that only depends on the dcache stuff instead of
   2602 * using the inode layer
   2603 *
   2604 * Unfortunately, things are a little more complicated than indicated
   2605 * above. For a cross-directory move, we want to make sure we can get
   2606 * rid of the old inode after the operation.  This means there must be
   2607 * no pending writes (if it's a file), and the use count must be 1.
   2608 * If these conditions are met, we can drop the dentries before doing
   2609 * the rename.
   2610 */
   2611int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
   2612	       struct dentry *old_dentry, struct inode *new_dir,
   2613	       struct dentry *new_dentry, unsigned int flags)
   2614{
   2615	struct inode *old_inode = d_inode(old_dentry);
   2616	struct inode *new_inode = d_inode(new_dentry);
   2617	struct dentry *dentry = NULL, *rehash = NULL;
   2618	struct rpc_task *task;
   2619	int error = -EBUSY;
   2620
   2621	if (flags)
   2622		return -EINVAL;
   2623
   2624	dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
   2625		 old_dentry, new_dentry,
   2626		 d_count(new_dentry));
   2627
   2628	trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
   2629	/*
   2630	 * For non-directories, check whether the target is busy and if so,
   2631	 * make a copy of the dentry and then do a silly-rename. If the
   2632	 * silly-rename succeeds, the copied dentry is hashed and becomes
   2633	 * the new target.
   2634	 */
   2635	if (new_inode && !S_ISDIR(new_inode->i_mode)) {
   2636		/*
   2637		 * To prevent any new references to the target during the
   2638		 * rename, we unhash the dentry in advance.
   2639		 */
   2640		if (!d_unhashed(new_dentry)) {
   2641			d_drop(new_dentry);
   2642			rehash = new_dentry;
   2643		}
   2644
   2645		if (d_count(new_dentry) > 2) {
   2646			int err;
   2647
   2648			/* copy the target dentry's name */
   2649			dentry = d_alloc(new_dentry->d_parent,
   2650					 &new_dentry->d_name);
   2651			if (!dentry)
   2652				goto out;
   2653
   2654			/* silly-rename the existing target ... */
   2655			err = nfs_sillyrename(new_dir, new_dentry);
   2656			if (err)
   2657				goto out;
   2658
   2659			new_dentry = dentry;
   2660			rehash = NULL;
   2661			new_inode = NULL;
   2662		}
   2663	}
   2664
   2665	if (S_ISREG(old_inode->i_mode))
   2666		nfs_sync_inode(old_inode);
   2667	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
   2668	if (IS_ERR(task)) {
   2669		error = PTR_ERR(task);
   2670		goto out;
   2671	}
   2672
   2673	error = rpc_wait_for_completion_task(task);
   2674	if (error != 0) {
   2675		((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
   2676		/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
   2677		smp_wmb();
   2678	} else
   2679		error = task->tk_status;
   2680	rpc_put_task(task);
   2681	/* Ensure the inode attributes are revalidated */
   2682	if (error == 0) {
   2683		spin_lock(&old_inode->i_lock);
   2684		NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
   2685		nfs_set_cache_invalid(old_inode, NFS_INO_INVALID_CHANGE |
   2686							 NFS_INO_INVALID_CTIME |
   2687							 NFS_INO_REVAL_FORCED);
   2688		spin_unlock(&old_inode->i_lock);
   2689	}
   2690out:
   2691	if (rehash)
   2692		d_rehash(rehash);
   2693	trace_nfs_rename_exit(old_dir, old_dentry,
   2694			new_dir, new_dentry, error);
   2695	if (!error) {
   2696		if (new_inode != NULL)
   2697			nfs_drop_nlink(new_inode);
   2698		/*
   2699		 * The d_move() should be here instead of in an async RPC completion
   2700		 * handler because we need the proper locks to move the dentry.  If
   2701		 * we're interrupted by a signal, the async RPC completion handler
   2702		 * should mark the directories for revalidation.
   2703		 */
   2704		d_move(old_dentry, new_dentry);
   2705		nfs_set_verifier(old_dentry,
   2706					nfs_save_change_attribute(new_dir));
   2707	} else if (error == -ENOENT)
   2708		nfs_dentry_handle_enoent(old_dentry);
   2709
   2710	/* new dentry created? */
   2711	if (dentry)
   2712		dput(dentry);
   2713	return error;
   2714}
   2715EXPORT_SYMBOL_GPL(nfs_rename);
   2716
   2717static DEFINE_SPINLOCK(nfs_access_lru_lock);
   2718static LIST_HEAD(nfs_access_lru_list);
   2719static atomic_long_t nfs_access_nr_entries;
   2720
   2721static unsigned long nfs_access_max_cachesize = 4*1024*1024;
   2722module_param(nfs_access_max_cachesize, ulong, 0644);
   2723MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
   2724
   2725static void nfs_access_free_entry(struct nfs_access_entry *entry)
   2726{
   2727	put_group_info(entry->group_info);
   2728	kfree_rcu(entry, rcu_head);
   2729	smp_mb__before_atomic();
   2730	atomic_long_dec(&nfs_access_nr_entries);
   2731	smp_mb__after_atomic();
   2732}
   2733
   2734static void nfs_access_free_list(struct list_head *head)
   2735{
   2736	struct nfs_access_entry *cache;
   2737
   2738	while (!list_empty(head)) {
   2739		cache = list_entry(head->next, struct nfs_access_entry, lru);
   2740		list_del(&cache->lru);
   2741		nfs_access_free_entry(cache);
   2742	}
   2743}
   2744
   2745static unsigned long
   2746nfs_do_access_cache_scan(unsigned int nr_to_scan)
   2747{
   2748	LIST_HEAD(head);
   2749	struct nfs_inode *nfsi, *next;
   2750	struct nfs_access_entry *cache;
   2751	long freed = 0;
   2752
   2753	spin_lock(&nfs_access_lru_lock);
   2754	list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
   2755		struct inode *inode;
   2756
   2757		if (nr_to_scan-- == 0)
   2758			break;
   2759		inode = &nfsi->vfs_inode;
   2760		spin_lock(&inode->i_lock);
   2761		if (list_empty(&nfsi->access_cache_entry_lru))
   2762			goto remove_lru_entry;
   2763		cache = list_entry(nfsi->access_cache_entry_lru.next,
   2764				struct nfs_access_entry, lru);
   2765		list_move(&cache->lru, &head);
   2766		rb_erase(&cache->rb_node, &nfsi->access_cache);
   2767		freed++;
   2768		if (!list_empty(&nfsi->access_cache_entry_lru))
   2769			list_move_tail(&nfsi->access_cache_inode_lru,
   2770					&nfs_access_lru_list);
   2771		else {
   2772remove_lru_entry:
   2773			list_del_init(&nfsi->access_cache_inode_lru);
   2774			smp_mb__before_atomic();
   2775			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
   2776			smp_mb__after_atomic();
   2777		}
   2778		spin_unlock(&inode->i_lock);
   2779	}
   2780	spin_unlock(&nfs_access_lru_lock);
   2781	nfs_access_free_list(&head);
   2782	return freed;
   2783}
   2784
   2785unsigned long
   2786nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
   2787{
   2788	int nr_to_scan = sc->nr_to_scan;
   2789	gfp_t gfp_mask = sc->gfp_mask;
   2790
   2791	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
   2792		return SHRINK_STOP;
   2793	return nfs_do_access_cache_scan(nr_to_scan);
   2794}
   2795
   2796
   2797unsigned long
   2798nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
   2799{
   2800	return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
   2801}
   2802
   2803static void
   2804nfs_access_cache_enforce_limit(void)
   2805{
   2806	long nr_entries = atomic_long_read(&nfs_access_nr_entries);
   2807	unsigned long diff;
   2808	unsigned int nr_to_scan;
   2809
   2810	if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
   2811		return;
   2812	nr_to_scan = 100;
   2813	diff = nr_entries - nfs_access_max_cachesize;
   2814	if (diff < nr_to_scan)
   2815		nr_to_scan = diff;
   2816	nfs_do_access_cache_scan(nr_to_scan);
   2817}
   2818
   2819static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
   2820{
   2821	struct rb_root *root_node = &nfsi->access_cache;
   2822	struct rb_node *n;
   2823	struct nfs_access_entry *entry;
   2824
   2825	/* Unhook entries from the cache */
   2826	while ((n = rb_first(root_node)) != NULL) {
   2827		entry = rb_entry(n, struct nfs_access_entry, rb_node);
   2828		rb_erase(n, root_node);
   2829		list_move(&entry->lru, head);
   2830	}
   2831	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
   2832}
   2833
   2834void nfs_access_zap_cache(struct inode *inode)
   2835{
   2836	LIST_HEAD(head);
   2837
   2838	if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
   2839		return;
   2840	/* Remove from global LRU init */
   2841	spin_lock(&nfs_access_lru_lock);
   2842	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
   2843		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
   2844
   2845	spin_lock(&inode->i_lock);
   2846	__nfs_access_zap_cache(NFS_I(inode), &head);
   2847	spin_unlock(&inode->i_lock);
   2848	spin_unlock(&nfs_access_lru_lock);
   2849	nfs_access_free_list(&head);
   2850}
   2851EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
   2852
   2853static int access_cmp(const struct cred *a, const struct nfs_access_entry *b)
   2854{
   2855	struct group_info *ga, *gb;
   2856	int g;
   2857
   2858	if (uid_lt(a->fsuid, b->fsuid))
   2859		return -1;
   2860	if (uid_gt(a->fsuid, b->fsuid))
   2861		return 1;
   2862
   2863	if (gid_lt(a->fsgid, b->fsgid))
   2864		return -1;
   2865	if (gid_gt(a->fsgid, b->fsgid))
   2866		return 1;
   2867
   2868	ga = a->group_info;
   2869	gb = b->group_info;
   2870	if (ga == gb)
   2871		return 0;
   2872	if (ga == NULL)
   2873		return -1;
   2874	if (gb == NULL)
   2875		return 1;
   2876	if (ga->ngroups < gb->ngroups)
   2877		return -1;
   2878	if (ga->ngroups > gb->ngroups)
   2879		return 1;
   2880
   2881	for (g = 0; g < ga->ngroups; g++) {
   2882		if (gid_lt(ga->gid[g], gb->gid[g]))
   2883			return -1;
   2884		if (gid_gt(ga->gid[g], gb->gid[g]))
   2885			return 1;
   2886	}
   2887	return 0;
   2888}
   2889
   2890static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
   2891{
   2892	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
   2893
   2894	while (n != NULL) {
   2895		struct nfs_access_entry *entry =
   2896			rb_entry(n, struct nfs_access_entry, rb_node);
   2897		int cmp = access_cmp(cred, entry);
   2898
   2899		if (cmp < 0)
   2900			n = n->rb_left;
   2901		else if (cmp > 0)
   2902			n = n->rb_right;
   2903		else
   2904			return entry;
   2905	}
   2906	return NULL;
   2907}
   2908
   2909static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
   2910{
   2911	struct nfs_inode *nfsi = NFS_I(inode);
   2912	struct nfs_access_entry *cache;
   2913	bool retry = true;
   2914	int err;
   2915
   2916	spin_lock(&inode->i_lock);
   2917	for(;;) {
   2918		if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
   2919			goto out_zap;
   2920		cache = nfs_access_search_rbtree(inode, cred);
   2921		err = -ENOENT;
   2922		if (cache == NULL)
   2923			goto out;
   2924		/* Found an entry, is our attribute cache valid? */
   2925		if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
   2926			break;
   2927		if (!retry)
   2928			break;
   2929		err = -ECHILD;
   2930		if (!may_block)
   2931			goto out;
   2932		spin_unlock(&inode->i_lock);
   2933		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
   2934		if (err)
   2935			return err;
   2936		spin_lock(&inode->i_lock);
   2937		retry = false;
   2938	}
   2939	*mask = cache->mask;
   2940	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
   2941	err = 0;
   2942out:
   2943	spin_unlock(&inode->i_lock);
   2944	return err;
   2945out_zap:
   2946	spin_unlock(&inode->i_lock);
   2947	nfs_access_zap_cache(inode);
   2948	return -ENOENT;
   2949}
   2950
   2951static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
   2952{
   2953	/* Only check the most recently returned cache entry,
   2954	 * but do it without locking.
   2955	 */
   2956	struct nfs_inode *nfsi = NFS_I(inode);
   2957	struct nfs_access_entry *cache;
   2958	int err = -ECHILD;
   2959	struct list_head *lh;
   2960
   2961	rcu_read_lock();
   2962	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
   2963		goto out;
   2964	lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
   2965	cache = list_entry(lh, struct nfs_access_entry, lru);
   2966	if (lh == &nfsi->access_cache_entry_lru ||
   2967	    access_cmp(cred, cache) != 0)
   2968		cache = NULL;
   2969	if (cache == NULL)
   2970		goto out;
   2971	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
   2972		goto out;
   2973	*mask = cache->mask;
   2974	err = 0;
   2975out:
   2976	rcu_read_unlock();
   2977	return err;
   2978}
   2979
   2980int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
   2981			  u32 *mask, bool may_block)
   2982{
   2983	int status;
   2984
   2985	status = nfs_access_get_cached_rcu(inode, cred, mask);
   2986	if (status != 0)
   2987		status = nfs_access_get_cached_locked(inode, cred, mask,
   2988		    may_block);
   2989
   2990	return status;
   2991}
   2992EXPORT_SYMBOL_GPL(nfs_access_get_cached);
   2993
   2994static void nfs_access_add_rbtree(struct inode *inode,
   2995				  struct nfs_access_entry *set,
   2996				  const struct cred *cred)
   2997{
   2998	struct nfs_inode *nfsi = NFS_I(inode);
   2999	struct rb_root *root_node = &nfsi->access_cache;
   3000	struct rb_node **p = &root_node->rb_node;
   3001	struct rb_node *parent = NULL;
   3002	struct nfs_access_entry *entry;
   3003	int cmp;
   3004
   3005	spin_lock(&inode->i_lock);
   3006	while (*p != NULL) {
   3007		parent = *p;
   3008		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
   3009		cmp = access_cmp(cred, entry);
   3010
   3011		if (cmp < 0)
   3012			p = &parent->rb_left;
   3013		else if (cmp > 0)
   3014			p = &parent->rb_right;
   3015		else
   3016			goto found;
   3017	}
   3018	rb_link_node(&set->rb_node, parent, p);
   3019	rb_insert_color(&set->rb_node, root_node);
   3020	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
   3021	spin_unlock(&inode->i_lock);
   3022	return;
   3023found:
   3024	rb_replace_node(parent, &set->rb_node, root_node);
   3025	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
   3026	list_del(&entry->lru);
   3027	spin_unlock(&inode->i_lock);
   3028	nfs_access_free_entry(entry);
   3029}
   3030
   3031void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
   3032			  const struct cred *cred)
   3033{
   3034	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
   3035	if (cache == NULL)
   3036		return;
   3037	RB_CLEAR_NODE(&cache->rb_node);
   3038	cache->fsuid = cred->fsuid;
   3039	cache->fsgid = cred->fsgid;
   3040	cache->group_info = get_group_info(cred->group_info);
   3041	cache->mask = set->mask;
   3042
   3043	/* The above field assignments must be visible
   3044	 * before this item appears on the lru.  We cannot easily
   3045	 * use rcu_assign_pointer, so just force the memory barrier.
   3046	 */
   3047	smp_wmb();
   3048	nfs_access_add_rbtree(inode, cache, cred);
   3049
   3050	/* Update accounting */
   3051	smp_mb__before_atomic();
   3052	atomic_long_inc(&nfs_access_nr_entries);
   3053	smp_mb__after_atomic();
   3054
   3055	/* Add inode to global LRU list */
   3056	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
   3057		spin_lock(&nfs_access_lru_lock);
   3058		if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
   3059			list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
   3060					&nfs_access_lru_list);
   3061		spin_unlock(&nfs_access_lru_lock);
   3062	}
   3063	nfs_access_cache_enforce_limit();
   3064}
   3065EXPORT_SYMBOL_GPL(nfs_access_add_cache);
   3066
   3067#define NFS_MAY_READ (NFS_ACCESS_READ)
   3068#define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
   3069		NFS_ACCESS_EXTEND | \
   3070		NFS_ACCESS_DELETE)
   3071#define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
   3072		NFS_ACCESS_EXTEND)
   3073#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
   3074#define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
   3075#define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
   3076static int
   3077nfs_access_calc_mask(u32 access_result, umode_t umode)
   3078{
   3079	int mask = 0;
   3080
   3081	if (access_result & NFS_MAY_READ)
   3082		mask |= MAY_READ;
   3083	if (S_ISDIR(umode)) {
   3084		if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE)
   3085			mask |= MAY_WRITE;
   3086		if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP)
   3087			mask |= MAY_EXEC;
   3088	} else if (S_ISREG(umode)) {
   3089		if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE)
   3090			mask |= MAY_WRITE;
   3091		if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE)
   3092			mask |= MAY_EXEC;
   3093	} else if (access_result & NFS_MAY_WRITE)
   3094			mask |= MAY_WRITE;
   3095	return mask;
   3096}
   3097
   3098void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
   3099{
   3100	entry->mask = access_result;
   3101}
   3102EXPORT_SYMBOL_GPL(nfs_access_set_mask);
   3103
   3104static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
   3105{
   3106	struct nfs_access_entry cache;
   3107	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
   3108	int cache_mask = -1;
   3109	int status;
   3110
   3111	trace_nfs_access_enter(inode);
   3112
   3113	status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
   3114	if (status == 0)
   3115		goto out_cached;
   3116
   3117	status = -ECHILD;
   3118	if (!may_block)
   3119		goto out;
   3120
   3121	/*
   3122	 * Determine which access bits we want to ask for...
   3123	 */
   3124	cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND |
   3125		     nfs_access_xattr_mask(NFS_SERVER(inode));
   3126	if (S_ISDIR(inode->i_mode))
   3127		cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
   3128	else
   3129		cache.mask |= NFS_ACCESS_EXECUTE;
   3130	status = NFS_PROTO(inode)->access(inode, &cache, cred);
   3131	if (status != 0) {
   3132		if (status == -ESTALE) {
   3133			if (!S_ISDIR(inode->i_mode))
   3134				nfs_set_inode_stale(inode);
   3135			else
   3136				nfs_zap_caches(inode);
   3137		}
   3138		goto out;
   3139	}
   3140	nfs_access_add_cache(inode, &cache, cred);
   3141out_cached:
   3142	cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
   3143	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
   3144		status = -EACCES;
   3145out:
   3146	trace_nfs_access_exit(inode, mask, cache_mask, status);
   3147	return status;
   3148}
   3149
   3150static int nfs_open_permission_mask(int openflags)
   3151{
   3152	int mask = 0;
   3153
   3154	if (openflags & __FMODE_EXEC) {
   3155		/* ONLY check exec rights */
   3156		mask = MAY_EXEC;
   3157	} else {
   3158		if ((openflags & O_ACCMODE) != O_WRONLY)
   3159			mask |= MAY_READ;
   3160		if ((openflags & O_ACCMODE) != O_RDONLY)
   3161			mask |= MAY_WRITE;
   3162	}
   3163
   3164	return mask;
   3165}
   3166
   3167int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags)
   3168{
   3169	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
   3170}
   3171EXPORT_SYMBOL_GPL(nfs_may_open);
   3172
   3173static int nfs_execute_ok(struct inode *inode, int mask)
   3174{
   3175	struct nfs_server *server = NFS_SERVER(inode);
   3176	int ret = 0;
   3177
   3178	if (S_ISDIR(inode->i_mode))
   3179		return 0;
   3180	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
   3181		if (mask & MAY_NOT_BLOCK)
   3182			return -ECHILD;
   3183		ret = __nfs_revalidate_inode(server, inode);
   3184	}
   3185	if (ret == 0 && !execute_ok(inode))
   3186		ret = -EACCES;
   3187	return ret;
   3188}
   3189
   3190int nfs_permission(struct user_namespace *mnt_userns,
   3191		   struct inode *inode,
   3192		   int mask)
   3193{
   3194	const struct cred *cred = current_cred();
   3195	int res = 0;
   3196
   3197	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
   3198
   3199	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
   3200		goto out;
   3201	/* Is this sys_access() ? */
   3202	if (mask & (MAY_ACCESS | MAY_CHDIR))
   3203		goto force_lookup;
   3204
   3205	switch (inode->i_mode & S_IFMT) {
   3206		case S_IFLNK:
   3207			goto out;
   3208		case S_IFREG:
   3209			if ((mask & MAY_OPEN) &&
   3210			   nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
   3211				return 0;
   3212			break;
   3213		case S_IFDIR:
   3214			/*
   3215			 * Optimize away all write operations, since the server
   3216			 * will check permissions when we perform the op.
   3217			 */
   3218			if ((mask & MAY_WRITE) && !(mask & MAY_READ))
   3219				goto out;
   3220	}
   3221
   3222force_lookup:
   3223	if (!NFS_PROTO(inode)->access)
   3224		goto out_notsup;
   3225
   3226	res = nfs_do_access(inode, cred, mask);
   3227out:
   3228	if (!res && (mask & MAY_EXEC))
   3229		res = nfs_execute_ok(inode, mask);
   3230
   3231	dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
   3232		inode->i_sb->s_id, inode->i_ino, mask, res);
   3233	return res;
   3234out_notsup:
   3235	if (mask & MAY_NOT_BLOCK)
   3236		return -ECHILD;
   3237
   3238	res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
   3239						  NFS_INO_INVALID_OTHER);
   3240	if (res == 0)
   3241		res = generic_permission(&init_user_ns, inode, mask);
   3242	goto out;
   3243}
   3244EXPORT_SYMBOL_GPL(nfs_permission);