filecache.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
filecache.c (28207B)
      1/*
      2 * Open file cache.
      3 *
      4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com>
      5 */
      6
      7#include <linux/hash.h>
      8#include <linux/slab.h>
      9#include <linux/file.h>
     10#include <linux/pagemap.h>
     11#include <linux/sched.h>
     12#include <linux/list_lru.h>
     13#include <linux/fsnotify_backend.h>
     14#include <linux/fsnotify.h>
     15#include <linux/seq_file.h>
     16
     17#include "vfs.h"
     18#include "nfsd.h"
     19#include "nfsfh.h"
     20#include "netns.h"
     21#include "filecache.h"
     22#include "trace.h"
     23
     24#define NFSDDBG_FACILITY	NFSDDBG_FH
     25
     26/* FIXME: dynamically size this for the machine somehow? */
     27#define NFSD_FILE_HASH_BITS                   12
     28#define NFSD_FILE_HASH_SIZE                  (1 << NFSD_FILE_HASH_BITS)
     29#define NFSD_LAUNDRETTE_DELAY		     (2 * HZ)
     30
     31#define NFSD_FILE_SHUTDOWN		     (1)
     32#define NFSD_FILE_LRU_THRESHOLD		     (4096UL)
     33#define NFSD_FILE_LRU_LIMIT		     (NFSD_FILE_LRU_THRESHOLD << 2)
     34
     35/* We only care about NFSD_MAY_READ/WRITE for this cache */
     36#define NFSD_FILE_MAY_MASK	(NFSD_MAY_READ|NFSD_MAY_WRITE)
     37
     38struct nfsd_fcache_bucket {
     39	struct hlist_head	nfb_head;
     40	spinlock_t		nfb_lock;
     41	unsigned int		nfb_count;
     42	unsigned int		nfb_maxcount;
     43};
     44
     45static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
     46
     47struct nfsd_fcache_disposal {
     48	struct work_struct work;
     49	spinlock_t lock;
     50	struct list_head freeme;
     51};
     52
     53static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
     54
     55static struct kmem_cache		*nfsd_file_slab;
     56static struct kmem_cache		*nfsd_file_mark_slab;
     57static struct nfsd_fcache_bucket	*nfsd_file_hashtbl;
     58static struct list_lru			nfsd_file_lru;
     59static long				nfsd_file_lru_flags;
     60static struct fsnotify_group		*nfsd_file_fsnotify_group;
     61static atomic_long_t			nfsd_filecache_count;
     62static struct delayed_work		nfsd_filecache_laundrette;
     63
     64static void nfsd_file_gc(void);
     65
     66static void
     67nfsd_file_schedule_laundrette(void)
     68{
     69	long count = atomic_long_read(&nfsd_filecache_count);
     70
     71	if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags))
     72		return;
     73
     74	queue_delayed_work(system_wq, &nfsd_filecache_laundrette,
     75			NFSD_LAUNDRETTE_DELAY);
     76}
     77
     78static void
     79nfsd_file_slab_free(struct rcu_head *rcu)
     80{
     81	struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu);
     82
     83	put_cred(nf->nf_cred);
     84	kmem_cache_free(nfsd_file_slab, nf);
     85}
     86
     87static void
     88nfsd_file_mark_free(struct fsnotify_mark *mark)
     89{
     90	struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark,
     91						  nfm_mark);
     92
     93	kmem_cache_free(nfsd_file_mark_slab, nfm);
     94}
     95
     96static struct nfsd_file_mark *
     97nfsd_file_mark_get(struct nfsd_file_mark *nfm)
     98{
     99	if (!refcount_inc_not_zero(&nfm->nfm_ref))
    100		return NULL;
    101	return nfm;
    102}
    103
    104static void
    105nfsd_file_mark_put(struct nfsd_file_mark *nfm)
    106{
    107	if (refcount_dec_and_test(&nfm->nfm_ref)) {
    108		fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
    109		fsnotify_put_mark(&nfm->nfm_mark);
    110	}
    111}
    112
    113static struct nfsd_file_mark *
    114nfsd_file_mark_find_or_create(struct nfsd_file *nf)
    115{
    116	int			err;
    117	struct fsnotify_mark	*mark;
    118	struct nfsd_file_mark	*nfm = NULL, *new;
    119	struct inode *inode = nf->nf_inode;
    120
    121	do {
    122		fsnotify_group_lock(nfsd_file_fsnotify_group);
    123		mark = fsnotify_find_mark(&inode->i_fsnotify_marks,
    124					  nfsd_file_fsnotify_group);
    125		if (mark) {
    126			nfm = nfsd_file_mark_get(container_of(mark,
    127						 struct nfsd_file_mark,
    128						 nfm_mark));
    129			fsnotify_group_unlock(nfsd_file_fsnotify_group);
    130			if (nfm) {
    131				fsnotify_put_mark(mark);
    132				break;
    133			}
    134			/* Avoid soft lockup race with nfsd_file_mark_put() */
    135			fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
    136			fsnotify_put_mark(mark);
    137		} else {
    138			fsnotify_group_unlock(nfsd_file_fsnotify_group);
    139		}
    140
    141		/* allocate a new nfm */
    142		new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
    143		if (!new)
    144			return NULL;
    145		fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
    146		new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
    147		refcount_set(&new->nfm_ref, 1);
    148
    149		err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0);
    150
    151		/*
    152		 * If the add was successful, then return the object.
    153		 * Otherwise, we need to put the reference we hold on the
    154		 * nfm_mark. The fsnotify code will take a reference and put
    155		 * it on failure, so we can't just free it directly. It's also
    156		 * not safe to call fsnotify_destroy_mark on it as the
    157		 * mark->group will be NULL. Thus, we can't let the nfm_ref
    158		 * counter drive the destruction at this point.
    159		 */
    160		if (likely(!err))
    161			nfm = new;
    162		else
    163			fsnotify_put_mark(&new->nfm_mark);
    164	} while (unlikely(err == -EEXIST));
    165
    166	return nfm;
    167}
    168
    169static struct nfsd_file *
    170nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval,
    171		struct net *net)
    172{
    173	struct nfsd_file *nf;
    174
    175	nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
    176	if (nf) {
    177		INIT_HLIST_NODE(&nf->nf_node);
    178		INIT_LIST_HEAD(&nf->nf_lru);
    179		nf->nf_file = NULL;
    180		nf->nf_cred = get_current_cred();
    181		nf->nf_net = net;
    182		nf->nf_flags = 0;
    183		nf->nf_inode = inode;
    184		nf->nf_hashval = hashval;
    185		refcount_set(&nf->nf_ref, 1);
    186		nf->nf_may = may & NFSD_FILE_MAY_MASK;
    187		if (may & NFSD_MAY_NOT_BREAK_LEASE) {
    188			if (may & NFSD_MAY_WRITE)
    189				__set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags);
    190			if (may & NFSD_MAY_READ)
    191				__set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
    192		}
    193		nf->nf_mark = NULL;
    194		trace_nfsd_file_alloc(nf);
    195	}
    196	return nf;
    197}
    198
    199static bool
    200nfsd_file_free(struct nfsd_file *nf)
    201{
    202	bool flush = false;
    203
    204	trace_nfsd_file_put_final(nf);
    205	if (nf->nf_mark)
    206		nfsd_file_mark_put(nf->nf_mark);
    207	if (nf->nf_file) {
    208		get_file(nf->nf_file);
    209		filp_close(nf->nf_file, NULL);
    210		fput(nf->nf_file);
    211		flush = true;
    212	}
    213	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
    214	return flush;
    215}
    216
    217static bool
    218nfsd_file_check_writeback(struct nfsd_file *nf)
    219{
    220	struct file *file = nf->nf_file;
    221	struct address_space *mapping;
    222
    223	if (!file || !(file->f_mode & FMODE_WRITE))
    224		return false;
    225	mapping = file->f_mapping;
    226	return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
    227		mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
    228}
    229
    230static int
    231nfsd_file_check_write_error(struct nfsd_file *nf)
    232{
    233	struct file *file = nf->nf_file;
    234
    235	if (!file || !(file->f_mode & FMODE_WRITE))
    236		return 0;
    237	return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err));
    238}
    239
    240static void
    241nfsd_file_flush(struct nfsd_file *nf)
    242{
    243	if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0)
    244		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
    245}
    246
    247static void
    248nfsd_file_do_unhash(struct nfsd_file *nf)
    249{
    250	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
    251
    252	trace_nfsd_file_unhash(nf);
    253
    254	if (nfsd_file_check_write_error(nf))
    255		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
    256	--nfsd_file_hashtbl[nf->nf_hashval].nfb_count;
    257	hlist_del_rcu(&nf->nf_node);
    258	atomic_long_dec(&nfsd_filecache_count);
    259}
    260
    261static bool
    262nfsd_file_unhash(struct nfsd_file *nf)
    263{
    264	if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
    265		nfsd_file_do_unhash(nf);
    266		if (!list_empty(&nf->nf_lru))
    267			list_lru_del(&nfsd_file_lru, &nf->nf_lru);
    268		return true;
    269	}
    270	return false;
    271}
    272
    273/*
    274 * Return true if the file was unhashed.
    275 */
    276static bool
    277nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose)
    278{
    279	lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
    280
    281	trace_nfsd_file_unhash_and_release_locked(nf);
    282	if (!nfsd_file_unhash(nf))
    283		return false;
    284	/* keep final reference for nfsd_file_lru_dispose */
    285	if (refcount_dec_not_one(&nf->nf_ref))
    286		return true;
    287
    288	list_add(&nf->nf_lru, dispose);
    289	return true;
    290}
    291
    292static void
    293nfsd_file_put_noref(struct nfsd_file *nf)
    294{
    295	trace_nfsd_file_put(nf);
    296
    297	if (refcount_dec_and_test(&nf->nf_ref)) {
    298		WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
    299		nfsd_file_free(nf);
    300	}
    301}
    302
    303void
    304nfsd_file_put(struct nfsd_file *nf)
    305{
    306	might_sleep();
    307
    308	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
    309	if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
    310		nfsd_file_flush(nf);
    311		nfsd_file_put_noref(nf);
    312	} else if (nf->nf_file) {
    313		nfsd_file_put_noref(nf);
    314		nfsd_file_schedule_laundrette();
    315	} else
    316		nfsd_file_put_noref(nf);
    317
    318	if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
    319		nfsd_file_gc();
    320}
    321
    322struct nfsd_file *
    323nfsd_file_get(struct nfsd_file *nf)
    324{
    325	if (likely(refcount_inc_not_zero(&nf->nf_ref)))
    326		return nf;
    327	return NULL;
    328}
    329
    330static void
    331nfsd_file_dispose_list(struct list_head *dispose)
    332{
    333	struct nfsd_file *nf;
    334
    335	while(!list_empty(dispose)) {
    336		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
    337		list_del(&nf->nf_lru);
    338		nfsd_file_flush(nf);
    339		nfsd_file_put_noref(nf);
    340	}
    341}
    342
    343static void
    344nfsd_file_dispose_list_sync(struct list_head *dispose)
    345{
    346	bool flush = false;
    347	struct nfsd_file *nf;
    348
    349	while(!list_empty(dispose)) {
    350		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
    351		list_del(&nf->nf_lru);
    352		nfsd_file_flush(nf);
    353		if (!refcount_dec_and_test(&nf->nf_ref))
    354			continue;
    355		if (nfsd_file_free(nf))
    356			flush = true;
    357	}
    358	if (flush)
    359		flush_delayed_fput();
    360}
    361
    362static void
    363nfsd_file_list_remove_disposal(struct list_head *dst,
    364		struct nfsd_fcache_disposal *l)
    365{
    366	spin_lock(&l->lock);
    367	list_splice_init(&l->freeme, dst);
    368	spin_unlock(&l->lock);
    369}
    370
    371static void
    372nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
    373{
    374	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
    375	struct nfsd_fcache_disposal *l = nn->fcache_disposal;
    376
    377	spin_lock(&l->lock);
    378	list_splice_tail_init(files, &l->freeme);
    379	spin_unlock(&l->lock);
    380	queue_work(nfsd_filecache_wq, &l->work);
    381}
    382
    383static void
    384nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
    385		struct net *net)
    386{
    387	struct nfsd_file *nf, *tmp;
    388
    389	list_for_each_entry_safe(nf, tmp, src, nf_lru) {
    390		if (nf->nf_net == net)
    391			list_move_tail(&nf->nf_lru, dst);
    392	}
    393}
    394
    395static void
    396nfsd_file_dispose_list_delayed(struct list_head *dispose)
    397{
    398	LIST_HEAD(list);
    399	struct nfsd_file *nf;
    400
    401	while(!list_empty(dispose)) {
    402		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
    403		nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
    404		nfsd_file_list_add_disposal(&list, nf->nf_net);
    405	}
    406}
    407
    408/*
    409 * Note this can deadlock with nfsd_file_cache_purge.
    410 */
    411static enum lru_status
    412nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
    413		 spinlock_t *lock, void *arg)
    414	__releases(lock)
    415	__acquires(lock)
    416{
    417	struct list_head *head = arg;
    418	struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
    419
    420	/*
    421	 * Do a lockless refcount check. The hashtable holds one reference, so
    422	 * we look to see if anything else has a reference, or if any have
    423	 * been put since the shrinker last ran. Those don't get unhashed and
    424	 * released.
    425	 *
    426	 * Note that in the put path, we set the flag and then decrement the
    427	 * counter. Here we check the counter and then test and clear the flag.
    428	 * That order is deliberate to ensure that we can do this locklessly.
    429	 */
    430	if (refcount_read(&nf->nf_ref) > 1)
    431		goto out_skip;
    432
    433	/*
    434	 * Don't throw out files that are still undergoing I/O or
    435	 * that have uncleared errors pending.
    436	 */
    437	if (nfsd_file_check_writeback(nf))
    438		goto out_skip;
    439
    440	if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags))
    441		goto out_skip;
    442
    443	if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags))
    444		goto out_skip;
    445
    446	list_lru_isolate_move(lru, &nf->nf_lru, head);
    447	return LRU_REMOVED;
    448out_skip:
    449	return LRU_SKIP;
    450}
    451
    452static unsigned long
    453nfsd_file_lru_walk_list(struct shrink_control *sc)
    454{
    455	LIST_HEAD(head);
    456	struct nfsd_file *nf;
    457	unsigned long ret;
    458
    459	if (sc)
    460		ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
    461				nfsd_file_lru_cb, &head);
    462	else
    463		ret = list_lru_walk(&nfsd_file_lru,
    464				nfsd_file_lru_cb,
    465				&head, LONG_MAX);
    466	list_for_each_entry(nf, &head, nf_lru) {
    467		spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
    468		nfsd_file_do_unhash(nf);
    469		spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock);
    470	}
    471	nfsd_file_dispose_list_delayed(&head);
    472	return ret;
    473}
    474
    475static void
    476nfsd_file_gc(void)
    477{
    478	nfsd_file_lru_walk_list(NULL);
    479}
    480
    481static void
    482nfsd_file_gc_worker(struct work_struct *work)
    483{
    484	nfsd_file_gc();
    485	nfsd_file_schedule_laundrette();
    486}
    487
    488static unsigned long
    489nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc)
    490{
    491	return list_lru_count(&nfsd_file_lru);
    492}
    493
    494static unsigned long
    495nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
    496{
    497	return nfsd_file_lru_walk_list(sc);
    498}
    499
    500static struct shrinker	nfsd_file_shrinker = {
    501	.scan_objects = nfsd_file_lru_scan,
    502	.count_objects = nfsd_file_lru_count,
    503	.seeks = 1,
    504};
    505
    506static void
    507__nfsd_file_close_inode(struct inode *inode, unsigned int hashval,
    508			struct list_head *dispose)
    509{
    510	struct nfsd_file	*nf;
    511	struct hlist_node	*tmp;
    512
    513	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
    514	hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) {
    515		if (inode == nf->nf_inode)
    516			nfsd_file_unhash_and_release_locked(nf, dispose);
    517	}
    518	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
    519}
    520
    521/**
    522 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
    523 * @inode: inode of the file to attempt to remove
    524 *
    525 * Walk the whole hash bucket, looking for any files that correspond to "inode".
    526 * If any do, then unhash them and put the hashtable reference to them and
    527 * destroy any that had their last reference put. Also ensure that any of the
    528 * fputs also have their final __fput done as well.
    529 */
    530void
    531nfsd_file_close_inode_sync(struct inode *inode)
    532{
    533	unsigned int		hashval = (unsigned int)hash_long(inode->i_ino,
    534						NFSD_FILE_HASH_BITS);
    535	LIST_HEAD(dispose);
    536
    537	__nfsd_file_close_inode(inode, hashval, &dispose);
    538	trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose));
    539	nfsd_file_dispose_list_sync(&dispose);
    540}
    541
    542/**
    543 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
    544 * @inode: inode of the file to attempt to remove
    545 *
    546 * Walk the whole hash bucket, looking for any files that correspond to "inode".
    547 * If any do, then unhash them and put the hashtable reference to them and
    548 * destroy any that had their last reference put.
    549 */
    550static void
    551nfsd_file_close_inode(struct inode *inode)
    552{
    553	unsigned int		hashval = (unsigned int)hash_long(inode->i_ino,
    554						NFSD_FILE_HASH_BITS);
    555	LIST_HEAD(dispose);
    556
    557	__nfsd_file_close_inode(inode, hashval, &dispose);
    558	trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose));
    559	nfsd_file_dispose_list_delayed(&dispose);
    560}
    561
    562/**
    563 * nfsd_file_delayed_close - close unused nfsd_files
    564 * @work: dummy
    565 *
    566 * Walk the LRU list and close any entries that have not been used since
    567 * the last scan.
    568 *
    569 * Note this can deadlock with nfsd_file_cache_purge.
    570 */
    571static void
    572nfsd_file_delayed_close(struct work_struct *work)
    573{
    574	LIST_HEAD(head);
    575	struct nfsd_fcache_disposal *l = container_of(work,
    576			struct nfsd_fcache_disposal, work);
    577
    578	nfsd_file_list_remove_disposal(&head, l);
    579	nfsd_file_dispose_list(&head);
    580}
    581
    582static int
    583nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
    584			    void *data)
    585{
    586	struct file_lock *fl = data;
    587
    588	/* Only close files for F_SETLEASE leases */
    589	if (fl->fl_flags & FL_LEASE)
    590		nfsd_file_close_inode_sync(file_inode(fl->fl_file));
    591	return 0;
    592}
    593
    594static struct notifier_block nfsd_file_lease_notifier = {
    595	.notifier_call = nfsd_file_lease_notifier_call,
    596};
    597
    598static int
    599nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
    600				struct inode *inode, struct inode *dir,
    601				const struct qstr *name, u32 cookie)
    602{
    603	if (WARN_ON_ONCE(!inode))
    604		return 0;
    605
    606	trace_nfsd_file_fsnotify_handle_event(inode, mask);
    607
    608	/* Should be no marks on non-regular files */
    609	if (!S_ISREG(inode->i_mode)) {
    610		WARN_ON_ONCE(1);
    611		return 0;
    612	}
    613
    614	/* don't close files if this was not the last link */
    615	if (mask & FS_ATTRIB) {
    616		if (inode->i_nlink)
    617			return 0;
    618	}
    619
    620	nfsd_file_close_inode(inode);
    621	return 0;
    622}
    623
    624
    625static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
    626	.handle_inode_event = nfsd_file_fsnotify_handle_event,
    627	.free_mark = nfsd_file_mark_free,
    628};
    629
    630int
    631nfsd_file_cache_init(void)
    632{
    633	int		ret = -ENOMEM;
    634	unsigned int	i;
    635
    636	clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
    637
    638	if (nfsd_file_hashtbl)
    639		return 0;
    640
    641	nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0);
    642	if (!nfsd_filecache_wq)
    643		goto out;
    644
    645	nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE,
    646				sizeof(*nfsd_file_hashtbl), GFP_KERNEL);
    647	if (!nfsd_file_hashtbl) {
    648		pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n");
    649		goto out_err;
    650	}
    651
    652	nfsd_file_slab = kmem_cache_create("nfsd_file",
    653				sizeof(struct nfsd_file), 0, 0, NULL);
    654	if (!nfsd_file_slab) {
    655		pr_err("nfsd: unable to create nfsd_file_slab\n");
    656		goto out_err;
    657	}
    658
    659	nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
    660					sizeof(struct nfsd_file_mark), 0, 0, NULL);
    661	if (!nfsd_file_mark_slab) {
    662		pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
    663		goto out_err;
    664	}
    665
    666
    667	ret = list_lru_init(&nfsd_file_lru);
    668	if (ret) {
    669		pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
    670		goto out_err;
    671	}
    672
    673	ret = register_shrinker(&nfsd_file_shrinker);
    674	if (ret) {
    675		pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret);
    676		goto out_lru;
    677	}
    678
    679	ret = lease_register_notifier(&nfsd_file_lease_notifier);
    680	if (ret) {
    681		pr_err("nfsd: unable to register lease notifier: %d\n", ret);
    682		goto out_shrinker;
    683	}
    684
    685	nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
    686							FSNOTIFY_GROUP_NOFS);
    687	if (IS_ERR(nfsd_file_fsnotify_group)) {
    688		pr_err("nfsd: unable to create fsnotify group: %ld\n",
    689			PTR_ERR(nfsd_file_fsnotify_group));
    690		ret = PTR_ERR(nfsd_file_fsnotify_group);
    691		nfsd_file_fsnotify_group = NULL;
    692		goto out_notifier;
    693	}
    694
    695	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
    696		INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head);
    697		spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock);
    698	}
    699
    700	INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
    701out:
    702	return ret;
    703out_notifier:
    704	lease_unregister_notifier(&nfsd_file_lease_notifier);
    705out_shrinker:
    706	unregister_shrinker(&nfsd_file_shrinker);
    707out_lru:
    708	list_lru_destroy(&nfsd_file_lru);
    709out_err:
    710	kmem_cache_destroy(nfsd_file_slab);
    711	nfsd_file_slab = NULL;
    712	kmem_cache_destroy(nfsd_file_mark_slab);
    713	nfsd_file_mark_slab = NULL;
    714	kvfree(nfsd_file_hashtbl);
    715	nfsd_file_hashtbl = NULL;
    716	destroy_workqueue(nfsd_filecache_wq);
    717	nfsd_filecache_wq = NULL;
    718	goto out;
    719}
    720
    721/*
    722 * Note this can deadlock with nfsd_file_lru_cb.
    723 */
    724void
    725nfsd_file_cache_purge(struct net *net)
    726{
    727	unsigned int		i;
    728	struct nfsd_file	*nf;
    729	struct hlist_node	*next;
    730	LIST_HEAD(dispose);
    731	bool del;
    732
    733	if (!nfsd_file_hashtbl)
    734		return;
    735
    736	for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
    737		struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i];
    738
    739		spin_lock(&nfb->nfb_lock);
    740		hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) {
    741			if (net && nf->nf_net != net)
    742				continue;
    743			del = nfsd_file_unhash_and_release_locked(nf, &dispose);
    744
    745			/*
    746			 * Deadlock detected! Something marked this entry as
    747			 * unhased, but hasn't removed it from the hash list.
    748			 */
    749			WARN_ON_ONCE(!del);
    750		}
    751		spin_unlock(&nfb->nfb_lock);
    752		nfsd_file_dispose_list(&dispose);
    753	}
    754}
    755
    756static struct nfsd_fcache_disposal *
    757nfsd_alloc_fcache_disposal(void)
    758{
    759	struct nfsd_fcache_disposal *l;
    760
    761	l = kmalloc(sizeof(*l), GFP_KERNEL);
    762	if (!l)
    763		return NULL;
    764	INIT_WORK(&l->work, nfsd_file_delayed_close);
    765	spin_lock_init(&l->lock);
    766	INIT_LIST_HEAD(&l->freeme);
    767	return l;
    768}
    769
    770static void
    771nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
    772{
    773	cancel_work_sync(&l->work);
    774	nfsd_file_dispose_list(&l->freeme);
    775	kfree(l);
    776}
    777
    778static void
    779nfsd_free_fcache_disposal_net(struct net *net)
    780{
    781	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
    782	struct nfsd_fcache_disposal *l = nn->fcache_disposal;
    783
    784	nfsd_free_fcache_disposal(l);
    785}
    786
    787int
    788nfsd_file_cache_start_net(struct net *net)
    789{
    790	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
    791
    792	nn->fcache_disposal = nfsd_alloc_fcache_disposal();
    793	return nn->fcache_disposal ? 0 : -ENOMEM;
    794}
    795
    796void
    797nfsd_file_cache_shutdown_net(struct net *net)
    798{
    799	nfsd_file_cache_purge(net);
    800	nfsd_free_fcache_disposal_net(net);
    801}
    802
    803void
    804nfsd_file_cache_shutdown(void)
    805{
    806	set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags);
    807
    808	lease_unregister_notifier(&nfsd_file_lease_notifier);
    809	unregister_shrinker(&nfsd_file_shrinker);
    810	/*
    811	 * make sure all callers of nfsd_file_lru_cb are done before
    812	 * calling nfsd_file_cache_purge
    813	 */
    814	cancel_delayed_work_sync(&nfsd_filecache_laundrette);
    815	nfsd_file_cache_purge(NULL);
    816	list_lru_destroy(&nfsd_file_lru);
    817	rcu_barrier();
    818	fsnotify_put_group(nfsd_file_fsnotify_group);
    819	nfsd_file_fsnotify_group = NULL;
    820	kmem_cache_destroy(nfsd_file_slab);
    821	nfsd_file_slab = NULL;
    822	fsnotify_wait_marks_destroyed();
    823	kmem_cache_destroy(nfsd_file_mark_slab);
    824	nfsd_file_mark_slab = NULL;
    825	kvfree(nfsd_file_hashtbl);
    826	nfsd_file_hashtbl = NULL;
    827	destroy_workqueue(nfsd_filecache_wq);
    828	nfsd_filecache_wq = NULL;
    829}
    830
    831static bool
    832nfsd_match_cred(const struct cred *c1, const struct cred *c2)
    833{
    834	int i;
    835
    836	if (!uid_eq(c1->fsuid, c2->fsuid))
    837		return false;
    838	if (!gid_eq(c1->fsgid, c2->fsgid))
    839		return false;
    840	if (c1->group_info == NULL || c2->group_info == NULL)
    841		return c1->group_info == c2->group_info;
    842	if (c1->group_info->ngroups != c2->group_info->ngroups)
    843		return false;
    844	for (i = 0; i < c1->group_info->ngroups; i++) {
    845		if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i]))
    846			return false;
    847	}
    848	return true;
    849}
    850
    851static struct nfsd_file *
    852nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
    853			unsigned int hashval, struct net *net)
    854{
    855	struct nfsd_file *nf;
    856	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
    857
    858	hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
    859				 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) {
    860		if (nf->nf_may != need)
    861			continue;
    862		if (nf->nf_inode != inode)
    863			continue;
    864		if (nf->nf_net != net)
    865			continue;
    866		if (!nfsd_match_cred(nf->nf_cred, current_cred()))
    867			continue;
    868		if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags))
    869			continue;
    870		if (nfsd_file_get(nf) != NULL)
    871			return nf;
    872	}
    873	return NULL;
    874}
    875
    876/**
    877 * nfsd_file_is_cached - are there any cached open files for this fh?
    878 * @inode: inode of the file to check
    879 *
    880 * Scan the hashtable for open files that match this fh. Returns true if there
    881 * are any, and false if not.
    882 */
    883bool
    884nfsd_file_is_cached(struct inode *inode)
    885{
    886	bool			ret = false;
    887	struct nfsd_file	*nf;
    888	unsigned int		hashval;
    889
    890        hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
    891
    892	rcu_read_lock();
    893	hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head,
    894				 nf_node) {
    895		if (inode == nf->nf_inode) {
    896			ret = true;
    897			break;
    898		}
    899	}
    900	rcu_read_unlock();
    901	trace_nfsd_file_is_cached(inode, hashval, (int)ret);
    902	return ret;
    903}
    904
    905static __be32
    906nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
    907		     unsigned int may_flags, struct nfsd_file **pnf, bool open)
    908{
    909	__be32	status;
    910	struct net *net = SVC_NET(rqstp);
    911	struct nfsd_file *nf, *new;
    912	struct inode *inode;
    913	unsigned int hashval;
    914	bool retry = true;
    915
    916	/* FIXME: skip this if fh_dentry is already set? */
    917	status = fh_verify(rqstp, fhp, S_IFREG,
    918				may_flags|NFSD_MAY_OWNER_OVERRIDE);
    919	if (status != nfs_ok)
    920		return status;
    921
    922	inode = d_inode(fhp->fh_dentry);
    923	hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS);
    924retry:
    925	rcu_read_lock();
    926	nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
    927	rcu_read_unlock();
    928	if (nf)
    929		goto wait_for_construction;
    930
    931	new = nfsd_file_alloc(inode, may_flags, hashval, net);
    932	if (!new) {
    933		trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags,
    934					NULL, nfserr_jukebox);
    935		return nfserr_jukebox;
    936	}
    937
    938	spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
    939	nf = nfsd_file_find_locked(inode, may_flags, hashval, net);
    940	if (nf == NULL)
    941		goto open_file;
    942	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
    943	nfsd_file_slab_free(&new->nf_rcu);
    944
    945wait_for_construction:
    946	wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
    947
    948	/* Did construction of this file fail? */
    949	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
    950		if (!retry) {
    951			status = nfserr_jukebox;
    952			goto out;
    953		}
    954		retry = false;
    955		nfsd_file_put_noref(nf);
    956		goto retry;
    957	}
    958
    959	this_cpu_inc(nfsd_file_cache_hits);
    960
    961	if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) {
    962		bool write = (may_flags & NFSD_MAY_WRITE);
    963
    964		if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) ||
    965		    (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) {
    966			status = nfserrno(nfsd_open_break_lease(
    967					file_inode(nf->nf_file), may_flags));
    968			if (status == nfs_ok) {
    969				clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags);
    970				if (write)
    971					clear_bit(NFSD_FILE_BREAK_WRITE,
    972						  &nf->nf_flags);
    973			}
    974		}
    975	}
    976out:
    977	if (status == nfs_ok) {
    978		*pnf = nf;
    979	} else {
    980		nfsd_file_put(nf);
    981		nf = NULL;
    982	}
    983
    984	trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status);
    985	return status;
    986open_file:
    987	nf = new;
    988	/* Take reference for the hashtable */
    989	refcount_inc(&nf->nf_ref);
    990	__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
    991	__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
    992	list_lru_add(&nfsd_file_lru, &nf->nf_lru);
    993	hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head);
    994	++nfsd_file_hashtbl[hashval].nfb_count;
    995	nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount,
    996			nfsd_file_hashtbl[hashval].nfb_count);
    997	spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
    998	if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD)
    999		nfsd_file_gc();
   1000
   1001	nf->nf_mark = nfsd_file_mark_find_or_create(nf);
   1002	if (nf->nf_mark) {
   1003		if (open) {
   1004			status = nfsd_open_verified(rqstp, fhp, may_flags,
   1005						    &nf->nf_file);
   1006			trace_nfsd_file_open(nf, status);
   1007		} else
   1008			status = nfs_ok;
   1009	} else
   1010		status = nfserr_jukebox;
   1011	/*
   1012	 * If construction failed, or we raced with a call to unlink()
   1013	 * then unhash.
   1014	 */
   1015	if (status != nfs_ok || inode->i_nlink == 0) {
   1016		bool do_free;
   1017		spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock);
   1018		do_free = nfsd_file_unhash(nf);
   1019		spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock);
   1020		if (do_free)
   1021			nfsd_file_put_noref(nf);
   1022	}
   1023	clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
   1024	smp_mb__after_atomic();
   1025	wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
   1026	goto out;
   1027}
   1028
   1029/**
   1030 * nfsd_file_acquire - Get a struct nfsd_file with an open file
   1031 * @rqstp: the RPC transaction being executed
   1032 * @fhp: the NFS filehandle of the file to be opened
   1033 * @may_flags: NFSD_MAY_ settings for the file
   1034 * @pnf: OUT: new or found "struct nfsd_file" object
   1035 *
   1036 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
   1037 * network byte order is returned.
   1038 */
   1039__be32
   1040nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
   1041		  unsigned int may_flags, struct nfsd_file **pnf)
   1042{
   1043	return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true);
   1044}
   1045
   1046/**
   1047 * nfsd_file_create - Get a struct nfsd_file, do not open
   1048 * @rqstp: the RPC transaction being executed
   1049 * @fhp: the NFS filehandle of the file just created
   1050 * @may_flags: NFSD_MAY_ settings for the file
   1051 * @pnf: OUT: new or found "struct nfsd_file" object
   1052 *
   1053 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
   1054 * network byte order is returned.
   1055 */
   1056__be32
   1057nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
   1058		 unsigned int may_flags, struct nfsd_file **pnf)
   1059{
   1060	return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false);
   1061}
   1062
   1063/*
   1064 * Note that fields may be added, removed or reordered in the future. Programs
   1065 * scraping this file for info should test the labels to ensure they're
   1066 * getting the correct field.
   1067 */
   1068static int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
   1069{
   1070	unsigned int i, count = 0, longest = 0;
   1071	unsigned long hits = 0;
   1072
   1073	/*
   1074	 * No need for spinlocks here since we're not terribly interested in
   1075	 * accuracy. We do take the nfsd_mutex simply to ensure that we
   1076	 * don't end up racing with server shutdown
   1077	 */
   1078	mutex_lock(&nfsd_mutex);
   1079	if (nfsd_file_hashtbl) {
   1080		for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) {
   1081			count += nfsd_file_hashtbl[i].nfb_count;
   1082			longest = max(longest, nfsd_file_hashtbl[i].nfb_count);
   1083		}
   1084	}
   1085	mutex_unlock(&nfsd_mutex);
   1086
   1087	for_each_possible_cpu(i)
   1088		hits += per_cpu(nfsd_file_cache_hits, i);
   1089
   1090	seq_printf(m, "total entries: %u\n", count);
   1091	seq_printf(m, "longest chain: %u\n", longest);
   1092	seq_printf(m, "cache hits:    %lu\n", hits);
   1093	return 0;
   1094}
   1095
   1096int nfsd_file_cache_stats_open(struct inode *inode, struct file *file)
   1097{
   1098	return single_open(file, nfsd_file_cache_stats_show, NULL);
   1099}