filecache.c (28207B)
1/* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7#include <linux/hash.h> 8#include <linux/slab.h> 9#include <linux/file.h> 10#include <linux/pagemap.h> 11#include <linux/sched.h> 12#include <linux/list_lru.h> 13#include <linux/fsnotify_backend.h> 14#include <linux/fsnotify.h> 15#include <linux/seq_file.h> 16 17#include "vfs.h" 18#include "nfsd.h" 19#include "nfsfh.h" 20#include "netns.h" 21#include "filecache.h" 22#include "trace.h" 23 24#define NFSDDBG_FACILITY NFSDDBG_FH 25 26/* FIXME: dynamically size this for the machine somehow? */ 27#define NFSD_FILE_HASH_BITS 12 28#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 29#define NFSD_LAUNDRETTE_DELAY (2 * HZ) 30 31#define NFSD_FILE_SHUTDOWN (1) 32#define NFSD_FILE_LRU_THRESHOLD (4096UL) 33#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 34 35/* We only care about NFSD_MAY_READ/WRITE for this cache */ 36#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 37 38struct nfsd_fcache_bucket { 39 struct hlist_head nfb_head; 40 spinlock_t nfb_lock; 41 unsigned int nfb_count; 42 unsigned int nfb_maxcount; 43}; 44 45static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 46 47struct nfsd_fcache_disposal { 48 struct work_struct work; 49 spinlock_t lock; 50 struct list_head freeme; 51}; 52 53static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 54 55static struct kmem_cache *nfsd_file_slab; 56static struct kmem_cache *nfsd_file_mark_slab; 57static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 58static struct list_lru nfsd_file_lru; 59static long nfsd_file_lru_flags; 60static struct fsnotify_group *nfsd_file_fsnotify_group; 61static atomic_long_t nfsd_filecache_count; 62static struct delayed_work nfsd_filecache_laundrette; 63 64static void nfsd_file_gc(void); 65 66static void 67nfsd_file_schedule_laundrette(void) 68{ 69 long count = atomic_long_read(&nfsd_filecache_count); 70 71 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 72 return; 73 74 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 75 NFSD_LAUNDRETTE_DELAY); 76} 77 78static void 79nfsd_file_slab_free(struct rcu_head *rcu) 80{ 81 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 82 83 put_cred(nf->nf_cred); 84 kmem_cache_free(nfsd_file_slab, nf); 85} 86 87static void 88nfsd_file_mark_free(struct fsnotify_mark *mark) 89{ 90 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 91 nfm_mark); 92 93 kmem_cache_free(nfsd_file_mark_slab, nfm); 94} 95 96static struct nfsd_file_mark * 97nfsd_file_mark_get(struct nfsd_file_mark *nfm) 98{ 99 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 100 return NULL; 101 return nfm; 102} 103 104static void 105nfsd_file_mark_put(struct nfsd_file_mark *nfm) 106{ 107 if (refcount_dec_and_test(&nfm->nfm_ref)) { 108 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 109 fsnotify_put_mark(&nfm->nfm_mark); 110 } 111} 112 113static struct nfsd_file_mark * 114nfsd_file_mark_find_or_create(struct nfsd_file *nf) 115{ 116 int err; 117 struct fsnotify_mark *mark; 118 struct nfsd_file_mark *nfm = NULL, *new; 119 struct inode *inode = nf->nf_inode; 120 121 do { 122 fsnotify_group_lock(nfsd_file_fsnotify_group); 123 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 124 nfsd_file_fsnotify_group); 125 if (mark) { 126 nfm = nfsd_file_mark_get(container_of(mark, 127 struct nfsd_file_mark, 128 nfm_mark)); 129 fsnotify_group_unlock(nfsd_file_fsnotify_group); 130 if (nfm) { 131 fsnotify_put_mark(mark); 132 break; 133 } 134 /* Avoid soft lockup race with nfsd_file_mark_put() */ 135 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 136 fsnotify_put_mark(mark); 137 } else { 138 fsnotify_group_unlock(nfsd_file_fsnotify_group); 139 } 140 141 /* allocate a new nfm */ 142 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 143 if (!new) 144 return NULL; 145 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 146 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 147 refcount_set(&new->nfm_ref, 1); 148 149 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 150 151 /* 152 * If the add was successful, then return the object. 153 * Otherwise, we need to put the reference we hold on the 154 * nfm_mark. The fsnotify code will take a reference and put 155 * it on failure, so we can't just free it directly. It's also 156 * not safe to call fsnotify_destroy_mark on it as the 157 * mark->group will be NULL. Thus, we can't let the nfm_ref 158 * counter drive the destruction at this point. 159 */ 160 if (likely(!err)) 161 nfm = new; 162 else 163 fsnotify_put_mark(&new->nfm_mark); 164 } while (unlikely(err == -EEXIST)); 165 166 return nfm; 167} 168 169static struct nfsd_file * 170nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 171 struct net *net) 172{ 173 struct nfsd_file *nf; 174 175 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 176 if (nf) { 177 INIT_HLIST_NODE(&nf->nf_node); 178 INIT_LIST_HEAD(&nf->nf_lru); 179 nf->nf_file = NULL; 180 nf->nf_cred = get_current_cred(); 181 nf->nf_net = net; 182 nf->nf_flags = 0; 183 nf->nf_inode = inode; 184 nf->nf_hashval = hashval; 185 refcount_set(&nf->nf_ref, 1); 186 nf->nf_may = may & NFSD_FILE_MAY_MASK; 187 if (may & NFSD_MAY_NOT_BREAK_LEASE) { 188 if (may & NFSD_MAY_WRITE) 189 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 190 if (may & NFSD_MAY_READ) 191 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 192 } 193 nf->nf_mark = NULL; 194 trace_nfsd_file_alloc(nf); 195 } 196 return nf; 197} 198 199static bool 200nfsd_file_free(struct nfsd_file *nf) 201{ 202 bool flush = false; 203 204 trace_nfsd_file_put_final(nf); 205 if (nf->nf_mark) 206 nfsd_file_mark_put(nf->nf_mark); 207 if (nf->nf_file) { 208 get_file(nf->nf_file); 209 filp_close(nf->nf_file, NULL); 210 fput(nf->nf_file); 211 flush = true; 212 } 213 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 214 return flush; 215} 216 217static bool 218nfsd_file_check_writeback(struct nfsd_file *nf) 219{ 220 struct file *file = nf->nf_file; 221 struct address_space *mapping; 222 223 if (!file || !(file->f_mode & FMODE_WRITE)) 224 return false; 225 mapping = file->f_mapping; 226 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 227 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 228} 229 230static int 231nfsd_file_check_write_error(struct nfsd_file *nf) 232{ 233 struct file *file = nf->nf_file; 234 235 if (!file || !(file->f_mode & FMODE_WRITE)) 236 return 0; 237 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 238} 239 240static void 241nfsd_file_flush(struct nfsd_file *nf) 242{ 243 if (nf->nf_file && vfs_fsync(nf->nf_file, 1) != 0) 244 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 245} 246 247static void 248nfsd_file_do_unhash(struct nfsd_file *nf) 249{ 250 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 251 252 trace_nfsd_file_unhash(nf); 253 254 if (nfsd_file_check_write_error(nf)) 255 nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); 256 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 257 hlist_del_rcu(&nf->nf_node); 258 atomic_long_dec(&nfsd_filecache_count); 259} 260 261static bool 262nfsd_file_unhash(struct nfsd_file *nf) 263{ 264 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 265 nfsd_file_do_unhash(nf); 266 if (!list_empty(&nf->nf_lru)) 267 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 268 return true; 269 } 270 return false; 271} 272 273/* 274 * Return true if the file was unhashed. 275 */ 276static bool 277nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 278{ 279 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 280 281 trace_nfsd_file_unhash_and_release_locked(nf); 282 if (!nfsd_file_unhash(nf)) 283 return false; 284 /* keep final reference for nfsd_file_lru_dispose */ 285 if (refcount_dec_not_one(&nf->nf_ref)) 286 return true; 287 288 list_add(&nf->nf_lru, dispose); 289 return true; 290} 291 292static void 293nfsd_file_put_noref(struct nfsd_file *nf) 294{ 295 trace_nfsd_file_put(nf); 296 297 if (refcount_dec_and_test(&nf->nf_ref)) { 298 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 299 nfsd_file_free(nf); 300 } 301} 302 303void 304nfsd_file_put(struct nfsd_file *nf) 305{ 306 might_sleep(); 307 308 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 309 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { 310 nfsd_file_flush(nf); 311 nfsd_file_put_noref(nf); 312 } else if (nf->nf_file) { 313 nfsd_file_put_noref(nf); 314 nfsd_file_schedule_laundrette(); 315 } else 316 nfsd_file_put_noref(nf); 317 318 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 319 nfsd_file_gc(); 320} 321 322struct nfsd_file * 323nfsd_file_get(struct nfsd_file *nf) 324{ 325 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 326 return nf; 327 return NULL; 328} 329 330static void 331nfsd_file_dispose_list(struct list_head *dispose) 332{ 333 struct nfsd_file *nf; 334 335 while(!list_empty(dispose)) { 336 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 337 list_del(&nf->nf_lru); 338 nfsd_file_flush(nf); 339 nfsd_file_put_noref(nf); 340 } 341} 342 343static void 344nfsd_file_dispose_list_sync(struct list_head *dispose) 345{ 346 bool flush = false; 347 struct nfsd_file *nf; 348 349 while(!list_empty(dispose)) { 350 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 351 list_del(&nf->nf_lru); 352 nfsd_file_flush(nf); 353 if (!refcount_dec_and_test(&nf->nf_ref)) 354 continue; 355 if (nfsd_file_free(nf)) 356 flush = true; 357 } 358 if (flush) 359 flush_delayed_fput(); 360} 361 362static void 363nfsd_file_list_remove_disposal(struct list_head *dst, 364 struct nfsd_fcache_disposal *l) 365{ 366 spin_lock(&l->lock); 367 list_splice_init(&l->freeme, dst); 368 spin_unlock(&l->lock); 369} 370 371static void 372nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 373{ 374 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 375 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 376 377 spin_lock(&l->lock); 378 list_splice_tail_init(files, &l->freeme); 379 spin_unlock(&l->lock); 380 queue_work(nfsd_filecache_wq, &l->work); 381} 382 383static void 384nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 385 struct net *net) 386{ 387 struct nfsd_file *nf, *tmp; 388 389 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 390 if (nf->nf_net == net) 391 list_move_tail(&nf->nf_lru, dst); 392 } 393} 394 395static void 396nfsd_file_dispose_list_delayed(struct list_head *dispose) 397{ 398 LIST_HEAD(list); 399 struct nfsd_file *nf; 400 401 while(!list_empty(dispose)) { 402 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 403 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 404 nfsd_file_list_add_disposal(&list, nf->nf_net); 405 } 406} 407 408/* 409 * Note this can deadlock with nfsd_file_cache_purge. 410 */ 411static enum lru_status 412nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 413 spinlock_t *lock, void *arg) 414 __releases(lock) 415 __acquires(lock) 416{ 417 struct list_head *head = arg; 418 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 419 420 /* 421 * Do a lockless refcount check. The hashtable holds one reference, so 422 * we look to see if anything else has a reference, or if any have 423 * been put since the shrinker last ran. Those don't get unhashed and 424 * released. 425 * 426 * Note that in the put path, we set the flag and then decrement the 427 * counter. Here we check the counter and then test and clear the flag. 428 * That order is deliberate to ensure that we can do this locklessly. 429 */ 430 if (refcount_read(&nf->nf_ref) > 1) 431 goto out_skip; 432 433 /* 434 * Don't throw out files that are still undergoing I/O or 435 * that have uncleared errors pending. 436 */ 437 if (nfsd_file_check_writeback(nf)) 438 goto out_skip; 439 440 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 441 goto out_skip; 442 443 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 444 goto out_skip; 445 446 list_lru_isolate_move(lru, &nf->nf_lru, head); 447 return LRU_REMOVED; 448out_skip: 449 return LRU_SKIP; 450} 451 452static unsigned long 453nfsd_file_lru_walk_list(struct shrink_control *sc) 454{ 455 LIST_HEAD(head); 456 struct nfsd_file *nf; 457 unsigned long ret; 458 459 if (sc) 460 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 461 nfsd_file_lru_cb, &head); 462 else 463 ret = list_lru_walk(&nfsd_file_lru, 464 nfsd_file_lru_cb, 465 &head, LONG_MAX); 466 list_for_each_entry(nf, &head, nf_lru) { 467 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 468 nfsd_file_do_unhash(nf); 469 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 470 } 471 nfsd_file_dispose_list_delayed(&head); 472 return ret; 473} 474 475static void 476nfsd_file_gc(void) 477{ 478 nfsd_file_lru_walk_list(NULL); 479} 480 481static void 482nfsd_file_gc_worker(struct work_struct *work) 483{ 484 nfsd_file_gc(); 485 nfsd_file_schedule_laundrette(); 486} 487 488static unsigned long 489nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 490{ 491 return list_lru_count(&nfsd_file_lru); 492} 493 494static unsigned long 495nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 496{ 497 return nfsd_file_lru_walk_list(sc); 498} 499 500static struct shrinker nfsd_file_shrinker = { 501 .scan_objects = nfsd_file_lru_scan, 502 .count_objects = nfsd_file_lru_count, 503 .seeks = 1, 504}; 505 506static void 507__nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 508 struct list_head *dispose) 509{ 510 struct nfsd_file *nf; 511 struct hlist_node *tmp; 512 513 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 514 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 515 if (inode == nf->nf_inode) 516 nfsd_file_unhash_and_release_locked(nf, dispose); 517 } 518 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 519} 520 521/** 522 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 523 * @inode: inode of the file to attempt to remove 524 * 525 * Walk the whole hash bucket, looking for any files that correspond to "inode". 526 * If any do, then unhash them and put the hashtable reference to them and 527 * destroy any that had their last reference put. Also ensure that any of the 528 * fputs also have their final __fput done as well. 529 */ 530void 531nfsd_file_close_inode_sync(struct inode *inode) 532{ 533 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 534 NFSD_FILE_HASH_BITS); 535 LIST_HEAD(dispose); 536 537 __nfsd_file_close_inode(inode, hashval, &dispose); 538 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 539 nfsd_file_dispose_list_sync(&dispose); 540} 541 542/** 543 * nfsd_file_close_inode - attempt a delayed close of a nfsd_file 544 * @inode: inode of the file to attempt to remove 545 * 546 * Walk the whole hash bucket, looking for any files that correspond to "inode". 547 * If any do, then unhash them and put the hashtable reference to them and 548 * destroy any that had their last reference put. 549 */ 550static void 551nfsd_file_close_inode(struct inode *inode) 552{ 553 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 554 NFSD_FILE_HASH_BITS); 555 LIST_HEAD(dispose); 556 557 __nfsd_file_close_inode(inode, hashval, &dispose); 558 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 559 nfsd_file_dispose_list_delayed(&dispose); 560} 561 562/** 563 * nfsd_file_delayed_close - close unused nfsd_files 564 * @work: dummy 565 * 566 * Walk the LRU list and close any entries that have not been used since 567 * the last scan. 568 * 569 * Note this can deadlock with nfsd_file_cache_purge. 570 */ 571static void 572nfsd_file_delayed_close(struct work_struct *work) 573{ 574 LIST_HEAD(head); 575 struct nfsd_fcache_disposal *l = container_of(work, 576 struct nfsd_fcache_disposal, work); 577 578 nfsd_file_list_remove_disposal(&head, l); 579 nfsd_file_dispose_list(&head); 580} 581 582static int 583nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 584 void *data) 585{ 586 struct file_lock *fl = data; 587 588 /* Only close files for F_SETLEASE leases */ 589 if (fl->fl_flags & FL_LEASE) 590 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 591 return 0; 592} 593 594static struct notifier_block nfsd_file_lease_notifier = { 595 .notifier_call = nfsd_file_lease_notifier_call, 596}; 597 598static int 599nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 600 struct inode *inode, struct inode *dir, 601 const struct qstr *name, u32 cookie) 602{ 603 if (WARN_ON_ONCE(!inode)) 604 return 0; 605 606 trace_nfsd_file_fsnotify_handle_event(inode, mask); 607 608 /* Should be no marks on non-regular files */ 609 if (!S_ISREG(inode->i_mode)) { 610 WARN_ON_ONCE(1); 611 return 0; 612 } 613 614 /* don't close files if this was not the last link */ 615 if (mask & FS_ATTRIB) { 616 if (inode->i_nlink) 617 return 0; 618 } 619 620 nfsd_file_close_inode(inode); 621 return 0; 622} 623 624 625static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 626 .handle_inode_event = nfsd_file_fsnotify_handle_event, 627 .free_mark = nfsd_file_mark_free, 628}; 629 630int 631nfsd_file_cache_init(void) 632{ 633 int ret = -ENOMEM; 634 unsigned int i; 635 636 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 637 638 if (nfsd_file_hashtbl) 639 return 0; 640 641 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 642 if (!nfsd_filecache_wq) 643 goto out; 644 645 nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, 646 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 647 if (!nfsd_file_hashtbl) { 648 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 649 goto out_err; 650 } 651 652 nfsd_file_slab = kmem_cache_create("nfsd_file", 653 sizeof(struct nfsd_file), 0, 0, NULL); 654 if (!nfsd_file_slab) { 655 pr_err("nfsd: unable to create nfsd_file_slab\n"); 656 goto out_err; 657 } 658 659 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 660 sizeof(struct nfsd_file_mark), 0, 0, NULL); 661 if (!nfsd_file_mark_slab) { 662 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 663 goto out_err; 664 } 665 666 667 ret = list_lru_init(&nfsd_file_lru); 668 if (ret) { 669 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 670 goto out_err; 671 } 672 673 ret = register_shrinker(&nfsd_file_shrinker); 674 if (ret) { 675 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 676 goto out_lru; 677 } 678 679 ret = lease_register_notifier(&nfsd_file_lease_notifier); 680 if (ret) { 681 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 682 goto out_shrinker; 683 } 684 685 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 686 FSNOTIFY_GROUP_NOFS); 687 if (IS_ERR(nfsd_file_fsnotify_group)) { 688 pr_err("nfsd: unable to create fsnotify group: %ld\n", 689 PTR_ERR(nfsd_file_fsnotify_group)); 690 ret = PTR_ERR(nfsd_file_fsnotify_group); 691 nfsd_file_fsnotify_group = NULL; 692 goto out_notifier; 693 } 694 695 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 696 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 697 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 698 } 699 700 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 701out: 702 return ret; 703out_notifier: 704 lease_unregister_notifier(&nfsd_file_lease_notifier); 705out_shrinker: 706 unregister_shrinker(&nfsd_file_shrinker); 707out_lru: 708 list_lru_destroy(&nfsd_file_lru); 709out_err: 710 kmem_cache_destroy(nfsd_file_slab); 711 nfsd_file_slab = NULL; 712 kmem_cache_destroy(nfsd_file_mark_slab); 713 nfsd_file_mark_slab = NULL; 714 kvfree(nfsd_file_hashtbl); 715 nfsd_file_hashtbl = NULL; 716 destroy_workqueue(nfsd_filecache_wq); 717 nfsd_filecache_wq = NULL; 718 goto out; 719} 720 721/* 722 * Note this can deadlock with nfsd_file_lru_cb. 723 */ 724void 725nfsd_file_cache_purge(struct net *net) 726{ 727 unsigned int i; 728 struct nfsd_file *nf; 729 struct hlist_node *next; 730 LIST_HEAD(dispose); 731 bool del; 732 733 if (!nfsd_file_hashtbl) 734 return; 735 736 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 737 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 738 739 spin_lock(&nfb->nfb_lock); 740 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 741 if (net && nf->nf_net != net) 742 continue; 743 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 744 745 /* 746 * Deadlock detected! Something marked this entry as 747 * unhased, but hasn't removed it from the hash list. 748 */ 749 WARN_ON_ONCE(!del); 750 } 751 spin_unlock(&nfb->nfb_lock); 752 nfsd_file_dispose_list(&dispose); 753 } 754} 755 756static struct nfsd_fcache_disposal * 757nfsd_alloc_fcache_disposal(void) 758{ 759 struct nfsd_fcache_disposal *l; 760 761 l = kmalloc(sizeof(*l), GFP_KERNEL); 762 if (!l) 763 return NULL; 764 INIT_WORK(&l->work, nfsd_file_delayed_close); 765 spin_lock_init(&l->lock); 766 INIT_LIST_HEAD(&l->freeme); 767 return l; 768} 769 770static void 771nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 772{ 773 cancel_work_sync(&l->work); 774 nfsd_file_dispose_list(&l->freeme); 775 kfree(l); 776} 777 778static void 779nfsd_free_fcache_disposal_net(struct net *net) 780{ 781 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 782 struct nfsd_fcache_disposal *l = nn->fcache_disposal; 783 784 nfsd_free_fcache_disposal(l); 785} 786 787int 788nfsd_file_cache_start_net(struct net *net) 789{ 790 struct nfsd_net *nn = net_generic(net, nfsd_net_id); 791 792 nn->fcache_disposal = nfsd_alloc_fcache_disposal(); 793 return nn->fcache_disposal ? 0 : -ENOMEM; 794} 795 796void 797nfsd_file_cache_shutdown_net(struct net *net) 798{ 799 nfsd_file_cache_purge(net); 800 nfsd_free_fcache_disposal_net(net); 801} 802 803void 804nfsd_file_cache_shutdown(void) 805{ 806 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 807 808 lease_unregister_notifier(&nfsd_file_lease_notifier); 809 unregister_shrinker(&nfsd_file_shrinker); 810 /* 811 * make sure all callers of nfsd_file_lru_cb are done before 812 * calling nfsd_file_cache_purge 813 */ 814 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 815 nfsd_file_cache_purge(NULL); 816 list_lru_destroy(&nfsd_file_lru); 817 rcu_barrier(); 818 fsnotify_put_group(nfsd_file_fsnotify_group); 819 nfsd_file_fsnotify_group = NULL; 820 kmem_cache_destroy(nfsd_file_slab); 821 nfsd_file_slab = NULL; 822 fsnotify_wait_marks_destroyed(); 823 kmem_cache_destroy(nfsd_file_mark_slab); 824 nfsd_file_mark_slab = NULL; 825 kvfree(nfsd_file_hashtbl); 826 nfsd_file_hashtbl = NULL; 827 destroy_workqueue(nfsd_filecache_wq); 828 nfsd_filecache_wq = NULL; 829} 830 831static bool 832nfsd_match_cred(const struct cred *c1, const struct cred *c2) 833{ 834 int i; 835 836 if (!uid_eq(c1->fsuid, c2->fsuid)) 837 return false; 838 if (!gid_eq(c1->fsgid, c2->fsgid)) 839 return false; 840 if (c1->group_info == NULL || c2->group_info == NULL) 841 return c1->group_info == c2->group_info; 842 if (c1->group_info->ngroups != c2->group_info->ngroups) 843 return false; 844 for (i = 0; i < c1->group_info->ngroups; i++) { 845 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 846 return false; 847 } 848 return true; 849} 850 851static struct nfsd_file * 852nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 853 unsigned int hashval, struct net *net) 854{ 855 struct nfsd_file *nf; 856 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 857 858 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 859 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 860 if (nf->nf_may != need) 861 continue; 862 if (nf->nf_inode != inode) 863 continue; 864 if (nf->nf_net != net) 865 continue; 866 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 867 continue; 868 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 869 continue; 870 if (nfsd_file_get(nf) != NULL) 871 return nf; 872 } 873 return NULL; 874} 875 876/** 877 * nfsd_file_is_cached - are there any cached open files for this fh? 878 * @inode: inode of the file to check 879 * 880 * Scan the hashtable for open files that match this fh. Returns true if there 881 * are any, and false if not. 882 */ 883bool 884nfsd_file_is_cached(struct inode *inode) 885{ 886 bool ret = false; 887 struct nfsd_file *nf; 888 unsigned int hashval; 889 890 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 891 892 rcu_read_lock(); 893 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 894 nf_node) { 895 if (inode == nf->nf_inode) { 896 ret = true; 897 break; 898 } 899 } 900 rcu_read_unlock(); 901 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 902 return ret; 903} 904 905static __be32 906nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 907 unsigned int may_flags, struct nfsd_file **pnf, bool open) 908{ 909 __be32 status; 910 struct net *net = SVC_NET(rqstp); 911 struct nfsd_file *nf, *new; 912 struct inode *inode; 913 unsigned int hashval; 914 bool retry = true; 915 916 /* FIXME: skip this if fh_dentry is already set? */ 917 status = fh_verify(rqstp, fhp, S_IFREG, 918 may_flags|NFSD_MAY_OWNER_OVERRIDE); 919 if (status != nfs_ok) 920 return status; 921 922 inode = d_inode(fhp->fh_dentry); 923 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 924retry: 925 rcu_read_lock(); 926 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 927 rcu_read_unlock(); 928 if (nf) 929 goto wait_for_construction; 930 931 new = nfsd_file_alloc(inode, may_flags, hashval, net); 932 if (!new) { 933 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 934 NULL, nfserr_jukebox); 935 return nfserr_jukebox; 936 } 937 938 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 939 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 940 if (nf == NULL) 941 goto open_file; 942 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 943 nfsd_file_slab_free(&new->nf_rcu); 944 945wait_for_construction: 946 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 947 948 /* Did construction of this file fail? */ 949 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 950 if (!retry) { 951 status = nfserr_jukebox; 952 goto out; 953 } 954 retry = false; 955 nfsd_file_put_noref(nf); 956 goto retry; 957 } 958 959 this_cpu_inc(nfsd_file_cache_hits); 960 961 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 962 bool write = (may_flags & NFSD_MAY_WRITE); 963 964 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 965 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 966 status = nfserrno(nfsd_open_break_lease( 967 file_inode(nf->nf_file), may_flags)); 968 if (status == nfs_ok) { 969 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 970 if (write) 971 clear_bit(NFSD_FILE_BREAK_WRITE, 972 &nf->nf_flags); 973 } 974 } 975 } 976out: 977 if (status == nfs_ok) { 978 *pnf = nf; 979 } else { 980 nfsd_file_put(nf); 981 nf = NULL; 982 } 983 984 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 985 return status; 986open_file: 987 nf = new; 988 /* Take reference for the hashtable */ 989 refcount_inc(&nf->nf_ref); 990 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 991 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 992 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 993 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 994 ++nfsd_file_hashtbl[hashval].nfb_count; 995 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 996 nfsd_file_hashtbl[hashval].nfb_count); 997 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 998 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 999 nfsd_file_gc(); 1000 1001 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 1002 if (nf->nf_mark) { 1003 if (open) { 1004 status = nfsd_open_verified(rqstp, fhp, may_flags, 1005 &nf->nf_file); 1006 trace_nfsd_file_open(nf, status); 1007 } else 1008 status = nfs_ok; 1009 } else 1010 status = nfserr_jukebox; 1011 /* 1012 * If construction failed, or we raced with a call to unlink() 1013 * then unhash. 1014 */ 1015 if (status != nfs_ok || inode->i_nlink == 0) { 1016 bool do_free; 1017 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1018 do_free = nfsd_file_unhash(nf); 1019 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1020 if (do_free) 1021 nfsd_file_put_noref(nf); 1022 } 1023 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1024 smp_mb__after_atomic(); 1025 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1026 goto out; 1027} 1028 1029/** 1030 * nfsd_file_acquire - Get a struct nfsd_file with an open file 1031 * @rqstp: the RPC transaction being executed 1032 * @fhp: the NFS filehandle of the file to be opened 1033 * @may_flags: NFSD_MAY_ settings for the file 1034 * @pnf: OUT: new or found "struct nfsd_file" object 1035 * 1036 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1037 * network byte order is returned. 1038 */ 1039__be32 1040nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 1041 unsigned int may_flags, struct nfsd_file **pnf) 1042{ 1043 return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, true); 1044} 1045 1046/** 1047 * nfsd_file_create - Get a struct nfsd_file, do not open 1048 * @rqstp: the RPC transaction being executed 1049 * @fhp: the NFS filehandle of the file just created 1050 * @may_flags: NFSD_MAY_ settings for the file 1051 * @pnf: OUT: new or found "struct nfsd_file" object 1052 * 1053 * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in 1054 * network byte order is returned. 1055 */ 1056__be32 1057nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, 1058 unsigned int may_flags, struct nfsd_file **pnf) 1059{ 1060 return nfsd_do_file_acquire(rqstp, fhp, may_flags, pnf, false); 1061} 1062 1063/* 1064 * Note that fields may be added, removed or reordered in the future. Programs 1065 * scraping this file for info should test the labels to ensure they're 1066 * getting the correct field. 1067 */ 1068static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1069{ 1070 unsigned int i, count = 0, longest = 0; 1071 unsigned long hits = 0; 1072 1073 /* 1074 * No need for spinlocks here since we're not terribly interested in 1075 * accuracy. We do take the nfsd_mutex simply to ensure that we 1076 * don't end up racing with server shutdown 1077 */ 1078 mutex_lock(&nfsd_mutex); 1079 if (nfsd_file_hashtbl) { 1080 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1081 count += nfsd_file_hashtbl[i].nfb_count; 1082 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1083 } 1084 } 1085 mutex_unlock(&nfsd_mutex); 1086 1087 for_each_possible_cpu(i) 1088 hits += per_cpu(nfsd_file_cache_hits, i); 1089 1090 seq_printf(m, "total entries: %u\n", count); 1091 seq_printf(m, "longest chain: %u\n", longest); 1092 seq_printf(m, "cache hits: %lu\n", hits); 1093 return 0; 1094} 1095 1096int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1097{ 1098 return single_open(file, nfsd_file_cache_stats_show, NULL); 1099}