expfs.c (15092B)
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) Neil Brown 2002 4 * Copyright (C) Christoph Hellwig 2007 5 * 6 * This file contains the code mapping from inodes to NFS file handles, 7 * and for mapping back from file handles to dentries. 8 * 9 * For details on why we do all the strange and hairy things in here 10 * take a look at Documentation/filesystems/nfs/exporting.rst. 11 */ 12#include <linux/exportfs.h> 13#include <linux/fs.h> 14#include <linux/file.h> 15#include <linux/module.h> 16#include <linux/mount.h> 17#include <linux/namei.h> 18#include <linux/sched.h> 19#include <linux/cred.h> 20 21#define dprintk(fmt, args...) do{}while(0) 22 23 24static int get_name(const struct path *path, char *name, struct dentry *child); 25 26 27static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, 28 char *name, struct dentry *child) 29{ 30 const struct export_operations *nop = dir->d_sb->s_export_op; 31 struct path path = {.mnt = mnt, .dentry = dir}; 32 33 if (nop->get_name) 34 return nop->get_name(dir, name, child); 35 else 36 return get_name(&path, name, child); 37} 38 39/* 40 * Check if the dentry or any of it's aliases is acceptable. 41 */ 42static struct dentry * 43find_acceptable_alias(struct dentry *result, 44 int (*acceptable)(void *context, struct dentry *dentry), 45 void *context) 46{ 47 struct dentry *dentry, *toput = NULL; 48 struct inode *inode; 49 50 if (acceptable(context, result)) 51 return result; 52 53 inode = result->d_inode; 54 spin_lock(&inode->i_lock); 55 hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { 56 dget(dentry); 57 spin_unlock(&inode->i_lock); 58 if (toput) 59 dput(toput); 60 if (dentry != result && acceptable(context, dentry)) { 61 dput(result); 62 return dentry; 63 } 64 spin_lock(&inode->i_lock); 65 toput = dentry; 66 } 67 spin_unlock(&inode->i_lock); 68 69 if (toput) 70 dput(toput); 71 return NULL; 72} 73 74static bool dentry_connected(struct dentry *dentry) 75{ 76 dget(dentry); 77 while (dentry->d_flags & DCACHE_DISCONNECTED) { 78 struct dentry *parent = dget_parent(dentry); 79 80 dput(dentry); 81 if (dentry == parent) { 82 dput(parent); 83 return false; 84 } 85 dentry = parent; 86 } 87 dput(dentry); 88 return true; 89} 90 91static void clear_disconnected(struct dentry *dentry) 92{ 93 dget(dentry); 94 while (dentry->d_flags & DCACHE_DISCONNECTED) { 95 struct dentry *parent = dget_parent(dentry); 96 97 WARN_ON_ONCE(IS_ROOT(dentry)); 98 99 spin_lock(&dentry->d_lock); 100 dentry->d_flags &= ~DCACHE_DISCONNECTED; 101 spin_unlock(&dentry->d_lock); 102 103 dput(dentry); 104 dentry = parent; 105 } 106 dput(dentry); 107} 108 109/* 110 * Reconnect a directory dentry with its parent. 111 * 112 * This can return a dentry, or NULL, or an error. 113 * 114 * In the first case the returned dentry is the parent of the given 115 * dentry, and may itself need to be reconnected to its parent. 116 * 117 * In the NULL case, a concurrent VFS operation has either renamed or 118 * removed this directory. The concurrent operation has reconnected our 119 * dentry, so we no longer need to. 120 */ 121static struct dentry *reconnect_one(struct vfsmount *mnt, 122 struct dentry *dentry, char *nbuf) 123{ 124 struct dentry *parent; 125 struct dentry *tmp; 126 int err; 127 128 parent = ERR_PTR(-EACCES); 129 inode_lock(dentry->d_inode); 130 if (mnt->mnt_sb->s_export_op->get_parent) 131 parent = mnt->mnt_sb->s_export_op->get_parent(dentry); 132 inode_unlock(dentry->d_inode); 133 134 if (IS_ERR(parent)) { 135 dprintk("%s: get_parent of %ld failed, err %d\n", 136 __func__, dentry->d_inode->i_ino, PTR_ERR(parent)); 137 return parent; 138 } 139 140 dprintk("%s: find name of %lu in %lu\n", __func__, 141 dentry->d_inode->i_ino, parent->d_inode->i_ino); 142 err = exportfs_get_name(mnt, parent, nbuf, dentry); 143 if (err == -ENOENT) 144 goto out_reconnected; 145 if (err) 146 goto out_err; 147 dprintk("%s: found name: %s\n", __func__, nbuf); 148 tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf)); 149 if (IS_ERR(tmp)) { 150 dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); 151 err = PTR_ERR(tmp); 152 goto out_err; 153 } 154 if (tmp != dentry) { 155 /* 156 * Somebody has renamed it since exportfs_get_name(); 157 * great, since it could've only been renamed if it 158 * got looked up and thus connected, and it would 159 * remain connected afterwards. We are done. 160 */ 161 dput(tmp); 162 goto out_reconnected; 163 } 164 dput(tmp); 165 if (IS_ROOT(dentry)) { 166 err = -ESTALE; 167 goto out_err; 168 } 169 return parent; 170 171out_err: 172 dput(parent); 173 return ERR_PTR(err); 174out_reconnected: 175 dput(parent); 176 /* 177 * Someone must have renamed our entry into another parent, in 178 * which case it has been reconnected by the rename. 179 * 180 * Or someone removed it entirely, in which case filehandle 181 * lookup will succeed but the directory is now IS_DEAD and 182 * subsequent operations on it will fail. 183 * 184 * Alternatively, maybe there was no race at all, and the 185 * filesystem is just corrupt and gave us a parent that doesn't 186 * actually contain any entry pointing to this inode. So, 187 * double check that this worked and return -ESTALE if not: 188 */ 189 if (!dentry_connected(dentry)) 190 return ERR_PTR(-ESTALE); 191 return NULL; 192} 193 194/* 195 * Make sure target_dir is fully connected to the dentry tree. 196 * 197 * On successful return, DCACHE_DISCONNECTED will be cleared on 198 * target_dir, and target_dir->d_parent->...->d_parent will reach the 199 * root of the filesystem. 200 * 201 * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. 202 * But the converse is not true: target_dir may have DCACHE_DISCONNECTED 203 * set but already be connected. In that case we'll verify the 204 * connection to root and then clear the flag. 205 * 206 * Note that target_dir could be removed by a concurrent operation. In 207 * that case reconnect_path may still succeed with target_dir fully 208 * connected, but further operations using the filehandle will fail when 209 * necessary (due to S_DEAD being set on the directory). 210 */ 211static int 212reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) 213{ 214 struct dentry *dentry, *parent; 215 216 dentry = dget(target_dir); 217 218 while (dentry->d_flags & DCACHE_DISCONNECTED) { 219 BUG_ON(dentry == mnt->mnt_sb->s_root); 220 221 if (IS_ROOT(dentry)) 222 parent = reconnect_one(mnt, dentry, nbuf); 223 else 224 parent = dget_parent(dentry); 225 226 if (!parent) 227 break; 228 dput(dentry); 229 if (IS_ERR(parent)) 230 return PTR_ERR(parent); 231 dentry = parent; 232 } 233 dput(dentry); 234 clear_disconnected(target_dir); 235 return 0; 236} 237 238struct getdents_callback { 239 struct dir_context ctx; 240 char *name; /* name that was found. It already points to a 241 buffer NAME_MAX+1 is size */ 242 u64 ino; /* the inum we are looking for */ 243 int found; /* inode matched? */ 244 int sequence; /* sequence counter */ 245}; 246 247/* 248 * A rather strange filldir function to capture 249 * the name matching the specified inode number. 250 */ 251static int filldir_one(struct dir_context *ctx, const char *name, int len, 252 loff_t pos, u64 ino, unsigned int d_type) 253{ 254 struct getdents_callback *buf = 255 container_of(ctx, struct getdents_callback, ctx); 256 int result = 0; 257 258 buf->sequence++; 259 if (buf->ino == ino && len <= NAME_MAX) { 260 memcpy(buf->name, name, len); 261 buf->name[len] = '\0'; 262 buf->found = 1; 263 result = -1; 264 } 265 return result; 266} 267 268/** 269 * get_name - default export_operations->get_name function 270 * @path: the directory in which to find a name 271 * @name: a pointer to a %NAME_MAX+1 char buffer to store the name 272 * @child: the dentry for the child directory. 273 * 274 * calls readdir on the parent until it finds an entry with 275 * the same inode number as the child, and returns that. 276 */ 277static int get_name(const struct path *path, char *name, struct dentry *child) 278{ 279 const struct cred *cred = current_cred(); 280 struct inode *dir = path->dentry->d_inode; 281 int error; 282 struct file *file; 283 struct kstat stat; 284 struct path child_path = { 285 .mnt = path->mnt, 286 .dentry = child, 287 }; 288 struct getdents_callback buffer = { 289 .ctx.actor = filldir_one, 290 .name = name, 291 }; 292 293 error = -ENOTDIR; 294 if (!dir || !S_ISDIR(dir->i_mode)) 295 goto out; 296 error = -EINVAL; 297 if (!dir->i_fop) 298 goto out; 299 /* 300 * inode->i_ino is unsigned long, kstat->ino is u64, so the 301 * former would be insufficient on 32-bit hosts when the 302 * filesystem supports 64-bit inode numbers. So we need to 303 * actually call ->getattr, not just read i_ino: 304 */ 305 error = vfs_getattr_nosec(&child_path, &stat, 306 STATX_INO, AT_STATX_SYNC_AS_STAT); 307 if (error) 308 return error; 309 buffer.ino = stat.ino; 310 /* 311 * Open the directory ... 312 */ 313 file = dentry_open(path, O_RDONLY, cred); 314 error = PTR_ERR(file); 315 if (IS_ERR(file)) 316 goto out; 317 318 error = -EINVAL; 319 if (!file->f_op->iterate && !file->f_op->iterate_shared) 320 goto out_close; 321 322 buffer.sequence = 0; 323 while (1) { 324 int old_seq = buffer.sequence; 325 326 error = iterate_dir(file, &buffer.ctx); 327 if (buffer.found) { 328 error = 0; 329 break; 330 } 331 332 if (error < 0) 333 break; 334 335 error = -ENOENT; 336 if (old_seq == buffer.sequence) 337 break; 338 } 339 340out_close: 341 fput(file); 342out: 343 return error; 344} 345 346/** 347 * export_encode_fh - default export_operations->encode_fh function 348 * @inode: the object to encode 349 * @fid: where to store the file handle fragment 350 * @max_len: maximum length to store there 351 * @parent: parent directory inode, if wanted 352 * 353 * This default encode_fh function assumes that the 32 inode number 354 * is suitable for locating an inode, and that the generation number 355 * can be used to check that it is still valid. It places them in the 356 * filehandle fragment where export_decode_fh expects to find them. 357 */ 358static int export_encode_fh(struct inode *inode, struct fid *fid, 359 int *max_len, struct inode *parent) 360{ 361 int len = *max_len; 362 int type = FILEID_INO32_GEN; 363 364 if (parent && (len < 4)) { 365 *max_len = 4; 366 return FILEID_INVALID; 367 } else if (len < 2) { 368 *max_len = 2; 369 return FILEID_INVALID; 370 } 371 372 len = 2; 373 fid->i32.ino = inode->i_ino; 374 fid->i32.gen = inode->i_generation; 375 if (parent) { 376 fid->i32.parent_ino = parent->i_ino; 377 fid->i32.parent_gen = parent->i_generation; 378 len = 4; 379 type = FILEID_INO32_GEN_PARENT; 380 } 381 *max_len = len; 382 return type; 383} 384 385int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, 386 int *max_len, struct inode *parent) 387{ 388 const struct export_operations *nop = inode->i_sb->s_export_op; 389 390 if (nop && nop->encode_fh) 391 return nop->encode_fh(inode, fid->raw, max_len, parent); 392 393 return export_encode_fh(inode, fid, max_len, parent); 394} 395EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh); 396 397int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, 398 int connectable) 399{ 400 int error; 401 struct dentry *p = NULL; 402 struct inode *inode = dentry->d_inode, *parent = NULL; 403 404 if (connectable && !S_ISDIR(inode->i_mode)) { 405 p = dget_parent(dentry); 406 /* 407 * note that while p might've ceased to be our parent already, 408 * it's still pinned by and still positive. 409 */ 410 parent = p->d_inode; 411 } 412 413 error = exportfs_encode_inode_fh(inode, fid, max_len, parent); 414 dput(p); 415 416 return error; 417} 418EXPORT_SYMBOL_GPL(exportfs_encode_fh); 419 420struct dentry * 421exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, 422 int fileid_type, 423 int (*acceptable)(void *, struct dentry *), 424 void *context) 425{ 426 const struct export_operations *nop = mnt->mnt_sb->s_export_op; 427 struct dentry *result, *alias; 428 char nbuf[NAME_MAX+1]; 429 int err; 430 431 /* 432 * Try to get any dentry for the given file handle from the filesystem. 433 */ 434 if (!nop || !nop->fh_to_dentry) 435 return ERR_PTR(-ESTALE); 436 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 437 if (IS_ERR_OR_NULL(result)) 438 return result; 439 440 /* 441 * If no acceptance criteria was specified by caller, a disconnected 442 * dentry is also accepatable. Callers may use this mode to query if 443 * file handle is stale or to get a reference to an inode without 444 * risking the high overhead caused by directory reconnect. 445 */ 446 if (!acceptable) 447 return result; 448 449 if (d_is_dir(result)) { 450 /* 451 * This request is for a directory. 452 * 453 * On the positive side there is only one dentry for each 454 * directory inode. On the negative side this implies that we 455 * to ensure our dentry is connected all the way up to the 456 * filesystem root. 457 */ 458 if (result->d_flags & DCACHE_DISCONNECTED) { 459 err = reconnect_path(mnt, result, nbuf); 460 if (err) 461 goto err_result; 462 } 463 464 if (!acceptable(context, result)) { 465 err = -EACCES; 466 goto err_result; 467 } 468 469 return result; 470 } else { 471 /* 472 * It's not a directory. Life is a little more complicated. 473 */ 474 struct dentry *target_dir, *nresult; 475 476 /* 477 * See if either the dentry we just got from the filesystem 478 * or any alias for it is acceptable. This is always true 479 * if this filesystem is exported without the subtreecheck 480 * option. If the filesystem is exported with the subtree 481 * check option there's a fair chance we need to look at 482 * the parent directory in the file handle and make sure 483 * it's connected to the filesystem root. 484 */ 485 alias = find_acceptable_alias(result, acceptable, context); 486 if (alias) 487 return alias; 488 489 /* 490 * Try to extract a dentry for the parent directory from the 491 * file handle. If this fails we'll have to give up. 492 */ 493 err = -ESTALE; 494 if (!nop->fh_to_parent) 495 goto err_result; 496 497 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, 498 fh_len, fileid_type); 499 if (!target_dir) 500 goto err_result; 501 err = PTR_ERR(target_dir); 502 if (IS_ERR(target_dir)) 503 goto err_result; 504 505 /* 506 * And as usual we need to make sure the parent directory is 507 * connected to the filesystem root. The VFS really doesn't 508 * like disconnected directories.. 509 */ 510 err = reconnect_path(mnt, target_dir, nbuf); 511 if (err) { 512 dput(target_dir); 513 goto err_result; 514 } 515 516 /* 517 * Now that we've got both a well-connected parent and a 518 * dentry for the inode we're after, make sure that our 519 * inode is actually connected to the parent. 520 */ 521 err = exportfs_get_name(mnt, target_dir, nbuf, result); 522 if (err) { 523 dput(target_dir); 524 goto err_result; 525 } 526 527 inode_lock(target_dir->d_inode); 528 nresult = lookup_one(mnt_user_ns(mnt), nbuf, 529 target_dir, strlen(nbuf)); 530 if (!IS_ERR(nresult)) { 531 if (unlikely(nresult->d_inode != result->d_inode)) { 532 dput(nresult); 533 nresult = ERR_PTR(-ESTALE); 534 } 535 } 536 inode_unlock(target_dir->d_inode); 537 /* 538 * At this point we are done with the parent, but it's pinned 539 * by the child dentry anyway. 540 */ 541 dput(target_dir); 542 543 if (IS_ERR(nresult)) { 544 err = PTR_ERR(nresult); 545 goto err_result; 546 } 547 dput(result); 548 result = nresult; 549 550 /* 551 * And finally make sure the dentry is actually acceptable 552 * to NFSD. 553 */ 554 alias = find_acceptable_alias(result, acceptable, context); 555 if (!alias) { 556 err = -EACCES; 557 goto err_result; 558 } 559 560 return alias; 561 } 562 563 err_result: 564 dput(result); 565 return ERR_PTR(err); 566} 567EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw); 568 569struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, 570 int fh_len, int fileid_type, 571 int (*acceptable)(void *, struct dentry *), 572 void *context) 573{ 574 struct dentry *ret; 575 576 ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, 577 acceptable, context); 578 if (IS_ERR_OR_NULL(ret)) { 579 if (ret == ERR_PTR(-ENOMEM)) 580 return ret; 581 return ERR_PTR(-ESTALE); 582 } 583 return ret; 584} 585EXPORT_SYMBOL_GPL(exportfs_decode_fh); 586 587MODULE_LICENSE("GPL");