cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

expfs.c (15092B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) Neil Brown 2002
      4 * Copyright (C) Christoph Hellwig 2007
      5 *
      6 * This file contains the code mapping from inodes to NFS file handles,
      7 * and for mapping back from file handles to dentries.
      8 *
      9 * For details on why we do all the strange and hairy things in here
     10 * take a look at Documentation/filesystems/nfs/exporting.rst.
     11 */
     12#include <linux/exportfs.h>
     13#include <linux/fs.h>
     14#include <linux/file.h>
     15#include <linux/module.h>
     16#include <linux/mount.h>
     17#include <linux/namei.h>
     18#include <linux/sched.h>
     19#include <linux/cred.h>
     20
     21#define dprintk(fmt, args...) do{}while(0)
     22
     23
     24static int get_name(const struct path *path, char *name, struct dentry *child);
     25
     26
     27static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir,
     28		char *name, struct dentry *child)
     29{
     30	const struct export_operations *nop = dir->d_sb->s_export_op;
     31	struct path path = {.mnt = mnt, .dentry = dir};
     32
     33	if (nop->get_name)
     34		return nop->get_name(dir, name, child);
     35	else
     36		return get_name(&path, name, child);
     37}
     38
     39/*
     40 * Check if the dentry or any of it's aliases is acceptable.
     41 */
     42static struct dentry *
     43find_acceptable_alias(struct dentry *result,
     44		int (*acceptable)(void *context, struct dentry *dentry),
     45		void *context)
     46{
     47	struct dentry *dentry, *toput = NULL;
     48	struct inode *inode;
     49
     50	if (acceptable(context, result))
     51		return result;
     52
     53	inode = result->d_inode;
     54	spin_lock(&inode->i_lock);
     55	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
     56		dget(dentry);
     57		spin_unlock(&inode->i_lock);
     58		if (toput)
     59			dput(toput);
     60		if (dentry != result && acceptable(context, dentry)) {
     61			dput(result);
     62			return dentry;
     63		}
     64		spin_lock(&inode->i_lock);
     65		toput = dentry;
     66	}
     67	spin_unlock(&inode->i_lock);
     68
     69	if (toput)
     70		dput(toput);
     71	return NULL;
     72}
     73
     74static bool dentry_connected(struct dentry *dentry)
     75{
     76	dget(dentry);
     77	while (dentry->d_flags & DCACHE_DISCONNECTED) {
     78		struct dentry *parent = dget_parent(dentry);
     79
     80		dput(dentry);
     81		if (dentry == parent) {
     82			dput(parent);
     83			return false;
     84		}
     85		dentry = parent;
     86	}
     87	dput(dentry);
     88	return true;
     89}
     90
     91static void clear_disconnected(struct dentry *dentry)
     92{
     93	dget(dentry);
     94	while (dentry->d_flags & DCACHE_DISCONNECTED) {
     95		struct dentry *parent = dget_parent(dentry);
     96
     97		WARN_ON_ONCE(IS_ROOT(dentry));
     98
     99		spin_lock(&dentry->d_lock);
    100		dentry->d_flags &= ~DCACHE_DISCONNECTED;
    101		spin_unlock(&dentry->d_lock);
    102
    103		dput(dentry);
    104		dentry = parent;
    105	}
    106	dput(dentry);
    107}
    108
    109/*
    110 * Reconnect a directory dentry with its parent.
    111 *
    112 * This can return a dentry, or NULL, or an error.
    113 *
    114 * In the first case the returned dentry is the parent of the given
    115 * dentry, and may itself need to be reconnected to its parent.
    116 *
    117 * In the NULL case, a concurrent VFS operation has either renamed or
    118 * removed this directory.  The concurrent operation has reconnected our
    119 * dentry, so we no longer need to.
    120 */
    121static struct dentry *reconnect_one(struct vfsmount *mnt,
    122		struct dentry *dentry, char *nbuf)
    123{
    124	struct dentry *parent;
    125	struct dentry *tmp;
    126	int err;
    127
    128	parent = ERR_PTR(-EACCES);
    129	inode_lock(dentry->d_inode);
    130	if (mnt->mnt_sb->s_export_op->get_parent)
    131		parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
    132	inode_unlock(dentry->d_inode);
    133
    134	if (IS_ERR(parent)) {
    135		dprintk("%s: get_parent of %ld failed, err %d\n",
    136			__func__, dentry->d_inode->i_ino, PTR_ERR(parent));
    137		return parent;
    138	}
    139
    140	dprintk("%s: find name of %lu in %lu\n", __func__,
    141		dentry->d_inode->i_ino, parent->d_inode->i_ino);
    142	err = exportfs_get_name(mnt, parent, nbuf, dentry);
    143	if (err == -ENOENT)
    144		goto out_reconnected;
    145	if (err)
    146		goto out_err;
    147	dprintk("%s: found name: %s\n", __func__, nbuf);
    148	tmp = lookup_one_unlocked(mnt_user_ns(mnt), nbuf, parent, strlen(nbuf));
    149	if (IS_ERR(tmp)) {
    150		dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
    151		err = PTR_ERR(tmp);
    152		goto out_err;
    153	}
    154	if (tmp != dentry) {
    155		/*
    156		 * Somebody has renamed it since exportfs_get_name();
    157		 * great, since it could've only been renamed if it
    158		 * got looked up and thus connected, and it would
    159		 * remain connected afterwards.  We are done.
    160		 */
    161		dput(tmp);
    162		goto out_reconnected;
    163	}
    164	dput(tmp);
    165	if (IS_ROOT(dentry)) {
    166		err = -ESTALE;
    167		goto out_err;
    168	}
    169	return parent;
    170
    171out_err:
    172	dput(parent);
    173	return ERR_PTR(err);
    174out_reconnected:
    175	dput(parent);
    176	/*
    177	 * Someone must have renamed our entry into another parent, in
    178	 * which case it has been reconnected by the rename.
    179	 *
    180	 * Or someone removed it entirely, in which case filehandle
    181	 * lookup will succeed but the directory is now IS_DEAD and
    182	 * subsequent operations on it will fail.
    183	 *
    184	 * Alternatively, maybe there was no race at all, and the
    185	 * filesystem is just corrupt and gave us a parent that doesn't
    186	 * actually contain any entry pointing to this inode.  So,
    187	 * double check that this worked and return -ESTALE if not:
    188	 */
    189	if (!dentry_connected(dentry))
    190		return ERR_PTR(-ESTALE);
    191	return NULL;
    192}
    193
    194/*
    195 * Make sure target_dir is fully connected to the dentry tree.
    196 *
    197 * On successful return, DCACHE_DISCONNECTED will be cleared on
    198 * target_dir, and target_dir->d_parent->...->d_parent will reach the
    199 * root of the filesystem.
    200 *
    201 * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected.
    202 * But the converse is not true: target_dir may have DCACHE_DISCONNECTED
    203 * set but already be connected.  In that case we'll verify the
    204 * connection to root and then clear the flag.
    205 *
    206 * Note that target_dir could be removed by a concurrent operation.  In
    207 * that case reconnect_path may still succeed with target_dir fully
    208 * connected, but further operations using the filehandle will fail when
    209 * necessary (due to S_DEAD being set on the directory).
    210 */
    211static int
    212reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
    213{
    214	struct dentry *dentry, *parent;
    215
    216	dentry = dget(target_dir);
    217
    218	while (dentry->d_flags & DCACHE_DISCONNECTED) {
    219		BUG_ON(dentry == mnt->mnt_sb->s_root);
    220
    221		if (IS_ROOT(dentry))
    222			parent = reconnect_one(mnt, dentry, nbuf);
    223		else
    224			parent = dget_parent(dentry);
    225
    226		if (!parent)
    227			break;
    228		dput(dentry);
    229		if (IS_ERR(parent))
    230			return PTR_ERR(parent);
    231		dentry = parent;
    232	}
    233	dput(dentry);
    234	clear_disconnected(target_dir);
    235	return 0;
    236}
    237
    238struct getdents_callback {
    239	struct dir_context ctx;
    240	char *name;		/* name that was found. It already points to a
    241				   buffer NAME_MAX+1 is size */
    242	u64 ino;		/* the inum we are looking for */
    243	int found;		/* inode matched? */
    244	int sequence;		/* sequence counter */
    245};
    246
    247/*
    248 * A rather strange filldir function to capture
    249 * the name matching the specified inode number.
    250 */
    251static int filldir_one(struct dir_context *ctx, const char *name, int len,
    252			loff_t pos, u64 ino, unsigned int d_type)
    253{
    254	struct getdents_callback *buf =
    255		container_of(ctx, struct getdents_callback, ctx);
    256	int result = 0;
    257
    258	buf->sequence++;
    259	if (buf->ino == ino && len <= NAME_MAX) {
    260		memcpy(buf->name, name, len);
    261		buf->name[len] = '\0';
    262		buf->found = 1;
    263		result = -1;
    264	}
    265	return result;
    266}
    267
    268/**
    269 * get_name - default export_operations->get_name function
    270 * @path:   the directory in which to find a name
    271 * @name:   a pointer to a %NAME_MAX+1 char buffer to store the name
    272 * @child:  the dentry for the child directory.
    273 *
    274 * calls readdir on the parent until it finds an entry with
    275 * the same inode number as the child, and returns that.
    276 */
    277static int get_name(const struct path *path, char *name, struct dentry *child)
    278{
    279	const struct cred *cred = current_cred();
    280	struct inode *dir = path->dentry->d_inode;
    281	int error;
    282	struct file *file;
    283	struct kstat stat;
    284	struct path child_path = {
    285		.mnt = path->mnt,
    286		.dentry = child,
    287	};
    288	struct getdents_callback buffer = {
    289		.ctx.actor = filldir_one,
    290		.name = name,
    291	};
    292
    293	error = -ENOTDIR;
    294	if (!dir || !S_ISDIR(dir->i_mode))
    295		goto out;
    296	error = -EINVAL;
    297	if (!dir->i_fop)
    298		goto out;
    299	/*
    300	 * inode->i_ino is unsigned long, kstat->ino is u64, so the
    301	 * former would be insufficient on 32-bit hosts when the
    302	 * filesystem supports 64-bit inode numbers.  So we need to
    303	 * actually call ->getattr, not just read i_ino:
    304	 */
    305	error = vfs_getattr_nosec(&child_path, &stat,
    306				  STATX_INO, AT_STATX_SYNC_AS_STAT);
    307	if (error)
    308		return error;
    309	buffer.ino = stat.ino;
    310	/*
    311	 * Open the directory ...
    312	 */
    313	file = dentry_open(path, O_RDONLY, cred);
    314	error = PTR_ERR(file);
    315	if (IS_ERR(file))
    316		goto out;
    317
    318	error = -EINVAL;
    319	if (!file->f_op->iterate && !file->f_op->iterate_shared)
    320		goto out_close;
    321
    322	buffer.sequence = 0;
    323	while (1) {
    324		int old_seq = buffer.sequence;
    325
    326		error = iterate_dir(file, &buffer.ctx);
    327		if (buffer.found) {
    328			error = 0;
    329			break;
    330		}
    331
    332		if (error < 0)
    333			break;
    334
    335		error = -ENOENT;
    336		if (old_seq == buffer.sequence)
    337			break;
    338	}
    339
    340out_close:
    341	fput(file);
    342out:
    343	return error;
    344}
    345
    346/**
    347 * export_encode_fh - default export_operations->encode_fh function
    348 * @inode:   the object to encode
    349 * @fid:     where to store the file handle fragment
    350 * @max_len: maximum length to store there
    351 * @parent:  parent directory inode, if wanted
    352 *
    353 * This default encode_fh function assumes that the 32 inode number
    354 * is suitable for locating an inode, and that the generation number
    355 * can be used to check that it is still valid.  It places them in the
    356 * filehandle fragment where export_decode_fh expects to find them.
    357 */
    358static int export_encode_fh(struct inode *inode, struct fid *fid,
    359		int *max_len, struct inode *parent)
    360{
    361	int len = *max_len;
    362	int type = FILEID_INO32_GEN;
    363
    364	if (parent && (len < 4)) {
    365		*max_len = 4;
    366		return FILEID_INVALID;
    367	} else if (len < 2) {
    368		*max_len = 2;
    369		return FILEID_INVALID;
    370	}
    371
    372	len = 2;
    373	fid->i32.ino = inode->i_ino;
    374	fid->i32.gen = inode->i_generation;
    375	if (parent) {
    376		fid->i32.parent_ino = parent->i_ino;
    377		fid->i32.parent_gen = parent->i_generation;
    378		len = 4;
    379		type = FILEID_INO32_GEN_PARENT;
    380	}
    381	*max_len = len;
    382	return type;
    383}
    384
    385int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
    386			     int *max_len, struct inode *parent)
    387{
    388	const struct export_operations *nop = inode->i_sb->s_export_op;
    389
    390	if (nop && nop->encode_fh)
    391		return nop->encode_fh(inode, fid->raw, max_len, parent);
    392
    393	return export_encode_fh(inode, fid, max_len, parent);
    394}
    395EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh);
    396
    397int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
    398		int connectable)
    399{
    400	int error;
    401	struct dentry *p = NULL;
    402	struct inode *inode = dentry->d_inode, *parent = NULL;
    403
    404	if (connectable && !S_ISDIR(inode->i_mode)) {
    405		p = dget_parent(dentry);
    406		/*
    407		 * note that while p might've ceased to be our parent already,
    408		 * it's still pinned by and still positive.
    409		 */
    410		parent = p->d_inode;
    411	}
    412
    413	error = exportfs_encode_inode_fh(inode, fid, max_len, parent);
    414	dput(p);
    415
    416	return error;
    417}
    418EXPORT_SYMBOL_GPL(exportfs_encode_fh);
    419
    420struct dentry *
    421exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
    422		       int fileid_type,
    423		       int (*acceptable)(void *, struct dentry *),
    424		       void *context)
    425{
    426	const struct export_operations *nop = mnt->mnt_sb->s_export_op;
    427	struct dentry *result, *alias;
    428	char nbuf[NAME_MAX+1];
    429	int err;
    430
    431	/*
    432	 * Try to get any dentry for the given file handle from the filesystem.
    433	 */
    434	if (!nop || !nop->fh_to_dentry)
    435		return ERR_PTR(-ESTALE);
    436	result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
    437	if (IS_ERR_OR_NULL(result))
    438		return result;
    439
    440	/*
    441	 * If no acceptance criteria was specified by caller, a disconnected
    442	 * dentry is also accepatable. Callers may use this mode to query if
    443	 * file handle is stale or to get a reference to an inode without
    444	 * risking the high overhead caused by directory reconnect.
    445	 */
    446	if (!acceptable)
    447		return result;
    448
    449	if (d_is_dir(result)) {
    450		/*
    451		 * This request is for a directory.
    452		 *
    453		 * On the positive side there is only one dentry for each
    454		 * directory inode.  On the negative side this implies that we
    455		 * to ensure our dentry is connected all the way up to the
    456		 * filesystem root.
    457		 */
    458		if (result->d_flags & DCACHE_DISCONNECTED) {
    459			err = reconnect_path(mnt, result, nbuf);
    460			if (err)
    461				goto err_result;
    462		}
    463
    464		if (!acceptable(context, result)) {
    465			err = -EACCES;
    466			goto err_result;
    467		}
    468
    469		return result;
    470	} else {
    471		/*
    472		 * It's not a directory.  Life is a little more complicated.
    473		 */
    474		struct dentry *target_dir, *nresult;
    475
    476		/*
    477		 * See if either the dentry we just got from the filesystem
    478		 * or any alias for it is acceptable.  This is always true
    479		 * if this filesystem is exported without the subtreecheck
    480		 * option.  If the filesystem is exported with the subtree
    481		 * check option there's a fair chance we need to look at
    482		 * the parent directory in the file handle and make sure
    483		 * it's connected to the filesystem root.
    484		 */
    485		alias = find_acceptable_alias(result, acceptable, context);
    486		if (alias)
    487			return alias;
    488
    489		/*
    490		 * Try to extract a dentry for the parent directory from the
    491		 * file handle.  If this fails we'll have to give up.
    492		 */
    493		err = -ESTALE;
    494		if (!nop->fh_to_parent)
    495			goto err_result;
    496
    497		target_dir = nop->fh_to_parent(mnt->mnt_sb, fid,
    498				fh_len, fileid_type);
    499		if (!target_dir)
    500			goto err_result;
    501		err = PTR_ERR(target_dir);
    502		if (IS_ERR(target_dir))
    503			goto err_result;
    504
    505		/*
    506		 * And as usual we need to make sure the parent directory is
    507		 * connected to the filesystem root.  The VFS really doesn't
    508		 * like disconnected directories..
    509		 */
    510		err = reconnect_path(mnt, target_dir, nbuf);
    511		if (err) {
    512			dput(target_dir);
    513			goto err_result;
    514		}
    515
    516		/*
    517		 * Now that we've got both a well-connected parent and a
    518		 * dentry for the inode we're after, make sure that our
    519		 * inode is actually connected to the parent.
    520		 */
    521		err = exportfs_get_name(mnt, target_dir, nbuf, result);
    522		if (err) {
    523			dput(target_dir);
    524			goto err_result;
    525		}
    526
    527		inode_lock(target_dir->d_inode);
    528		nresult = lookup_one(mnt_user_ns(mnt), nbuf,
    529				     target_dir, strlen(nbuf));
    530		if (!IS_ERR(nresult)) {
    531			if (unlikely(nresult->d_inode != result->d_inode)) {
    532				dput(nresult);
    533				nresult = ERR_PTR(-ESTALE);
    534			}
    535		}
    536		inode_unlock(target_dir->d_inode);
    537		/*
    538		 * At this point we are done with the parent, but it's pinned
    539		 * by the child dentry anyway.
    540		 */
    541		dput(target_dir);
    542
    543		if (IS_ERR(nresult)) {
    544			err = PTR_ERR(nresult);
    545			goto err_result;
    546		}
    547		dput(result);
    548		result = nresult;
    549
    550		/*
    551		 * And finally make sure the dentry is actually acceptable
    552		 * to NFSD.
    553		 */
    554		alias = find_acceptable_alias(result, acceptable, context);
    555		if (!alias) {
    556			err = -EACCES;
    557			goto err_result;
    558		}
    559
    560		return alias;
    561	}
    562
    563 err_result:
    564	dput(result);
    565	return ERR_PTR(err);
    566}
    567EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw);
    568
    569struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
    570				  int fh_len, int fileid_type,
    571				  int (*acceptable)(void *, struct dentry *),
    572				  void *context)
    573{
    574	struct dentry *ret;
    575
    576	ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type,
    577				     acceptable, context);
    578	if (IS_ERR_OR_NULL(ret)) {
    579		if (ret == ERR_PTR(-ENOMEM))
    580			return ret;
    581		return ERR_PTR(-ESTALE);
    582	}
    583	return ret;
    584}
    585EXPORT_SYMBOL_GPL(exportfs_decode_fh);
    586
    587MODULE_LICENSE("GPL");