cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

namei.c (29676B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) 2011 Novell Inc.
      4 * Copyright (C) 2016 Red Hat, Inc.
      5 */
      6
      7#include <linux/fs.h>
      8#include <linux/cred.h>
      9#include <linux/ctype.h>
     10#include <linux/namei.h>
     11#include <linux/xattr.h>
     12#include <linux/ratelimit.h>
     13#include <linux/mount.h>
     14#include <linux/exportfs.h>
     15#include "overlayfs.h"
     16
     17struct ovl_lookup_data {
     18	struct super_block *sb;
     19	struct vfsmount *mnt;
     20	struct qstr name;
     21	bool is_dir;
     22	bool opaque;
     23	bool stop;
     24	bool last;
     25	char *redirect;
     26	bool metacopy;
     27};
     28
     29static int ovl_check_redirect(struct path *path, struct ovl_lookup_data *d,
     30			      size_t prelen, const char *post)
     31{
     32	int res;
     33	char *buf;
     34	struct ovl_fs *ofs = OVL_FS(d->sb);
     35
     36	buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post));
     37	if (IS_ERR_OR_NULL(buf))
     38		return PTR_ERR(buf);
     39
     40	if (buf[0] == '/') {
     41		/*
     42		 * One of the ancestor path elements in an absolute path
     43		 * lookup in ovl_lookup_layer() could have been opaque and
     44		 * that will stop further lookup in lower layers (d->stop=true)
     45		 * But we have found an absolute redirect in decendant path
     46		 * element and that should force continue lookup in lower
     47		 * layers (reset d->stop).
     48		 */
     49		d->stop = false;
     50	} else {
     51		res = strlen(buf) + 1;
     52		memmove(buf + prelen, buf, res);
     53		memcpy(buf, d->name.name, prelen);
     54	}
     55
     56	strcat(buf, post);
     57	kfree(d->redirect);
     58	d->redirect = buf;
     59	d->name.name = d->redirect;
     60	d->name.len = strlen(d->redirect);
     61
     62	return 0;
     63}
     64
     65static int ovl_acceptable(void *ctx, struct dentry *dentry)
     66{
     67	/*
     68	 * A non-dir origin may be disconnected, which is fine, because
     69	 * we only need it for its unique inode number.
     70	 */
     71	if (!d_is_dir(dentry))
     72		return 1;
     73
     74	/* Don't decode a deleted empty directory */
     75	if (d_unhashed(dentry))
     76		return 0;
     77
     78	/* Check if directory belongs to the layer we are decoding from */
     79	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
     80}
     81
     82/*
     83 * Check validity of an overlay file handle buffer.
     84 *
     85 * Return 0 for a valid file handle.
     86 * Return -ENODATA for "origin unknown".
     87 * Return <0 for an invalid file handle.
     88 */
     89int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
     90{
     91	if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
     92		return -EINVAL;
     93
     94	if (fb->magic != OVL_FH_MAGIC)
     95		return -EINVAL;
     96
     97	/* Treat larger version and unknown flags as "origin unknown" */
     98	if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
     99		return -ENODATA;
    100
    101	/* Treat endianness mismatch as "origin unknown" */
    102	if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
    103	    (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
    104		return -ENODATA;
    105
    106	return 0;
    107}
    108
    109static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
    110				 enum ovl_xattr ox)
    111{
    112	int res, err;
    113	struct ovl_fh *fh = NULL;
    114
    115	res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0);
    116	if (res < 0) {
    117		if (res == -ENODATA || res == -EOPNOTSUPP)
    118			return NULL;
    119		goto fail;
    120	}
    121	/* Zero size value means "copied up but origin unknown" */
    122	if (res == 0)
    123		return NULL;
    124
    125	fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
    126	if (!fh)
    127		return ERR_PTR(-ENOMEM);
    128
    129	res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res);
    130	if (res < 0)
    131		goto fail;
    132
    133	err = ovl_check_fb_len(&fh->fb, res);
    134	if (err < 0) {
    135		if (err == -ENODATA)
    136			goto out;
    137		goto invalid;
    138	}
    139
    140	return fh;
    141
    142out:
    143	kfree(fh);
    144	return NULL;
    145
    146fail:
    147	pr_warn_ratelimited("failed to get origin (%i)\n", res);
    148	goto out;
    149invalid:
    150	pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
    151	goto out;
    152}
    153
    154struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
    155				  struct vfsmount *mnt, bool connected)
    156{
    157	struct dentry *real;
    158	int bytes;
    159
    160	if (!capable(CAP_DAC_READ_SEARCH))
    161		return NULL;
    162
    163	/*
    164	 * Make sure that the stored uuid matches the uuid of the lower
    165	 * layer where file handle will be decoded.
    166	 * In case of uuid=off option just make sure that stored uuid is null.
    167	 */
    168	if (ofs->config.uuid ? !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
    169			      !uuid_is_null(&fh->fb.uuid))
    170		return NULL;
    171
    172	bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
    173	real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
    174				  bytes >> 2, (int)fh->fb.type,
    175				  connected ? ovl_acceptable : NULL, mnt);
    176	if (IS_ERR(real)) {
    177		/*
    178		 * Treat stale file handle to lower file as "origin unknown".
    179		 * upper file handle could become stale when upper file is
    180		 * unlinked and this information is needed to handle stale
    181		 * index entries correctly.
    182		 */
    183		if (real == ERR_PTR(-ESTALE) &&
    184		    !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
    185			real = NULL;
    186		return real;
    187	}
    188
    189	if (ovl_dentry_weird(real)) {
    190		dput(real);
    191		return NULL;
    192	}
    193
    194	return real;
    195}
    196
    197static bool ovl_is_opaquedir(struct ovl_fs *ofs, struct path *path)
    198{
    199	return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE);
    200}
    201
    202static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
    203						   const char *name,
    204						   struct dentry *base, int len,
    205						   bool drop_negative)
    206{
    207	struct dentry *ret = lookup_one_unlocked(mnt_user_ns(d->mnt), name, base, len);
    208
    209	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
    210		if (drop_negative && ret->d_lockref.count == 1) {
    211			spin_lock(&ret->d_lock);
    212			/* Recheck condition under lock */
    213			if (d_is_negative(ret) && ret->d_lockref.count == 1)
    214				__d_drop(ret);
    215			spin_unlock(&ret->d_lock);
    216		}
    217		dput(ret);
    218		ret = ERR_PTR(-ENOENT);
    219	}
    220	return ret;
    221}
    222
    223static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
    224			     const char *name, unsigned int namelen,
    225			     size_t prelen, const char *post,
    226			     struct dentry **ret, bool drop_negative)
    227{
    228	struct dentry *this;
    229	struct path path;
    230	int err;
    231	bool last_element = !post[0];
    232
    233	this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
    234	if (IS_ERR(this)) {
    235		err = PTR_ERR(this);
    236		this = NULL;
    237		if (err == -ENOENT || err == -ENAMETOOLONG)
    238			goto out;
    239		goto out_err;
    240	}
    241
    242	if (ovl_dentry_weird(this)) {
    243		/* Don't support traversing automounts and other weirdness */
    244		err = -EREMOTE;
    245		goto out_err;
    246	}
    247	if (ovl_is_whiteout(this)) {
    248		d->stop = d->opaque = true;
    249		goto put_and_out;
    250	}
    251	/*
    252	 * This dentry should be a regular file if previous layer lookup
    253	 * found a metacopy dentry.
    254	 */
    255	if (last_element && d->metacopy && !d_is_reg(this)) {
    256		d->stop = true;
    257		goto put_and_out;
    258	}
    259
    260	path.dentry = this;
    261	path.mnt = d->mnt;
    262	if (!d_can_lookup(this)) {
    263		if (d->is_dir || !last_element) {
    264			d->stop = true;
    265			goto put_and_out;
    266		}
    267		err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path);
    268		if (err < 0)
    269			goto out_err;
    270
    271		d->metacopy = err;
    272		d->stop = !d->metacopy;
    273		if (!d->metacopy || d->last)
    274			goto out;
    275	} else {
    276		if (ovl_lookup_trap_inode(d->sb, this)) {
    277			/* Caught in a trap of overlapping layers */
    278			err = -ELOOP;
    279			goto out_err;
    280		}
    281
    282		if (last_element)
    283			d->is_dir = true;
    284		if (d->last)
    285			goto out;
    286
    287		if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) {
    288			d->stop = true;
    289			if (last_element)
    290				d->opaque = true;
    291			goto out;
    292		}
    293	}
    294	err = ovl_check_redirect(&path, d, prelen, post);
    295	if (err)
    296		goto out_err;
    297out:
    298	*ret = this;
    299	return 0;
    300
    301put_and_out:
    302	dput(this);
    303	this = NULL;
    304	goto out;
    305
    306out_err:
    307	dput(this);
    308	return err;
    309}
    310
    311static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
    312			    struct dentry **ret, bool drop_negative)
    313{
    314	/* Counting down from the end, since the prefix can change */
    315	size_t rem = d->name.len - 1;
    316	struct dentry *dentry = NULL;
    317	int err;
    318
    319	if (d->name.name[0] != '/')
    320		return ovl_lookup_single(base, d, d->name.name, d->name.len,
    321					 0, "", ret, drop_negative);
    322
    323	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
    324		const char *s = d->name.name + d->name.len - rem;
    325		const char *next = strchrnul(s, '/');
    326		size_t thislen = next - s;
    327		bool end = !next[0];
    328
    329		/* Verify we did not go off the rails */
    330		if (WARN_ON(s[-1] != '/'))
    331			return -EIO;
    332
    333		err = ovl_lookup_single(base, d, s, thislen,
    334					d->name.len - rem, next, &base,
    335					drop_negative);
    336		dput(dentry);
    337		if (err)
    338			return err;
    339		dentry = base;
    340		if (end)
    341			break;
    342
    343		rem -= thislen + 1;
    344
    345		if (WARN_ON(rem >= d->name.len))
    346			return -EIO;
    347	}
    348	*ret = dentry;
    349	return 0;
    350}
    351
    352
    353int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
    354			struct dentry *upperdentry, struct ovl_path **stackp)
    355{
    356	struct dentry *origin = NULL;
    357	int i;
    358
    359	for (i = 1; i < ofs->numlayer; i++) {
    360		/*
    361		 * If lower fs uuid is not unique among lower fs we cannot match
    362		 * fh->uuid to layer.
    363		 */
    364		if (ofs->layers[i].fsid &&
    365		    ofs->layers[i].fs->bad_uuid)
    366			continue;
    367
    368		origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
    369					    connected);
    370		if (origin)
    371			break;
    372	}
    373
    374	if (!origin)
    375		return -ESTALE;
    376	else if (IS_ERR(origin))
    377		return PTR_ERR(origin);
    378
    379	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
    380	    inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
    381		goto invalid;
    382
    383	if (!*stackp)
    384		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
    385	if (!*stackp) {
    386		dput(origin);
    387		return -ENOMEM;
    388	}
    389	**stackp = (struct ovl_path){
    390		.dentry = origin,
    391		.layer = &ofs->layers[i]
    392	};
    393
    394	return 0;
    395
    396invalid:
    397	pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
    398			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
    399			    d_inode(origin)->i_mode & S_IFMT);
    400	dput(origin);
    401	return -ESTALE;
    402}
    403
    404static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
    405			    struct ovl_path **stackp)
    406{
    407	struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
    408	int err;
    409
    410	if (IS_ERR_OR_NULL(fh))
    411		return PTR_ERR(fh);
    412
    413	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
    414	kfree(fh);
    415
    416	if (err) {
    417		if (err == -ESTALE)
    418			return 0;
    419		return err;
    420	}
    421
    422	return 0;
    423}
    424
    425/*
    426 * Verify that @fh matches the file handle stored in xattr @name.
    427 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
    428 */
    429static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
    430			 enum ovl_xattr ox, const struct ovl_fh *fh)
    431{
    432	struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
    433	int err = 0;
    434
    435	if (!ofh)
    436		return -ENODATA;
    437
    438	if (IS_ERR(ofh))
    439		return PTR_ERR(ofh);
    440
    441	if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
    442		err = -ESTALE;
    443
    444	kfree(ofh);
    445	return err;
    446}
    447
    448/*
    449 * Verify that @real dentry matches the file handle stored in xattr @name.
    450 *
    451 * If @set is true and there is no stored file handle, encode @real and store
    452 * file handle in xattr @name.
    453 *
    454 * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
    455 */
    456int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
    457		      enum ovl_xattr ox, struct dentry *real, bool is_upper,
    458		      bool set)
    459{
    460	struct inode *inode;
    461	struct ovl_fh *fh;
    462	int err;
    463
    464	fh = ovl_encode_real_fh(ofs, real, is_upper);
    465	err = PTR_ERR(fh);
    466	if (IS_ERR(fh)) {
    467		fh = NULL;
    468		goto fail;
    469	}
    470
    471	err = ovl_verify_fh(ofs, dentry, ox, fh);
    472	if (set && err == -ENODATA)
    473		err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
    474	if (err)
    475		goto fail;
    476
    477out:
    478	kfree(fh);
    479	return err;
    480
    481fail:
    482	inode = d_inode(real);
    483	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
    484			    is_upper ? "upper" : "origin", real,
    485			    inode ? inode->i_ino : 0, err);
    486	goto out;
    487}
    488
    489/* Get upper dentry from index */
    490struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
    491{
    492	struct ovl_fh *fh;
    493	struct dentry *upper;
    494
    495	if (!d_is_dir(index))
    496		return dget(index);
    497
    498	fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
    499	if (IS_ERR_OR_NULL(fh))
    500		return ERR_CAST(fh);
    501
    502	upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
    503	kfree(fh);
    504
    505	if (IS_ERR_OR_NULL(upper))
    506		return upper ?: ERR_PTR(-ESTALE);
    507
    508	if (!d_is_dir(upper)) {
    509		pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
    510				    index, upper);
    511		dput(upper);
    512		return ERR_PTR(-EIO);
    513	}
    514
    515	return upper;
    516}
    517
    518/*
    519 * Verify that an index entry name matches the origin file handle stored in
    520 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
    521 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
    522 */
    523int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
    524{
    525	struct ovl_fh *fh = NULL;
    526	size_t len;
    527	struct ovl_path origin = { };
    528	struct ovl_path *stack = &origin;
    529	struct dentry *upper = NULL;
    530	int err;
    531
    532	if (!d_inode(index))
    533		return 0;
    534
    535	err = -EINVAL;
    536	if (index->d_name.len < sizeof(struct ovl_fb)*2)
    537		goto fail;
    538
    539	err = -ENOMEM;
    540	len = index->d_name.len / 2;
    541	fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
    542	if (!fh)
    543		goto fail;
    544
    545	err = -EINVAL;
    546	if (hex2bin(fh->buf, index->d_name.name, len))
    547		goto fail;
    548
    549	err = ovl_check_fb_len(&fh->fb, len);
    550	if (err)
    551		goto fail;
    552
    553	/*
    554	 * Whiteout index entries are used as an indication that an exported
    555	 * overlay file handle should be treated as stale (i.e. after unlink
    556	 * of the overlay inode). These entries contain no origin xattr.
    557	 */
    558	if (ovl_is_whiteout(index))
    559		goto out;
    560
    561	/*
    562	 * Verifying directory index entries are not stale is expensive, so
    563	 * only verify stale dir index if NFS export is enabled.
    564	 */
    565	if (d_is_dir(index) && !ofs->config.nfs_export)
    566		goto out;
    567
    568	/*
    569	 * Directory index entries should have 'upper' xattr pointing to the
    570	 * real upper dir. Non-dir index entries are hardlinks to the upper
    571	 * real inode. For non-dir index, we can read the copy up origin xattr
    572	 * directly from the index dentry, but for dir index we first need to
    573	 * decode the upper directory.
    574	 */
    575	upper = ovl_index_upper(ofs, index);
    576	if (IS_ERR_OR_NULL(upper)) {
    577		err = PTR_ERR(upper);
    578		/*
    579		 * Directory index entries with no 'upper' xattr need to be
    580		 * removed. When dir index entry has a stale 'upper' xattr,
    581		 * we assume that upper dir was removed and we treat the dir
    582		 * index as orphan entry that needs to be whited out.
    583		 */
    584		if (err == -ESTALE)
    585			goto orphan;
    586		else if (!err)
    587			err = -ESTALE;
    588		goto fail;
    589	}
    590
    591	err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
    592	dput(upper);
    593	if (err)
    594		goto fail;
    595
    596	/* Check if non-dir index is orphan and don't warn before cleaning it */
    597	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
    598		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
    599		if (err)
    600			goto fail;
    601
    602		if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
    603			goto orphan;
    604	}
    605
    606out:
    607	dput(origin.dentry);
    608	kfree(fh);
    609	return err;
    610
    611fail:
    612	pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
    613			    index, d_inode(index)->i_mode & S_IFMT, err);
    614	goto out;
    615
    616orphan:
    617	pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
    618			    index, d_inode(index)->i_mode & S_IFMT,
    619			    d_inode(index)->i_nlink);
    620	err = -ENOENT;
    621	goto out;
    622}
    623
    624static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
    625{
    626	char *n, *s;
    627
    628	n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
    629	if (!n)
    630		return -ENOMEM;
    631
    632	s  = bin2hex(n, fh->buf, fh->fb.len);
    633	*name = (struct qstr) QSTR_INIT(n, s - n);
    634
    635	return 0;
    636
    637}
    638
    639/*
    640 * Lookup in indexdir for the index entry of a lower real inode or a copy up
    641 * origin inode. The index entry name is the hex representation of the lower
    642 * inode file handle.
    643 *
    644 * If the index dentry in negative, then either no lower aliases have been
    645 * copied up yet, or aliases have been copied up in older kernels and are
    646 * not indexed.
    647 *
    648 * If the index dentry for a copy up origin inode is positive, but points
    649 * to an inode different than the upper inode, then either the upper inode
    650 * has been copied up and not indexed or it was indexed, but since then
    651 * index dir was cleared. Either way, that index cannot be used to indentify
    652 * the overlay inode.
    653 */
    654int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
    655		       struct qstr *name)
    656{
    657	struct ovl_fh *fh;
    658	int err;
    659
    660	fh = ovl_encode_real_fh(ofs, origin, false);
    661	if (IS_ERR(fh))
    662		return PTR_ERR(fh);
    663
    664	err = ovl_get_index_name_fh(fh, name);
    665
    666	kfree(fh);
    667	return err;
    668}
    669
    670/* Lookup index by file handle for NFS export */
    671struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
    672{
    673	struct dentry *index;
    674	struct qstr name;
    675	int err;
    676
    677	err = ovl_get_index_name_fh(fh, &name);
    678	if (err)
    679		return ERR_PTR(err);
    680
    681	index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
    682	kfree(name.name);
    683	if (IS_ERR(index)) {
    684		if (PTR_ERR(index) == -ENOENT)
    685			index = NULL;
    686		return index;
    687	}
    688
    689	if (ovl_is_whiteout(index))
    690		err = -ESTALE;
    691	else if (ovl_dentry_weird(index))
    692		err = -EIO;
    693	else
    694		return index;
    695
    696	dput(index);
    697	return ERR_PTR(err);
    698}
    699
    700struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
    701				struct dentry *origin, bool verify)
    702{
    703	struct dentry *index;
    704	struct inode *inode;
    705	struct qstr name;
    706	bool is_dir = d_is_dir(origin);
    707	int err;
    708
    709	err = ovl_get_index_name(ofs, origin, &name);
    710	if (err)
    711		return ERR_PTR(err);
    712
    713	index = lookup_one_positive_unlocked(ovl_upper_mnt_userns(ofs), name.name,
    714					     ofs->indexdir, name.len);
    715	if (IS_ERR(index)) {
    716		err = PTR_ERR(index);
    717		if (err == -ENOENT) {
    718			index = NULL;
    719			goto out;
    720		}
    721		pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
    722				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
    723				    d_inode(origin)->i_ino, name.len, name.name,
    724				    err);
    725		goto out;
    726	}
    727
    728	inode = d_inode(index);
    729	if (ovl_is_whiteout(index) && !verify) {
    730		/*
    731		 * When index lookup is called with !verify for decoding an
    732		 * overlay file handle, a whiteout index implies that decode
    733		 * should treat file handle as stale and no need to print a
    734		 * warning about it.
    735		 */
    736		dput(index);
    737		index = ERR_PTR(-ESTALE);
    738		goto out;
    739	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
    740		   inode_wrong_type(inode, d_inode(origin)->i_mode)) {
    741		/*
    742		 * Index should always be of the same file type as origin
    743		 * except for the case of a whiteout index. A whiteout
    744		 * index should only exist if all lower aliases have been
    745		 * unlinked, which means that finding a lower origin on lookup
    746		 * whose index is a whiteout should be treated as an error.
    747		 */
    748		pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
    749				    index, d_inode(index)->i_mode & S_IFMT,
    750				    d_inode(origin)->i_mode & S_IFMT);
    751		goto fail;
    752	} else if (is_dir && verify) {
    753		if (!upper) {
    754			pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
    755					    origin, index);
    756			goto fail;
    757		}
    758
    759		/* Verify that dir index 'upper' xattr points to upper dir */
    760		err = ovl_verify_upper(ofs, index, upper, false);
    761		if (err) {
    762			if (err == -ESTALE) {
    763				pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
    764						    upper, origin, index);
    765			}
    766			goto fail;
    767		}
    768	} else if (upper && d_inode(upper) != inode) {
    769		goto out_dput;
    770	}
    771out:
    772	kfree(name.name);
    773	return index;
    774
    775out_dput:
    776	dput(index);
    777	index = NULL;
    778	goto out;
    779
    780fail:
    781	dput(index);
    782	index = ERR_PTR(-EIO);
    783	goto out;
    784}
    785
    786/*
    787 * Returns next layer in stack starting from top.
    788 * Returns -1 if this is the last layer.
    789 */
    790int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
    791{
    792	struct ovl_entry *oe = dentry->d_fsdata;
    793
    794	BUG_ON(idx < 0);
    795	if (idx == 0) {
    796		ovl_path_upper(dentry, path);
    797		if (path->dentry)
    798			return oe->numlower ? 1 : -1;
    799		idx++;
    800	}
    801	BUG_ON(idx > oe->numlower);
    802	path->dentry = oe->lowerstack[idx - 1].dentry;
    803	path->mnt = oe->lowerstack[idx - 1].layer->mnt;
    804
    805	return (idx < oe->numlower) ? idx + 1 : -1;
    806}
    807
    808/* Fix missing 'origin' xattr */
    809static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
    810			  struct dentry *lower, struct dentry *upper)
    811{
    812	int err;
    813
    814	if (ovl_check_origin_xattr(ofs, upper))
    815		return 0;
    816
    817	err = ovl_want_write(dentry);
    818	if (err)
    819		return err;
    820
    821	err = ovl_set_origin(ofs, lower, upper);
    822	if (!err)
    823		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
    824
    825	ovl_drop_write(dentry);
    826	return err;
    827}
    828
    829struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
    830			  unsigned int flags)
    831{
    832	struct ovl_entry *oe;
    833	const struct cred *old_cred;
    834	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
    835	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
    836	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
    837	struct ovl_path *stack = NULL, *origin_path = NULL;
    838	struct dentry *upperdir, *upperdentry = NULL;
    839	struct dentry *origin = NULL;
    840	struct dentry *index = NULL;
    841	unsigned int ctr = 0;
    842	struct inode *inode = NULL;
    843	bool upperopaque = false;
    844	char *upperredirect = NULL;
    845	struct dentry *this;
    846	unsigned int i;
    847	int err;
    848	bool uppermetacopy = false;
    849	struct ovl_lookup_data d = {
    850		.sb = dentry->d_sb,
    851		.name = dentry->d_name,
    852		.is_dir = false,
    853		.opaque = false,
    854		.stop = false,
    855		.last = ofs->config.redirect_follow ? false : !poe->numlower,
    856		.redirect = NULL,
    857		.metacopy = false,
    858	};
    859
    860	if (dentry->d_name.len > ofs->namelen)
    861		return ERR_PTR(-ENAMETOOLONG);
    862
    863	old_cred = ovl_override_creds(dentry->d_sb);
    864	upperdir = ovl_dentry_upper(dentry->d_parent);
    865	if (upperdir) {
    866		d.mnt = ovl_upper_mnt(ofs);
    867		err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
    868		if (err)
    869			goto out;
    870
    871		if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
    872			dput(upperdentry);
    873			err = -EREMOTE;
    874			goto out;
    875		}
    876		if (upperdentry && !d.is_dir) {
    877			/*
    878			 * Lookup copy up origin by decoding origin file handle.
    879			 * We may get a disconnected dentry, which is fine,
    880			 * because we only need to hold the origin inode in
    881			 * cache and use its inode number.  We may even get a
    882			 * connected dentry, that is not under any of the lower
    883			 * layers root.  That is also fine for using it's inode
    884			 * number - it's the same as if we held a reference
    885			 * to a dentry in lower layer that was moved under us.
    886			 */
    887			err = ovl_check_origin(ofs, upperdentry, &origin_path);
    888			if (err)
    889				goto out_put_upper;
    890
    891			if (d.metacopy)
    892				uppermetacopy = true;
    893		}
    894
    895		if (d.redirect) {
    896			err = -ENOMEM;
    897			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
    898			if (!upperredirect)
    899				goto out_put_upper;
    900			if (d.redirect[0] == '/')
    901				poe = roe;
    902		}
    903		upperopaque = d.opaque;
    904	}
    905
    906	if (!d.stop && poe->numlower) {
    907		err = -ENOMEM;
    908		stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path),
    909				GFP_KERNEL);
    910		if (!stack)
    911			goto out_put_upper;
    912	}
    913
    914	for (i = 0; !d.stop && i < poe->numlower; i++) {
    915		struct ovl_path lower = poe->lowerstack[i];
    916
    917		if (!ofs->config.redirect_follow)
    918			d.last = i == poe->numlower - 1;
    919		else
    920			d.last = lower.layer->idx == roe->numlower;
    921
    922		d.mnt = lower.layer->mnt;
    923		err = ovl_lookup_layer(lower.dentry, &d, &this, false);
    924		if (err)
    925			goto out_put;
    926
    927		if (!this)
    928			continue;
    929
    930		if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
    931			dput(this);
    932			err = -EPERM;
    933			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
    934			goto out_put;
    935		}
    936
    937		/*
    938		 * If no origin fh is stored in upper of a merge dir, store fh
    939		 * of lower dir and set upper parent "impure".
    940		 */
    941		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
    942			err = ovl_fix_origin(ofs, dentry, this, upperdentry);
    943			if (err) {
    944				dput(this);
    945				goto out_put;
    946			}
    947		}
    948
    949		/*
    950		 * When "verify_lower" feature is enabled, do not merge with a
    951		 * lower dir that does not match a stored origin xattr. In any
    952		 * case, only verified origin is used for index lookup.
    953		 *
    954		 * For non-dir dentry, if index=on, then ensure origin
    955		 * matches the dentry found using path based lookup,
    956		 * otherwise error out.
    957		 */
    958		if (upperdentry && !ctr &&
    959		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
    960		     (!d.is_dir && ofs->config.index && origin_path))) {
    961			err = ovl_verify_origin(ofs, upperdentry, this, false);
    962			if (err) {
    963				dput(this);
    964				if (d.is_dir)
    965					break;
    966				goto out_put;
    967			}
    968			origin = this;
    969		}
    970
    971		if (d.metacopy && ctr) {
    972			/*
    973			 * Do not store intermediate metacopy dentries in
    974			 * lower chain, except top most lower metacopy dentry.
    975			 * Continue the loop so that if there is an absolute
    976			 * redirect on this dentry, poe can be reset to roe.
    977			 */
    978			dput(this);
    979			this = NULL;
    980		} else {
    981			stack[ctr].dentry = this;
    982			stack[ctr].layer = lower.layer;
    983			ctr++;
    984		}
    985
    986		/*
    987		 * Following redirects can have security consequences: it's like
    988		 * a symlink into the lower layer without the permission checks.
    989		 * This is only a problem if the upper layer is untrusted (e.g
    990		 * comes from an USB drive).  This can allow a non-readable file
    991		 * or directory to become readable.
    992		 *
    993		 * Only following redirects when redirects are enabled disables
    994		 * this attack vector when not necessary.
    995		 */
    996		err = -EPERM;
    997		if (d.redirect && !ofs->config.redirect_follow) {
    998			pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
    999					    dentry);
   1000			goto out_put;
   1001		}
   1002
   1003		if (d.stop)
   1004			break;
   1005
   1006		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
   1007			poe = roe;
   1008			/* Find the current layer on the root dentry */
   1009			i = lower.layer->idx - 1;
   1010		}
   1011	}
   1012
   1013	/*
   1014	 * For regular non-metacopy upper dentries, there is no lower
   1015	 * path based lookup, hence ctr will be zero. If a dentry is found
   1016	 * using ORIGIN xattr on upper, install it in stack.
   1017	 *
   1018	 * For metacopy dentry, path based lookup will find lower dentries.
   1019	 * Just make sure a corresponding data dentry has been found.
   1020	 */
   1021	if (d.metacopy || (uppermetacopy && !ctr)) {
   1022		pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
   1023				    dentry);
   1024		err = -EIO;
   1025		goto out_put;
   1026	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
   1027		if (WARN_ON(stack != NULL)) {
   1028			err = -EIO;
   1029			goto out_put;
   1030		}
   1031		stack = origin_path;
   1032		ctr = 1;
   1033		origin = origin_path->dentry;
   1034		origin_path = NULL;
   1035	}
   1036
   1037	/*
   1038	 * Always lookup index if there is no-upperdentry.
   1039	 *
   1040	 * For the case of upperdentry, we have set origin by now if it
   1041	 * needed to be set. There are basically three cases.
   1042	 *
   1043	 * For directories, lookup index by lower inode and verify it matches
   1044	 * upper inode. We only trust dir index if we verified that lower dir
   1045	 * matches origin, otherwise dir index entries may be inconsistent
   1046	 * and we ignore them.
   1047	 *
   1048	 * For regular upper, we already set origin if upper had ORIGIN
   1049	 * xattr. There is no verification though as there is no path
   1050	 * based dentry lookup in lower in this case.
   1051	 *
   1052	 * For metacopy upper, we set a verified origin already if index
   1053	 * is enabled and if upper had an ORIGIN xattr.
   1054	 *
   1055	 */
   1056	if (!upperdentry && ctr)
   1057		origin = stack[0].dentry;
   1058
   1059	if (origin && ovl_indexdir(dentry->d_sb) &&
   1060	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
   1061		index = ovl_lookup_index(ofs, upperdentry, origin, true);
   1062		if (IS_ERR(index)) {
   1063			err = PTR_ERR(index);
   1064			index = NULL;
   1065			goto out_put;
   1066		}
   1067	}
   1068
   1069	oe = ovl_alloc_entry(ctr);
   1070	err = -ENOMEM;
   1071	if (!oe)
   1072		goto out_put;
   1073
   1074	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
   1075	dentry->d_fsdata = oe;
   1076
   1077	if (upperopaque)
   1078		ovl_dentry_set_opaque(dentry);
   1079
   1080	if (upperdentry)
   1081		ovl_dentry_set_upper_alias(dentry);
   1082	else if (index) {
   1083		struct path upperpath = {
   1084			.dentry = upperdentry = dget(index),
   1085			.mnt = ovl_upper_mnt(ofs),
   1086		};
   1087
   1088		upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
   1089		if (IS_ERR(upperredirect)) {
   1090			err = PTR_ERR(upperredirect);
   1091			upperredirect = NULL;
   1092			goto out_free_oe;
   1093		}
   1094		err = ovl_check_metacopy_xattr(ofs, &upperpath);
   1095		if (err < 0)
   1096			goto out_free_oe;
   1097		uppermetacopy = err;
   1098	}
   1099
   1100	if (upperdentry || ctr) {
   1101		struct ovl_inode_params oip = {
   1102			.upperdentry = upperdentry,
   1103			.lowerpath = stack,
   1104			.index = index,
   1105			.numlower = ctr,
   1106			.redirect = upperredirect,
   1107			.lowerdata = (ctr > 1 && !d.is_dir) ?
   1108				      stack[ctr - 1].dentry : NULL,
   1109		};
   1110
   1111		inode = ovl_get_inode(dentry->d_sb, &oip);
   1112		err = PTR_ERR(inode);
   1113		if (IS_ERR(inode))
   1114			goto out_free_oe;
   1115		if (upperdentry && !uppermetacopy)
   1116			ovl_set_flag(OVL_UPPERDATA, inode);
   1117	}
   1118
   1119	ovl_dentry_update_reval(dentry, upperdentry,
   1120			DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
   1121
   1122	revert_creds(old_cred);
   1123	if (origin_path) {
   1124		dput(origin_path->dentry);
   1125		kfree(origin_path);
   1126	}
   1127	dput(index);
   1128	kfree(stack);
   1129	kfree(d.redirect);
   1130	return d_splice_alias(inode, dentry);
   1131
   1132out_free_oe:
   1133	dentry->d_fsdata = NULL;
   1134	kfree(oe);
   1135out_put:
   1136	dput(index);
   1137	for (i = 0; i < ctr; i++)
   1138		dput(stack[i].dentry);
   1139	kfree(stack);
   1140out_put_upper:
   1141	if (origin_path) {
   1142		dput(origin_path->dentry);
   1143		kfree(origin_path);
   1144	}
   1145	dput(upperdentry);
   1146	kfree(upperredirect);
   1147out:
   1148	kfree(d.redirect);
   1149	revert_creds(old_cred);
   1150	return ERR_PTR(err);
   1151}
   1152
   1153bool ovl_lower_positive(struct dentry *dentry)
   1154{
   1155	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
   1156	const struct qstr *name = &dentry->d_name;
   1157	const struct cred *old_cred;
   1158	unsigned int i;
   1159	bool positive = false;
   1160	bool done = false;
   1161
   1162	/*
   1163	 * If dentry is negative, then lower is positive iff this is a
   1164	 * whiteout.
   1165	 */
   1166	if (!dentry->d_inode)
   1167		return ovl_dentry_is_opaque(dentry);
   1168
   1169	/* Negative upper -> positive lower */
   1170	if (!ovl_dentry_upper(dentry))
   1171		return true;
   1172
   1173	old_cred = ovl_override_creds(dentry->d_sb);
   1174	/* Positive upper -> have to look up lower to see whether it exists */
   1175	for (i = 0; !done && !positive && i < poe->numlower; i++) {
   1176		struct dentry *this;
   1177		struct dentry *lowerdir = poe->lowerstack[i].dentry;
   1178
   1179		this = lookup_one_positive_unlocked(mnt_user_ns(poe->lowerstack[i].layer->mnt),
   1180						   name->name, lowerdir, name->len);
   1181		if (IS_ERR(this)) {
   1182			switch (PTR_ERR(this)) {
   1183			case -ENOENT:
   1184			case -ENAMETOOLONG:
   1185				break;
   1186
   1187			default:
   1188				/*
   1189				 * Assume something is there, we just couldn't
   1190				 * access it.
   1191				 */
   1192				positive = true;
   1193				break;
   1194			}
   1195		} else {
   1196			positive = !ovl_is_whiteout(this);
   1197			done = true;
   1198			dput(this);
   1199		}
   1200	}
   1201	revert_creds(old_cred);
   1202
   1203	return positive;
   1204}