cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

dir.c (30913B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *
      4 * Copyright (C) 2011 Novell Inc.
      5 */
      6
      7#include <linux/fs.h>
      8#include <linux/namei.h>
      9#include <linux/xattr.h>
     10#include <linux/security.h>
     11#include <linux/cred.h>
     12#include <linux/module.h>
     13#include <linux/posix_acl.h>
     14#include <linux/posix_acl_xattr.h>
     15#include <linux/atomic.h>
     16#include <linux/ratelimit.h>
     17#include "overlayfs.h"
     18
     19static unsigned short ovl_redirect_max = 256;
     20module_param_named(redirect_max, ovl_redirect_max, ushort, 0644);
     21MODULE_PARM_DESC(redirect_max,
     22		 "Maximum length of absolute redirect xattr value");
     23
     24static int ovl_set_redirect(struct dentry *dentry, bool samedir);
     25
     26int ovl_cleanup(struct ovl_fs *ofs, struct inode *wdir, struct dentry *wdentry)
     27{
     28	int err;
     29
     30	dget(wdentry);
     31	if (d_is_dir(wdentry))
     32		err = ovl_do_rmdir(ofs, wdir, wdentry);
     33	else
     34		err = ovl_do_unlink(ofs, wdir, wdentry);
     35	dput(wdentry);
     36
     37	if (err) {
     38		pr_err("cleanup of '%pd2' failed (%i)\n",
     39		       wdentry, err);
     40	}
     41
     42	return err;
     43}
     44
     45struct dentry *ovl_lookup_temp(struct ovl_fs *ofs, struct dentry *workdir)
     46{
     47	struct dentry *temp;
     48	char name[20];
     49	static atomic_t temp_id = ATOMIC_INIT(0);
     50
     51	/* counter is allowed to wrap, since temp dentries are ephemeral */
     52	snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
     53
     54	temp = ovl_lookup_upper(ofs, name, workdir, strlen(name));
     55	if (!IS_ERR(temp) && temp->d_inode) {
     56		pr_err("workdir/%s already exists\n", name);
     57		dput(temp);
     58		temp = ERR_PTR(-EIO);
     59	}
     60
     61	return temp;
     62}
     63
     64/* caller holds i_mutex on workdir */
     65static struct dentry *ovl_whiteout(struct ovl_fs *ofs)
     66{
     67	int err;
     68	struct dentry *whiteout;
     69	struct dentry *workdir = ofs->workdir;
     70	struct inode *wdir = workdir->d_inode;
     71
     72	if (!ofs->whiteout) {
     73		whiteout = ovl_lookup_temp(ofs, workdir);
     74		if (IS_ERR(whiteout))
     75			goto out;
     76
     77		err = ovl_do_whiteout(ofs, wdir, whiteout);
     78		if (err) {
     79			dput(whiteout);
     80			whiteout = ERR_PTR(err);
     81			goto out;
     82		}
     83		ofs->whiteout = whiteout;
     84	}
     85
     86	if (ofs->share_whiteout) {
     87		whiteout = ovl_lookup_temp(ofs, workdir);
     88		if (IS_ERR(whiteout))
     89			goto out;
     90
     91		err = ovl_do_link(ofs, ofs->whiteout, wdir, whiteout);
     92		if (!err)
     93			goto out;
     94
     95		if (err != -EMLINK) {
     96			pr_warn("Failed to link whiteout - disabling whiteout inode sharing(nlink=%u, err=%i)\n",
     97				ofs->whiteout->d_inode->i_nlink, err);
     98			ofs->share_whiteout = false;
     99		}
    100		dput(whiteout);
    101	}
    102	whiteout = ofs->whiteout;
    103	ofs->whiteout = NULL;
    104out:
    105	return whiteout;
    106}
    107
    108/* Caller must hold i_mutex on both workdir and dir */
    109int ovl_cleanup_and_whiteout(struct ovl_fs *ofs, struct inode *dir,
    110			     struct dentry *dentry)
    111{
    112	struct inode *wdir = ofs->workdir->d_inode;
    113	struct dentry *whiteout;
    114	int err;
    115	int flags = 0;
    116
    117	whiteout = ovl_whiteout(ofs);
    118	err = PTR_ERR(whiteout);
    119	if (IS_ERR(whiteout))
    120		return err;
    121
    122	if (d_is_dir(dentry))
    123		flags = RENAME_EXCHANGE;
    124
    125	err = ovl_do_rename(ofs, wdir, whiteout, dir, dentry, flags);
    126	if (err)
    127		goto kill_whiteout;
    128	if (flags)
    129		ovl_cleanup(ofs, wdir, dentry);
    130
    131out:
    132	dput(whiteout);
    133	return err;
    134
    135kill_whiteout:
    136	ovl_cleanup(ofs, wdir, whiteout);
    137	goto out;
    138}
    139
    140int ovl_mkdir_real(struct ovl_fs *ofs, struct inode *dir,
    141		   struct dentry **newdentry, umode_t mode)
    142{
    143	int err;
    144	struct dentry *d, *dentry = *newdentry;
    145
    146	err = ovl_do_mkdir(ofs, dir, dentry, mode);
    147	if (err)
    148		return err;
    149
    150	if (likely(!d_unhashed(dentry)))
    151		return 0;
    152
    153	/*
    154	 * vfs_mkdir() may succeed and leave the dentry passed
    155	 * to it unhashed and negative. If that happens, try to
    156	 * lookup a new hashed and positive dentry.
    157	 */
    158	d = ovl_lookup_upper(ofs, dentry->d_name.name, dentry->d_parent,
    159			     dentry->d_name.len);
    160	if (IS_ERR(d)) {
    161		pr_warn("failed lookup after mkdir (%pd2, err=%i).\n",
    162			dentry, err);
    163		return PTR_ERR(d);
    164	}
    165	dput(dentry);
    166	*newdentry = d;
    167
    168	return 0;
    169}
    170
    171struct dentry *ovl_create_real(struct ovl_fs *ofs, struct inode *dir,
    172			       struct dentry *newdentry, struct ovl_cattr *attr)
    173{
    174	int err;
    175
    176	if (IS_ERR(newdentry))
    177		return newdentry;
    178
    179	err = -ESTALE;
    180	if (newdentry->d_inode)
    181		goto out;
    182
    183	if (attr->hardlink) {
    184		err = ovl_do_link(ofs, attr->hardlink, dir, newdentry);
    185	} else {
    186		switch (attr->mode & S_IFMT) {
    187		case S_IFREG:
    188			err = ovl_do_create(ofs, dir, newdentry, attr->mode);
    189			break;
    190
    191		case S_IFDIR:
    192			/* mkdir is special... */
    193			err =  ovl_mkdir_real(ofs, dir, &newdentry, attr->mode);
    194			break;
    195
    196		case S_IFCHR:
    197		case S_IFBLK:
    198		case S_IFIFO:
    199		case S_IFSOCK:
    200			err = ovl_do_mknod(ofs, dir, newdentry, attr->mode,
    201					   attr->rdev);
    202			break;
    203
    204		case S_IFLNK:
    205			err = ovl_do_symlink(ofs, dir, newdentry, attr->link);
    206			break;
    207
    208		default:
    209			err = -EPERM;
    210		}
    211	}
    212	if (!err && WARN_ON(!newdentry->d_inode)) {
    213		/*
    214		 * Not quite sure if non-instantiated dentry is legal or not.
    215		 * VFS doesn't seem to care so check and warn here.
    216		 */
    217		err = -EIO;
    218	}
    219out:
    220	if (err) {
    221		dput(newdentry);
    222		return ERR_PTR(err);
    223	}
    224	return newdentry;
    225}
    226
    227struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir,
    228			       struct ovl_cattr *attr)
    229{
    230	return ovl_create_real(ofs, d_inode(workdir),
    231			       ovl_lookup_temp(ofs, workdir), attr);
    232}
    233
    234static int ovl_set_opaque_xerr(struct dentry *dentry, struct dentry *upper,
    235			       int xerr)
    236{
    237	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
    238	int err;
    239
    240	err = ovl_check_setxattr(ofs, upper, OVL_XATTR_OPAQUE, "y", 1, xerr);
    241	if (!err)
    242		ovl_dentry_set_opaque(dentry);
    243
    244	return err;
    245}
    246
    247static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry)
    248{
    249	/*
    250	 * Fail with -EIO when trying to create opaque dir and upper doesn't
    251	 * support xattrs. ovl_rename() calls ovl_set_opaque_xerr(-EXDEV) to
    252	 * return a specific error for noxattr case.
    253	 */
    254	return ovl_set_opaque_xerr(dentry, upperdentry, -EIO);
    255}
    256
    257/*
    258 * Common operations required to be done after creation of file on upper.
    259 * If @hardlink is false, then @inode is a pre-allocated inode, we may or
    260 * may not use to instantiate the new dentry.
    261 */
    262static int ovl_instantiate(struct dentry *dentry, struct inode *inode,
    263			   struct dentry *newdentry, bool hardlink)
    264{
    265	struct ovl_inode_params oip = {
    266		.upperdentry = newdentry,
    267		.newinode = inode,
    268	};
    269
    270	ovl_dir_modified(dentry->d_parent, false);
    271	ovl_dentry_set_upper_alias(dentry);
    272	ovl_dentry_update_reval(dentry, newdentry,
    273			DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
    274
    275	if (!hardlink) {
    276		/*
    277		 * ovl_obtain_alias() can be called after ovl_create_real()
    278		 * and before we get here, so we may get an inode from cache
    279		 * with the same real upperdentry that is not the inode we
    280		 * pre-allocated.  In this case we will use the cached inode
    281		 * to instantiate the new dentry.
    282		 *
    283		 * XXX: if we ever use ovl_obtain_alias() to decode directory
    284		 * file handles, need to use ovl_get_inode_locked() and
    285		 * d_instantiate_new() here to prevent from creating two
    286		 * hashed directory inode aliases.
    287		 */
    288		inode = ovl_get_inode(dentry->d_sb, &oip);
    289		if (IS_ERR(inode))
    290			return PTR_ERR(inode);
    291		if (inode == oip.newinode)
    292			ovl_set_flag(OVL_UPPERDATA, inode);
    293	} else {
    294		WARN_ON(ovl_inode_real(inode) != d_inode(newdentry));
    295		dput(newdentry);
    296		inc_nlink(inode);
    297	}
    298
    299	d_instantiate(dentry, inode);
    300	if (inode != oip.newinode) {
    301		pr_warn_ratelimited("newly created inode found in cache (%pd2)\n",
    302				    dentry);
    303	}
    304
    305	/* Force lookup of new upper hardlink to find its lower */
    306	if (hardlink)
    307		d_drop(dentry);
    308
    309	return 0;
    310}
    311
    312static bool ovl_type_merge(struct dentry *dentry)
    313{
    314	return OVL_TYPE_MERGE(ovl_path_type(dentry));
    315}
    316
    317static bool ovl_type_origin(struct dentry *dentry)
    318{
    319	return OVL_TYPE_ORIGIN(ovl_path_type(dentry));
    320}
    321
    322static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
    323			    struct ovl_cattr *attr)
    324{
    325	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
    326	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
    327	struct inode *udir = upperdir->d_inode;
    328	struct dentry *newdentry;
    329	int err;
    330
    331	if (!attr->hardlink && !IS_POSIXACL(udir))
    332		attr->mode &= ~current_umask();
    333
    334	inode_lock_nested(udir, I_MUTEX_PARENT);
    335	newdentry = ovl_create_real(ofs, udir,
    336				    ovl_lookup_upper(ofs, dentry->d_name.name,
    337						     upperdir, dentry->d_name.len),
    338				    attr);
    339	err = PTR_ERR(newdentry);
    340	if (IS_ERR(newdentry))
    341		goto out_unlock;
    342
    343	if (ovl_type_merge(dentry->d_parent) && d_is_dir(newdentry) &&
    344	    !ovl_allow_offline_changes(ofs)) {
    345		/* Setting opaque here is just an optimization, allow to fail */
    346		ovl_set_opaque(dentry, newdentry);
    347	}
    348
    349	err = ovl_instantiate(dentry, inode, newdentry, !!attr->hardlink);
    350	if (err)
    351		goto out_cleanup;
    352out_unlock:
    353	inode_unlock(udir);
    354	return err;
    355
    356out_cleanup:
    357	ovl_cleanup(ofs, udir, newdentry);
    358	dput(newdentry);
    359	goto out_unlock;
    360}
    361
    362static struct dentry *ovl_clear_empty(struct dentry *dentry,
    363				      struct list_head *list)
    364{
    365	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
    366	struct dentry *workdir = ovl_workdir(dentry);
    367	struct inode *wdir = workdir->d_inode;
    368	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
    369	struct inode *udir = upperdir->d_inode;
    370	struct path upperpath;
    371	struct dentry *upper;
    372	struct dentry *opaquedir;
    373	struct kstat stat;
    374	int err;
    375
    376	if (WARN_ON(!workdir))
    377		return ERR_PTR(-EROFS);
    378
    379	err = ovl_lock_rename_workdir(workdir, upperdir);
    380	if (err)
    381		goto out;
    382
    383	ovl_path_upper(dentry, &upperpath);
    384	err = vfs_getattr(&upperpath, &stat,
    385			  STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
    386	if (err)
    387		goto out_unlock;
    388
    389	err = -ESTALE;
    390	if (!S_ISDIR(stat.mode))
    391		goto out_unlock;
    392	upper = upperpath.dentry;
    393	if (upper->d_parent->d_inode != udir)
    394		goto out_unlock;
    395
    396	opaquedir = ovl_create_temp(ofs, workdir, OVL_CATTR(stat.mode));
    397	err = PTR_ERR(opaquedir);
    398	if (IS_ERR(opaquedir))
    399		goto out_unlock;
    400
    401	err = ovl_copy_xattr(dentry->d_sb, &upperpath, opaquedir);
    402	if (err)
    403		goto out_cleanup;
    404
    405	err = ovl_set_opaque(dentry, opaquedir);
    406	if (err)
    407		goto out_cleanup;
    408
    409	inode_lock(opaquedir->d_inode);
    410	err = ovl_set_attr(ofs, opaquedir, &stat);
    411	inode_unlock(opaquedir->d_inode);
    412	if (err)
    413		goto out_cleanup;
    414
    415	err = ovl_do_rename(ofs, wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
    416	if (err)
    417		goto out_cleanup;
    418
    419	ovl_cleanup_whiteouts(ofs, upper, list);
    420	ovl_cleanup(ofs, wdir, upper);
    421	unlock_rename(workdir, upperdir);
    422
    423	/* dentry's upper doesn't match now, get rid of it */
    424	d_drop(dentry);
    425
    426	return opaquedir;
    427
    428out_cleanup:
    429	ovl_cleanup(ofs, wdir, opaquedir);
    430	dput(opaquedir);
    431out_unlock:
    432	unlock_rename(workdir, upperdir);
    433out:
    434	return ERR_PTR(err);
    435}
    436
    437static int ovl_set_upper_acl(struct ovl_fs *ofs, struct dentry *upperdentry,
    438			     const char *name, const struct posix_acl *acl)
    439{
    440	void *buffer;
    441	size_t size;
    442	int err;
    443
    444	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
    445		return 0;
    446
    447	size = posix_acl_xattr_size(acl->a_count);
    448	buffer = kmalloc(size, GFP_KERNEL);
    449	if (!buffer)
    450		return -ENOMEM;
    451
    452	err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
    453	if (err < 0)
    454		goto out_free;
    455
    456	err = ovl_do_setxattr(ofs, upperdentry, name, buffer, size, XATTR_CREATE);
    457out_free:
    458	kfree(buffer);
    459	return err;
    460}
    461
    462static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
    463				    struct ovl_cattr *cattr)
    464{
    465	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
    466	struct dentry *workdir = ovl_workdir(dentry);
    467	struct inode *wdir = workdir->d_inode;
    468	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
    469	struct inode *udir = upperdir->d_inode;
    470	struct dentry *upper;
    471	struct dentry *newdentry;
    472	int err;
    473	struct posix_acl *acl, *default_acl;
    474	bool hardlink = !!cattr->hardlink;
    475
    476	if (WARN_ON(!workdir))
    477		return -EROFS;
    478
    479	if (!hardlink) {
    480		err = posix_acl_create(dentry->d_parent->d_inode,
    481				       &cattr->mode, &default_acl, &acl);
    482		if (err)
    483			return err;
    484	}
    485
    486	err = ovl_lock_rename_workdir(workdir, upperdir);
    487	if (err)
    488		goto out;
    489
    490	upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
    491				 dentry->d_name.len);
    492	err = PTR_ERR(upper);
    493	if (IS_ERR(upper))
    494		goto out_unlock;
    495
    496	err = -ESTALE;
    497	if (d_is_negative(upper) || !IS_WHITEOUT(d_inode(upper)))
    498		goto out_dput;
    499
    500	newdentry = ovl_create_temp(ofs, workdir, cattr);
    501	err = PTR_ERR(newdentry);
    502	if (IS_ERR(newdentry))
    503		goto out_dput;
    504
    505	/*
    506	 * mode could have been mutilated due to umask (e.g. sgid directory)
    507	 */
    508	if (!hardlink &&
    509	    !S_ISLNK(cattr->mode) &&
    510	    newdentry->d_inode->i_mode != cattr->mode) {
    511		struct iattr attr = {
    512			.ia_valid = ATTR_MODE,
    513			.ia_mode = cattr->mode,
    514		};
    515		inode_lock(newdentry->d_inode);
    516		err = ovl_do_notify_change(ofs, newdentry, &attr);
    517		inode_unlock(newdentry->d_inode);
    518		if (err)
    519			goto out_cleanup;
    520	}
    521	if (!hardlink) {
    522		err = ovl_set_upper_acl(ofs, newdentry,
    523					XATTR_NAME_POSIX_ACL_ACCESS, acl);
    524		if (err)
    525			goto out_cleanup;
    526
    527		err = ovl_set_upper_acl(ofs, newdentry,
    528					XATTR_NAME_POSIX_ACL_DEFAULT, default_acl);
    529		if (err)
    530			goto out_cleanup;
    531	}
    532
    533	if (!hardlink && S_ISDIR(cattr->mode)) {
    534		err = ovl_set_opaque(dentry, newdentry);
    535		if (err)
    536			goto out_cleanup;
    537
    538		err = ovl_do_rename(ofs, wdir, newdentry, udir, upper,
    539				    RENAME_EXCHANGE);
    540		if (err)
    541			goto out_cleanup;
    542
    543		ovl_cleanup(ofs, wdir, upper);
    544	} else {
    545		err = ovl_do_rename(ofs, wdir, newdentry, udir, upper, 0);
    546		if (err)
    547			goto out_cleanup;
    548	}
    549	err = ovl_instantiate(dentry, inode, newdentry, hardlink);
    550	if (err) {
    551		ovl_cleanup(ofs, udir, newdentry);
    552		dput(newdentry);
    553	}
    554out_dput:
    555	dput(upper);
    556out_unlock:
    557	unlock_rename(workdir, upperdir);
    558out:
    559	if (!hardlink) {
    560		posix_acl_release(acl);
    561		posix_acl_release(default_acl);
    562	}
    563	return err;
    564
    565out_cleanup:
    566	ovl_cleanup(ofs, wdir, newdentry);
    567	dput(newdentry);
    568	goto out_dput;
    569}
    570
    571static int ovl_create_or_link(struct dentry *dentry, struct inode *inode,
    572			      struct ovl_cattr *attr, bool origin)
    573{
    574	int err;
    575	const struct cred *old_cred;
    576	struct cred *override_cred;
    577	struct dentry *parent = dentry->d_parent;
    578
    579	err = ovl_copy_up(parent);
    580	if (err)
    581		return err;
    582
    583	old_cred = ovl_override_creds(dentry->d_sb);
    584
    585	/*
    586	 * When linking a file with copy up origin into a new parent, mark the
    587	 * new parent dir "impure".
    588	 */
    589	if (origin) {
    590		err = ovl_set_impure(parent, ovl_dentry_upper(parent));
    591		if (err)
    592			goto out_revert_creds;
    593	}
    594
    595	err = -ENOMEM;
    596	override_cred = prepare_creds();
    597	if (override_cred) {
    598		override_cred->fsuid = inode->i_uid;
    599		override_cred->fsgid = inode->i_gid;
    600		if (!attr->hardlink) {
    601			err = security_dentry_create_files_as(dentry,
    602					attr->mode, &dentry->d_name, old_cred,
    603					override_cred);
    604			if (err) {
    605				put_cred(override_cred);
    606				goto out_revert_creds;
    607			}
    608		}
    609		put_cred(override_creds(override_cred));
    610		put_cred(override_cred);
    611
    612		if (!ovl_dentry_is_whiteout(dentry))
    613			err = ovl_create_upper(dentry, inode, attr);
    614		else
    615			err = ovl_create_over_whiteout(dentry, inode, attr);
    616	}
    617out_revert_creds:
    618	revert_creds(old_cred);
    619	return err;
    620}
    621
    622static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
    623			     const char *link)
    624{
    625	int err;
    626	struct inode *inode;
    627	struct ovl_cattr attr = {
    628		.rdev = rdev,
    629		.link = link,
    630	};
    631
    632	err = ovl_want_write(dentry);
    633	if (err)
    634		goto out;
    635
    636	/* Preallocate inode to be used by ovl_get_inode() */
    637	err = -ENOMEM;
    638	inode = ovl_new_inode(dentry->d_sb, mode, rdev);
    639	if (!inode)
    640		goto out_drop_write;
    641
    642	spin_lock(&inode->i_lock);
    643	inode->i_state |= I_CREATING;
    644	spin_unlock(&inode->i_lock);
    645
    646	inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
    647	attr.mode = inode->i_mode;
    648
    649	err = ovl_create_or_link(dentry, inode, &attr, false);
    650	/* Did we end up using the preallocated inode? */
    651	if (inode != d_inode(dentry))
    652		iput(inode);
    653
    654out_drop_write:
    655	ovl_drop_write(dentry);
    656out:
    657	return err;
    658}
    659
    660static int ovl_create(struct user_namespace *mnt_userns, struct inode *dir,
    661		      struct dentry *dentry, umode_t mode, bool excl)
    662{
    663	return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
    664}
    665
    666static int ovl_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
    667		     struct dentry *dentry, umode_t mode)
    668{
    669	return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
    670}
    671
    672static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir,
    673		     struct dentry *dentry, umode_t mode, dev_t rdev)
    674{
    675	/* Don't allow creation of "whiteout" on overlay */
    676	if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
    677		return -EPERM;
    678
    679	return ovl_create_object(dentry, mode, rdev, NULL);
    680}
    681
    682static int ovl_symlink(struct user_namespace *mnt_userns, struct inode *dir,
    683		       struct dentry *dentry, const char *link)
    684{
    685	return ovl_create_object(dentry, S_IFLNK, 0, link);
    686}
    687
    688static int ovl_set_link_redirect(struct dentry *dentry)
    689{
    690	const struct cred *old_cred;
    691	int err;
    692
    693	old_cred = ovl_override_creds(dentry->d_sb);
    694	err = ovl_set_redirect(dentry, false);
    695	revert_creds(old_cred);
    696
    697	return err;
    698}
    699
    700static int ovl_link(struct dentry *old, struct inode *newdir,
    701		    struct dentry *new)
    702{
    703	int err;
    704	struct inode *inode;
    705
    706	err = ovl_want_write(old);
    707	if (err)
    708		goto out;
    709
    710	err = ovl_copy_up(old);
    711	if (err)
    712		goto out_drop_write;
    713
    714	err = ovl_copy_up(new->d_parent);
    715	if (err)
    716		goto out_drop_write;
    717
    718	if (ovl_is_metacopy_dentry(old)) {
    719		err = ovl_set_link_redirect(old);
    720		if (err)
    721			goto out_drop_write;
    722	}
    723
    724	err = ovl_nlink_start(old);
    725	if (err)
    726		goto out_drop_write;
    727
    728	inode = d_inode(old);
    729	ihold(inode);
    730
    731	err = ovl_create_or_link(new, inode,
    732			&(struct ovl_cattr) {.hardlink = ovl_dentry_upper(old)},
    733			ovl_type_origin(old));
    734	if (err)
    735		iput(inode);
    736
    737	ovl_nlink_end(old);
    738out_drop_write:
    739	ovl_drop_write(old);
    740out:
    741	return err;
    742}
    743
    744static bool ovl_matches_upper(struct dentry *dentry, struct dentry *upper)
    745{
    746	return d_inode(ovl_dentry_upper(dentry)) == d_inode(upper);
    747}
    748
    749static int ovl_remove_and_whiteout(struct dentry *dentry,
    750				   struct list_head *list)
    751{
    752	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
    753	struct dentry *workdir = ovl_workdir(dentry);
    754	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
    755	struct dentry *upper;
    756	struct dentry *opaquedir = NULL;
    757	int err;
    758
    759	if (WARN_ON(!workdir))
    760		return -EROFS;
    761
    762	if (!list_empty(list)) {
    763		opaquedir = ovl_clear_empty(dentry, list);
    764		err = PTR_ERR(opaquedir);
    765		if (IS_ERR(opaquedir))
    766			goto out;
    767	}
    768
    769	err = ovl_lock_rename_workdir(workdir, upperdir);
    770	if (err)
    771		goto out_dput;
    772
    773	upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
    774				 dentry->d_name.len);
    775	err = PTR_ERR(upper);
    776	if (IS_ERR(upper))
    777		goto out_unlock;
    778
    779	err = -ESTALE;
    780	if ((opaquedir && upper != opaquedir) ||
    781	    (!opaquedir && ovl_dentry_upper(dentry) &&
    782	     !ovl_matches_upper(dentry, upper))) {
    783		goto out_dput_upper;
    784	}
    785
    786	err = ovl_cleanup_and_whiteout(ofs, d_inode(upperdir), upper);
    787	if (err)
    788		goto out_d_drop;
    789
    790	ovl_dir_modified(dentry->d_parent, true);
    791out_d_drop:
    792	d_drop(dentry);
    793out_dput_upper:
    794	dput(upper);
    795out_unlock:
    796	unlock_rename(workdir, upperdir);
    797out_dput:
    798	dput(opaquedir);
    799out:
    800	return err;
    801}
    802
    803static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
    804			    struct list_head *list)
    805{
    806	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
    807	struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
    808	struct inode *dir = upperdir->d_inode;
    809	struct dentry *upper;
    810	struct dentry *opaquedir = NULL;
    811	int err;
    812
    813	if (!list_empty(list)) {
    814		opaquedir = ovl_clear_empty(dentry, list);
    815		err = PTR_ERR(opaquedir);
    816		if (IS_ERR(opaquedir))
    817			goto out;
    818	}
    819
    820	inode_lock_nested(dir, I_MUTEX_PARENT);
    821	upper = ovl_lookup_upper(ofs, dentry->d_name.name, upperdir,
    822				 dentry->d_name.len);
    823	err = PTR_ERR(upper);
    824	if (IS_ERR(upper))
    825		goto out_unlock;
    826
    827	err = -ESTALE;
    828	if ((opaquedir && upper != opaquedir) ||
    829	    (!opaquedir && !ovl_matches_upper(dentry, upper)))
    830		goto out_dput_upper;
    831
    832	if (is_dir)
    833		err = ovl_do_rmdir(ofs, dir, upper);
    834	else
    835		err = ovl_do_unlink(ofs, dir, upper);
    836	ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
    837
    838	/*
    839	 * Keeping this dentry hashed would mean having to release
    840	 * upperpath/lowerpath, which could only be done if we are the
    841	 * sole user of this dentry.  Too tricky...  Just unhash for
    842	 * now.
    843	 */
    844	if (!err)
    845		d_drop(dentry);
    846out_dput_upper:
    847	dput(upper);
    848out_unlock:
    849	inode_unlock(dir);
    850	dput(opaquedir);
    851out:
    852	return err;
    853}
    854
    855static bool ovl_pure_upper(struct dentry *dentry)
    856{
    857	return !ovl_dentry_lower(dentry) &&
    858	       !ovl_test_flag(OVL_WHITEOUTS, d_inode(dentry));
    859}
    860
    861static void ovl_drop_nlink(struct dentry *dentry)
    862{
    863	struct inode *inode = d_inode(dentry);
    864	struct dentry *alias;
    865
    866	/* Try to find another, hashed alias */
    867	spin_lock(&inode->i_lock);
    868	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
    869		if (alias != dentry && !d_unhashed(alias))
    870			break;
    871	}
    872	spin_unlock(&inode->i_lock);
    873
    874	/*
    875	 * Changes to underlying layers may cause i_nlink to lose sync with
    876	 * reality.  In this case prevent the link count from going to zero
    877	 * prematurely.
    878	 */
    879	if (inode->i_nlink > !!alias)
    880		drop_nlink(inode);
    881}
    882
    883static int ovl_do_remove(struct dentry *dentry, bool is_dir)
    884{
    885	int err;
    886	const struct cred *old_cred;
    887	bool lower_positive = ovl_lower_positive(dentry);
    888	LIST_HEAD(list);
    889
    890	/* No need to clean pure upper removed by vfs_rmdir() */
    891	if (is_dir && (lower_positive || !ovl_pure_upper(dentry))) {
    892		err = ovl_check_empty_dir(dentry, &list);
    893		if (err)
    894			goto out;
    895	}
    896
    897	err = ovl_want_write(dentry);
    898	if (err)
    899		goto out;
    900
    901	err = ovl_copy_up(dentry->d_parent);
    902	if (err)
    903		goto out_drop_write;
    904
    905	err = ovl_nlink_start(dentry);
    906	if (err)
    907		goto out_drop_write;
    908
    909	old_cred = ovl_override_creds(dentry->d_sb);
    910	if (!lower_positive)
    911		err = ovl_remove_upper(dentry, is_dir, &list);
    912	else
    913		err = ovl_remove_and_whiteout(dentry, &list);
    914	revert_creds(old_cred);
    915	if (!err) {
    916		if (is_dir)
    917			clear_nlink(dentry->d_inode);
    918		else
    919			ovl_drop_nlink(dentry);
    920	}
    921	ovl_nlink_end(dentry);
    922
    923	/*
    924	 * Copy ctime
    925	 *
    926	 * Note: we fail to update ctime if there was no copy-up, only a
    927	 * whiteout
    928	 */
    929	if (ovl_dentry_upper(dentry))
    930		ovl_copyattr(d_inode(dentry));
    931
    932out_drop_write:
    933	ovl_drop_write(dentry);
    934out:
    935	ovl_cache_free(&list);
    936	return err;
    937}
    938
    939static int ovl_unlink(struct inode *dir, struct dentry *dentry)
    940{
    941	return ovl_do_remove(dentry, false);
    942}
    943
    944static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
    945{
    946	return ovl_do_remove(dentry, true);
    947}
    948
    949static bool ovl_type_merge_or_lower(struct dentry *dentry)
    950{
    951	enum ovl_path_type type = ovl_path_type(dentry);
    952
    953	return OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type);
    954}
    955
    956static bool ovl_can_move(struct dentry *dentry)
    957{
    958	return ovl_redirect_dir(dentry->d_sb) ||
    959		!d_is_dir(dentry) || !ovl_type_merge_or_lower(dentry);
    960}
    961
    962static char *ovl_get_redirect(struct dentry *dentry, bool abs_redirect)
    963{
    964	char *buf, *ret;
    965	struct dentry *d, *tmp;
    966	int buflen = ovl_redirect_max + 1;
    967
    968	if (!abs_redirect) {
    969		ret = kstrndup(dentry->d_name.name, dentry->d_name.len,
    970			       GFP_KERNEL);
    971		goto out;
    972	}
    973
    974	buf = ret = kmalloc(buflen, GFP_KERNEL);
    975	if (!buf)
    976		goto out;
    977
    978	buflen--;
    979	buf[buflen] = '\0';
    980	for (d = dget(dentry); !IS_ROOT(d);) {
    981		const char *name;
    982		int thislen;
    983
    984		spin_lock(&d->d_lock);
    985		name = ovl_dentry_get_redirect(d);
    986		if (name) {
    987			thislen = strlen(name);
    988		} else {
    989			name = d->d_name.name;
    990			thislen = d->d_name.len;
    991		}
    992
    993		/* If path is too long, fall back to userspace move */
    994		if (thislen + (name[0] != '/') > buflen) {
    995			ret = ERR_PTR(-EXDEV);
    996			spin_unlock(&d->d_lock);
    997			goto out_put;
    998		}
    999
   1000		buflen -= thislen;
   1001		memcpy(&buf[buflen], name, thislen);
   1002		spin_unlock(&d->d_lock);
   1003		tmp = dget_parent(d);
   1004
   1005		dput(d);
   1006		d = tmp;
   1007
   1008		/* Absolute redirect: finished */
   1009		if (buf[buflen] == '/')
   1010			break;
   1011		buflen--;
   1012		buf[buflen] = '/';
   1013	}
   1014	ret = kstrdup(&buf[buflen], GFP_KERNEL);
   1015out_put:
   1016	dput(d);
   1017	kfree(buf);
   1018out:
   1019	return ret ? ret : ERR_PTR(-ENOMEM);
   1020}
   1021
   1022static bool ovl_need_absolute_redirect(struct dentry *dentry, bool samedir)
   1023{
   1024	struct dentry *lowerdentry;
   1025
   1026	if (!samedir)
   1027		return true;
   1028
   1029	if (d_is_dir(dentry))
   1030		return false;
   1031
   1032	/*
   1033	 * For non-dir hardlinked files, we need absolute redirects
   1034	 * in general as two upper hardlinks could be in different
   1035	 * dirs. We could put a relative redirect now and convert
   1036	 * it to absolute redirect later. But when nlink > 1 and
   1037	 * indexing is on, that means relative redirect needs to be
   1038	 * converted to absolute during copy up of another lower
   1039	 * hardllink as well.
   1040	 *
   1041	 * So without optimizing too much, just check if lower is
   1042	 * a hard link or not. If lower is hard link, put absolute
   1043	 * redirect.
   1044	 */
   1045	lowerdentry = ovl_dentry_lower(dentry);
   1046	return (d_inode(lowerdentry)->i_nlink > 1);
   1047}
   1048
   1049static int ovl_set_redirect(struct dentry *dentry, bool samedir)
   1050{
   1051	int err;
   1052	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
   1053	const char *redirect = ovl_dentry_get_redirect(dentry);
   1054	bool absolute_redirect = ovl_need_absolute_redirect(dentry, samedir);
   1055
   1056	if (redirect && (!absolute_redirect || redirect[0] == '/'))
   1057		return 0;
   1058
   1059	redirect = ovl_get_redirect(dentry, absolute_redirect);
   1060	if (IS_ERR(redirect))
   1061		return PTR_ERR(redirect);
   1062
   1063	err = ovl_check_setxattr(ofs, ovl_dentry_upper(dentry),
   1064				 OVL_XATTR_REDIRECT,
   1065				 redirect, strlen(redirect), -EXDEV);
   1066	if (!err) {
   1067		spin_lock(&dentry->d_lock);
   1068		ovl_dentry_set_redirect(dentry, redirect);
   1069		spin_unlock(&dentry->d_lock);
   1070	} else {
   1071		kfree(redirect);
   1072		pr_warn_ratelimited("failed to set redirect (%i)\n",
   1073				    err);
   1074		/* Fall back to userspace copy-up */
   1075		err = -EXDEV;
   1076	}
   1077	return err;
   1078}
   1079
   1080static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
   1081		      struct dentry *old, struct inode *newdir,
   1082		      struct dentry *new, unsigned int flags)
   1083{
   1084	int err;
   1085	struct dentry *old_upperdir;
   1086	struct dentry *new_upperdir;
   1087	struct dentry *olddentry;
   1088	struct dentry *newdentry;
   1089	struct dentry *trap;
   1090	bool old_opaque;
   1091	bool new_opaque;
   1092	bool cleanup_whiteout = false;
   1093	bool update_nlink = false;
   1094	bool overwrite = !(flags & RENAME_EXCHANGE);
   1095	bool is_dir = d_is_dir(old);
   1096	bool new_is_dir = d_is_dir(new);
   1097	bool samedir = olddir == newdir;
   1098	struct dentry *opaquedir = NULL;
   1099	const struct cred *old_cred = NULL;
   1100	struct ovl_fs *ofs = OVL_FS(old->d_sb);
   1101	LIST_HEAD(list);
   1102
   1103	err = -EINVAL;
   1104	if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
   1105		goto out;
   1106
   1107	flags &= ~RENAME_NOREPLACE;
   1108
   1109	/* Don't copy up directory trees */
   1110	err = -EXDEV;
   1111	if (!ovl_can_move(old))
   1112		goto out;
   1113	if (!overwrite && !ovl_can_move(new))
   1114		goto out;
   1115
   1116	if (overwrite && new_is_dir && !ovl_pure_upper(new)) {
   1117		err = ovl_check_empty_dir(new, &list);
   1118		if (err)
   1119			goto out;
   1120	}
   1121
   1122	if (overwrite) {
   1123		if (ovl_lower_positive(old)) {
   1124			if (!ovl_dentry_is_whiteout(new)) {
   1125				/* Whiteout source */
   1126				flags |= RENAME_WHITEOUT;
   1127			} else {
   1128				/* Switch whiteouts */
   1129				flags |= RENAME_EXCHANGE;
   1130			}
   1131		} else if (is_dir && ovl_dentry_is_whiteout(new)) {
   1132			flags |= RENAME_EXCHANGE;
   1133			cleanup_whiteout = true;
   1134		}
   1135	}
   1136
   1137	err = ovl_want_write(old);
   1138	if (err)
   1139		goto out;
   1140
   1141	err = ovl_copy_up(old);
   1142	if (err)
   1143		goto out_drop_write;
   1144
   1145	err = ovl_copy_up(new->d_parent);
   1146	if (err)
   1147		goto out_drop_write;
   1148	if (!overwrite) {
   1149		err = ovl_copy_up(new);
   1150		if (err)
   1151			goto out_drop_write;
   1152	} else if (d_inode(new)) {
   1153		err = ovl_nlink_start(new);
   1154		if (err)
   1155			goto out_drop_write;
   1156
   1157		update_nlink = true;
   1158	}
   1159
   1160	old_cred = ovl_override_creds(old->d_sb);
   1161
   1162	if (!list_empty(&list)) {
   1163		opaquedir = ovl_clear_empty(new, &list);
   1164		err = PTR_ERR(opaquedir);
   1165		if (IS_ERR(opaquedir)) {
   1166			opaquedir = NULL;
   1167			goto out_revert_creds;
   1168		}
   1169	}
   1170
   1171	old_upperdir = ovl_dentry_upper(old->d_parent);
   1172	new_upperdir = ovl_dentry_upper(new->d_parent);
   1173
   1174	if (!samedir) {
   1175		/*
   1176		 * When moving a merge dir or non-dir with copy up origin into
   1177		 * a new parent, we are marking the new parent dir "impure".
   1178		 * When ovl_iterate() iterates an "impure" upper dir, it will
   1179		 * lookup the origin inodes of the entries to fill d_ino.
   1180		 */
   1181		if (ovl_type_origin(old)) {
   1182			err = ovl_set_impure(new->d_parent, new_upperdir);
   1183			if (err)
   1184				goto out_revert_creds;
   1185		}
   1186		if (!overwrite && ovl_type_origin(new)) {
   1187			err = ovl_set_impure(old->d_parent, old_upperdir);
   1188			if (err)
   1189				goto out_revert_creds;
   1190		}
   1191	}
   1192
   1193	trap = lock_rename(new_upperdir, old_upperdir);
   1194
   1195	olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
   1196				     old->d_name.len);
   1197	err = PTR_ERR(olddentry);
   1198	if (IS_ERR(olddentry))
   1199		goto out_unlock;
   1200
   1201	err = -ESTALE;
   1202	if (!ovl_matches_upper(old, olddentry))
   1203		goto out_dput_old;
   1204
   1205	newdentry = ovl_lookup_upper(ofs, new->d_name.name, new_upperdir,
   1206				     new->d_name.len);
   1207	err = PTR_ERR(newdentry);
   1208	if (IS_ERR(newdentry))
   1209		goto out_dput_old;
   1210
   1211	old_opaque = ovl_dentry_is_opaque(old);
   1212	new_opaque = ovl_dentry_is_opaque(new);
   1213
   1214	err = -ESTALE;
   1215	if (d_inode(new) && ovl_dentry_upper(new)) {
   1216		if (opaquedir) {
   1217			if (newdentry != opaquedir)
   1218				goto out_dput;
   1219		} else {
   1220			if (!ovl_matches_upper(new, newdentry))
   1221				goto out_dput;
   1222		}
   1223	} else {
   1224		if (!d_is_negative(newdentry)) {
   1225			if (!new_opaque || !ovl_is_whiteout(newdentry))
   1226				goto out_dput;
   1227		} else {
   1228			if (flags & RENAME_EXCHANGE)
   1229				goto out_dput;
   1230		}
   1231	}
   1232
   1233	if (olddentry == trap)
   1234		goto out_dput;
   1235	if (newdentry == trap)
   1236		goto out_dput;
   1237
   1238	if (olddentry->d_inode == newdentry->d_inode)
   1239		goto out_dput;
   1240
   1241	err = 0;
   1242	if (ovl_type_merge_or_lower(old))
   1243		err = ovl_set_redirect(old, samedir);
   1244	else if (is_dir && !old_opaque && ovl_type_merge(new->d_parent))
   1245		err = ovl_set_opaque_xerr(old, olddentry, -EXDEV);
   1246	if (err)
   1247		goto out_dput;
   1248
   1249	if (!overwrite && ovl_type_merge_or_lower(new))
   1250		err = ovl_set_redirect(new, samedir);
   1251	else if (!overwrite && new_is_dir && !new_opaque &&
   1252		 ovl_type_merge(old->d_parent))
   1253		err = ovl_set_opaque_xerr(new, newdentry, -EXDEV);
   1254	if (err)
   1255		goto out_dput;
   1256
   1257	err = ovl_do_rename(ofs, old_upperdir->d_inode, olddentry,
   1258			    new_upperdir->d_inode, newdentry, flags);
   1259	if (err)
   1260		goto out_dput;
   1261
   1262	if (cleanup_whiteout)
   1263		ovl_cleanup(ofs, old_upperdir->d_inode, newdentry);
   1264
   1265	if (overwrite && d_inode(new)) {
   1266		if (new_is_dir)
   1267			clear_nlink(d_inode(new));
   1268		else
   1269			ovl_drop_nlink(new);
   1270	}
   1271
   1272	ovl_dir_modified(old->d_parent, ovl_type_origin(old) ||
   1273			 (!overwrite && ovl_type_origin(new)));
   1274	ovl_dir_modified(new->d_parent, ovl_type_origin(old) ||
   1275			 (d_inode(new) && ovl_type_origin(new)));
   1276
   1277	/* copy ctime: */
   1278	ovl_copyattr(d_inode(old));
   1279	if (d_inode(new) && ovl_dentry_upper(new))
   1280		ovl_copyattr(d_inode(new));
   1281
   1282out_dput:
   1283	dput(newdentry);
   1284out_dput_old:
   1285	dput(olddentry);
   1286out_unlock:
   1287	unlock_rename(new_upperdir, old_upperdir);
   1288out_revert_creds:
   1289	revert_creds(old_cred);
   1290	if (update_nlink)
   1291		ovl_nlink_end(new);
   1292out_drop_write:
   1293	ovl_drop_write(old);
   1294out:
   1295	dput(opaquedir);
   1296	ovl_cache_free(&list);
   1297	return err;
   1298}
   1299
   1300const struct inode_operations ovl_dir_inode_operations = {
   1301	.lookup		= ovl_lookup,
   1302	.mkdir		= ovl_mkdir,
   1303	.symlink	= ovl_symlink,
   1304	.unlink		= ovl_unlink,
   1305	.rmdir		= ovl_rmdir,
   1306	.rename		= ovl_rename,
   1307	.link		= ovl_link,
   1308	.setattr	= ovl_setattr,
   1309	.create		= ovl_create,
   1310	.mknod		= ovl_mknod,
   1311	.permission	= ovl_permission,
   1312	.getattr	= ovl_getattr,
   1313	.listxattr	= ovl_listxattr,
   1314	.get_acl	= ovl_get_acl,
   1315	.update_time	= ovl_update_time,
   1316	.fileattr_get	= ovl_fileattr_get,
   1317	.fileattr_set	= ovl_fileattr_set,
   1318};