cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

readdir.c (28335B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 *
      4 * Copyright (C) 2011 Novell Inc.
      5 */
      6
      7#include <linux/fs.h>
      8#include <linux/slab.h>
      9#include <linux/namei.h>
     10#include <linux/file.h>
     11#include <linux/xattr.h>
     12#include <linux/rbtree.h>
     13#include <linux/security.h>
     14#include <linux/cred.h>
     15#include <linux/ratelimit.h>
     16#include "overlayfs.h"
     17
     18struct ovl_cache_entry {
     19	unsigned int len;
     20	unsigned int type;
     21	u64 real_ino;
     22	u64 ino;
     23	struct list_head l_node;
     24	struct rb_node node;
     25	struct ovl_cache_entry *next_maybe_whiteout;
     26	bool is_upper;
     27	bool is_whiteout;
     28	char name[];
     29};
     30
     31struct ovl_dir_cache {
     32	long refcount;
     33	u64 version;
     34	struct list_head entries;
     35	struct rb_root root;
     36};
     37
     38struct ovl_readdir_data {
     39	struct dir_context ctx;
     40	struct dentry *dentry;
     41	bool is_lowest;
     42	struct rb_root *root;
     43	struct list_head *list;
     44	struct list_head middle;
     45	struct ovl_cache_entry *first_maybe_whiteout;
     46	int count;
     47	int err;
     48	bool is_upper;
     49	bool d_type_supported;
     50};
     51
     52struct ovl_dir_file {
     53	bool is_real;
     54	bool is_upper;
     55	struct ovl_dir_cache *cache;
     56	struct list_head *cursor;
     57	struct file *realfile;
     58	struct file *upperfile;
     59};
     60
     61static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
     62{
     63	return rb_entry(n, struct ovl_cache_entry, node);
     64}
     65
     66static bool ovl_cache_entry_find_link(const char *name, int len,
     67				      struct rb_node ***link,
     68				      struct rb_node **parent)
     69{
     70	bool found = false;
     71	struct rb_node **newp = *link;
     72
     73	while (!found && *newp) {
     74		int cmp;
     75		struct ovl_cache_entry *tmp;
     76
     77		*parent = *newp;
     78		tmp = ovl_cache_entry_from_node(*newp);
     79		cmp = strncmp(name, tmp->name, len);
     80		if (cmp > 0)
     81			newp = &tmp->node.rb_right;
     82		else if (cmp < 0 || len < tmp->len)
     83			newp = &tmp->node.rb_left;
     84		else
     85			found = true;
     86	}
     87	*link = newp;
     88
     89	return found;
     90}
     91
     92static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
     93						    const char *name, int len)
     94{
     95	struct rb_node *node = root->rb_node;
     96	int cmp;
     97
     98	while (node) {
     99		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
    100
    101		cmp = strncmp(name, p->name, len);
    102		if (cmp > 0)
    103			node = p->node.rb_right;
    104		else if (cmp < 0 || len < p->len)
    105			node = p->node.rb_left;
    106		else
    107			return p;
    108	}
    109
    110	return NULL;
    111}
    112
    113static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
    114			   struct ovl_cache_entry *p)
    115{
    116	/* Don't care if not doing ovl_iter() */
    117	if (!rdd->dentry)
    118		return false;
    119
    120	/* Always recalc d_ino when remapping lower inode numbers */
    121	if (ovl_xino_bits(rdd->dentry->d_sb))
    122		return true;
    123
    124	/* Always recalc d_ino for parent */
    125	if (strcmp(p->name, "..") == 0)
    126		return true;
    127
    128	/* If this is lower, then native d_ino will do */
    129	if (!rdd->is_upper)
    130		return false;
    131
    132	/*
    133	 * Recalc d_ino for '.' and for all entries if dir is impure (contains
    134	 * copied up entries)
    135	 */
    136	if ((p->name[0] == '.' && p->len == 1) ||
    137	    ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
    138		return true;
    139
    140	return false;
    141}
    142
    143static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
    144						   const char *name, int len,
    145						   u64 ino, unsigned int d_type)
    146{
    147	struct ovl_cache_entry *p;
    148	size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
    149
    150	p = kmalloc(size, GFP_KERNEL);
    151	if (!p)
    152		return NULL;
    153
    154	memcpy(p->name, name, len);
    155	p->name[len] = '\0';
    156	p->len = len;
    157	p->type = d_type;
    158	p->real_ino = ino;
    159	p->ino = ino;
    160	/* Defer setting d_ino for upper entry to ovl_iterate() */
    161	if (ovl_calc_d_ino(rdd, p))
    162		p->ino = 0;
    163	p->is_upper = rdd->is_upper;
    164	p->is_whiteout = false;
    165
    166	if (d_type == DT_CHR) {
    167		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
    168		rdd->first_maybe_whiteout = p;
    169	}
    170	return p;
    171}
    172
    173static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
    174				  const char *name, int len, u64 ino,
    175				  unsigned int d_type)
    176{
    177	struct rb_node **newp = &rdd->root->rb_node;
    178	struct rb_node *parent = NULL;
    179	struct ovl_cache_entry *p;
    180
    181	if (ovl_cache_entry_find_link(name, len, &newp, &parent))
    182		return 0;
    183
    184	p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
    185	if (p == NULL) {
    186		rdd->err = -ENOMEM;
    187		return -ENOMEM;
    188	}
    189
    190	list_add_tail(&p->l_node, rdd->list);
    191	rb_link_node(&p->node, parent, newp);
    192	rb_insert_color(&p->node, rdd->root);
    193
    194	return 0;
    195}
    196
    197static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
    198			   const char *name, int namelen,
    199			   loff_t offset, u64 ino, unsigned int d_type)
    200{
    201	struct ovl_cache_entry *p;
    202
    203	p = ovl_cache_entry_find(rdd->root, name, namelen);
    204	if (p) {
    205		list_move_tail(&p->l_node, &rdd->middle);
    206	} else {
    207		p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
    208		if (p == NULL)
    209			rdd->err = -ENOMEM;
    210		else
    211			list_add_tail(&p->l_node, &rdd->middle);
    212	}
    213
    214	return rdd->err;
    215}
    216
    217void ovl_cache_free(struct list_head *list)
    218{
    219	struct ovl_cache_entry *p;
    220	struct ovl_cache_entry *n;
    221
    222	list_for_each_entry_safe(p, n, list, l_node)
    223		kfree(p);
    224
    225	INIT_LIST_HEAD(list);
    226}
    227
    228void ovl_dir_cache_free(struct inode *inode)
    229{
    230	struct ovl_dir_cache *cache = ovl_dir_cache(inode);
    231
    232	if (cache) {
    233		ovl_cache_free(&cache->entries);
    234		kfree(cache);
    235	}
    236}
    237
    238static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
    239{
    240	struct ovl_dir_cache *cache = od->cache;
    241
    242	WARN_ON(cache->refcount <= 0);
    243	cache->refcount--;
    244	if (!cache->refcount) {
    245		if (ovl_dir_cache(d_inode(dentry)) == cache)
    246			ovl_set_dir_cache(d_inode(dentry), NULL);
    247
    248		ovl_cache_free(&cache->entries);
    249		kfree(cache);
    250	}
    251}
    252
    253static int ovl_fill_merge(struct dir_context *ctx, const char *name,
    254			  int namelen, loff_t offset, u64 ino,
    255			  unsigned int d_type)
    256{
    257	struct ovl_readdir_data *rdd =
    258		container_of(ctx, struct ovl_readdir_data, ctx);
    259
    260	rdd->count++;
    261	if (!rdd->is_lowest)
    262		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
    263	else
    264		return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
    265}
    266
    267static int ovl_check_whiteouts(struct path *path, struct ovl_readdir_data *rdd)
    268{
    269	int err;
    270	struct ovl_cache_entry *p;
    271	struct dentry *dentry, *dir = path->dentry;
    272	const struct cred *old_cred;
    273
    274	old_cred = ovl_override_creds(rdd->dentry->d_sb);
    275
    276	err = down_write_killable(&dir->d_inode->i_rwsem);
    277	if (!err) {
    278		while (rdd->first_maybe_whiteout) {
    279			p = rdd->first_maybe_whiteout;
    280			rdd->first_maybe_whiteout = p->next_maybe_whiteout;
    281			dentry = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len);
    282			if (!IS_ERR(dentry)) {
    283				p->is_whiteout = ovl_is_whiteout(dentry);
    284				dput(dentry);
    285			}
    286		}
    287		inode_unlock(dir->d_inode);
    288	}
    289	revert_creds(old_cred);
    290
    291	return err;
    292}
    293
    294static inline int ovl_dir_read(struct path *realpath,
    295			       struct ovl_readdir_data *rdd)
    296{
    297	struct file *realfile;
    298	int err;
    299
    300	realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
    301	if (IS_ERR(realfile))
    302		return PTR_ERR(realfile);
    303
    304	rdd->first_maybe_whiteout = NULL;
    305	rdd->ctx.pos = 0;
    306	do {
    307		rdd->count = 0;
    308		rdd->err = 0;
    309		err = iterate_dir(realfile, &rdd->ctx);
    310		if (err >= 0)
    311			err = rdd->err;
    312	} while (!err && rdd->count);
    313
    314	if (!err && rdd->first_maybe_whiteout && rdd->dentry)
    315		err = ovl_check_whiteouts(realpath, rdd);
    316
    317	fput(realfile);
    318
    319	return err;
    320}
    321
    322static void ovl_dir_reset(struct file *file)
    323{
    324	struct ovl_dir_file *od = file->private_data;
    325	struct ovl_dir_cache *cache = od->cache;
    326	struct dentry *dentry = file->f_path.dentry;
    327	bool is_real;
    328
    329	if (cache && ovl_dentry_version_get(dentry) != cache->version) {
    330		ovl_cache_put(od, dentry);
    331		od->cache = NULL;
    332		od->cursor = NULL;
    333	}
    334	is_real = ovl_dir_is_real(dentry);
    335	if (od->is_real != is_real) {
    336		/* is_real can only become false when dir is copied up */
    337		if (WARN_ON(is_real))
    338			return;
    339		od->is_real = false;
    340	}
    341}
    342
    343static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
    344	struct rb_root *root)
    345{
    346	int err;
    347	struct path realpath;
    348	struct ovl_readdir_data rdd = {
    349		.ctx.actor = ovl_fill_merge,
    350		.dentry = dentry,
    351		.list = list,
    352		.root = root,
    353		.is_lowest = false,
    354	};
    355	int idx, next;
    356
    357	for (idx = 0; idx != -1; idx = next) {
    358		next = ovl_path_next(idx, dentry, &realpath);
    359		rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
    360
    361		if (next != -1) {
    362			err = ovl_dir_read(&realpath, &rdd);
    363			if (err)
    364				break;
    365		} else {
    366			/*
    367			 * Insert lowest layer entries before upper ones, this
    368			 * allows offsets to be reasonably constant
    369			 */
    370			list_add(&rdd.middle, rdd.list);
    371			rdd.is_lowest = true;
    372			err = ovl_dir_read(&realpath, &rdd);
    373			list_del(&rdd.middle);
    374		}
    375	}
    376	return err;
    377}
    378
    379static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
    380{
    381	struct list_head *p;
    382	loff_t off = 0;
    383
    384	list_for_each(p, &od->cache->entries) {
    385		if (off >= pos)
    386			break;
    387		off++;
    388	}
    389	/* Cursor is safe since the cache is stable */
    390	od->cursor = p;
    391}
    392
    393static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
    394{
    395	int res;
    396	struct ovl_dir_cache *cache;
    397
    398	cache = ovl_dir_cache(d_inode(dentry));
    399	if (cache && ovl_dentry_version_get(dentry) == cache->version) {
    400		WARN_ON(!cache->refcount);
    401		cache->refcount++;
    402		return cache;
    403	}
    404	ovl_set_dir_cache(d_inode(dentry), NULL);
    405
    406	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
    407	if (!cache)
    408		return ERR_PTR(-ENOMEM);
    409
    410	cache->refcount = 1;
    411	INIT_LIST_HEAD(&cache->entries);
    412	cache->root = RB_ROOT;
    413
    414	res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
    415	if (res) {
    416		ovl_cache_free(&cache->entries);
    417		kfree(cache);
    418		return ERR_PTR(res);
    419	}
    420
    421	cache->version = ovl_dentry_version_get(dentry);
    422	ovl_set_dir_cache(d_inode(dentry), cache);
    423
    424	return cache;
    425}
    426
    427/* Map inode number to lower fs unique range */
    428static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
    429			       const char *name, int namelen, bool warn)
    430{
    431	unsigned int xinoshift = 64 - xinobits;
    432
    433	if (unlikely(ino >> xinoshift)) {
    434		if (warn) {
    435			pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
    436					    namelen, name, ino, xinobits);
    437		}
    438		return ino;
    439	}
    440
    441	/*
    442	 * The lowest xinobit is reserved for mapping the non-peresistent inode
    443	 * numbers range, but this range is only exposed via st_ino, not here.
    444	 */
    445	return ino | ((u64)fsid) << (xinoshift + 1);
    446}
    447
    448/*
    449 * Set d_ino for upper entries. Non-upper entries should always report
    450 * the uppermost real inode ino and should not call this function.
    451 *
    452 * When not all layer are on same fs, report real ino also for upper.
    453 *
    454 * When all layers are on the same fs, and upper has a reference to
    455 * copy up origin, call vfs_getattr() on the overlay entry to make
    456 * sure that d_ino will be consistent with st_ino from stat(2).
    457 */
    458static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p)
    459
    460{
    461	struct dentry *dir = path->dentry;
    462	struct dentry *this = NULL;
    463	enum ovl_path_type type;
    464	u64 ino = p->real_ino;
    465	int xinobits = ovl_xino_bits(dir->d_sb);
    466	int err = 0;
    467
    468	if (!ovl_same_dev(dir->d_sb))
    469		goto out;
    470
    471	if (p->name[0] == '.') {
    472		if (p->len == 1) {
    473			this = dget(dir);
    474			goto get;
    475		}
    476		if (p->len == 2 && p->name[1] == '.') {
    477			/* we shall not be moved */
    478			this = dget(dir->d_parent);
    479			goto get;
    480		}
    481	}
    482	this = lookup_one(mnt_user_ns(path->mnt), p->name, dir, p->len);
    483	if (IS_ERR_OR_NULL(this) || !this->d_inode) {
    484		/* Mark a stale entry */
    485		p->is_whiteout = true;
    486		if (IS_ERR(this)) {
    487			err = PTR_ERR(this);
    488			this = NULL;
    489			goto fail;
    490		}
    491		goto out;
    492	}
    493
    494get:
    495	type = ovl_path_type(this);
    496	if (OVL_TYPE_ORIGIN(type)) {
    497		struct kstat stat;
    498		struct path statpath = *path;
    499
    500		statpath.dentry = this;
    501		err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
    502		if (err)
    503			goto fail;
    504
    505		/*
    506		 * Directory inode is always on overlay st_dev.
    507		 * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
    508		 * of xino bits overflow.
    509		 */
    510		WARN_ON_ONCE(S_ISDIR(stat.mode) &&
    511			     dir->d_sb->s_dev != stat.dev);
    512		ino = stat.ino;
    513	} else if (xinobits && !OVL_TYPE_UPPER(type)) {
    514		ino = ovl_remap_lower_ino(ino, xinobits,
    515					  ovl_layer_lower(this)->fsid,
    516					  p->name, p->len,
    517					  ovl_xino_warn(dir->d_sb));
    518	}
    519
    520out:
    521	p->ino = ino;
    522	dput(this);
    523	return err;
    524
    525fail:
    526	pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
    527			    p->name, err);
    528	goto out;
    529}
    530
    531static int ovl_fill_plain(struct dir_context *ctx, const char *name,
    532			  int namelen, loff_t offset, u64 ino,
    533			  unsigned int d_type)
    534{
    535	struct ovl_cache_entry *p;
    536	struct ovl_readdir_data *rdd =
    537		container_of(ctx, struct ovl_readdir_data, ctx);
    538
    539	rdd->count++;
    540	p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
    541	if (p == NULL) {
    542		rdd->err = -ENOMEM;
    543		return -ENOMEM;
    544	}
    545	list_add_tail(&p->l_node, rdd->list);
    546
    547	return 0;
    548}
    549
    550static int ovl_dir_read_impure(struct path *path,  struct list_head *list,
    551			       struct rb_root *root)
    552{
    553	int err;
    554	struct path realpath;
    555	struct ovl_cache_entry *p, *n;
    556	struct ovl_readdir_data rdd = {
    557		.ctx.actor = ovl_fill_plain,
    558		.list = list,
    559		.root = root,
    560	};
    561
    562	INIT_LIST_HEAD(list);
    563	*root = RB_ROOT;
    564	ovl_path_upper(path->dentry, &realpath);
    565
    566	err = ovl_dir_read(&realpath, &rdd);
    567	if (err)
    568		return err;
    569
    570	list_for_each_entry_safe(p, n, list, l_node) {
    571		if (strcmp(p->name, ".") != 0 &&
    572		    strcmp(p->name, "..") != 0) {
    573			err = ovl_cache_update_ino(path, p);
    574			if (err)
    575				return err;
    576		}
    577		if (p->ino == p->real_ino) {
    578			list_del(&p->l_node);
    579			kfree(p);
    580		} else {
    581			struct rb_node **newp = &root->rb_node;
    582			struct rb_node *parent = NULL;
    583
    584			if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
    585							      &newp, &parent)))
    586				return -EIO;
    587
    588			rb_link_node(&p->node, parent, newp);
    589			rb_insert_color(&p->node, root);
    590		}
    591	}
    592	return 0;
    593}
    594
    595static struct ovl_dir_cache *ovl_cache_get_impure(struct path *path)
    596{
    597	int res;
    598	struct dentry *dentry = path->dentry;
    599	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
    600	struct ovl_dir_cache *cache;
    601
    602	cache = ovl_dir_cache(d_inode(dentry));
    603	if (cache && ovl_dentry_version_get(dentry) == cache->version)
    604		return cache;
    605
    606	/* Impure cache is not refcounted, free it here */
    607	ovl_dir_cache_free(d_inode(dentry));
    608	ovl_set_dir_cache(d_inode(dentry), NULL);
    609
    610	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
    611	if (!cache)
    612		return ERR_PTR(-ENOMEM);
    613
    614	res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
    615	if (res) {
    616		ovl_cache_free(&cache->entries);
    617		kfree(cache);
    618		return ERR_PTR(res);
    619	}
    620	if (list_empty(&cache->entries)) {
    621		/*
    622		 * A good opportunity to get rid of an unneeded "impure" flag.
    623		 * Removing the "impure" xattr is best effort.
    624		 */
    625		if (!ovl_want_write(dentry)) {
    626			ovl_removexattr(ofs, ovl_dentry_upper(dentry),
    627					OVL_XATTR_IMPURE);
    628			ovl_drop_write(dentry);
    629		}
    630		ovl_clear_flag(OVL_IMPURE, d_inode(dentry));
    631		kfree(cache);
    632		return NULL;
    633	}
    634
    635	cache->version = ovl_dentry_version_get(dentry);
    636	ovl_set_dir_cache(d_inode(dentry), cache);
    637
    638	return cache;
    639}
    640
    641struct ovl_readdir_translate {
    642	struct dir_context *orig_ctx;
    643	struct ovl_dir_cache *cache;
    644	struct dir_context ctx;
    645	u64 parent_ino;
    646	int fsid;
    647	int xinobits;
    648	bool xinowarn;
    649};
    650
    651static int ovl_fill_real(struct dir_context *ctx, const char *name,
    652			   int namelen, loff_t offset, u64 ino,
    653			   unsigned int d_type)
    654{
    655	struct ovl_readdir_translate *rdt =
    656		container_of(ctx, struct ovl_readdir_translate, ctx);
    657	struct dir_context *orig_ctx = rdt->orig_ctx;
    658
    659	if (rdt->parent_ino && strcmp(name, "..") == 0) {
    660		ino = rdt->parent_ino;
    661	} else if (rdt->cache) {
    662		struct ovl_cache_entry *p;
    663
    664		p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
    665		if (p)
    666			ino = p->ino;
    667	} else if (rdt->xinobits) {
    668		ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
    669					  name, namelen, rdt->xinowarn);
    670	}
    671
    672	return orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
    673}
    674
    675static bool ovl_is_impure_dir(struct file *file)
    676{
    677	struct ovl_dir_file *od = file->private_data;
    678	struct inode *dir = d_inode(file->f_path.dentry);
    679
    680	/*
    681	 * Only upper dir can be impure, but if we are in the middle of
    682	 * iterating a lower real dir, dir could be copied up and marked
    683	 * impure. We only want the impure cache if we started iterating
    684	 * a real upper dir to begin with.
    685	 */
    686	return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
    687
    688}
    689
    690static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
    691{
    692	int err;
    693	struct ovl_dir_file *od = file->private_data;
    694	struct dentry *dir = file->f_path.dentry;
    695	const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
    696	struct ovl_readdir_translate rdt = {
    697		.ctx.actor = ovl_fill_real,
    698		.orig_ctx = ctx,
    699		.xinobits = ovl_xino_bits(dir->d_sb),
    700		.xinowarn = ovl_xino_warn(dir->d_sb),
    701	};
    702
    703	if (rdt.xinobits && lower_layer)
    704		rdt.fsid = lower_layer->fsid;
    705
    706	if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
    707		struct kstat stat;
    708		struct path statpath = file->f_path;
    709
    710		statpath.dentry = dir->d_parent;
    711		err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
    712		if (err)
    713			return err;
    714
    715		WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
    716		rdt.parent_ino = stat.ino;
    717	}
    718
    719	if (ovl_is_impure_dir(file)) {
    720		rdt.cache = ovl_cache_get_impure(&file->f_path);
    721		if (IS_ERR(rdt.cache))
    722			return PTR_ERR(rdt.cache);
    723	}
    724
    725	err = iterate_dir(od->realfile, &rdt.ctx);
    726	ctx->pos = rdt.ctx.pos;
    727
    728	return err;
    729}
    730
    731
    732static int ovl_iterate(struct file *file, struct dir_context *ctx)
    733{
    734	struct ovl_dir_file *od = file->private_data;
    735	struct dentry *dentry = file->f_path.dentry;
    736	struct ovl_cache_entry *p;
    737	const struct cred *old_cred;
    738	int err;
    739
    740	old_cred = ovl_override_creds(dentry->d_sb);
    741	if (!ctx->pos)
    742		ovl_dir_reset(file);
    743
    744	if (od->is_real) {
    745		/*
    746		 * If parent is merge, then need to adjust d_ino for '..', if
    747		 * dir is impure then need to adjust d_ino for copied up
    748		 * entries.
    749		 */
    750		if (ovl_xino_bits(dentry->d_sb) ||
    751		    (ovl_same_fs(dentry->d_sb) &&
    752		     (ovl_is_impure_dir(file) ||
    753		      OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent))))) {
    754			err = ovl_iterate_real(file, ctx);
    755		} else {
    756			err = iterate_dir(od->realfile, ctx);
    757		}
    758		goto out;
    759	}
    760
    761	if (!od->cache) {
    762		struct ovl_dir_cache *cache;
    763
    764		cache = ovl_cache_get(dentry);
    765		err = PTR_ERR(cache);
    766		if (IS_ERR(cache))
    767			goto out;
    768
    769		od->cache = cache;
    770		ovl_seek_cursor(od, ctx->pos);
    771	}
    772
    773	while (od->cursor != &od->cache->entries) {
    774		p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
    775		if (!p->is_whiteout) {
    776			if (!p->ino) {
    777				err = ovl_cache_update_ino(&file->f_path, p);
    778				if (err)
    779					goto out;
    780			}
    781		}
    782		/* ovl_cache_update_ino() sets is_whiteout on stale entry */
    783		if (!p->is_whiteout) {
    784			if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
    785				break;
    786		}
    787		od->cursor = p->l_node.next;
    788		ctx->pos++;
    789	}
    790	err = 0;
    791out:
    792	revert_creds(old_cred);
    793	return err;
    794}
    795
    796static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
    797{
    798	loff_t res;
    799	struct ovl_dir_file *od = file->private_data;
    800
    801	inode_lock(file_inode(file));
    802	if (!file->f_pos)
    803		ovl_dir_reset(file);
    804
    805	if (od->is_real) {
    806		res = vfs_llseek(od->realfile, offset, origin);
    807		file->f_pos = od->realfile->f_pos;
    808	} else {
    809		res = -EINVAL;
    810
    811		switch (origin) {
    812		case SEEK_CUR:
    813			offset += file->f_pos;
    814			break;
    815		case SEEK_SET:
    816			break;
    817		default:
    818			goto out_unlock;
    819		}
    820		if (offset < 0)
    821			goto out_unlock;
    822
    823		if (offset != file->f_pos) {
    824			file->f_pos = offset;
    825			if (od->cache)
    826				ovl_seek_cursor(od, offset);
    827		}
    828		res = offset;
    829	}
    830out_unlock:
    831	inode_unlock(file_inode(file));
    832
    833	return res;
    834}
    835
    836static struct file *ovl_dir_open_realfile(const struct file *file,
    837					  struct path *realpath)
    838{
    839	struct file *res;
    840	const struct cred *old_cred;
    841
    842	old_cred = ovl_override_creds(file_inode(file)->i_sb);
    843	res = ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
    844	revert_creds(old_cred);
    845
    846	return res;
    847}
    848
    849/*
    850 * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
    851 * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
    852 *
    853 * TODO: use same abstract type for file->private_data of dir and file so
    854 * upperfile could also be cached for files as well.
    855 */
    856struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
    857{
    858
    859	struct ovl_dir_file *od = file->private_data;
    860	struct dentry *dentry = file->f_path.dentry;
    861	struct file *old, *realfile = od->realfile;
    862
    863	if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
    864		return want_upper ? NULL : realfile;
    865
    866	/*
    867	 * Need to check if we started out being a lower dir, but got copied up
    868	 */
    869	if (!od->is_upper) {
    870		realfile = READ_ONCE(od->upperfile);
    871		if (!realfile) {
    872			struct path upperpath;
    873
    874			ovl_path_upper(dentry, &upperpath);
    875			realfile = ovl_dir_open_realfile(file, &upperpath);
    876			if (IS_ERR(realfile))
    877				return realfile;
    878
    879			old = cmpxchg_release(&od->upperfile, NULL, realfile);
    880			if (old) {
    881				fput(realfile);
    882				realfile = old;
    883			}
    884		}
    885	}
    886
    887	return realfile;
    888}
    889
    890static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
    891			 int datasync)
    892{
    893	struct file *realfile;
    894	int err;
    895
    896	err = ovl_sync_status(OVL_FS(file->f_path.dentry->d_sb));
    897	if (err <= 0)
    898		return err;
    899
    900	realfile = ovl_dir_real_file(file, true);
    901	err = PTR_ERR_OR_ZERO(realfile);
    902
    903	/* Nothing to sync for lower */
    904	if (!realfile || err)
    905		return err;
    906
    907	return vfs_fsync_range(realfile, start, end, datasync);
    908}
    909
    910static int ovl_dir_release(struct inode *inode, struct file *file)
    911{
    912	struct ovl_dir_file *od = file->private_data;
    913
    914	if (od->cache) {
    915		inode_lock(inode);
    916		ovl_cache_put(od, file->f_path.dentry);
    917		inode_unlock(inode);
    918	}
    919	fput(od->realfile);
    920	if (od->upperfile)
    921		fput(od->upperfile);
    922	kfree(od);
    923
    924	return 0;
    925}
    926
    927static int ovl_dir_open(struct inode *inode, struct file *file)
    928{
    929	struct path realpath;
    930	struct file *realfile;
    931	struct ovl_dir_file *od;
    932	enum ovl_path_type type;
    933
    934	od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
    935	if (!od)
    936		return -ENOMEM;
    937
    938	type = ovl_path_real(file->f_path.dentry, &realpath);
    939	realfile = ovl_dir_open_realfile(file, &realpath);
    940	if (IS_ERR(realfile)) {
    941		kfree(od);
    942		return PTR_ERR(realfile);
    943	}
    944	od->realfile = realfile;
    945	od->is_real = ovl_dir_is_real(file->f_path.dentry);
    946	od->is_upper = OVL_TYPE_UPPER(type);
    947	file->private_data = od;
    948
    949	return 0;
    950}
    951
    952const struct file_operations ovl_dir_operations = {
    953	.read		= generic_read_dir,
    954	.open		= ovl_dir_open,
    955	.iterate	= ovl_iterate,
    956	.llseek		= ovl_dir_llseek,
    957	.fsync		= ovl_dir_fsync,
    958	.release	= ovl_dir_release,
    959};
    960
    961int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
    962{
    963	int err;
    964	struct ovl_cache_entry *p, *n;
    965	struct rb_root root = RB_ROOT;
    966	const struct cred *old_cred;
    967
    968	old_cred = ovl_override_creds(dentry->d_sb);
    969	err = ovl_dir_read_merged(dentry, list, &root);
    970	revert_creds(old_cred);
    971	if (err)
    972		return err;
    973
    974	err = 0;
    975
    976	list_for_each_entry_safe(p, n, list, l_node) {
    977		/*
    978		 * Select whiteouts in upperdir, they should
    979		 * be cleared when deleting this directory.
    980		 */
    981		if (p->is_whiteout) {
    982			if (p->is_upper)
    983				continue;
    984			goto del_entry;
    985		}
    986
    987		if (p->name[0] == '.') {
    988			if (p->len == 1)
    989				goto del_entry;
    990			if (p->len == 2 && p->name[1] == '.')
    991				goto del_entry;
    992		}
    993		err = -ENOTEMPTY;
    994		break;
    995
    996del_entry:
    997		list_del(&p->l_node);
    998		kfree(p);
    999	}
   1000
   1001	return err;
   1002}
   1003
   1004void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
   1005			   struct list_head *list)
   1006{
   1007	struct ovl_cache_entry *p;
   1008
   1009	inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
   1010	list_for_each_entry(p, list, l_node) {
   1011		struct dentry *dentry;
   1012
   1013		if (WARN_ON(!p->is_whiteout || !p->is_upper))
   1014			continue;
   1015
   1016		dentry = ovl_lookup_upper(ofs, p->name, upper, p->len);
   1017		if (IS_ERR(dentry)) {
   1018			pr_err("lookup '%s/%.*s' failed (%i)\n",
   1019			       upper->d_name.name, p->len, p->name,
   1020			       (int) PTR_ERR(dentry));
   1021			continue;
   1022		}
   1023		if (dentry->d_inode)
   1024			ovl_cleanup(ofs, upper->d_inode, dentry);
   1025		dput(dentry);
   1026	}
   1027	inode_unlock(upper->d_inode);
   1028}
   1029
   1030static int ovl_check_d_type(struct dir_context *ctx, const char *name,
   1031			  int namelen, loff_t offset, u64 ino,
   1032			  unsigned int d_type)
   1033{
   1034	struct ovl_readdir_data *rdd =
   1035		container_of(ctx, struct ovl_readdir_data, ctx);
   1036
   1037	/* Even if d_type is not supported, DT_DIR is returned for . and .. */
   1038	if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
   1039		return 0;
   1040
   1041	if (d_type != DT_UNKNOWN)
   1042		rdd->d_type_supported = true;
   1043
   1044	return 0;
   1045}
   1046
   1047/*
   1048 * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
   1049 * if error is encountered.
   1050 */
   1051int ovl_check_d_type_supported(struct path *realpath)
   1052{
   1053	int err;
   1054	struct ovl_readdir_data rdd = {
   1055		.ctx.actor = ovl_check_d_type,
   1056		.d_type_supported = false,
   1057	};
   1058
   1059	err = ovl_dir_read(realpath, &rdd);
   1060	if (err)
   1061		return err;
   1062
   1063	return rdd.d_type_supported;
   1064}
   1065
   1066#define OVL_INCOMPATDIR_NAME "incompat"
   1067
   1068static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, struct path *path,
   1069				       int level)
   1070{
   1071	int err;
   1072	struct inode *dir = path->dentry->d_inode;
   1073	LIST_HEAD(list);
   1074	struct rb_root root = RB_ROOT;
   1075	struct ovl_cache_entry *p;
   1076	struct ovl_readdir_data rdd = {
   1077		.ctx.actor = ovl_fill_merge,
   1078		.dentry = NULL,
   1079		.list = &list,
   1080		.root = &root,
   1081		.is_lowest = false,
   1082	};
   1083	bool incompat = false;
   1084
   1085	/*
   1086	 * The "work/incompat" directory is treated specially - if it is not
   1087	 * empty, instead of printing a generic error and mounting read-only,
   1088	 * we will error about incompat features and fail the mount.
   1089	 *
   1090	 * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
   1091	 * starts with '#'.
   1092	 */
   1093	if (level == 2 &&
   1094	    !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
   1095		incompat = true;
   1096
   1097	err = ovl_dir_read(path, &rdd);
   1098	if (err)
   1099		goto out;
   1100
   1101	inode_lock_nested(dir, I_MUTEX_PARENT);
   1102	list_for_each_entry(p, &list, l_node) {
   1103		struct dentry *dentry;
   1104
   1105		if (p->name[0] == '.') {
   1106			if (p->len == 1)
   1107				continue;
   1108			if (p->len == 2 && p->name[1] == '.')
   1109				continue;
   1110		} else if (incompat) {
   1111			pr_err("overlay with incompat feature '%s' cannot be mounted\n",
   1112				p->name);
   1113			err = -EINVAL;
   1114			break;
   1115		}
   1116		dentry = ovl_lookup_upper(ofs, p->name, path->dentry, p->len);
   1117		if (IS_ERR(dentry))
   1118			continue;
   1119		if (dentry->d_inode)
   1120			err = ovl_workdir_cleanup(ofs, dir, path->mnt, dentry, level);
   1121		dput(dentry);
   1122		if (err)
   1123			break;
   1124	}
   1125	inode_unlock(dir);
   1126out:
   1127	ovl_cache_free(&list);
   1128	return err;
   1129}
   1130
   1131int ovl_workdir_cleanup(struct ovl_fs *ofs, struct inode *dir,
   1132			struct vfsmount *mnt, struct dentry *dentry, int level)
   1133{
   1134	int err;
   1135
   1136	if (!d_is_dir(dentry) || level > 1) {
   1137		return ovl_cleanup(ofs, dir, dentry);
   1138	}
   1139
   1140	err = ovl_do_rmdir(ofs, dir, dentry);
   1141	if (err) {
   1142		struct path path = { .mnt = mnt, .dentry = dentry };
   1143
   1144		inode_unlock(dir);
   1145		err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
   1146		inode_lock_nested(dir, I_MUTEX_PARENT);
   1147		if (!err)
   1148			err = ovl_cleanup(ofs, dir, dentry);
   1149	}
   1150
   1151	return err;
   1152}
   1153
   1154int ovl_indexdir_cleanup(struct ovl_fs *ofs)
   1155{
   1156	int err;
   1157	struct dentry *indexdir = ofs->indexdir;
   1158	struct dentry *index = NULL;
   1159	struct inode *dir = indexdir->d_inode;
   1160	struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
   1161	LIST_HEAD(list);
   1162	struct rb_root root = RB_ROOT;
   1163	struct ovl_cache_entry *p;
   1164	struct ovl_readdir_data rdd = {
   1165		.ctx.actor = ovl_fill_merge,
   1166		.dentry = NULL,
   1167		.list = &list,
   1168		.root = &root,
   1169		.is_lowest = false,
   1170	};
   1171
   1172	err = ovl_dir_read(&path, &rdd);
   1173	if (err)
   1174		goto out;
   1175
   1176	inode_lock_nested(dir, I_MUTEX_PARENT);
   1177	list_for_each_entry(p, &list, l_node) {
   1178		if (p->name[0] == '.') {
   1179			if (p->len == 1)
   1180				continue;
   1181			if (p->len == 2 && p->name[1] == '.')
   1182				continue;
   1183		}
   1184		index = ovl_lookup_upper(ofs, p->name, indexdir, p->len);
   1185		if (IS_ERR(index)) {
   1186			err = PTR_ERR(index);
   1187			index = NULL;
   1188			break;
   1189		}
   1190		/* Cleanup leftover from index create/cleanup attempt */
   1191		if (index->d_name.name[0] == '#') {
   1192			err = ovl_workdir_cleanup(ofs, dir, path.mnt, index, 1);
   1193			if (err)
   1194				break;
   1195			goto next;
   1196		}
   1197		err = ovl_verify_index(ofs, index);
   1198		if (!err) {
   1199			goto next;
   1200		} else if (err == -ESTALE) {
   1201			/* Cleanup stale index entries */
   1202			err = ovl_cleanup(ofs, dir, index);
   1203		} else if (err != -ENOENT) {
   1204			/*
   1205			 * Abort mount to avoid corrupting the index if
   1206			 * an incompatible index entry was found or on out
   1207			 * of memory.
   1208			 */
   1209			break;
   1210		} else if (ofs->config.nfs_export) {
   1211			/*
   1212			 * Whiteout orphan index to block future open by
   1213			 * handle after overlay nlink dropped to zero.
   1214			 */
   1215			err = ovl_cleanup_and_whiteout(ofs, dir, index);
   1216		} else {
   1217			/* Cleanup orphan index entries */
   1218			err = ovl_cleanup(ofs, dir, index);
   1219		}
   1220
   1221		if (err)
   1222			break;
   1223
   1224next:
   1225		dput(index);
   1226		index = NULL;
   1227	}
   1228	dput(index);
   1229	inode_unlock(dir);
   1230out:
   1231	ovl_cache_free(&list);
   1232	if (err)
   1233		pr_err("failed index dir cleanup (%i)\n", err);
   1234	return err;
   1235}