cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

inode.c (98324B)


      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/**
      3 * inode.c - NTFS kernel inode handling.
      4 *
      5 * Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
      6 */
      7
      8#include <linux/buffer_head.h>
      9#include <linux/fs.h>
     10#include <linux/mm.h>
     11#include <linux/mount.h>
     12#include <linux/mutex.h>
     13#include <linux/pagemap.h>
     14#include <linux/quotaops.h>
     15#include <linux/slab.h>
     16#include <linux/log2.h>
     17
     18#include "aops.h"
     19#include "attrib.h"
     20#include "bitmap.h"
     21#include "dir.h"
     22#include "debug.h"
     23#include "inode.h"
     24#include "lcnalloc.h"
     25#include "malloc.h"
     26#include "mft.h"
     27#include "time.h"
     28#include "ntfs.h"
     29
     30/**
     31 * ntfs_test_inode - compare two (possibly fake) inodes for equality
     32 * @vi:		vfs inode which to test
     33 * @data:	data which is being tested with
     34 *
     35 * Compare the ntfs attribute embedded in the ntfs specific part of the vfs
     36 * inode @vi for equality with the ntfs attribute @data.
     37 *
     38 * If searching for the normal file/directory inode, set @na->type to AT_UNUSED.
     39 * @na->name and @na->name_len are then ignored.
     40 *
     41 * Return 1 if the attributes match and 0 if not.
     42 *
     43 * NOTE: This function runs with the inode_hash_lock spin lock held so it is not
     44 * allowed to sleep.
     45 */
     46int ntfs_test_inode(struct inode *vi, void *data)
     47{
     48	ntfs_attr *na = (ntfs_attr *)data;
     49	ntfs_inode *ni;
     50
     51	if (vi->i_ino != na->mft_no)
     52		return 0;
     53	ni = NTFS_I(vi);
     54	/* If !NInoAttr(ni), @vi is a normal file or directory inode. */
     55	if (likely(!NInoAttr(ni))) {
     56		/* If not looking for a normal inode this is a mismatch. */
     57		if (unlikely(na->type != AT_UNUSED))
     58			return 0;
     59	} else {
     60		/* A fake inode describing an attribute. */
     61		if (ni->type != na->type)
     62			return 0;
     63		if (ni->name_len != na->name_len)
     64			return 0;
     65		if (na->name_len && memcmp(ni->name, na->name,
     66				na->name_len * sizeof(ntfschar)))
     67			return 0;
     68	}
     69	/* Match! */
     70	return 1;
     71}
     72
     73/**
     74 * ntfs_init_locked_inode - initialize an inode
     75 * @vi:		vfs inode to initialize
     76 * @data:	data which to initialize @vi to
     77 *
     78 * Initialize the vfs inode @vi with the values from the ntfs attribute @data in
     79 * order to enable ntfs_test_inode() to do its work.
     80 *
     81 * If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
     82 * In that case, @na->name and @na->name_len should be set to NULL and 0,
     83 * respectively. Although that is not strictly necessary as
     84 * ntfs_read_locked_inode() will fill them in later.
     85 *
     86 * Return 0 on success and -errno on error.
     87 *
     88 * NOTE: This function runs with the inode->i_lock spin lock held so it is not
     89 * allowed to sleep. (Hence the GFP_ATOMIC allocation.)
     90 */
     91static int ntfs_init_locked_inode(struct inode *vi, void *data)
     92{
     93	ntfs_attr *na = (ntfs_attr *)data;
     94	ntfs_inode *ni = NTFS_I(vi);
     95
     96	vi->i_ino = na->mft_no;
     97
     98	ni->type = na->type;
     99	if (na->type == AT_INDEX_ALLOCATION)
    100		NInoSetMstProtected(ni);
    101
    102	ni->name = na->name;
    103	ni->name_len = na->name_len;
    104
    105	/* If initializing a normal inode, we are done. */
    106	if (likely(na->type == AT_UNUSED)) {
    107		BUG_ON(na->name);
    108		BUG_ON(na->name_len);
    109		return 0;
    110	}
    111
    112	/* It is a fake inode. */
    113	NInoSetAttr(ni);
    114
    115	/*
    116	 * We have I30 global constant as an optimization as it is the name
    117	 * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC
    118	 * allocation but that is ok. And most attributes are unnamed anyway,
    119	 * thus the fraction of named attributes with name != I30 is actually
    120	 * absolutely tiny.
    121	 */
    122	if (na->name_len && na->name != I30) {
    123		unsigned int i;
    124
    125		BUG_ON(!na->name);
    126		i = na->name_len * sizeof(ntfschar);
    127		ni->name = kmalloc(i + sizeof(ntfschar), GFP_ATOMIC);
    128		if (!ni->name)
    129			return -ENOMEM;
    130		memcpy(ni->name, na->name, i);
    131		ni->name[na->name_len] = 0;
    132	}
    133	return 0;
    134}
    135
    136static int ntfs_read_locked_inode(struct inode *vi);
    137static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi);
    138static int ntfs_read_locked_index_inode(struct inode *base_vi,
    139		struct inode *vi);
    140
    141/**
    142 * ntfs_iget - obtain a struct inode corresponding to a specific normal inode
    143 * @sb:		super block of mounted volume
    144 * @mft_no:	mft record number / inode number to obtain
    145 *
    146 * Obtain the struct inode corresponding to a specific normal inode (i.e. a
    147 * file or directory).
    148 *
    149 * If the inode is in the cache, it is just returned with an increased
    150 * reference count. Otherwise, a new struct inode is allocated and initialized,
    151 * and finally ntfs_read_locked_inode() is called to read in the inode and
    152 * fill in the remainder of the inode structure.
    153 *
    154 * Return the struct inode on success. Check the return value with IS_ERR() and
    155 * if true, the function failed and the error code is obtained from PTR_ERR().
    156 */
    157struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no)
    158{
    159	struct inode *vi;
    160	int err;
    161	ntfs_attr na;
    162
    163	na.mft_no = mft_no;
    164	na.type = AT_UNUSED;
    165	na.name = NULL;
    166	na.name_len = 0;
    167
    168	vi = iget5_locked(sb, mft_no, ntfs_test_inode,
    169			ntfs_init_locked_inode, &na);
    170	if (unlikely(!vi))
    171		return ERR_PTR(-ENOMEM);
    172
    173	err = 0;
    174
    175	/* If this is a freshly allocated inode, need to read it now. */
    176	if (vi->i_state & I_NEW) {
    177		err = ntfs_read_locked_inode(vi);
    178		unlock_new_inode(vi);
    179	}
    180	/*
    181	 * There is no point in keeping bad inodes around if the failure was
    182	 * due to ENOMEM. We want to be able to retry again later.
    183	 */
    184	if (unlikely(err == -ENOMEM)) {
    185		iput(vi);
    186		vi = ERR_PTR(err);
    187	}
    188	return vi;
    189}
    190
    191/**
    192 * ntfs_attr_iget - obtain a struct inode corresponding to an attribute
    193 * @base_vi:	vfs base inode containing the attribute
    194 * @type:	attribute type
    195 * @name:	Unicode name of the attribute (NULL if unnamed)
    196 * @name_len:	length of @name in Unicode characters (0 if unnamed)
    197 *
    198 * Obtain the (fake) struct inode corresponding to the attribute specified by
    199 * @type, @name, and @name_len, which is present in the base mft record
    200 * specified by the vfs inode @base_vi.
    201 *
    202 * If the attribute inode is in the cache, it is just returned with an
    203 * increased reference count. Otherwise, a new struct inode is allocated and
    204 * initialized, and finally ntfs_read_locked_attr_inode() is called to read the
    205 * attribute and fill in the inode structure.
    206 *
    207 * Note, for index allocation attributes, you need to use ntfs_index_iget()
    208 * instead of ntfs_attr_iget() as working with indices is a lot more complex.
    209 *
    210 * Return the struct inode of the attribute inode on success. Check the return
    211 * value with IS_ERR() and if true, the function failed and the error code is
    212 * obtained from PTR_ERR().
    213 */
    214struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPE type,
    215		ntfschar *name, u32 name_len)
    216{
    217	struct inode *vi;
    218	int err;
    219	ntfs_attr na;
    220
    221	/* Make sure no one calls ntfs_attr_iget() for indices. */
    222	BUG_ON(type == AT_INDEX_ALLOCATION);
    223
    224	na.mft_no = base_vi->i_ino;
    225	na.type = type;
    226	na.name = name;
    227	na.name_len = name_len;
    228
    229	vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
    230			ntfs_init_locked_inode, &na);
    231	if (unlikely(!vi))
    232		return ERR_PTR(-ENOMEM);
    233
    234	err = 0;
    235
    236	/* If this is a freshly allocated inode, need to read it now. */
    237	if (vi->i_state & I_NEW) {
    238		err = ntfs_read_locked_attr_inode(base_vi, vi);
    239		unlock_new_inode(vi);
    240	}
    241	/*
    242	 * There is no point in keeping bad attribute inodes around. This also
    243	 * simplifies things in that we never need to check for bad attribute
    244	 * inodes elsewhere.
    245	 */
    246	if (unlikely(err)) {
    247		iput(vi);
    248		vi = ERR_PTR(err);
    249	}
    250	return vi;
    251}
    252
    253/**
    254 * ntfs_index_iget - obtain a struct inode corresponding to an index
    255 * @base_vi:	vfs base inode containing the index related attributes
    256 * @name:	Unicode name of the index
    257 * @name_len:	length of @name in Unicode characters
    258 *
    259 * Obtain the (fake) struct inode corresponding to the index specified by @name
    260 * and @name_len, which is present in the base mft record specified by the vfs
    261 * inode @base_vi.
    262 *
    263 * If the index inode is in the cache, it is just returned with an increased
    264 * reference count.  Otherwise, a new struct inode is allocated and
    265 * initialized, and finally ntfs_read_locked_index_inode() is called to read
    266 * the index related attributes and fill in the inode structure.
    267 *
    268 * Return the struct inode of the index inode on success. Check the return
    269 * value with IS_ERR() and if true, the function failed and the error code is
    270 * obtained from PTR_ERR().
    271 */
    272struct inode *ntfs_index_iget(struct inode *base_vi, ntfschar *name,
    273		u32 name_len)
    274{
    275	struct inode *vi;
    276	int err;
    277	ntfs_attr na;
    278
    279	na.mft_no = base_vi->i_ino;
    280	na.type = AT_INDEX_ALLOCATION;
    281	na.name = name;
    282	na.name_len = name_len;
    283
    284	vi = iget5_locked(base_vi->i_sb, na.mft_no, ntfs_test_inode,
    285			ntfs_init_locked_inode, &na);
    286	if (unlikely(!vi))
    287		return ERR_PTR(-ENOMEM);
    288
    289	err = 0;
    290
    291	/* If this is a freshly allocated inode, need to read it now. */
    292	if (vi->i_state & I_NEW) {
    293		err = ntfs_read_locked_index_inode(base_vi, vi);
    294		unlock_new_inode(vi);
    295	}
    296	/*
    297	 * There is no point in keeping bad index inodes around.  This also
    298	 * simplifies things in that we never need to check for bad index
    299	 * inodes elsewhere.
    300	 */
    301	if (unlikely(err)) {
    302		iput(vi);
    303		vi = ERR_PTR(err);
    304	}
    305	return vi;
    306}
    307
    308struct inode *ntfs_alloc_big_inode(struct super_block *sb)
    309{
    310	ntfs_inode *ni;
    311
    312	ntfs_debug("Entering.");
    313	ni = alloc_inode_sb(sb, ntfs_big_inode_cache, GFP_NOFS);
    314	if (likely(ni != NULL)) {
    315		ni->state = 0;
    316		return VFS_I(ni);
    317	}
    318	ntfs_error(sb, "Allocation of NTFS big inode structure failed.");
    319	return NULL;
    320}
    321
    322void ntfs_free_big_inode(struct inode *inode)
    323{
    324	kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
    325}
    326
    327static inline ntfs_inode *ntfs_alloc_extent_inode(void)
    328{
    329	ntfs_inode *ni;
    330
    331	ntfs_debug("Entering.");
    332	ni = kmem_cache_alloc(ntfs_inode_cache, GFP_NOFS);
    333	if (likely(ni != NULL)) {
    334		ni->state = 0;
    335		return ni;
    336	}
    337	ntfs_error(NULL, "Allocation of NTFS inode structure failed.");
    338	return NULL;
    339}
    340
    341static void ntfs_destroy_extent_inode(ntfs_inode *ni)
    342{
    343	ntfs_debug("Entering.");
    344	BUG_ON(ni->page);
    345	if (!atomic_dec_and_test(&ni->count))
    346		BUG();
    347	kmem_cache_free(ntfs_inode_cache, ni);
    348}
    349
    350/*
    351 * The attribute runlist lock has separate locking rules from the
    352 * normal runlist lock, so split the two lock-classes:
    353 */
    354static struct lock_class_key attr_list_rl_lock_class;
    355
    356/**
    357 * __ntfs_init_inode - initialize ntfs specific part of an inode
    358 * @sb:		super block of mounted volume
    359 * @ni:		freshly allocated ntfs inode which to initialize
    360 *
    361 * Initialize an ntfs inode to defaults.
    362 *
    363 * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left
    364 * untouched. Make sure to initialize them elsewhere.
    365 *
    366 * Return zero on success and -ENOMEM on error.
    367 */
    368void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni)
    369{
    370	ntfs_debug("Entering.");
    371	rwlock_init(&ni->size_lock);
    372	ni->initialized_size = ni->allocated_size = 0;
    373	ni->seq_no = 0;
    374	atomic_set(&ni->count, 1);
    375	ni->vol = NTFS_SB(sb);
    376	ntfs_init_runlist(&ni->runlist);
    377	mutex_init(&ni->mrec_lock);
    378	ni->page = NULL;
    379	ni->page_ofs = 0;
    380	ni->attr_list_size = 0;
    381	ni->attr_list = NULL;
    382	ntfs_init_runlist(&ni->attr_list_rl);
    383	lockdep_set_class(&ni->attr_list_rl.lock,
    384				&attr_list_rl_lock_class);
    385	ni->itype.index.block_size = 0;
    386	ni->itype.index.vcn_size = 0;
    387	ni->itype.index.collation_rule = 0;
    388	ni->itype.index.block_size_bits = 0;
    389	ni->itype.index.vcn_size_bits = 0;
    390	mutex_init(&ni->extent_lock);
    391	ni->nr_extents = 0;
    392	ni->ext.base_ntfs_ino = NULL;
    393}
    394
    395/*
    396 * Extent inodes get MFT-mapped in a nested way, while the base inode
    397 * is still mapped. Teach this nesting to the lock validator by creating
    398 * a separate class for nested inode's mrec_lock's:
    399 */
    400static struct lock_class_key extent_inode_mrec_lock_key;
    401
    402inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb,
    403		unsigned long mft_no)
    404{
    405	ntfs_inode *ni = ntfs_alloc_extent_inode();
    406
    407	ntfs_debug("Entering.");
    408	if (likely(ni != NULL)) {
    409		__ntfs_init_inode(sb, ni);
    410		lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
    411		ni->mft_no = mft_no;
    412		ni->type = AT_UNUSED;
    413		ni->name = NULL;
    414		ni->name_len = 0;
    415	}
    416	return ni;
    417}
    418
    419/**
    420 * ntfs_is_extended_system_file - check if a file is in the $Extend directory
    421 * @ctx:	initialized attribute search context
    422 *
    423 * Search all file name attributes in the inode described by the attribute
    424 * search context @ctx and check if any of the names are in the $Extend system
    425 * directory.
    426 *
    427 * Return values:
    428 *	   1: file is in $Extend directory
    429 *	   0: file is not in $Extend directory
    430 *    -errno: failed to determine if the file is in the $Extend directory
    431 */
    432static int ntfs_is_extended_system_file(ntfs_attr_search_ctx *ctx)
    433{
    434	int nr_links, err;
    435
    436	/* Restart search. */
    437	ntfs_attr_reinit_search_ctx(ctx);
    438
    439	/* Get number of hard links. */
    440	nr_links = le16_to_cpu(ctx->mrec->link_count);
    441
    442	/* Loop through all hard links. */
    443	while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0,
    444			ctx))) {
    445		FILE_NAME_ATTR *file_name_attr;
    446		ATTR_RECORD *attr = ctx->attr;
    447		u8 *p, *p2;
    448
    449		nr_links--;
    450		/*
    451		 * Maximum sanity checking as we are called on an inode that
    452		 * we suspect might be corrupt.
    453		 */
    454		p = (u8*)attr + le32_to_cpu(attr->length);
    455		if (p < (u8*)ctx->mrec || (u8*)p > (u8*)ctx->mrec +
    456				le32_to_cpu(ctx->mrec->bytes_in_use)) {
    457err_corrupt_attr:
    458			ntfs_error(ctx->ntfs_ino->vol->sb, "Corrupt file name "
    459					"attribute. You should run chkdsk.");
    460			return -EIO;
    461		}
    462		if (attr->non_resident) {
    463			ntfs_error(ctx->ntfs_ino->vol->sb, "Non-resident file "
    464					"name. You should run chkdsk.");
    465			return -EIO;
    466		}
    467		if (attr->flags) {
    468			ntfs_error(ctx->ntfs_ino->vol->sb, "File name with "
    469					"invalid flags. You should run "
    470					"chkdsk.");
    471			return -EIO;
    472		}
    473		if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) {
    474			ntfs_error(ctx->ntfs_ino->vol->sb, "Unindexed file "
    475					"name. You should run chkdsk.");
    476			return -EIO;
    477		}
    478		file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
    479				le16_to_cpu(attr->data.resident.value_offset));
    480		p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
    481		if (p2 < (u8*)attr || p2 > p)
    482			goto err_corrupt_attr;
    483		/* This attribute is ok, but is it in the $Extend directory? */
    484		if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend)
    485			return 1;	/* YES, it's an extended system file. */
    486	}
    487	if (unlikely(err != -ENOENT))
    488		return err;
    489	if (unlikely(nr_links)) {
    490		ntfs_error(ctx->ntfs_ino->vol->sb, "Inode hard link count "
    491				"doesn't match number of name attributes. You "
    492				"should run chkdsk.");
    493		return -EIO;
    494	}
    495	return 0;	/* NO, it is not an extended system file. */
    496}
    497
    498/**
    499 * ntfs_read_locked_inode - read an inode from its device
    500 * @vi:		inode to read
    501 *
    502 * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode
    503 * described by @vi into memory from the device.
    504 *
    505 * The only fields in @vi that we need to/can look at when the function is
    506 * called are i_sb, pointing to the mounted device's super block, and i_ino,
    507 * the number of the inode to load.
    508 *
    509 * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino
    510 * for reading and sets up the necessary @vi fields as well as initializing
    511 * the ntfs inode.
    512 *
    513 * Q: What locks are held when the function is called?
    514 * A: i_state has I_NEW set, hence the inode is locked, also
    515 *    i_count is set to 1, so it is not going to go away
    516 *    i_flags is set to 0 and we have no business touching it.  Only an ioctl()
    517 *    is allowed to write to them. We should of course be honouring them but
    518 *    we need to do that using the IS_* macros defined in include/linux/fs.h.
    519 *    In any case ntfs_read_locked_inode() has nothing to do with i_flags.
    520 *
    521 * Return 0 on success and -errno on error.  In the error case, the inode will
    522 * have had make_bad_inode() executed on it.
    523 */
    524static int ntfs_read_locked_inode(struct inode *vi)
    525{
    526	ntfs_volume *vol = NTFS_SB(vi->i_sb);
    527	ntfs_inode *ni;
    528	struct inode *bvi;
    529	MFT_RECORD *m;
    530	ATTR_RECORD *a;
    531	STANDARD_INFORMATION *si;
    532	ntfs_attr_search_ctx *ctx;
    533	int err = 0;
    534
    535	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
    536
    537	/* Setup the generic vfs inode parts now. */
    538	vi->i_uid = vol->uid;
    539	vi->i_gid = vol->gid;
    540	vi->i_mode = 0;
    541
    542	/*
    543	 * Initialize the ntfs specific part of @vi special casing
    544	 * FILE_MFT which we need to do at mount time.
    545	 */
    546	if (vi->i_ino != FILE_MFT)
    547		ntfs_init_big_inode(vi);
    548	ni = NTFS_I(vi);
    549
    550	m = map_mft_record(ni);
    551	if (IS_ERR(m)) {
    552		err = PTR_ERR(m);
    553		goto err_out;
    554	}
    555	ctx = ntfs_attr_get_search_ctx(ni, m);
    556	if (!ctx) {
    557		err = -ENOMEM;
    558		goto unm_err_out;
    559	}
    560
    561	if (!(m->flags & MFT_RECORD_IN_USE)) {
    562		ntfs_error(vi->i_sb, "Inode is not in use!");
    563		goto unm_err_out;
    564	}
    565	if (m->base_mft_record) {
    566		ntfs_error(vi->i_sb, "Inode is an extent inode!");
    567		goto unm_err_out;
    568	}
    569
    570	/* Transfer information from mft record into vfs and ntfs inodes. */
    571	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
    572
    573	/*
    574	 * FIXME: Keep in mind that link_count is two for files which have both
    575	 * a long file name and a short file name as separate entries, so if
    576	 * we are hiding short file names this will be too high. Either we need
    577	 * to account for the short file names by subtracting them or we need
    578	 * to make sure we delete files even though i_nlink is not zero which
    579	 * might be tricky due to vfs interactions. Need to think about this
    580	 * some more when implementing the unlink command.
    581	 */
    582	set_nlink(vi, le16_to_cpu(m->link_count));
    583	/*
    584	 * FIXME: Reparse points can have the directory bit set even though
    585	 * they would be S_IFLNK. Need to deal with this further below when we
    586	 * implement reparse points / symbolic links but it will do for now.
    587	 * Also if not a directory, it could be something else, rather than
    588	 * a regular file. But again, will do for now.
    589	 */
    590	/* Everyone gets all permissions. */
    591	vi->i_mode |= S_IRWXUGO;
    592	/* If read-only, no one gets write permissions. */
    593	if (IS_RDONLY(vi))
    594		vi->i_mode &= ~S_IWUGO;
    595	if (m->flags & MFT_RECORD_IS_DIRECTORY) {
    596		vi->i_mode |= S_IFDIR;
    597		/*
    598		 * Apply the directory permissions mask set in the mount
    599		 * options.
    600		 */
    601		vi->i_mode &= ~vol->dmask;
    602		/* Things break without this kludge! */
    603		if (vi->i_nlink > 1)
    604			set_nlink(vi, 1);
    605	} else {
    606		vi->i_mode |= S_IFREG;
    607		/* Apply the file permissions mask set in the mount options. */
    608		vi->i_mode &= ~vol->fmask;
    609	}
    610	/*
    611	 * Find the standard information attribute in the mft record. At this
    612	 * stage we haven't setup the attribute list stuff yet, so this could
    613	 * in fact fail if the standard information is in an extent record, but
    614	 * I don't think this actually ever happens.
    615	 */
    616	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0,
    617			ctx);
    618	if (unlikely(err)) {
    619		if (err == -ENOENT) {
    620			/*
    621			 * TODO: We should be performing a hot fix here (if the
    622			 * recover mount option is set) by creating a new
    623			 * attribute.
    624			 */
    625			ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute "
    626					"is missing.");
    627		}
    628		goto unm_err_out;
    629	}
    630	a = ctx->attr;
    631	/* Get the standard information attribute value. */
    632	if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
    633			+ le32_to_cpu(a->data.resident.value_length) >
    634			(u8 *)ctx->mrec + vol->mft_record_size) {
    635		ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
    636		goto unm_err_out;
    637	}
    638	si = (STANDARD_INFORMATION*)((u8*)a +
    639			le16_to_cpu(a->data.resident.value_offset));
    640
    641	/* Transfer information from the standard information into vi. */
    642	/*
    643	 * Note: The i_?times do not quite map perfectly onto the NTFS times,
    644	 * but they are close enough, and in the end it doesn't really matter
    645	 * that much...
    646	 */
    647	/*
    648	 * mtime is the last change of the data within the file. Not changed
    649	 * when only metadata is changed, e.g. a rename doesn't affect mtime.
    650	 */
    651	vi->i_mtime = ntfs2utc(si->last_data_change_time);
    652	/*
    653	 * ctime is the last change of the metadata of the file. This obviously
    654	 * always changes, when mtime is changed. ctime can be changed on its
    655	 * own, mtime is then not changed, e.g. when a file is renamed.
    656	 */
    657	vi->i_ctime = ntfs2utc(si->last_mft_change_time);
    658	/*
    659	 * Last access to the data within the file. Not changed during a rename
    660	 * for example but changed whenever the file is written to.
    661	 */
    662	vi->i_atime = ntfs2utc(si->last_access_time);
    663
    664	/* Find the attribute list attribute if present. */
    665	ntfs_attr_reinit_search_ctx(ctx);
    666	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
    667	if (err) {
    668		if (unlikely(err != -ENOENT)) {
    669			ntfs_error(vi->i_sb, "Failed to lookup attribute list "
    670					"attribute.");
    671			goto unm_err_out;
    672		}
    673	} else /* if (!err) */ {
    674		if (vi->i_ino == FILE_MFT)
    675			goto skip_attr_list_load;
    676		ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
    677		NInoSetAttrList(ni);
    678		a = ctx->attr;
    679		if (a->flags & ATTR_COMPRESSION_MASK) {
    680			ntfs_error(vi->i_sb, "Attribute list attribute is "
    681					"compressed.");
    682			goto unm_err_out;
    683		}
    684		if (a->flags & ATTR_IS_ENCRYPTED ||
    685				a->flags & ATTR_IS_SPARSE) {
    686			if (a->non_resident) {
    687				ntfs_error(vi->i_sb, "Non-resident attribute "
    688						"list attribute is encrypted/"
    689						"sparse.");
    690				goto unm_err_out;
    691			}
    692			ntfs_warning(vi->i_sb, "Resident attribute list "
    693					"attribute in inode 0x%lx is marked "
    694					"encrypted/sparse which is not true.  "
    695					"However, Windows allows this and "
    696					"chkdsk does not detect or correct it "
    697					"so we will just ignore the invalid "
    698					"flags and pretend they are not set.",
    699					vi->i_ino);
    700		}
    701		/* Now allocate memory for the attribute list. */
    702		ni->attr_list_size = (u32)ntfs_attr_size(a);
    703		ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
    704		if (!ni->attr_list) {
    705			ntfs_error(vi->i_sb, "Not enough memory to allocate "
    706					"buffer for attribute list.");
    707			err = -ENOMEM;
    708			goto unm_err_out;
    709		}
    710		if (a->non_resident) {
    711			NInoSetAttrListNonResident(ni);
    712			if (a->data.non_resident.lowest_vcn) {
    713				ntfs_error(vi->i_sb, "Attribute list has non "
    714						"zero lowest_vcn.");
    715				goto unm_err_out;
    716			}
    717			/*
    718			 * Setup the runlist. No need for locking as we have
    719			 * exclusive access to the inode at this time.
    720			 */
    721			ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
    722					a, NULL);
    723			if (IS_ERR(ni->attr_list_rl.rl)) {
    724				err = PTR_ERR(ni->attr_list_rl.rl);
    725				ni->attr_list_rl.rl = NULL;
    726				ntfs_error(vi->i_sb, "Mapping pairs "
    727						"decompression failed.");
    728				goto unm_err_out;
    729			}
    730			/* Now load the attribute list. */
    731			if ((err = load_attribute_list(vol, &ni->attr_list_rl,
    732					ni->attr_list, ni->attr_list_size,
    733					sle64_to_cpu(a->data.non_resident.
    734					initialized_size)))) {
    735				ntfs_error(vi->i_sb, "Failed to load "
    736						"attribute list attribute.");
    737				goto unm_err_out;
    738			}
    739		} else /* if (!a->non_resident) */ {
    740			if ((u8*)a + le16_to_cpu(a->data.resident.value_offset)
    741					+ le32_to_cpu(
    742					a->data.resident.value_length) >
    743					(u8*)ctx->mrec + vol->mft_record_size) {
    744				ntfs_error(vi->i_sb, "Corrupt attribute list "
    745						"in inode.");
    746				goto unm_err_out;
    747			}
    748			/* Now copy the attribute list. */
    749			memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
    750					a->data.resident.value_offset),
    751					le32_to_cpu(
    752					a->data.resident.value_length));
    753		}
    754	}
    755skip_attr_list_load:
    756	/*
    757	 * If an attribute list is present we now have the attribute list value
    758	 * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
    759	 */
    760	if (S_ISDIR(vi->i_mode)) {
    761		loff_t bvi_size;
    762		ntfs_inode *bni;
    763		INDEX_ROOT *ir;
    764		u8 *ir_end, *index_end;
    765
    766		/* It is a directory, find index root attribute. */
    767		ntfs_attr_reinit_search_ctx(ctx);
    768		err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE,
    769				0, NULL, 0, ctx);
    770		if (unlikely(err)) {
    771			if (err == -ENOENT) {
    772				// FIXME: File is corrupt! Hot-fix with empty
    773				// index root attribute if recovery option is
    774				// set.
    775				ntfs_error(vi->i_sb, "$INDEX_ROOT attribute "
    776						"is missing.");
    777			}
    778			goto unm_err_out;
    779		}
    780		a = ctx->attr;
    781		/* Set up the state. */
    782		if (unlikely(a->non_resident)) {
    783			ntfs_error(vol->sb, "$INDEX_ROOT attribute is not "
    784					"resident.");
    785			goto unm_err_out;
    786		}
    787		/* Ensure the attribute name is placed before the value. */
    788		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
    789				le16_to_cpu(a->data.resident.value_offset)))) {
    790			ntfs_error(vol->sb, "$INDEX_ROOT attribute name is "
    791					"placed after the attribute value.");
    792			goto unm_err_out;
    793		}
    794		/*
    795		 * Compressed/encrypted index root just means that the newly
    796		 * created files in that directory should be created compressed/
    797		 * encrypted. However index root cannot be both compressed and
    798		 * encrypted.
    799		 */
    800		if (a->flags & ATTR_COMPRESSION_MASK)
    801			NInoSetCompressed(ni);
    802		if (a->flags & ATTR_IS_ENCRYPTED) {
    803			if (a->flags & ATTR_COMPRESSION_MASK) {
    804				ntfs_error(vi->i_sb, "Found encrypted and "
    805						"compressed attribute.");
    806				goto unm_err_out;
    807			}
    808			NInoSetEncrypted(ni);
    809		}
    810		if (a->flags & ATTR_IS_SPARSE)
    811			NInoSetSparse(ni);
    812		ir = (INDEX_ROOT*)((u8*)a +
    813				le16_to_cpu(a->data.resident.value_offset));
    814		ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
    815		if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
    816			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
    817					"corrupt.");
    818			goto unm_err_out;
    819		}
    820		index_end = (u8*)&ir->index +
    821				le32_to_cpu(ir->index.index_length);
    822		if (index_end > ir_end) {
    823			ntfs_error(vi->i_sb, "Directory index is corrupt.");
    824			goto unm_err_out;
    825		}
    826		if (ir->type != AT_FILE_NAME) {
    827			ntfs_error(vi->i_sb, "Indexed attribute is not "
    828					"$FILE_NAME.");
    829			goto unm_err_out;
    830		}
    831		if (ir->collation_rule != COLLATION_FILE_NAME) {
    832			ntfs_error(vi->i_sb, "Index collation rule is not "
    833					"COLLATION_FILE_NAME.");
    834			goto unm_err_out;
    835		}
    836		ni->itype.index.collation_rule = ir->collation_rule;
    837		ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
    838		if (ni->itype.index.block_size &
    839				(ni->itype.index.block_size - 1)) {
    840			ntfs_error(vi->i_sb, "Index block size (%u) is not a "
    841					"power of two.",
    842					ni->itype.index.block_size);
    843			goto unm_err_out;
    844		}
    845		if (ni->itype.index.block_size > PAGE_SIZE) {
    846			ntfs_error(vi->i_sb, "Index block size (%u) > "
    847					"PAGE_SIZE (%ld) is not "
    848					"supported.  Sorry.",
    849					ni->itype.index.block_size,
    850					PAGE_SIZE);
    851			err = -EOPNOTSUPP;
    852			goto unm_err_out;
    853		}
    854		if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
    855			ntfs_error(vi->i_sb, "Index block size (%u) < "
    856					"NTFS_BLOCK_SIZE (%i) is not "
    857					"supported.  Sorry.",
    858					ni->itype.index.block_size,
    859					NTFS_BLOCK_SIZE);
    860			err = -EOPNOTSUPP;
    861			goto unm_err_out;
    862		}
    863		ni->itype.index.block_size_bits =
    864				ffs(ni->itype.index.block_size) - 1;
    865		/* Determine the size of a vcn in the directory index. */
    866		if (vol->cluster_size <= ni->itype.index.block_size) {
    867			ni->itype.index.vcn_size = vol->cluster_size;
    868			ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
    869		} else {
    870			ni->itype.index.vcn_size = vol->sector_size;
    871			ni->itype.index.vcn_size_bits = vol->sector_size_bits;
    872		}
    873
    874		/* Setup the index allocation attribute, even if not present. */
    875		NInoSetMstProtected(ni);
    876		ni->type = AT_INDEX_ALLOCATION;
    877		ni->name = I30;
    878		ni->name_len = 4;
    879
    880		if (!(ir->index.flags & LARGE_INDEX)) {
    881			/* No index allocation. */
    882			vi->i_size = ni->initialized_size =
    883					ni->allocated_size = 0;
    884			/* We are done with the mft record, so we release it. */
    885			ntfs_attr_put_search_ctx(ctx);
    886			unmap_mft_record(ni);
    887			m = NULL;
    888			ctx = NULL;
    889			goto skip_large_dir_stuff;
    890		} /* LARGE_INDEX: Index allocation present. Setup state. */
    891		NInoSetIndexAllocPresent(ni);
    892		/* Find index allocation attribute. */
    893		ntfs_attr_reinit_search_ctx(ctx);
    894		err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, I30, 4,
    895				CASE_SENSITIVE, 0, NULL, 0, ctx);
    896		if (unlikely(err)) {
    897			if (err == -ENOENT)
    898				ntfs_error(vi->i_sb, "$INDEX_ALLOCATION "
    899						"attribute is not present but "
    900						"$INDEX_ROOT indicated it is.");
    901			else
    902				ntfs_error(vi->i_sb, "Failed to lookup "
    903						"$INDEX_ALLOCATION "
    904						"attribute.");
    905			goto unm_err_out;
    906		}
    907		a = ctx->attr;
    908		if (!a->non_resident) {
    909			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
    910					"is resident.");
    911			goto unm_err_out;
    912		}
    913		/*
    914		 * Ensure the attribute name is placed before the mapping pairs
    915		 * array.
    916		 */
    917		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
    918				le16_to_cpu(
    919				a->data.non_resident.mapping_pairs_offset)))) {
    920			ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name "
    921					"is placed after the mapping pairs "
    922					"array.");
    923			goto unm_err_out;
    924		}
    925		if (a->flags & ATTR_IS_ENCRYPTED) {
    926			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
    927					"is encrypted.");
    928			goto unm_err_out;
    929		}
    930		if (a->flags & ATTR_IS_SPARSE) {
    931			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
    932					"is sparse.");
    933			goto unm_err_out;
    934		}
    935		if (a->flags & ATTR_COMPRESSION_MASK) {
    936			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
    937					"is compressed.");
    938			goto unm_err_out;
    939		}
    940		if (a->data.non_resident.lowest_vcn) {
    941			ntfs_error(vi->i_sb, "First extent of "
    942					"$INDEX_ALLOCATION attribute has non "
    943					"zero lowest_vcn.");
    944			goto unm_err_out;
    945		}
    946		vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
    947		ni->initialized_size = sle64_to_cpu(
    948				a->data.non_resident.initialized_size);
    949		ni->allocated_size = sle64_to_cpu(
    950				a->data.non_resident.allocated_size);
    951		/*
    952		 * We are done with the mft record, so we release it. Otherwise
    953		 * we would deadlock in ntfs_attr_iget().
    954		 */
    955		ntfs_attr_put_search_ctx(ctx);
    956		unmap_mft_record(ni);
    957		m = NULL;
    958		ctx = NULL;
    959		/* Get the index bitmap attribute inode. */
    960		bvi = ntfs_attr_iget(vi, AT_BITMAP, I30, 4);
    961		if (IS_ERR(bvi)) {
    962			ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
    963			err = PTR_ERR(bvi);
    964			goto unm_err_out;
    965		}
    966		bni = NTFS_I(bvi);
    967		if (NInoCompressed(bni) || NInoEncrypted(bni) ||
    968				NInoSparse(bni)) {
    969			ntfs_error(vi->i_sb, "$BITMAP attribute is compressed "
    970					"and/or encrypted and/or sparse.");
    971			goto iput_unm_err_out;
    972		}
    973		/* Consistency check bitmap size vs. index allocation size. */
    974		bvi_size = i_size_read(bvi);
    975		if ((bvi_size << 3) < (vi->i_size >>
    976				ni->itype.index.block_size_bits)) {
    977			ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
    978					"for index allocation (0x%llx).",
    979					bvi_size << 3, vi->i_size);
    980			goto iput_unm_err_out;
    981		}
    982		/* No longer need the bitmap attribute inode. */
    983		iput(bvi);
    984skip_large_dir_stuff:
    985		/* Setup the operations for this inode. */
    986		vi->i_op = &ntfs_dir_inode_ops;
    987		vi->i_fop = &ntfs_dir_ops;
    988		vi->i_mapping->a_ops = &ntfs_mst_aops;
    989	} else {
    990		/* It is a file. */
    991		ntfs_attr_reinit_search_ctx(ctx);
    992
    993		/* Setup the data attribute, even if not present. */
    994		ni->type = AT_DATA;
    995		ni->name = NULL;
    996		ni->name_len = 0;
    997
    998		/* Find first extent of the unnamed data attribute. */
    999		err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx);
   1000		if (unlikely(err)) {
   1001			vi->i_size = ni->initialized_size =
   1002					ni->allocated_size = 0;
   1003			if (err != -ENOENT) {
   1004				ntfs_error(vi->i_sb, "Failed to lookup $DATA "
   1005						"attribute.");
   1006				goto unm_err_out;
   1007			}
   1008			/*
   1009			 * FILE_Secure does not have an unnamed $DATA
   1010			 * attribute, so we special case it here.
   1011			 */
   1012			if (vi->i_ino == FILE_Secure)
   1013				goto no_data_attr_special_case;
   1014			/*
   1015			 * Most if not all the system files in the $Extend
   1016			 * system directory do not have unnamed data
   1017			 * attributes so we need to check if the parent
   1018			 * directory of the file is FILE_Extend and if it is
   1019			 * ignore this error. To do this we need to get the
   1020			 * name of this inode from the mft record as the name
   1021			 * contains the back reference to the parent directory.
   1022			 */
   1023			if (ntfs_is_extended_system_file(ctx) > 0)
   1024				goto no_data_attr_special_case;
   1025			// FIXME: File is corrupt! Hot-fix with empty data
   1026			// attribute if recovery option is set.
   1027			ntfs_error(vi->i_sb, "$DATA attribute is missing.");
   1028			goto unm_err_out;
   1029		}
   1030		a = ctx->attr;
   1031		/* Setup the state. */
   1032		if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
   1033			if (a->flags & ATTR_COMPRESSION_MASK) {
   1034				NInoSetCompressed(ni);
   1035				if (vol->cluster_size > 4096) {
   1036					ntfs_error(vi->i_sb, "Found "
   1037							"compressed data but "
   1038							"compression is "
   1039							"disabled due to "
   1040							"cluster size (%i) > "
   1041							"4kiB.",
   1042							vol->cluster_size);
   1043					goto unm_err_out;
   1044				}
   1045				if ((a->flags & ATTR_COMPRESSION_MASK)
   1046						!= ATTR_IS_COMPRESSED) {
   1047					ntfs_error(vi->i_sb, "Found unknown "
   1048							"compression method "
   1049							"or corrupt file.");
   1050					goto unm_err_out;
   1051				}
   1052			}
   1053			if (a->flags & ATTR_IS_SPARSE)
   1054				NInoSetSparse(ni);
   1055		}
   1056		if (a->flags & ATTR_IS_ENCRYPTED) {
   1057			if (NInoCompressed(ni)) {
   1058				ntfs_error(vi->i_sb, "Found encrypted and "
   1059						"compressed data.");
   1060				goto unm_err_out;
   1061			}
   1062			NInoSetEncrypted(ni);
   1063		}
   1064		if (a->non_resident) {
   1065			NInoSetNonResident(ni);
   1066			if (NInoCompressed(ni) || NInoSparse(ni)) {
   1067				if (NInoCompressed(ni) && a->data.non_resident.
   1068						compression_unit != 4) {
   1069					ntfs_error(vi->i_sb, "Found "
   1070							"non-standard "
   1071							"compression unit (%u "
   1072							"instead of 4).  "
   1073							"Cannot handle this.",
   1074							a->data.non_resident.
   1075							compression_unit);
   1076					err = -EOPNOTSUPP;
   1077					goto unm_err_out;
   1078				}
   1079				if (a->data.non_resident.compression_unit) {
   1080					ni->itype.compressed.block_size = 1U <<
   1081							(a->data.non_resident.
   1082							compression_unit +
   1083							vol->cluster_size_bits);
   1084					ni->itype.compressed.block_size_bits =
   1085							ffs(ni->itype.
   1086							compressed.
   1087							block_size) - 1;
   1088					ni->itype.compressed.block_clusters =
   1089							1U << a->data.
   1090							non_resident.
   1091							compression_unit;
   1092				} else {
   1093					ni->itype.compressed.block_size = 0;
   1094					ni->itype.compressed.block_size_bits =
   1095							0;
   1096					ni->itype.compressed.block_clusters =
   1097							0;
   1098				}
   1099				ni->itype.compressed.size = sle64_to_cpu(
   1100						a->data.non_resident.
   1101						compressed_size);
   1102			}
   1103			if (a->data.non_resident.lowest_vcn) {
   1104				ntfs_error(vi->i_sb, "First extent of $DATA "
   1105						"attribute has non zero "
   1106						"lowest_vcn.");
   1107				goto unm_err_out;
   1108			}
   1109			vi->i_size = sle64_to_cpu(
   1110					a->data.non_resident.data_size);
   1111			ni->initialized_size = sle64_to_cpu(
   1112					a->data.non_resident.initialized_size);
   1113			ni->allocated_size = sle64_to_cpu(
   1114					a->data.non_resident.allocated_size);
   1115		} else { /* Resident attribute. */
   1116			vi->i_size = ni->initialized_size = le32_to_cpu(
   1117					a->data.resident.value_length);
   1118			ni->allocated_size = le32_to_cpu(a->length) -
   1119					le16_to_cpu(
   1120					a->data.resident.value_offset);
   1121			if (vi->i_size > ni->allocated_size) {
   1122				ntfs_error(vi->i_sb, "Resident data attribute "
   1123						"is corrupt (size exceeds "
   1124						"allocation).");
   1125				goto unm_err_out;
   1126			}
   1127		}
   1128no_data_attr_special_case:
   1129		/* We are done with the mft record, so we release it. */
   1130		ntfs_attr_put_search_ctx(ctx);
   1131		unmap_mft_record(ni);
   1132		m = NULL;
   1133		ctx = NULL;
   1134		/* Setup the operations for this inode. */
   1135		vi->i_op = &ntfs_file_inode_ops;
   1136		vi->i_fop = &ntfs_file_ops;
   1137		vi->i_mapping->a_ops = &ntfs_normal_aops;
   1138		if (NInoMstProtected(ni))
   1139			vi->i_mapping->a_ops = &ntfs_mst_aops;
   1140		else if (NInoCompressed(ni))
   1141			vi->i_mapping->a_ops = &ntfs_compressed_aops;
   1142	}
   1143	/*
   1144	 * The number of 512-byte blocks used on disk (for stat). This is in so
   1145	 * far inaccurate as it doesn't account for any named streams or other
   1146	 * special non-resident attributes, but that is how Windows works, too,
   1147	 * so we are at least consistent with Windows, if not entirely
   1148	 * consistent with the Linux Way. Doing it the Linux Way would cause a
   1149	 * significant slowdown as it would involve iterating over all
   1150	 * attributes in the mft record and adding the allocated/compressed
   1151	 * sizes of all non-resident attributes present to give us the Linux
   1152	 * correct size that should go into i_blocks (after division by 512).
   1153	 */
   1154	if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) || NInoSparse(ni)))
   1155		vi->i_blocks = ni->itype.compressed.size >> 9;
   1156	else
   1157		vi->i_blocks = ni->allocated_size >> 9;
   1158	ntfs_debug("Done.");
   1159	return 0;
   1160iput_unm_err_out:
   1161	iput(bvi);
   1162unm_err_out:
   1163	if (!err)
   1164		err = -EIO;
   1165	if (ctx)
   1166		ntfs_attr_put_search_ctx(ctx);
   1167	if (m)
   1168		unmap_mft_record(ni);
   1169err_out:
   1170	ntfs_error(vol->sb, "Failed with error code %i.  Marking corrupt "
   1171			"inode 0x%lx as bad.  Run chkdsk.", err, vi->i_ino);
   1172	make_bad_inode(vi);
   1173	if (err != -EOPNOTSUPP && err != -ENOMEM)
   1174		NVolSetErrors(vol);
   1175	return err;
   1176}
   1177
   1178/**
   1179 * ntfs_read_locked_attr_inode - read an attribute inode from its base inode
   1180 * @base_vi:	base inode
   1181 * @vi:		attribute inode to read
   1182 *
   1183 * ntfs_read_locked_attr_inode() is called from ntfs_attr_iget() to read the
   1184 * attribute inode described by @vi into memory from the base mft record
   1185 * described by @base_ni.
   1186 *
   1187 * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for
   1188 * reading and looks up the attribute described by @vi before setting up the
   1189 * necessary fields in @vi as well as initializing the ntfs inode.
   1190 *
   1191 * Q: What locks are held when the function is called?
   1192 * A: i_state has I_NEW set, hence the inode is locked, also
   1193 *    i_count is set to 1, so it is not going to go away
   1194 *
   1195 * Return 0 on success and -errno on error.  In the error case, the inode will
   1196 * have had make_bad_inode() executed on it.
   1197 *
   1198 * Note this cannot be called for AT_INDEX_ALLOCATION.
   1199 */
   1200static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
   1201{
   1202	ntfs_volume *vol = NTFS_SB(vi->i_sb);
   1203	ntfs_inode *ni, *base_ni;
   1204	MFT_RECORD *m;
   1205	ATTR_RECORD *a;
   1206	ntfs_attr_search_ctx *ctx;
   1207	int err = 0;
   1208
   1209	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
   1210
   1211	ntfs_init_big_inode(vi);
   1212
   1213	ni	= NTFS_I(vi);
   1214	base_ni = NTFS_I(base_vi);
   1215
   1216	/* Just mirror the values from the base inode. */
   1217	vi->i_uid	= base_vi->i_uid;
   1218	vi->i_gid	= base_vi->i_gid;
   1219	set_nlink(vi, base_vi->i_nlink);
   1220	vi->i_mtime	= base_vi->i_mtime;
   1221	vi->i_ctime	= base_vi->i_ctime;
   1222	vi->i_atime	= base_vi->i_atime;
   1223	vi->i_generation = ni->seq_no = base_ni->seq_no;
   1224
   1225	/* Set inode type to zero but preserve permissions. */
   1226	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
   1227
   1228	m = map_mft_record(base_ni);
   1229	if (IS_ERR(m)) {
   1230		err = PTR_ERR(m);
   1231		goto err_out;
   1232	}
   1233	ctx = ntfs_attr_get_search_ctx(base_ni, m);
   1234	if (!ctx) {
   1235		err = -ENOMEM;
   1236		goto unm_err_out;
   1237	}
   1238	/* Find the attribute. */
   1239	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
   1240			CASE_SENSITIVE, 0, NULL, 0, ctx);
   1241	if (unlikely(err))
   1242		goto unm_err_out;
   1243	a = ctx->attr;
   1244	if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_SPARSE)) {
   1245		if (a->flags & ATTR_COMPRESSION_MASK) {
   1246			NInoSetCompressed(ni);
   1247			if ((ni->type != AT_DATA) || (ni->type == AT_DATA &&
   1248					ni->name_len)) {
   1249				ntfs_error(vi->i_sb, "Found compressed "
   1250						"non-data or named data "
   1251						"attribute.  Please report "
   1252						"you saw this message to "
   1253						"linux-ntfs-dev@lists."
   1254						"sourceforge.net");
   1255				goto unm_err_out;
   1256			}
   1257			if (vol->cluster_size > 4096) {
   1258				ntfs_error(vi->i_sb, "Found compressed "
   1259						"attribute but compression is "
   1260						"disabled due to cluster size "
   1261						"(%i) > 4kiB.",
   1262						vol->cluster_size);
   1263				goto unm_err_out;
   1264			}
   1265			if ((a->flags & ATTR_COMPRESSION_MASK) !=
   1266					ATTR_IS_COMPRESSED) {
   1267				ntfs_error(vi->i_sb, "Found unknown "
   1268						"compression method.");
   1269				goto unm_err_out;
   1270			}
   1271		}
   1272		/*
   1273		 * The compressed/sparse flag set in an index root just means
   1274		 * to compress all files.
   1275		 */
   1276		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
   1277			ntfs_error(vi->i_sb, "Found mst protected attribute "
   1278					"but the attribute is %s.  Please "
   1279					"report you saw this message to "
   1280					"linux-ntfs-dev@lists.sourceforge.net",
   1281					NInoCompressed(ni) ? "compressed" :
   1282					"sparse");
   1283			goto unm_err_out;
   1284		}
   1285		if (a->flags & ATTR_IS_SPARSE)
   1286			NInoSetSparse(ni);
   1287	}
   1288	if (a->flags & ATTR_IS_ENCRYPTED) {
   1289		if (NInoCompressed(ni)) {
   1290			ntfs_error(vi->i_sb, "Found encrypted and compressed "
   1291					"data.");
   1292			goto unm_err_out;
   1293		}
   1294		/*
   1295		 * The encryption flag set in an index root just means to
   1296		 * encrypt all files.
   1297		 */
   1298		if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
   1299			ntfs_error(vi->i_sb, "Found mst protected attribute "
   1300					"but the attribute is encrypted.  "
   1301					"Please report you saw this message "
   1302					"to linux-ntfs-dev@lists.sourceforge."
   1303					"net");
   1304			goto unm_err_out;
   1305		}
   1306		if (ni->type != AT_DATA) {
   1307			ntfs_error(vi->i_sb, "Found encrypted non-data "
   1308					"attribute.");
   1309			goto unm_err_out;
   1310		}
   1311		NInoSetEncrypted(ni);
   1312	}
   1313	if (!a->non_resident) {
   1314		/* Ensure the attribute name is placed before the value. */
   1315		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
   1316				le16_to_cpu(a->data.resident.value_offset)))) {
   1317			ntfs_error(vol->sb, "Attribute name is placed after "
   1318					"the attribute value.");
   1319			goto unm_err_out;
   1320		}
   1321		if (NInoMstProtected(ni)) {
   1322			ntfs_error(vi->i_sb, "Found mst protected attribute "
   1323					"but the attribute is resident.  "
   1324					"Please report you saw this message to "
   1325					"linux-ntfs-dev@lists.sourceforge.net");
   1326			goto unm_err_out;
   1327		}
   1328		vi->i_size = ni->initialized_size = le32_to_cpu(
   1329				a->data.resident.value_length);
   1330		ni->allocated_size = le32_to_cpu(a->length) -
   1331				le16_to_cpu(a->data.resident.value_offset);
   1332		if (vi->i_size > ni->allocated_size) {
   1333			ntfs_error(vi->i_sb, "Resident attribute is corrupt "
   1334					"(size exceeds allocation).");
   1335			goto unm_err_out;
   1336		}
   1337	} else {
   1338		NInoSetNonResident(ni);
   1339		/*
   1340		 * Ensure the attribute name is placed before the mapping pairs
   1341		 * array.
   1342		 */
   1343		if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
   1344				le16_to_cpu(
   1345				a->data.non_resident.mapping_pairs_offset)))) {
   1346			ntfs_error(vol->sb, "Attribute name is placed after "
   1347					"the mapping pairs array.");
   1348			goto unm_err_out;
   1349		}
   1350		if (NInoCompressed(ni) || NInoSparse(ni)) {
   1351			if (NInoCompressed(ni) && a->data.non_resident.
   1352					compression_unit != 4) {
   1353				ntfs_error(vi->i_sb, "Found non-standard "
   1354						"compression unit (%u instead "
   1355						"of 4).  Cannot handle this.",
   1356						a->data.non_resident.
   1357						compression_unit);
   1358				err = -EOPNOTSUPP;
   1359				goto unm_err_out;
   1360			}
   1361			if (a->data.non_resident.compression_unit) {
   1362				ni->itype.compressed.block_size = 1U <<
   1363						(a->data.non_resident.
   1364						compression_unit +
   1365						vol->cluster_size_bits);
   1366				ni->itype.compressed.block_size_bits =
   1367						ffs(ni->itype.compressed.
   1368						block_size) - 1;
   1369				ni->itype.compressed.block_clusters = 1U <<
   1370						a->data.non_resident.
   1371						compression_unit;
   1372			} else {
   1373				ni->itype.compressed.block_size = 0;
   1374				ni->itype.compressed.block_size_bits = 0;
   1375				ni->itype.compressed.block_clusters = 0;
   1376			}
   1377			ni->itype.compressed.size = sle64_to_cpu(
   1378					a->data.non_resident.compressed_size);
   1379		}
   1380		if (a->data.non_resident.lowest_vcn) {
   1381			ntfs_error(vi->i_sb, "First extent of attribute has "
   1382					"non-zero lowest_vcn.");
   1383			goto unm_err_out;
   1384		}
   1385		vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
   1386		ni->initialized_size = sle64_to_cpu(
   1387				a->data.non_resident.initialized_size);
   1388		ni->allocated_size = sle64_to_cpu(
   1389				a->data.non_resident.allocated_size);
   1390	}
   1391	vi->i_mapping->a_ops = &ntfs_normal_aops;
   1392	if (NInoMstProtected(ni))
   1393		vi->i_mapping->a_ops = &ntfs_mst_aops;
   1394	else if (NInoCompressed(ni))
   1395		vi->i_mapping->a_ops = &ntfs_compressed_aops;
   1396	if ((NInoCompressed(ni) || NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
   1397		vi->i_blocks = ni->itype.compressed.size >> 9;
   1398	else
   1399		vi->i_blocks = ni->allocated_size >> 9;
   1400	/*
   1401	 * Make sure the base inode does not go away and attach it to the
   1402	 * attribute inode.
   1403	 */
   1404	igrab(base_vi);
   1405	ni->ext.base_ntfs_ino = base_ni;
   1406	ni->nr_extents = -1;
   1407
   1408	ntfs_attr_put_search_ctx(ctx);
   1409	unmap_mft_record(base_ni);
   1410
   1411	ntfs_debug("Done.");
   1412	return 0;
   1413
   1414unm_err_out:
   1415	if (!err)
   1416		err = -EIO;
   1417	if (ctx)
   1418		ntfs_attr_put_search_ctx(ctx);
   1419	unmap_mft_record(base_ni);
   1420err_out:
   1421	ntfs_error(vol->sb, "Failed with error code %i while reading attribute "
   1422			"inode (mft_no 0x%lx, type 0x%x, name_len %i).  "
   1423			"Marking corrupt inode and base inode 0x%lx as bad.  "
   1424			"Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len,
   1425			base_vi->i_ino);
   1426	make_bad_inode(vi);
   1427	if (err != -ENOMEM)
   1428		NVolSetErrors(vol);
   1429	return err;
   1430}
   1431
   1432/**
   1433 * ntfs_read_locked_index_inode - read an index inode from its base inode
   1434 * @base_vi:	base inode
   1435 * @vi:		index inode to read
   1436 *
   1437 * ntfs_read_locked_index_inode() is called from ntfs_index_iget() to read the
   1438 * index inode described by @vi into memory from the base mft record described
   1439 * by @base_ni.
   1440 *
   1441 * ntfs_read_locked_index_inode() maps, pins and locks the base inode for
   1442 * reading and looks up the attributes relating to the index described by @vi
   1443 * before setting up the necessary fields in @vi as well as initializing the
   1444 * ntfs inode.
   1445 *
   1446 * Note, index inodes are essentially attribute inodes (NInoAttr() is true)
   1447 * with the attribute type set to AT_INDEX_ALLOCATION.  Apart from that, they
   1448 * are setup like directory inodes since directories are a special case of
   1449 * indices ao they need to be treated in much the same way.  Most importantly,
   1450 * for small indices the index allocation attribute might not actually exist.
   1451 * However, the index root attribute always exists but this does not need to
   1452 * have an inode associated with it and this is why we define a new inode type
   1453 * index.  Also, like for directories, we need to have an attribute inode for
   1454 * the bitmap attribute corresponding to the index allocation attribute and we
   1455 * can store this in the appropriate field of the inode, just like we do for
   1456 * normal directory inodes.
   1457 *
   1458 * Q: What locks are held when the function is called?
   1459 * A: i_state has I_NEW set, hence the inode is locked, also
   1460 *    i_count is set to 1, so it is not going to go away
   1461 *
   1462 * Return 0 on success and -errno on error.  In the error case, the inode will
   1463 * have had make_bad_inode() executed on it.
   1464 */
   1465static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
   1466{
   1467	loff_t bvi_size;
   1468	ntfs_volume *vol = NTFS_SB(vi->i_sb);
   1469	ntfs_inode *ni, *base_ni, *bni;
   1470	struct inode *bvi;
   1471	MFT_RECORD *m;
   1472	ATTR_RECORD *a;
   1473	ntfs_attr_search_ctx *ctx;
   1474	INDEX_ROOT *ir;
   1475	u8 *ir_end, *index_end;
   1476	int err = 0;
   1477
   1478	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
   1479	ntfs_init_big_inode(vi);
   1480	ni	= NTFS_I(vi);
   1481	base_ni = NTFS_I(base_vi);
   1482	/* Just mirror the values from the base inode. */
   1483	vi->i_uid	= base_vi->i_uid;
   1484	vi->i_gid	= base_vi->i_gid;
   1485	set_nlink(vi, base_vi->i_nlink);
   1486	vi->i_mtime	= base_vi->i_mtime;
   1487	vi->i_ctime	= base_vi->i_ctime;
   1488	vi->i_atime	= base_vi->i_atime;
   1489	vi->i_generation = ni->seq_no = base_ni->seq_no;
   1490	/* Set inode type to zero but preserve permissions. */
   1491	vi->i_mode	= base_vi->i_mode & ~S_IFMT;
   1492	/* Map the mft record for the base inode. */
   1493	m = map_mft_record(base_ni);
   1494	if (IS_ERR(m)) {
   1495		err = PTR_ERR(m);
   1496		goto err_out;
   1497	}
   1498	ctx = ntfs_attr_get_search_ctx(base_ni, m);
   1499	if (!ctx) {
   1500		err = -ENOMEM;
   1501		goto unm_err_out;
   1502	}
   1503	/* Find the index root attribute. */
   1504	err = ntfs_attr_lookup(AT_INDEX_ROOT, ni->name, ni->name_len,
   1505			CASE_SENSITIVE, 0, NULL, 0, ctx);
   1506	if (unlikely(err)) {
   1507		if (err == -ENOENT)
   1508			ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
   1509					"missing.");
   1510		goto unm_err_out;
   1511	}
   1512	a = ctx->attr;
   1513	/* Set up the state. */
   1514	if (unlikely(a->non_resident)) {
   1515		ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
   1516		goto unm_err_out;
   1517	}
   1518	/* Ensure the attribute name is placed before the value. */
   1519	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
   1520			le16_to_cpu(a->data.resident.value_offset)))) {
   1521		ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed "
   1522				"after the attribute value.");
   1523		goto unm_err_out;
   1524	}
   1525	/*
   1526	 * Compressed/encrypted/sparse index root is not allowed, except for
   1527	 * directories of course but those are not dealt with here.
   1528	 */
   1529	if (a->flags & (ATTR_COMPRESSION_MASK | ATTR_IS_ENCRYPTED |
   1530			ATTR_IS_SPARSE)) {
   1531		ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
   1532				"root attribute.");
   1533		goto unm_err_out;
   1534	}
   1535	ir = (INDEX_ROOT*)((u8*)a + le16_to_cpu(a->data.resident.value_offset));
   1536	ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
   1537	if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
   1538		ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
   1539		goto unm_err_out;
   1540	}
   1541	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
   1542	if (index_end > ir_end) {
   1543		ntfs_error(vi->i_sb, "Index is corrupt.");
   1544		goto unm_err_out;
   1545	}
   1546	if (ir->type) {
   1547		ntfs_error(vi->i_sb, "Index type is not 0 (type is 0x%x).",
   1548				le32_to_cpu(ir->type));
   1549		goto unm_err_out;
   1550	}
   1551	ni->itype.index.collation_rule = ir->collation_rule;
   1552	ntfs_debug("Index collation rule is 0x%x.",
   1553			le32_to_cpu(ir->collation_rule));
   1554	ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
   1555	if (!is_power_of_2(ni->itype.index.block_size)) {
   1556		ntfs_error(vi->i_sb, "Index block size (%u) is not a power of "
   1557				"two.", ni->itype.index.block_size);
   1558		goto unm_err_out;
   1559	}
   1560	if (ni->itype.index.block_size > PAGE_SIZE) {
   1561		ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE "
   1562				"(%ld) is not supported.  Sorry.",
   1563				ni->itype.index.block_size, PAGE_SIZE);
   1564		err = -EOPNOTSUPP;
   1565		goto unm_err_out;
   1566	}
   1567	if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
   1568		ntfs_error(vi->i_sb, "Index block size (%u) < NTFS_BLOCK_SIZE "
   1569				"(%i) is not supported.  Sorry.",
   1570				ni->itype.index.block_size, NTFS_BLOCK_SIZE);
   1571		err = -EOPNOTSUPP;
   1572		goto unm_err_out;
   1573	}
   1574	ni->itype.index.block_size_bits = ffs(ni->itype.index.block_size) - 1;
   1575	/* Determine the size of a vcn in the index. */
   1576	if (vol->cluster_size <= ni->itype.index.block_size) {
   1577		ni->itype.index.vcn_size = vol->cluster_size;
   1578		ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
   1579	} else {
   1580		ni->itype.index.vcn_size = vol->sector_size;
   1581		ni->itype.index.vcn_size_bits = vol->sector_size_bits;
   1582	}
   1583	/* Check for presence of index allocation attribute. */
   1584	if (!(ir->index.flags & LARGE_INDEX)) {
   1585		/* No index allocation. */
   1586		vi->i_size = ni->initialized_size = ni->allocated_size = 0;
   1587		/* We are done with the mft record, so we release it. */
   1588		ntfs_attr_put_search_ctx(ctx);
   1589		unmap_mft_record(base_ni);
   1590		m = NULL;
   1591		ctx = NULL;
   1592		goto skip_large_index_stuff;
   1593	} /* LARGE_INDEX:  Index allocation present.  Setup state. */
   1594	NInoSetIndexAllocPresent(ni);
   1595	/* Find index allocation attribute. */
   1596	ntfs_attr_reinit_search_ctx(ctx);
   1597	err = ntfs_attr_lookup(AT_INDEX_ALLOCATION, ni->name, ni->name_len,
   1598			CASE_SENSITIVE, 0, NULL, 0, ctx);
   1599	if (unlikely(err)) {
   1600		if (err == -ENOENT)
   1601			ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
   1602					"not present but $INDEX_ROOT "
   1603					"indicated it is.");
   1604		else
   1605			ntfs_error(vi->i_sb, "Failed to lookup "
   1606					"$INDEX_ALLOCATION attribute.");
   1607		goto unm_err_out;
   1608	}
   1609	a = ctx->attr;
   1610	if (!a->non_resident) {
   1611		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
   1612				"resident.");
   1613		goto unm_err_out;
   1614	}
   1615	/*
   1616	 * Ensure the attribute name is placed before the mapping pairs array.
   1617	 */
   1618	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
   1619			le16_to_cpu(
   1620			a->data.non_resident.mapping_pairs_offset)))) {
   1621		ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is "
   1622				"placed after the mapping pairs array.");
   1623		goto unm_err_out;
   1624	}
   1625	if (a->flags & ATTR_IS_ENCRYPTED) {
   1626		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
   1627				"encrypted.");
   1628		goto unm_err_out;
   1629	}
   1630	if (a->flags & ATTR_IS_SPARSE) {
   1631		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
   1632		goto unm_err_out;
   1633	}
   1634	if (a->flags & ATTR_COMPRESSION_MASK) {
   1635		ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
   1636				"compressed.");
   1637		goto unm_err_out;
   1638	}
   1639	if (a->data.non_resident.lowest_vcn) {
   1640		ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION "
   1641				"attribute has non zero lowest_vcn.");
   1642		goto unm_err_out;
   1643	}
   1644	vi->i_size = sle64_to_cpu(a->data.non_resident.data_size);
   1645	ni->initialized_size = sle64_to_cpu(
   1646			a->data.non_resident.initialized_size);
   1647	ni->allocated_size = sle64_to_cpu(a->data.non_resident.allocated_size);
   1648	/*
   1649	 * We are done with the mft record, so we release it.  Otherwise
   1650	 * we would deadlock in ntfs_attr_iget().
   1651	 */
   1652	ntfs_attr_put_search_ctx(ctx);
   1653	unmap_mft_record(base_ni);
   1654	m = NULL;
   1655	ctx = NULL;
   1656	/* Get the index bitmap attribute inode. */
   1657	bvi = ntfs_attr_iget(base_vi, AT_BITMAP, ni->name, ni->name_len);
   1658	if (IS_ERR(bvi)) {
   1659		ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
   1660		err = PTR_ERR(bvi);
   1661		goto unm_err_out;
   1662	}
   1663	bni = NTFS_I(bvi);
   1664	if (NInoCompressed(bni) || NInoEncrypted(bni) ||
   1665			NInoSparse(bni)) {
   1666		ntfs_error(vi->i_sb, "$BITMAP attribute is compressed and/or "
   1667				"encrypted and/or sparse.");
   1668		goto iput_unm_err_out;
   1669	}
   1670	/* Consistency check bitmap size vs. index allocation size. */
   1671	bvi_size = i_size_read(bvi);
   1672	if ((bvi_size << 3) < (vi->i_size >> ni->itype.index.block_size_bits)) {
   1673		ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for "
   1674				"index allocation (0x%llx).", bvi_size << 3,
   1675				vi->i_size);
   1676		goto iput_unm_err_out;
   1677	}
   1678	iput(bvi);
   1679skip_large_index_stuff:
   1680	/* Setup the operations for this index inode. */
   1681	vi->i_mapping->a_ops = &ntfs_mst_aops;
   1682	vi->i_blocks = ni->allocated_size >> 9;
   1683	/*
   1684	 * Make sure the base inode doesn't go away and attach it to the
   1685	 * index inode.
   1686	 */
   1687	igrab(base_vi);
   1688	ni->ext.base_ntfs_ino = base_ni;
   1689	ni->nr_extents = -1;
   1690
   1691	ntfs_debug("Done.");
   1692	return 0;
   1693iput_unm_err_out:
   1694	iput(bvi);
   1695unm_err_out:
   1696	if (!err)
   1697		err = -EIO;
   1698	if (ctx)
   1699		ntfs_attr_put_search_ctx(ctx);
   1700	if (m)
   1701		unmap_mft_record(base_ni);
   1702err_out:
   1703	ntfs_error(vi->i_sb, "Failed with error code %i while reading index "
   1704			"inode (mft_no 0x%lx, name_len %i.", err, vi->i_ino,
   1705			ni->name_len);
   1706	make_bad_inode(vi);
   1707	if (err != -EOPNOTSUPP && err != -ENOMEM)
   1708		NVolSetErrors(vol);
   1709	return err;
   1710}
   1711
   1712/*
   1713 * The MFT inode has special locking, so teach the lock validator
   1714 * about this by splitting off the locking rules of the MFT from
   1715 * the locking rules of other inodes. The MFT inode can never be
   1716 * accessed from the VFS side (or even internally), only by the
   1717 * map_mft functions.
   1718 */
   1719static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
   1720
   1721/**
   1722 * ntfs_read_inode_mount - special read_inode for mount time use only
   1723 * @vi:		inode to read
   1724 *
   1725 * Read inode FILE_MFT at mount time, only called with super_block lock
   1726 * held from within the read_super() code path.
   1727 *
   1728 * This function exists because when it is called the page cache for $MFT/$DATA
   1729 * is not initialized and hence we cannot get at the contents of mft records
   1730 * by calling map_mft_record*().
   1731 *
   1732 * Further it needs to cope with the circular references problem, i.e. cannot
   1733 * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
   1734 * we do not know where the other extent mft records are yet and again, because
   1735 * we cannot call map_mft_record*() yet.  Obviously this applies only when an
   1736 * attribute list is actually present in $MFT inode.
   1737 *
   1738 * We solve these problems by starting with the $DATA attribute before anything
   1739 * else and iterating using ntfs_attr_lookup($DATA) over all extents.  As each
   1740 * extent is found, we ntfs_mapping_pairs_decompress() including the implied
   1741 * ntfs_runlists_merge().  Each step of the iteration necessarily provides
   1742 * sufficient information for the next step to complete.
   1743 *
   1744 * This should work but there are two possible pit falls (see inline comments
   1745 * below), but only time will tell if they are real pits or just smoke...
   1746 */
   1747int ntfs_read_inode_mount(struct inode *vi)
   1748{
   1749	VCN next_vcn, last_vcn, highest_vcn;
   1750	s64 block;
   1751	struct super_block *sb = vi->i_sb;
   1752	ntfs_volume *vol = NTFS_SB(sb);
   1753	struct buffer_head *bh;
   1754	ntfs_inode *ni;
   1755	MFT_RECORD *m = NULL;
   1756	ATTR_RECORD *a;
   1757	ntfs_attr_search_ctx *ctx;
   1758	unsigned int i, nr_blocks;
   1759	int err;
   1760
   1761	ntfs_debug("Entering.");
   1762
   1763	/* Initialize the ntfs specific part of @vi. */
   1764	ntfs_init_big_inode(vi);
   1765
   1766	ni = NTFS_I(vi);
   1767
   1768	/* Setup the data attribute. It is special as it is mst protected. */
   1769	NInoSetNonResident(ni);
   1770	NInoSetMstProtected(ni);
   1771	NInoSetSparseDisabled(ni);
   1772	ni->type = AT_DATA;
   1773	ni->name = NULL;
   1774	ni->name_len = 0;
   1775	/*
   1776	 * This sets up our little cheat allowing us to reuse the async read io
   1777	 * completion handler for directories.
   1778	 */
   1779	ni->itype.index.block_size = vol->mft_record_size;
   1780	ni->itype.index.block_size_bits = vol->mft_record_size_bits;
   1781
   1782	/* Very important! Needed to be able to call map_mft_record*(). */
   1783	vol->mft_ino = vi;
   1784
   1785	/* Allocate enough memory to read the first mft record. */
   1786	if (vol->mft_record_size > 64 * 1024) {
   1787		ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).",
   1788				vol->mft_record_size);
   1789		goto err_out;
   1790	}
   1791	i = vol->mft_record_size;
   1792	if (i < sb->s_blocksize)
   1793		i = sb->s_blocksize;
   1794	m = (MFT_RECORD*)ntfs_malloc_nofs(i);
   1795	if (!m) {
   1796		ntfs_error(sb, "Failed to allocate buffer for $MFT record 0.");
   1797		goto err_out;
   1798	}
   1799
   1800	/* Determine the first block of the $MFT/$DATA attribute. */
   1801	block = vol->mft_lcn << vol->cluster_size_bits >>
   1802			sb->s_blocksize_bits;
   1803	nr_blocks = vol->mft_record_size >> sb->s_blocksize_bits;
   1804	if (!nr_blocks)
   1805		nr_blocks = 1;
   1806
   1807	/* Load $MFT/$DATA's first mft record. */
   1808	for (i = 0; i < nr_blocks; i++) {
   1809		bh = sb_bread(sb, block++);
   1810		if (!bh) {
   1811			ntfs_error(sb, "Device read failed.");
   1812			goto err_out;
   1813		}
   1814		memcpy((char*)m + (i << sb->s_blocksize_bits), bh->b_data,
   1815				sb->s_blocksize);
   1816		brelse(bh);
   1817	}
   1818
   1819	if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
   1820		ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
   1821				le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
   1822		goto err_out;
   1823	}
   1824
   1825	/* Apply the mst fixups. */
   1826	if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) {
   1827		/* FIXME: Try to use the $MFTMirr now. */
   1828		ntfs_error(sb, "MST fixup failed. $MFT is corrupt.");
   1829		goto err_out;
   1830	}
   1831
   1832	/* Need this to sanity check attribute list references to $MFT. */
   1833	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
   1834
   1835	/* Provides read_folio() for map_mft_record(). */
   1836	vi->i_mapping->a_ops = &ntfs_mst_aops;
   1837
   1838	ctx = ntfs_attr_get_search_ctx(ni, m);
   1839	if (!ctx) {
   1840		err = -ENOMEM;
   1841		goto err_out;
   1842	}
   1843
   1844	/* Find the attribute list attribute if present. */
   1845	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx);
   1846	if (err) {
   1847		if (unlikely(err != -ENOENT)) {
   1848			ntfs_error(sb, "Failed to lookup attribute list "
   1849					"attribute. You should run chkdsk.");
   1850			goto put_err_out;
   1851		}
   1852	} else /* if (!err) */ {
   1853		ATTR_LIST_ENTRY *al_entry, *next_al_entry;
   1854		u8 *al_end;
   1855		static const char *es = "  Not allowed.  $MFT is corrupt.  "
   1856				"You should run chkdsk.";
   1857
   1858		ntfs_debug("Attribute list attribute found in $MFT.");
   1859		NInoSetAttrList(ni);
   1860		a = ctx->attr;
   1861		if (a->flags & ATTR_COMPRESSION_MASK) {
   1862			ntfs_error(sb, "Attribute list attribute is "
   1863					"compressed.%s", es);
   1864			goto put_err_out;
   1865		}
   1866		if (a->flags & ATTR_IS_ENCRYPTED ||
   1867				a->flags & ATTR_IS_SPARSE) {
   1868			if (a->non_resident) {
   1869				ntfs_error(sb, "Non-resident attribute list "
   1870						"attribute is encrypted/"
   1871						"sparse.%s", es);
   1872				goto put_err_out;
   1873			}
   1874			ntfs_warning(sb, "Resident attribute list attribute "
   1875					"in $MFT system file is marked "
   1876					"encrypted/sparse which is not true.  "
   1877					"However, Windows allows this and "
   1878					"chkdsk does not detect or correct it "
   1879					"so we will just ignore the invalid "
   1880					"flags and pretend they are not set.");
   1881		}
   1882		/* Now allocate memory for the attribute list. */
   1883		ni->attr_list_size = (u32)ntfs_attr_size(a);
   1884		if (!ni->attr_list_size) {
   1885			ntfs_error(sb, "Attr_list_size is zero");
   1886			goto put_err_out;
   1887		}
   1888		ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size);
   1889		if (!ni->attr_list) {
   1890			ntfs_error(sb, "Not enough memory to allocate buffer "
   1891					"for attribute list.");
   1892			goto put_err_out;
   1893		}
   1894		if (a->non_resident) {
   1895			NInoSetAttrListNonResident(ni);
   1896			if (a->data.non_resident.lowest_vcn) {
   1897				ntfs_error(sb, "Attribute list has non zero "
   1898						"lowest_vcn. $MFT is corrupt. "
   1899						"You should run chkdsk.");
   1900				goto put_err_out;
   1901			}
   1902			/* Setup the runlist. */
   1903			ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
   1904					a, NULL);
   1905			if (IS_ERR(ni->attr_list_rl.rl)) {
   1906				err = PTR_ERR(ni->attr_list_rl.rl);
   1907				ni->attr_list_rl.rl = NULL;
   1908				ntfs_error(sb, "Mapping pairs decompression "
   1909						"failed with error code %i.",
   1910						-err);
   1911				goto put_err_out;
   1912			}
   1913			/* Now load the attribute list. */
   1914			if ((err = load_attribute_list(vol, &ni->attr_list_rl,
   1915					ni->attr_list, ni->attr_list_size,
   1916					sle64_to_cpu(a->data.
   1917					non_resident.initialized_size)))) {
   1918				ntfs_error(sb, "Failed to load attribute list "
   1919						"attribute with error code %i.",
   1920						-err);
   1921				goto put_err_out;
   1922			}
   1923		} else /* if (!ctx.attr->non_resident) */ {
   1924			if ((u8*)a + le16_to_cpu(
   1925					a->data.resident.value_offset) +
   1926					le32_to_cpu(
   1927					a->data.resident.value_length) >
   1928					(u8*)ctx->mrec + vol->mft_record_size) {
   1929				ntfs_error(sb, "Corrupt attribute list "
   1930						"attribute.");
   1931				goto put_err_out;
   1932			}
   1933			/* Now copy the attribute list. */
   1934			memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
   1935					a->data.resident.value_offset),
   1936					le32_to_cpu(
   1937					a->data.resident.value_length));
   1938		}
   1939		/* The attribute list is now setup in memory. */
   1940		/*
   1941		 * FIXME: I don't know if this case is actually possible.
   1942		 * According to logic it is not possible but I have seen too
   1943		 * many weird things in MS software to rely on logic... Thus we
   1944		 * perform a manual search and make sure the first $MFT/$DATA
   1945		 * extent is in the base inode. If it is not we abort with an
   1946		 * error and if we ever see a report of this error we will need
   1947		 * to do some magic in order to have the necessary mft record
   1948		 * loaded and in the right place in the page cache. But
   1949		 * hopefully logic will prevail and this never happens...
   1950		 */
   1951		al_entry = (ATTR_LIST_ENTRY*)ni->attr_list;
   1952		al_end = (u8*)al_entry + ni->attr_list_size;
   1953		for (;; al_entry = next_al_entry) {
   1954			/* Out of bounds check. */
   1955			if ((u8*)al_entry < ni->attr_list ||
   1956					(u8*)al_entry > al_end)
   1957				goto em_put_err_out;
   1958			/* Catch the end of the attribute list. */
   1959			if ((u8*)al_entry == al_end)
   1960				goto em_put_err_out;
   1961			if (!al_entry->length)
   1962				goto em_put_err_out;
   1963			if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
   1964					le16_to_cpu(al_entry->length) > al_end)
   1965				goto em_put_err_out;
   1966			next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
   1967					le16_to_cpu(al_entry->length));
   1968			if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
   1969				goto em_put_err_out;
   1970			if (AT_DATA != al_entry->type)
   1971				continue;
   1972			/* We want an unnamed attribute. */
   1973			if (al_entry->name_length)
   1974				goto em_put_err_out;
   1975			/* Want the first entry, i.e. lowest_vcn == 0. */
   1976			if (al_entry->lowest_vcn)
   1977				goto em_put_err_out;
   1978			/* First entry has to be in the base mft record. */
   1979			if (MREF_LE(al_entry->mft_reference) != vi->i_ino) {
   1980				/* MFT references do not match, logic fails. */
   1981				ntfs_error(sb, "BUG: The first $DATA extent "
   1982						"of $MFT is not in the base "
   1983						"mft record. Please report "
   1984						"you saw this message to "
   1985						"linux-ntfs-dev@lists."
   1986						"sourceforge.net");
   1987				goto put_err_out;
   1988			} else {
   1989				/* Sequence numbers must match. */
   1990				if (MSEQNO_LE(al_entry->mft_reference) !=
   1991						ni->seq_no)
   1992					goto em_put_err_out;
   1993				/* Got it. All is ok. We can stop now. */
   1994				break;
   1995			}
   1996		}
   1997	}
   1998
   1999	ntfs_attr_reinit_search_ctx(ctx);
   2000
   2001	/* Now load all attribute extents. */
   2002	a = NULL;
   2003	next_vcn = last_vcn = highest_vcn = 0;
   2004	while (!(err = ntfs_attr_lookup(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0,
   2005			ctx))) {
   2006		runlist_element *nrl;
   2007
   2008		/* Cache the current attribute. */
   2009		a = ctx->attr;
   2010		/* $MFT must be non-resident. */
   2011		if (!a->non_resident) {
   2012			ntfs_error(sb, "$MFT must be non-resident but a "
   2013					"resident extent was found. $MFT is "
   2014					"corrupt. Run chkdsk.");
   2015			goto put_err_out;
   2016		}
   2017		/* $MFT must be uncompressed and unencrypted. */
   2018		if (a->flags & ATTR_COMPRESSION_MASK ||
   2019				a->flags & ATTR_IS_ENCRYPTED ||
   2020				a->flags & ATTR_IS_SPARSE) {
   2021			ntfs_error(sb, "$MFT must be uncompressed, "
   2022					"non-sparse, and unencrypted but a "
   2023					"compressed/sparse/encrypted extent "
   2024					"was found. $MFT is corrupt. Run "
   2025					"chkdsk.");
   2026			goto put_err_out;
   2027		}
   2028		/*
   2029		 * Decompress the mapping pairs array of this extent and merge
   2030		 * the result into the existing runlist. No need for locking
   2031		 * as we have exclusive access to the inode at this time and we
   2032		 * are a mount in progress task, too.
   2033		 */
   2034		nrl = ntfs_mapping_pairs_decompress(vol, a, ni->runlist.rl);
   2035		if (IS_ERR(nrl)) {
   2036			ntfs_error(sb, "ntfs_mapping_pairs_decompress() "
   2037					"failed with error code %ld.  $MFT is "
   2038					"corrupt.", PTR_ERR(nrl));
   2039			goto put_err_out;
   2040		}
   2041		ni->runlist.rl = nrl;
   2042
   2043		/* Are we in the first extent? */
   2044		if (!next_vcn) {
   2045			if (a->data.non_resident.lowest_vcn) {
   2046				ntfs_error(sb, "First extent of $DATA "
   2047						"attribute has non zero "
   2048						"lowest_vcn. $MFT is corrupt. "
   2049						"You should run chkdsk.");
   2050				goto put_err_out;
   2051			}
   2052			/* Get the last vcn in the $DATA attribute. */
   2053			last_vcn = sle64_to_cpu(
   2054					a->data.non_resident.allocated_size)
   2055					>> vol->cluster_size_bits;
   2056			/* Fill in the inode size. */
   2057			vi->i_size = sle64_to_cpu(
   2058					a->data.non_resident.data_size);
   2059			ni->initialized_size = sle64_to_cpu(
   2060					a->data.non_resident.initialized_size);
   2061			ni->allocated_size = sle64_to_cpu(
   2062					a->data.non_resident.allocated_size);
   2063			/*
   2064			 * Verify the number of mft records does not exceed
   2065			 * 2^32 - 1.
   2066			 */
   2067			if ((vi->i_size >> vol->mft_record_size_bits) >=
   2068					(1ULL << 32)) {
   2069				ntfs_error(sb, "$MFT is too big! Aborting.");
   2070				goto put_err_out;
   2071			}
   2072			/*
   2073			 * We have got the first extent of the runlist for
   2074			 * $MFT which means it is now relatively safe to call
   2075			 * the normal ntfs_read_inode() function.
   2076			 * Complete reading the inode, this will actually
   2077			 * re-read the mft record for $MFT, this time entering
   2078			 * it into the page cache with which we complete the
   2079			 * kick start of the volume. It should be safe to do
   2080			 * this now as the first extent of $MFT/$DATA is
   2081			 * already known and we would hope that we don't need
   2082			 * further extents in order to find the other
   2083			 * attributes belonging to $MFT. Only time will tell if
   2084			 * this is really the case. If not we will have to play
   2085			 * magic at this point, possibly duplicating a lot of
   2086			 * ntfs_read_inode() at this point. We will need to
   2087			 * ensure we do enough of its work to be able to call
   2088			 * ntfs_read_inode() on extents of $MFT/$DATA. But lets
   2089			 * hope this never happens...
   2090			 */
   2091			ntfs_read_locked_inode(vi);
   2092			if (is_bad_inode(vi)) {
   2093				ntfs_error(sb, "ntfs_read_inode() of $MFT "
   2094						"failed. BUG or corrupt $MFT. "
   2095						"Run chkdsk and if no errors "
   2096						"are found, please report you "
   2097						"saw this message to "
   2098						"linux-ntfs-dev@lists."
   2099						"sourceforge.net");
   2100				ntfs_attr_put_search_ctx(ctx);
   2101				/* Revert to the safe super operations. */
   2102				ntfs_free(m);
   2103				return -1;
   2104			}
   2105			/*
   2106			 * Re-initialize some specifics about $MFT's inode as
   2107			 * ntfs_read_inode() will have set up the default ones.
   2108			 */
   2109			/* Set uid and gid to root. */
   2110			vi->i_uid = GLOBAL_ROOT_UID;
   2111			vi->i_gid = GLOBAL_ROOT_GID;
   2112			/* Regular file. No access for anyone. */
   2113			vi->i_mode = S_IFREG;
   2114			/* No VFS initiated operations allowed for $MFT. */
   2115			vi->i_op = &ntfs_empty_inode_ops;
   2116			vi->i_fop = &ntfs_empty_file_ops;
   2117		}
   2118
   2119		/* Get the lowest vcn for the next extent. */
   2120		highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
   2121		next_vcn = highest_vcn + 1;
   2122
   2123		/* Only one extent or error, which we catch below. */
   2124		if (next_vcn <= 0)
   2125			break;
   2126
   2127		/* Avoid endless loops due to corruption. */
   2128		if (next_vcn < sle64_to_cpu(
   2129				a->data.non_resident.lowest_vcn)) {
   2130			ntfs_error(sb, "$MFT has corrupt attribute list "
   2131					"attribute. Run chkdsk.");
   2132			goto put_err_out;
   2133		}
   2134	}
   2135	if (err != -ENOENT) {
   2136		ntfs_error(sb, "Failed to lookup $MFT/$DATA attribute extent. "
   2137				"$MFT is corrupt. Run chkdsk.");
   2138		goto put_err_out;
   2139	}
   2140	if (!a) {
   2141		ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is "
   2142				"corrupt. Run chkdsk.");
   2143		goto put_err_out;
   2144	}
   2145	if (highest_vcn && highest_vcn != last_vcn - 1) {
   2146		ntfs_error(sb, "Failed to load the complete runlist for "
   2147				"$MFT/$DATA. Driver bug or corrupt $MFT. "
   2148				"Run chkdsk.");
   2149		ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
   2150				(unsigned long long)highest_vcn,
   2151				(unsigned long long)last_vcn - 1);
   2152		goto put_err_out;
   2153	}
   2154	ntfs_attr_put_search_ctx(ctx);
   2155	ntfs_debug("Done.");
   2156	ntfs_free(m);
   2157
   2158	/*
   2159	 * Split the locking rules of the MFT inode from the
   2160	 * locking rules of other inodes:
   2161	 */
   2162	lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
   2163	lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
   2164
   2165	return 0;
   2166
   2167em_put_err_out:
   2168	ntfs_error(sb, "Couldn't find first extent of $DATA attribute in "
   2169			"attribute list. $MFT is corrupt. Run chkdsk.");
   2170put_err_out:
   2171	ntfs_attr_put_search_ctx(ctx);
   2172err_out:
   2173	ntfs_error(sb, "Failed. Marking inode as bad.");
   2174	make_bad_inode(vi);
   2175	ntfs_free(m);
   2176	return -1;
   2177}
   2178
   2179static void __ntfs_clear_inode(ntfs_inode *ni)
   2180{
   2181	/* Free all alocated memory. */
   2182	down_write(&ni->runlist.lock);
   2183	if (ni->runlist.rl) {
   2184		ntfs_free(ni->runlist.rl);
   2185		ni->runlist.rl = NULL;
   2186	}
   2187	up_write(&ni->runlist.lock);
   2188
   2189	if (ni->attr_list) {
   2190		ntfs_free(ni->attr_list);
   2191		ni->attr_list = NULL;
   2192	}
   2193
   2194	down_write(&ni->attr_list_rl.lock);
   2195	if (ni->attr_list_rl.rl) {
   2196		ntfs_free(ni->attr_list_rl.rl);
   2197		ni->attr_list_rl.rl = NULL;
   2198	}
   2199	up_write(&ni->attr_list_rl.lock);
   2200
   2201	if (ni->name_len && ni->name != I30) {
   2202		/* Catch bugs... */
   2203		BUG_ON(!ni->name);
   2204		kfree(ni->name);
   2205	}
   2206}
   2207
   2208void ntfs_clear_extent_inode(ntfs_inode *ni)
   2209{
   2210	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
   2211
   2212	BUG_ON(NInoAttr(ni));
   2213	BUG_ON(ni->nr_extents != -1);
   2214
   2215#ifdef NTFS_RW
   2216	if (NInoDirty(ni)) {
   2217		if (!is_bad_inode(VFS_I(ni->ext.base_ntfs_ino)))
   2218			ntfs_error(ni->vol->sb, "Clearing dirty extent inode!  "
   2219					"Losing data!  This is a BUG!!!");
   2220		// FIXME:  Do something!!!
   2221	}
   2222#endif /* NTFS_RW */
   2223
   2224	__ntfs_clear_inode(ni);
   2225
   2226	/* Bye, bye... */
   2227	ntfs_destroy_extent_inode(ni);
   2228}
   2229
   2230/**
   2231 * ntfs_evict_big_inode - clean up the ntfs specific part of an inode
   2232 * @vi:		vfs inode pending annihilation
   2233 *
   2234 * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
   2235 * is called, which deallocates all memory belonging to the NTFS specific part
   2236 * of the inode and returns.
   2237 *
   2238 * If the MFT record is dirty, we commit it before doing anything else.
   2239 */
   2240void ntfs_evict_big_inode(struct inode *vi)
   2241{
   2242	ntfs_inode *ni = NTFS_I(vi);
   2243
   2244	truncate_inode_pages_final(&vi->i_data);
   2245	clear_inode(vi);
   2246
   2247#ifdef NTFS_RW
   2248	if (NInoDirty(ni)) {
   2249		bool was_bad = (is_bad_inode(vi));
   2250
   2251		/* Committing the inode also commits all extent inodes. */
   2252		ntfs_commit_inode(vi);
   2253
   2254		if (!was_bad && (is_bad_inode(vi) || NInoDirty(ni))) {
   2255			ntfs_error(vi->i_sb, "Failed to commit dirty inode "
   2256					"0x%lx.  Losing data!", vi->i_ino);
   2257			// FIXME:  Do something!!!
   2258		}
   2259	}
   2260#endif /* NTFS_RW */
   2261
   2262	/* No need to lock at this stage as no one else has a reference. */
   2263	if (ni->nr_extents > 0) {
   2264		int i;
   2265
   2266		for (i = 0; i < ni->nr_extents; i++)
   2267			ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]);
   2268		kfree(ni->ext.extent_ntfs_inos);
   2269	}
   2270
   2271	__ntfs_clear_inode(ni);
   2272
   2273	if (NInoAttr(ni)) {
   2274		/* Release the base inode if we are holding it. */
   2275		if (ni->nr_extents == -1) {
   2276			iput(VFS_I(ni->ext.base_ntfs_ino));
   2277			ni->nr_extents = 0;
   2278			ni->ext.base_ntfs_ino = NULL;
   2279		}
   2280	}
   2281	BUG_ON(ni->page);
   2282	if (!atomic_dec_and_test(&ni->count))
   2283		BUG();
   2284	return;
   2285}
   2286
   2287/**
   2288 * ntfs_show_options - show mount options in /proc/mounts
   2289 * @sf:		seq_file in which to write our mount options
   2290 * @root:	root of the mounted tree whose mount options to display
   2291 *
   2292 * Called by the VFS once for each mounted ntfs volume when someone reads
   2293 * /proc/mounts in order to display the NTFS specific mount options of each
   2294 * mount. The mount options of fs specified by @root are written to the seq file
   2295 * @sf and success is returned.
   2296 */
   2297int ntfs_show_options(struct seq_file *sf, struct dentry *root)
   2298{
   2299	ntfs_volume *vol = NTFS_SB(root->d_sb);
   2300	int i;
   2301
   2302	seq_printf(sf, ",uid=%i", from_kuid_munged(&init_user_ns, vol->uid));
   2303	seq_printf(sf, ",gid=%i", from_kgid_munged(&init_user_ns, vol->gid));
   2304	if (vol->fmask == vol->dmask)
   2305		seq_printf(sf, ",umask=0%o", vol->fmask);
   2306	else {
   2307		seq_printf(sf, ",fmask=0%o", vol->fmask);
   2308		seq_printf(sf, ",dmask=0%o", vol->dmask);
   2309	}
   2310	seq_printf(sf, ",nls=%s", vol->nls_map->charset);
   2311	if (NVolCaseSensitive(vol))
   2312		seq_printf(sf, ",case_sensitive");
   2313	if (NVolShowSystemFiles(vol))
   2314		seq_printf(sf, ",show_sys_files");
   2315	if (!NVolSparseEnabled(vol))
   2316		seq_printf(sf, ",disable_sparse");
   2317	for (i = 0; on_errors_arr[i].val; i++) {
   2318		if (on_errors_arr[i].val & vol->on_errors)
   2319			seq_printf(sf, ",errors=%s", on_errors_arr[i].str);
   2320	}
   2321	seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier);
   2322	return 0;
   2323}
   2324
   2325#ifdef NTFS_RW
   2326
   2327static const char *es = "  Leaving inconsistent metadata.  Unmount and run "
   2328		"chkdsk.";
   2329
   2330/**
   2331 * ntfs_truncate - called when the i_size of an ntfs inode is changed
   2332 * @vi:		inode for which the i_size was changed
   2333 *
   2334 * We only support i_size changes for normal files at present, i.e. not
   2335 * compressed and not encrypted.  This is enforced in ntfs_setattr(), see
   2336 * below.
   2337 *
   2338 * The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
   2339 * that the change is allowed.
   2340 *
   2341 * This implies for us that @vi is a file inode rather than a directory, index,
   2342 * or attribute inode as well as that @vi is a base inode.
   2343 *
   2344 * Returns 0 on success or -errno on error.
   2345 *
   2346 * Called with ->i_mutex held.
   2347 */
   2348int ntfs_truncate(struct inode *vi)
   2349{
   2350	s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size;
   2351	VCN highest_vcn;
   2352	unsigned long flags;
   2353	ntfs_inode *base_ni, *ni = NTFS_I(vi);
   2354	ntfs_volume *vol = ni->vol;
   2355	ntfs_attr_search_ctx *ctx;
   2356	MFT_RECORD *m;
   2357	ATTR_RECORD *a;
   2358	const char *te = "  Leaving file length out of sync with i_size.";
   2359	int err, mp_size, size_change, alloc_change;
   2360
   2361	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
   2362	BUG_ON(NInoAttr(ni));
   2363	BUG_ON(S_ISDIR(vi->i_mode));
   2364	BUG_ON(NInoMstProtected(ni));
   2365	BUG_ON(ni->nr_extents < 0);
   2366retry_truncate:
   2367	/*
   2368	 * Lock the runlist for writing and map the mft record to ensure it is
   2369	 * safe to mess with the attribute runlist and sizes.
   2370	 */
   2371	down_write(&ni->runlist.lock);
   2372	if (!NInoAttr(ni))
   2373		base_ni = ni;
   2374	else
   2375		base_ni = ni->ext.base_ntfs_ino;
   2376	m = map_mft_record(base_ni);
   2377	if (IS_ERR(m)) {
   2378		err = PTR_ERR(m);
   2379		ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
   2380				"(error code %d).%s", vi->i_ino, err, te);
   2381		ctx = NULL;
   2382		m = NULL;
   2383		goto old_bad_out;
   2384	}
   2385	ctx = ntfs_attr_get_search_ctx(base_ni, m);
   2386	if (unlikely(!ctx)) {
   2387		ntfs_error(vi->i_sb, "Failed to allocate a search context for "
   2388				"inode 0x%lx (not enough memory).%s",
   2389				vi->i_ino, te);
   2390		err = -ENOMEM;
   2391		goto old_bad_out;
   2392	}
   2393	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
   2394			CASE_SENSITIVE, 0, NULL, 0, ctx);
   2395	if (unlikely(err)) {
   2396		if (err == -ENOENT) {
   2397			ntfs_error(vi->i_sb, "Open attribute is missing from "
   2398					"mft record.  Inode 0x%lx is corrupt.  "
   2399					"Run chkdsk.%s", vi->i_ino, te);
   2400			err = -EIO;
   2401		} else
   2402			ntfs_error(vi->i_sb, "Failed to lookup attribute in "
   2403					"inode 0x%lx (error code %d).%s",
   2404					vi->i_ino, err, te);
   2405		goto old_bad_out;
   2406	}
   2407	m = ctx->mrec;
   2408	a = ctx->attr;
   2409	/*
   2410	 * The i_size of the vfs inode is the new size for the attribute value.
   2411	 */
   2412	new_size = i_size_read(vi);
   2413	/* The current size of the attribute value is the old size. */
   2414	old_size = ntfs_attr_size(a);
   2415	/* Calculate the new allocated size. */
   2416	if (NInoNonResident(ni))
   2417		new_alloc_size = (new_size + vol->cluster_size - 1) &
   2418				~(s64)vol->cluster_size_mask;
   2419	else
   2420		new_alloc_size = (new_size + 7) & ~7;
   2421	/* The current allocated size is the old allocated size. */
   2422	read_lock_irqsave(&ni->size_lock, flags);
   2423	old_alloc_size = ni->allocated_size;
   2424	read_unlock_irqrestore(&ni->size_lock, flags);
   2425	/*
   2426	 * The change in the file size.  This will be 0 if no change, >0 if the
   2427	 * size is growing, and <0 if the size is shrinking.
   2428	 */
   2429	size_change = -1;
   2430	if (new_size - old_size >= 0) {
   2431		size_change = 1;
   2432		if (new_size == old_size)
   2433			size_change = 0;
   2434	}
   2435	/* As above for the allocated size. */
   2436	alloc_change = -1;
   2437	if (new_alloc_size - old_alloc_size >= 0) {
   2438		alloc_change = 1;
   2439		if (new_alloc_size == old_alloc_size)
   2440			alloc_change = 0;
   2441	}
   2442	/*
   2443	 * If neither the size nor the allocation are being changed there is
   2444	 * nothing to do.
   2445	 */
   2446	if (!size_change && !alloc_change)
   2447		goto unm_done;
   2448	/* If the size is changing, check if new size is allowed in $AttrDef. */
   2449	if (size_change) {
   2450		err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
   2451		if (unlikely(err)) {
   2452			if (err == -ERANGE) {
   2453				ntfs_error(vol->sb, "Truncate would cause the "
   2454						"inode 0x%lx to %simum size "
   2455						"for its attribute type "
   2456						"(0x%x).  Aborting truncate.",
   2457						vi->i_ino,
   2458						new_size > old_size ? "exceed "
   2459						"the max" : "go under the min",
   2460						le32_to_cpu(ni->type));
   2461				err = -EFBIG;
   2462			} else {
   2463				ntfs_error(vol->sb, "Inode 0x%lx has unknown "
   2464						"attribute type 0x%x.  "
   2465						"Aborting truncate.",
   2466						vi->i_ino,
   2467						le32_to_cpu(ni->type));
   2468				err = -EIO;
   2469			}
   2470			/* Reset the vfs inode size to the old size. */
   2471			i_size_write(vi, old_size);
   2472			goto err_out;
   2473		}
   2474	}
   2475	if (NInoCompressed(ni) || NInoEncrypted(ni)) {
   2476		ntfs_warning(vi->i_sb, "Changes in inode size are not "
   2477				"supported yet for %s files, ignoring.",
   2478				NInoCompressed(ni) ? "compressed" :
   2479				"encrypted");
   2480		err = -EOPNOTSUPP;
   2481		goto bad_out;
   2482	}
   2483	if (a->non_resident)
   2484		goto do_non_resident_truncate;
   2485	BUG_ON(NInoNonResident(ni));
   2486	/* Resize the attribute record to best fit the new attribute size. */
   2487	if (new_size < vol->mft_record_size &&
   2488			!ntfs_resident_attr_value_resize(m, a, new_size)) {
   2489		/* The resize succeeded! */
   2490		flush_dcache_mft_record_page(ctx->ntfs_ino);
   2491		mark_mft_record_dirty(ctx->ntfs_ino);
   2492		write_lock_irqsave(&ni->size_lock, flags);
   2493		/* Update the sizes in the ntfs inode and all is done. */
   2494		ni->allocated_size = le32_to_cpu(a->length) -
   2495				le16_to_cpu(a->data.resident.value_offset);
   2496		/*
   2497		 * Note ntfs_resident_attr_value_resize() has already done any
   2498		 * necessary data clearing in the attribute record.  When the
   2499		 * file is being shrunk vmtruncate() will already have cleared
   2500		 * the top part of the last partial page, i.e. since this is
   2501		 * the resident case this is the page with index 0.  However,
   2502		 * when the file is being expanded, the page cache page data
   2503		 * between the old data_size, i.e. old_size, and the new_size
   2504		 * has not been zeroed.  Fortunately, we do not need to zero it
   2505		 * either since on one hand it will either already be zero due
   2506		 * to both read_folio and writepage clearing partial page data
   2507		 * beyond i_size in which case there is nothing to do or in the
   2508		 * case of the file being mmap()ped at the same time, POSIX
   2509		 * specifies that the behaviour is unspecified thus we do not
   2510		 * have to do anything.  This means that in our implementation
   2511		 * in the rare case that the file is mmap()ped and a write
   2512		 * occurred into the mmap()ped region just beyond the file size
   2513		 * and writepage has not yet been called to write out the page
   2514		 * (which would clear the area beyond the file size) and we now
   2515		 * extend the file size to incorporate this dirty region
   2516		 * outside the file size, a write of the page would result in
   2517		 * this data being written to disk instead of being cleared.
   2518		 * Given both POSIX and the Linux mmap(2) man page specify that
   2519		 * this corner case is undefined, we choose to leave it like
   2520		 * that as this is much simpler for us as we cannot lock the
   2521		 * relevant page now since we are holding too many ntfs locks
   2522		 * which would result in a lock reversal deadlock.
   2523		 */
   2524		ni->initialized_size = new_size;
   2525		write_unlock_irqrestore(&ni->size_lock, flags);
   2526		goto unm_done;
   2527	}
   2528	/* If the above resize failed, this must be an attribute extension. */
   2529	BUG_ON(size_change < 0);
   2530	/*
   2531	 * We have to drop all the locks so we can call
   2532	 * ntfs_attr_make_non_resident().  This could be optimised by try-
   2533	 * locking the first page cache page and only if that fails dropping
   2534	 * the locks, locking the page, and redoing all the locking and
   2535	 * lookups.  While this would be a huge optimisation, it is not worth
   2536	 * it as this is definitely a slow code path as it only ever can happen
   2537	 * once for any given file.
   2538	 */
   2539	ntfs_attr_put_search_ctx(ctx);
   2540	unmap_mft_record(base_ni);
   2541	up_write(&ni->runlist.lock);
   2542	/*
   2543	 * Not enough space in the mft record, try to make the attribute
   2544	 * non-resident and if successful restart the truncation process.
   2545	 */
   2546	err = ntfs_attr_make_non_resident(ni, old_size);
   2547	if (likely(!err))
   2548		goto retry_truncate;
   2549	/*
   2550	 * Could not make non-resident.  If this is due to this not being
   2551	 * permitted for this attribute type or there not being enough space,
   2552	 * try to make other attributes non-resident.  Otherwise fail.
   2553	 */
   2554	if (unlikely(err != -EPERM && err != -ENOSPC)) {
   2555		ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute "
   2556				"type 0x%x, because the conversion from "
   2557				"resident to non-resident attribute failed "
   2558				"with error code %i.", vi->i_ino,
   2559				(unsigned)le32_to_cpu(ni->type), err);
   2560		if (err != -ENOMEM)
   2561			err = -EIO;
   2562		goto conv_err_out;
   2563	}
   2564	/* TODO: Not implemented from here, abort. */
   2565	if (err == -ENOSPC)
   2566		ntfs_error(vol->sb, "Not enough space in the mft record/on "
   2567				"disk for the non-resident attribute value.  "
   2568				"This case is not implemented yet.");
   2569	else /* if (err == -EPERM) */
   2570		ntfs_error(vol->sb, "This attribute type may not be "
   2571				"non-resident.  This case is not implemented "
   2572				"yet.");
   2573	err = -EOPNOTSUPP;
   2574	goto conv_err_out;
   2575#if 0
   2576	// TODO: Attempt to make other attributes non-resident.
   2577	if (!err)
   2578		goto do_resident_extend;
   2579	/*
   2580	 * Both the attribute list attribute and the standard information
   2581	 * attribute must remain in the base inode.  Thus, if this is one of
   2582	 * these attributes, we have to try to move other attributes out into
   2583	 * extent mft records instead.
   2584	 */
   2585	if (ni->type == AT_ATTRIBUTE_LIST ||
   2586			ni->type == AT_STANDARD_INFORMATION) {
   2587		// TODO: Attempt to move other attributes into extent mft
   2588		// records.
   2589		err = -EOPNOTSUPP;
   2590		if (!err)
   2591			goto do_resident_extend;
   2592		goto err_out;
   2593	}
   2594	// TODO: Attempt to move this attribute to an extent mft record, but
   2595	// only if it is not already the only attribute in an mft record in
   2596	// which case there would be nothing to gain.
   2597	err = -EOPNOTSUPP;
   2598	if (!err)
   2599		goto do_resident_extend;
   2600	/* There is nothing we can do to make enough space. )-: */
   2601	goto err_out;
   2602#endif
   2603do_non_resident_truncate:
   2604	BUG_ON(!NInoNonResident(ni));
   2605	if (alloc_change < 0) {
   2606		highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
   2607		if (highest_vcn > 0 &&
   2608				old_alloc_size >> vol->cluster_size_bits >
   2609				highest_vcn + 1) {
   2610			/*
   2611			 * This attribute has multiple extents.  Not yet
   2612			 * supported.
   2613			 */
   2614			ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, "
   2615					"attribute type 0x%x, because the "
   2616					"attribute is highly fragmented (it "
   2617					"consists of multiple extents) and "
   2618					"this case is not implemented yet.",
   2619					vi->i_ino,
   2620					(unsigned)le32_to_cpu(ni->type));
   2621			err = -EOPNOTSUPP;
   2622			goto bad_out;
   2623		}
   2624	}
   2625	/*
   2626	 * If the size is shrinking, need to reduce the initialized_size and
   2627	 * the data_size before reducing the allocation.
   2628	 */
   2629	if (size_change < 0) {
   2630		/*
   2631		 * Make the valid size smaller (i_size is already up-to-date).
   2632		 */
   2633		write_lock_irqsave(&ni->size_lock, flags);
   2634		if (new_size < ni->initialized_size) {
   2635			ni->initialized_size = new_size;
   2636			a->data.non_resident.initialized_size =
   2637					cpu_to_sle64(new_size);
   2638		}
   2639		a->data.non_resident.data_size = cpu_to_sle64(new_size);
   2640		write_unlock_irqrestore(&ni->size_lock, flags);
   2641		flush_dcache_mft_record_page(ctx->ntfs_ino);
   2642		mark_mft_record_dirty(ctx->ntfs_ino);
   2643		/* If the allocated size is not changing, we are done. */
   2644		if (!alloc_change)
   2645			goto unm_done;
   2646		/*
   2647		 * If the size is shrinking it makes no sense for the
   2648		 * allocation to be growing.
   2649		 */
   2650		BUG_ON(alloc_change > 0);
   2651	} else /* if (size_change >= 0) */ {
   2652		/*
   2653		 * The file size is growing or staying the same but the
   2654		 * allocation can be shrinking, growing or staying the same.
   2655		 */
   2656		if (alloc_change > 0) {
   2657			/*
   2658			 * We need to extend the allocation and possibly update
   2659			 * the data size.  If we are updating the data size,
   2660			 * since we are not touching the initialized_size we do
   2661			 * not need to worry about the actual data on disk.
   2662			 * And as far as the page cache is concerned, there
   2663			 * will be no pages beyond the old data size and any
   2664			 * partial region in the last page between the old and
   2665			 * new data size (or the end of the page if the new
   2666			 * data size is outside the page) does not need to be
   2667			 * modified as explained above for the resident
   2668			 * attribute truncate case.  To do this, we simply drop
   2669			 * the locks we hold and leave all the work to our
   2670			 * friendly helper ntfs_attr_extend_allocation().
   2671			 */
   2672			ntfs_attr_put_search_ctx(ctx);
   2673			unmap_mft_record(base_ni);
   2674			up_write(&ni->runlist.lock);
   2675			err = ntfs_attr_extend_allocation(ni, new_size,
   2676					size_change > 0 ? new_size : -1, -1);
   2677			/*
   2678			 * ntfs_attr_extend_allocation() will have done error
   2679			 * output already.
   2680			 */
   2681			goto done;
   2682		}
   2683		if (!alloc_change)
   2684			goto alloc_done;
   2685	}
   2686	/* alloc_change < 0 */
   2687	/* Free the clusters. */
   2688	nr_freed = ntfs_cluster_free(ni, new_alloc_size >>
   2689			vol->cluster_size_bits, -1, ctx);
   2690	m = ctx->mrec;
   2691	a = ctx->attr;
   2692	if (unlikely(nr_freed < 0)) {
   2693		ntfs_error(vol->sb, "Failed to release cluster(s) (error code "
   2694				"%lli).  Unmount and run chkdsk to recover "
   2695				"the lost cluster(s).", (long long)nr_freed);
   2696		NVolSetErrors(vol);
   2697		nr_freed = 0;
   2698	}
   2699	/* Truncate the runlist. */
   2700	err = ntfs_rl_truncate_nolock(vol, &ni->runlist,
   2701			new_alloc_size >> vol->cluster_size_bits);
   2702	/*
   2703	 * If the runlist truncation failed and/or the search context is no
   2704	 * longer valid, we cannot resize the attribute record or build the
   2705	 * mapping pairs array thus we mark the inode bad so that no access to
   2706	 * the freed clusters can happen.
   2707	 */
   2708	if (unlikely(err || IS_ERR(m))) {
   2709		ntfs_error(vol->sb, "Failed to %s (error code %li).%s",
   2710				IS_ERR(m) ?
   2711				"restore attribute search context" :
   2712				"truncate attribute runlist",
   2713				IS_ERR(m) ? PTR_ERR(m) : err, es);
   2714		err = -EIO;
   2715		goto bad_out;
   2716	}
   2717	/* Get the size for the shrunk mapping pairs array for the runlist. */
   2718	mp_size = ntfs_get_size_for_mapping_pairs(vol, ni->runlist.rl, 0, -1);
   2719	if (unlikely(mp_size <= 0)) {
   2720		ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
   2721				"attribute type 0x%x, because determining the "
   2722				"size for the mapping pairs failed with error "
   2723				"code %i.%s", vi->i_ino,
   2724				(unsigned)le32_to_cpu(ni->type), mp_size, es);
   2725		err = -EIO;
   2726		goto bad_out;
   2727	}
   2728	/*
   2729	 * Shrink the attribute record for the new mapping pairs array.  Note,
   2730	 * this cannot fail since we are making the attribute smaller thus by
   2731	 * definition there is enough space to do so.
   2732	 */
   2733	err = ntfs_attr_record_resize(m, a, mp_size +
   2734			le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
   2735	BUG_ON(err);
   2736	/*
   2737	 * Generate the mapping pairs array directly into the attribute record.
   2738	 */
   2739	err = ntfs_mapping_pairs_build(vol, (u8*)a +
   2740			le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
   2741			mp_size, ni->runlist.rl, 0, -1, NULL);
   2742	if (unlikely(err)) {
   2743		ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
   2744				"attribute type 0x%x, because building the "
   2745				"mapping pairs failed with error code %i.%s",
   2746				vi->i_ino, (unsigned)le32_to_cpu(ni->type),
   2747				err, es);
   2748		err = -EIO;
   2749		goto bad_out;
   2750	}
   2751	/* Update the allocated/compressed size as well as the highest vcn. */
   2752	a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
   2753			vol->cluster_size_bits) - 1);
   2754	write_lock_irqsave(&ni->size_lock, flags);
   2755	ni->allocated_size = new_alloc_size;
   2756	a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
   2757	if (NInoSparse(ni) || NInoCompressed(ni)) {
   2758		if (nr_freed) {
   2759			ni->itype.compressed.size -= nr_freed <<
   2760					vol->cluster_size_bits;
   2761			BUG_ON(ni->itype.compressed.size < 0);
   2762			a->data.non_resident.compressed_size = cpu_to_sle64(
   2763					ni->itype.compressed.size);
   2764			vi->i_blocks = ni->itype.compressed.size >> 9;
   2765		}
   2766	} else
   2767		vi->i_blocks = new_alloc_size >> 9;
   2768	write_unlock_irqrestore(&ni->size_lock, flags);
   2769	/*
   2770	 * We have shrunk the allocation.  If this is a shrinking truncate we
   2771	 * have already dealt with the initialized_size and the data_size above
   2772	 * and we are done.  If the truncate is only changing the allocation
   2773	 * and not the data_size, we are also done.  If this is an extending
   2774	 * truncate, need to extend the data_size now which is ensured by the
   2775	 * fact that @size_change is positive.
   2776	 */
   2777alloc_done:
   2778	/*
   2779	 * If the size is growing, need to update it now.  If it is shrinking,
   2780	 * we have already updated it above (before the allocation change).
   2781	 */
   2782	if (size_change > 0)
   2783		a->data.non_resident.data_size = cpu_to_sle64(new_size);
   2784	/* Ensure the modified mft record is written out. */
   2785	flush_dcache_mft_record_page(ctx->ntfs_ino);
   2786	mark_mft_record_dirty(ctx->ntfs_ino);
   2787unm_done:
   2788	ntfs_attr_put_search_ctx(ctx);
   2789	unmap_mft_record(base_ni);
   2790	up_write(&ni->runlist.lock);
   2791done:
   2792	/* Update the mtime and ctime on the base inode. */
   2793	/* normally ->truncate shouldn't update ctime or mtime,
   2794	 * but ntfs did before so it got a copy & paste version
   2795	 * of file_update_time.  one day someone should fix this
   2796	 * for real.
   2797	 */
   2798	if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
   2799		struct timespec64 now = current_time(VFS_I(base_ni));
   2800		int sync_it = 0;
   2801
   2802		if (!timespec64_equal(&VFS_I(base_ni)->i_mtime, &now) ||
   2803		    !timespec64_equal(&VFS_I(base_ni)->i_ctime, &now))
   2804			sync_it = 1;
   2805		VFS_I(base_ni)->i_mtime = now;
   2806		VFS_I(base_ni)->i_ctime = now;
   2807
   2808		if (sync_it)
   2809			mark_inode_dirty_sync(VFS_I(base_ni));
   2810	}
   2811
   2812	if (likely(!err)) {
   2813		NInoClearTruncateFailed(ni);
   2814		ntfs_debug("Done.");
   2815	}
   2816	return err;
   2817old_bad_out:
   2818	old_size = -1;
   2819bad_out:
   2820	if (err != -ENOMEM && err != -EOPNOTSUPP)
   2821		NVolSetErrors(vol);
   2822	if (err != -EOPNOTSUPP)
   2823		NInoSetTruncateFailed(ni);
   2824	else if (old_size >= 0)
   2825		i_size_write(vi, old_size);
   2826err_out:
   2827	if (ctx)
   2828		ntfs_attr_put_search_ctx(ctx);
   2829	if (m)
   2830		unmap_mft_record(base_ni);
   2831	up_write(&ni->runlist.lock);
   2832out:
   2833	ntfs_debug("Failed.  Returning error code %i.", err);
   2834	return err;
   2835conv_err_out:
   2836	if (err != -ENOMEM && err != -EOPNOTSUPP)
   2837		NVolSetErrors(vol);
   2838	if (err != -EOPNOTSUPP)
   2839		NInoSetTruncateFailed(ni);
   2840	else
   2841		i_size_write(vi, old_size);
   2842	goto out;
   2843}
   2844
   2845/**
   2846 * ntfs_truncate_vfs - wrapper for ntfs_truncate() that has no return value
   2847 * @vi:		inode for which the i_size was changed
   2848 *
   2849 * Wrapper for ntfs_truncate() that has no return value.
   2850 *
   2851 * See ntfs_truncate() description above for details.
   2852 */
   2853#ifdef NTFS_RW
   2854void ntfs_truncate_vfs(struct inode *vi) {
   2855	ntfs_truncate(vi);
   2856}
   2857#endif
   2858
   2859/**
   2860 * ntfs_setattr - called from notify_change() when an attribute is being changed
   2861 * @mnt_userns:	user namespace of the mount the inode was found from
   2862 * @dentry:	dentry whose attributes to change
   2863 * @attr:	structure describing the attributes and the changes
   2864 *
   2865 * We have to trap VFS attempts to truncate the file described by @dentry as
   2866 * soon as possible, because we do not implement changes in i_size yet.  So we
   2867 * abort all i_size changes here.
   2868 *
   2869 * We also abort all changes of user, group, and mode as we do not implement
   2870 * the NTFS ACLs yet.
   2871 *
   2872 * Called with ->i_mutex held.
   2873 */
   2874int ntfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
   2875		 struct iattr *attr)
   2876{
   2877	struct inode *vi = d_inode(dentry);
   2878	int err;
   2879	unsigned int ia_valid = attr->ia_valid;
   2880
   2881	err = setattr_prepare(&init_user_ns, dentry, attr);
   2882	if (err)
   2883		goto out;
   2884	/* We do not support NTFS ACLs yet. */
   2885	if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) {
   2886		ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
   2887				"supported yet, ignoring.");
   2888		err = -EOPNOTSUPP;
   2889		goto out;
   2890	}
   2891	if (ia_valid & ATTR_SIZE) {
   2892		if (attr->ia_size != i_size_read(vi)) {
   2893			ntfs_inode *ni = NTFS_I(vi);
   2894			/*
   2895			 * FIXME: For now we do not support resizing of
   2896			 * compressed or encrypted files yet.
   2897			 */
   2898			if (NInoCompressed(ni) || NInoEncrypted(ni)) {
   2899				ntfs_warning(vi->i_sb, "Changes in inode size "
   2900						"are not supported yet for "
   2901						"%s files, ignoring.",
   2902						NInoCompressed(ni) ?
   2903						"compressed" : "encrypted");
   2904				err = -EOPNOTSUPP;
   2905			} else {
   2906				truncate_setsize(vi, attr->ia_size);
   2907				ntfs_truncate_vfs(vi);
   2908			}
   2909			if (err || ia_valid == ATTR_SIZE)
   2910				goto out;
   2911		} else {
   2912			/*
   2913			 * We skipped the truncate but must still update
   2914			 * timestamps.
   2915			 */
   2916			ia_valid |= ATTR_MTIME | ATTR_CTIME;
   2917		}
   2918	}
   2919	if (ia_valid & ATTR_ATIME)
   2920		vi->i_atime = attr->ia_atime;
   2921	if (ia_valid & ATTR_MTIME)
   2922		vi->i_mtime = attr->ia_mtime;
   2923	if (ia_valid & ATTR_CTIME)
   2924		vi->i_ctime = attr->ia_ctime;
   2925	mark_inode_dirty(vi);
   2926out:
   2927	return err;
   2928}
   2929
   2930/**
   2931 * ntfs_write_inode - write out a dirty inode
   2932 * @vi:		inode to write out
   2933 * @sync:	if true, write out synchronously
   2934 *
   2935 * Write out a dirty inode to disk including any extent inodes if present.
   2936 *
   2937 * If @sync is true, commit the inode to disk and wait for io completion.  This
   2938 * is done using write_mft_record().
   2939 *
   2940 * If @sync is false, just schedule the write to happen but do not wait for i/o
   2941 * completion.  In 2.6 kernels, scheduling usually happens just by virtue of
   2942 * marking the page (and in this case mft record) dirty but we do not implement
   2943 * this yet as write_mft_record() largely ignores the @sync parameter and
   2944 * always performs synchronous writes.
   2945 *
   2946 * Return 0 on success and -errno on error.
   2947 */
   2948int __ntfs_write_inode(struct inode *vi, int sync)
   2949{
   2950	sle64 nt;
   2951	ntfs_inode *ni = NTFS_I(vi);
   2952	ntfs_attr_search_ctx *ctx;
   2953	MFT_RECORD *m;
   2954	STANDARD_INFORMATION *si;
   2955	int err = 0;
   2956	bool modified = false;
   2957
   2958	ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "",
   2959			vi->i_ino);
   2960	/*
   2961	 * Dirty attribute inodes are written via their real inodes so just
   2962	 * clean them here.  Access time updates are taken care off when the
   2963	 * real inode is written.
   2964	 */
   2965	if (NInoAttr(ni)) {
   2966		NInoClearDirty(ni);
   2967		ntfs_debug("Done.");
   2968		return 0;
   2969	}
   2970	/* Map, pin, and lock the mft record belonging to the inode. */
   2971	m = map_mft_record(ni);
   2972	if (IS_ERR(m)) {
   2973		err = PTR_ERR(m);
   2974		goto err_out;
   2975	}
   2976	/* Update the access times in the standard information attribute. */
   2977	ctx = ntfs_attr_get_search_ctx(ni, m);
   2978	if (unlikely(!ctx)) {
   2979		err = -ENOMEM;
   2980		goto unm_err_out;
   2981	}
   2982	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, NULL, 0,
   2983			CASE_SENSITIVE, 0, NULL, 0, ctx);
   2984	if (unlikely(err)) {
   2985		ntfs_attr_put_search_ctx(ctx);
   2986		goto unm_err_out;
   2987	}
   2988	si = (STANDARD_INFORMATION*)((u8*)ctx->attr +
   2989			le16_to_cpu(ctx->attr->data.resident.value_offset));
   2990	/* Update the access times if they have changed. */
   2991	nt = utc2ntfs(vi->i_mtime);
   2992	if (si->last_data_change_time != nt) {
   2993		ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, "
   2994				"new = 0x%llx", vi->i_ino, (long long)
   2995				sle64_to_cpu(si->last_data_change_time),
   2996				(long long)sle64_to_cpu(nt));
   2997		si->last_data_change_time = nt;
   2998		modified = true;
   2999	}
   3000	nt = utc2ntfs(vi->i_ctime);
   3001	if (si->last_mft_change_time != nt) {
   3002		ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
   3003				"new = 0x%llx", vi->i_ino, (long long)
   3004				sle64_to_cpu(si->last_mft_change_time),
   3005				(long long)sle64_to_cpu(nt));
   3006		si->last_mft_change_time = nt;
   3007		modified = true;
   3008	}
   3009	nt = utc2ntfs(vi->i_atime);
   3010	if (si->last_access_time != nt) {
   3011		ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, "
   3012				"new = 0x%llx", vi->i_ino,
   3013				(long long)sle64_to_cpu(si->last_access_time),
   3014				(long long)sle64_to_cpu(nt));
   3015		si->last_access_time = nt;
   3016		modified = true;
   3017	}
   3018	/*
   3019	 * If we just modified the standard information attribute we need to
   3020	 * mark the mft record it is in dirty.  We do this manually so that
   3021	 * mark_inode_dirty() is not called which would redirty the inode and
   3022	 * hence result in an infinite loop of trying to write the inode.
   3023	 * There is no need to mark the base inode nor the base mft record
   3024	 * dirty, since we are going to write this mft record below in any case
   3025	 * and the base mft record may actually not have been modified so it
   3026	 * might not need to be written out.
   3027	 * NOTE: It is not a problem when the inode for $MFT itself is being
   3028	 * written out as mark_ntfs_record_dirty() will only set I_DIRTY_PAGES
   3029	 * on the $MFT inode and hence ntfs_write_inode() will not be
   3030	 * re-invoked because of it which in turn is ok since the dirtied mft
   3031	 * record will be cleaned and written out to disk below, i.e. before
   3032	 * this function returns.
   3033	 */
   3034	if (modified) {
   3035		flush_dcache_mft_record_page(ctx->ntfs_ino);
   3036		if (!NInoTestSetDirty(ctx->ntfs_ino))
   3037			mark_ntfs_record_dirty(ctx->ntfs_ino->page,
   3038					ctx->ntfs_ino->page_ofs);
   3039	}
   3040	ntfs_attr_put_search_ctx(ctx);
   3041	/* Now the access times are updated, write the base mft record. */
   3042	if (NInoDirty(ni))
   3043		err = write_mft_record(ni, m, sync);
   3044	/* Write all attached extent mft records. */
   3045	mutex_lock(&ni->extent_lock);
   3046	if (ni->nr_extents > 0) {
   3047		ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
   3048		int i;
   3049
   3050		ntfs_debug("Writing %i extent inodes.", ni->nr_extents);
   3051		for (i = 0; i < ni->nr_extents; i++) {
   3052			ntfs_inode *tni = extent_nis[i];
   3053
   3054			if (NInoDirty(tni)) {
   3055				MFT_RECORD *tm = map_mft_record(tni);
   3056				int ret;
   3057
   3058				if (IS_ERR(tm)) {
   3059					if (!err || err == -ENOMEM)
   3060						err = PTR_ERR(tm);
   3061					continue;
   3062				}
   3063				ret = write_mft_record(tni, tm, sync);
   3064				unmap_mft_record(tni);
   3065				if (unlikely(ret)) {
   3066					if (!err || err == -ENOMEM)
   3067						err = ret;
   3068				}
   3069			}
   3070		}
   3071	}
   3072	mutex_unlock(&ni->extent_lock);
   3073	unmap_mft_record(ni);
   3074	if (unlikely(err))
   3075		goto err_out;
   3076	ntfs_debug("Done.");
   3077	return 0;
   3078unm_err_out:
   3079	unmap_mft_record(ni);
   3080err_out:
   3081	if (err == -ENOMEM) {
   3082		ntfs_warning(vi->i_sb, "Not enough memory to write inode.  "
   3083				"Marking the inode dirty again, so the VFS "
   3084				"retries later.");
   3085		mark_inode_dirty(vi);
   3086	} else {
   3087		ntfs_error(vi->i_sb, "Failed (error %i):  Run chkdsk.", -err);
   3088		NVolSetErrors(ni->vol);
   3089	}
   3090	return err;
   3091}
   3092
   3093#endif /* NTFS_RW */