cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

ioctl.c (41062B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * linux/fs/ext4/ioctl.c
      4 *
      5 * Copyright (C) 1993, 1994, 1995
      6 * Remy Card (card@masi.ibp.fr)
      7 * Laboratoire MASI - Institut Blaise Pascal
      8 * Universite Pierre et Marie Curie (Paris VI)
      9 */
     10
     11#include <linux/fs.h>
     12#include <linux/capability.h>
     13#include <linux/time.h>
     14#include <linux/compat.h>
     15#include <linux/mount.h>
     16#include <linux/file.h>
     17#include <linux/quotaops.h>
     18#include <linux/random.h>
     19#include <linux/uaccess.h>
     20#include <linux/delay.h>
     21#include <linux/iversion.h>
     22#include <linux/fileattr.h>
     23#include "ext4_jbd2.h"
     24#include "ext4.h"
     25#include <linux/fsmap.h>
     26#include "fsmap.h"
     27#include <trace/events/ext4.h>
     28
     29typedef void ext4_update_sb_callback(struct ext4_super_block *es,
     30				       const void *arg);
     31
     32/*
     33 * Superblock modification callback function for changing file system
     34 * label
     35 */
     36static void ext4_sb_setlabel(struct ext4_super_block *es, const void *arg)
     37{
     38	/* Sanity check, this should never happen */
     39	BUILD_BUG_ON(sizeof(es->s_volume_name) < EXT4_LABEL_MAX);
     40
     41	memcpy(es->s_volume_name, (char *)arg, EXT4_LABEL_MAX);
     42}
     43
     44static
     45int ext4_update_primary_sb(struct super_block *sb, handle_t *handle,
     46			   ext4_update_sb_callback func,
     47			   const void *arg)
     48{
     49	int err = 0;
     50	struct ext4_sb_info *sbi = EXT4_SB(sb);
     51	struct buffer_head *bh = sbi->s_sbh;
     52	struct ext4_super_block *es = sbi->s_es;
     53
     54	trace_ext4_update_sb(sb, bh->b_blocknr, 1);
     55
     56	BUFFER_TRACE(bh, "get_write_access");
     57	err = ext4_journal_get_write_access(handle, sb,
     58					    bh,
     59					    EXT4_JTR_NONE);
     60	if (err)
     61		goto out_err;
     62
     63	lock_buffer(bh);
     64	func(es, arg);
     65	ext4_superblock_csum_set(sb);
     66	unlock_buffer(bh);
     67
     68	if (buffer_write_io_error(bh) || !buffer_uptodate(bh)) {
     69		ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
     70			 "superblock detected");
     71		clear_buffer_write_io_error(bh);
     72		set_buffer_uptodate(bh);
     73	}
     74
     75	err = ext4_handle_dirty_metadata(handle, NULL, bh);
     76	if (err)
     77		goto out_err;
     78	err = sync_dirty_buffer(bh);
     79out_err:
     80	ext4_std_error(sb, err);
     81	return err;
     82}
     83
     84/*
     85 * Update one backup superblock in the group 'grp' using the callback
     86 * function 'func' and argument 'arg'. If the handle is NULL the
     87 * modification is not journalled.
     88 *
     89 * Returns: 0 when no modification was done (no superblock in the group)
     90 *	    1 when the modification was successful
     91 *	   <0 on error
     92 */
     93static int ext4_update_backup_sb(struct super_block *sb,
     94				 handle_t *handle, ext4_group_t grp,
     95				 ext4_update_sb_callback func, const void *arg)
     96{
     97	int err = 0;
     98	ext4_fsblk_t sb_block;
     99	struct buffer_head *bh;
    100	unsigned long offset = 0;
    101	struct ext4_super_block *es;
    102
    103	if (!ext4_bg_has_super(sb, grp))
    104		return 0;
    105
    106	/*
    107	 * For the group 0 there is always 1k padding, so we have
    108	 * either adjust offset, or sb_block depending on blocksize
    109	 */
    110	if (grp == 0) {
    111		sb_block = 1 * EXT4_MIN_BLOCK_SIZE;
    112		offset = do_div(sb_block, sb->s_blocksize);
    113	} else {
    114		sb_block = ext4_group_first_block_no(sb, grp);
    115		offset = 0;
    116	}
    117
    118	trace_ext4_update_sb(sb, sb_block, handle ? 1 : 0);
    119
    120	bh = ext4_sb_bread(sb, sb_block, 0);
    121	if (IS_ERR(bh))
    122		return PTR_ERR(bh);
    123
    124	if (handle) {
    125		BUFFER_TRACE(bh, "get_write_access");
    126		err = ext4_journal_get_write_access(handle, sb,
    127						    bh,
    128						    EXT4_JTR_NONE);
    129		if (err)
    130			goto out_bh;
    131	}
    132
    133	es = (struct ext4_super_block *) (bh->b_data + offset);
    134	lock_buffer(bh);
    135	if (ext4_has_metadata_csum(sb) &&
    136	    es->s_checksum != ext4_superblock_csum(sb, es)) {
    137		ext4_msg(sb, KERN_ERR, "Invalid checksum for backup "
    138		"superblock %llu\n", sb_block);
    139		unlock_buffer(bh);
    140		err = -EFSBADCRC;
    141		goto out_bh;
    142	}
    143	func(es, arg);
    144	if (ext4_has_metadata_csum(sb))
    145		es->s_checksum = ext4_superblock_csum(sb, es);
    146	set_buffer_uptodate(bh);
    147	unlock_buffer(bh);
    148
    149	if (err)
    150		goto out_bh;
    151
    152	if (handle) {
    153		err = ext4_handle_dirty_metadata(handle, NULL, bh);
    154		if (err)
    155			goto out_bh;
    156	} else {
    157		BUFFER_TRACE(bh, "marking dirty");
    158		mark_buffer_dirty(bh);
    159	}
    160	err = sync_dirty_buffer(bh);
    161
    162out_bh:
    163	brelse(bh);
    164	ext4_std_error(sb, err);
    165	return (err) ? err : 1;
    166}
    167
    168/*
    169 * Update primary and backup superblocks using the provided function
    170 * func and argument arg.
    171 *
    172 * Only the primary superblock and at most two backup superblock
    173 * modifications are journalled; the rest is modified without journal.
    174 * This is safe because e2fsck will re-write them if there is a problem,
    175 * and we're very unlikely to ever need more than two backups.
    176 */
    177static
    178int ext4_update_superblocks_fn(struct super_block *sb,
    179			       ext4_update_sb_callback func,
    180			       const void *arg)
    181{
    182	handle_t *handle;
    183	ext4_group_t ngroups;
    184	unsigned int three = 1;
    185	unsigned int five = 5;
    186	unsigned int seven = 7;
    187	int err = 0, ret, i;
    188	ext4_group_t grp, primary_grp;
    189	struct ext4_sb_info *sbi = EXT4_SB(sb);
    190
    191	/*
    192	 * We can't update superblocks while the online resize is running
    193	 */
    194	if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
    195				  &sbi->s_ext4_flags)) {
    196		ext4_msg(sb, KERN_ERR, "Can't modify superblock while"
    197			 "performing online resize");
    198		return -EBUSY;
    199	}
    200
    201	/*
    202	 * We're only going to update primary superblock and two
    203	 * backup superblocks in this transaction.
    204	 */
    205	handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 3);
    206	if (IS_ERR(handle)) {
    207		err = PTR_ERR(handle);
    208		goto out;
    209	}
    210
    211	/* Update primary superblock */
    212	err = ext4_update_primary_sb(sb, handle, func, arg);
    213	if (err) {
    214		ext4_msg(sb, KERN_ERR, "Failed to update primary "
    215			 "superblock");
    216		goto out_journal;
    217	}
    218
    219	primary_grp = ext4_get_group_number(sb, sbi->s_sbh->b_blocknr);
    220	ngroups = ext4_get_groups_count(sb);
    221
    222	/*
    223	 * Update backup superblocks. We have to start from group 0
    224	 * because it might not be where the primary superblock is
    225	 * if the fs is mounted with -o sb=<backup_sb_block>
    226	 */
    227	i = 0;
    228	grp = 0;
    229	while (grp < ngroups) {
    230		/* Skip primary superblock */
    231		if (grp == primary_grp)
    232			goto next_grp;
    233
    234		ret = ext4_update_backup_sb(sb, handle, grp, func, arg);
    235		if (ret < 0) {
    236			/* Ignore bad checksum; try to update next sb */
    237			if (ret == -EFSBADCRC)
    238				goto next_grp;
    239			err = ret;
    240			goto out_journal;
    241		}
    242
    243		i += ret;
    244		if (handle && i > 1) {
    245			/*
    246			 * We're only journalling primary superblock and
    247			 * two backup superblocks; the rest is not
    248			 * journalled.
    249			 */
    250			err = ext4_journal_stop(handle);
    251			if (err)
    252				goto out;
    253			handle = NULL;
    254		}
    255next_grp:
    256		grp = ext4_list_backups(sb, &three, &five, &seven);
    257	}
    258
    259out_journal:
    260	if (handle) {
    261		ret = ext4_journal_stop(handle);
    262		if (ret && !err)
    263			err = ret;
    264	}
    265out:
    266	clear_bit_unlock(EXT4_FLAGS_RESIZING, &sbi->s_ext4_flags);
    267	smp_mb__after_atomic();
    268	return err ? err : 0;
    269}
    270
    271/*
    272 * Swap memory between @a and @b for @len bytes.
    273 *
    274 * @a:          pointer to first memory area
    275 * @b:          pointer to second memory area
    276 * @len:        number of bytes to swap
    277 *
    278 */
    279static void memswap(void *a, void *b, size_t len)
    280{
    281	unsigned char *ap, *bp;
    282
    283	ap = (unsigned char *)a;
    284	bp = (unsigned char *)b;
    285	while (len-- > 0) {
    286		swap(*ap, *bp);
    287		ap++;
    288		bp++;
    289	}
    290}
    291
    292/*
    293 * Swap i_data and associated attributes between @inode1 and @inode2.
    294 * This function is used for the primary swap between inode1 and inode2
    295 * and also to revert this primary swap in case of errors.
    296 *
    297 * Therefore you have to make sure, that calling this method twice
    298 * will revert all changes.
    299 *
    300 * @inode1:     pointer to first inode
    301 * @inode2:     pointer to second inode
    302 */
    303static void swap_inode_data(struct inode *inode1, struct inode *inode2)
    304{
    305	loff_t isize;
    306	struct ext4_inode_info *ei1;
    307	struct ext4_inode_info *ei2;
    308	unsigned long tmp;
    309
    310	ei1 = EXT4_I(inode1);
    311	ei2 = EXT4_I(inode2);
    312
    313	swap(inode1->i_version, inode2->i_version);
    314	swap(inode1->i_atime, inode2->i_atime);
    315	swap(inode1->i_mtime, inode2->i_mtime);
    316
    317	memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
    318	tmp = ei1->i_flags & EXT4_FL_SHOULD_SWAP;
    319	ei1->i_flags = (ei2->i_flags & EXT4_FL_SHOULD_SWAP) |
    320		(ei1->i_flags & ~EXT4_FL_SHOULD_SWAP);
    321	ei2->i_flags = tmp | (ei2->i_flags & ~EXT4_FL_SHOULD_SWAP);
    322	swap(ei1->i_disksize, ei2->i_disksize);
    323	ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
    324	ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
    325
    326	isize = i_size_read(inode1);
    327	i_size_write(inode1, i_size_read(inode2));
    328	i_size_write(inode2, isize);
    329}
    330
    331void ext4_reset_inode_seed(struct inode *inode)
    332{
    333	struct ext4_inode_info *ei = EXT4_I(inode);
    334	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    335	__le32 inum = cpu_to_le32(inode->i_ino);
    336	__le32 gen = cpu_to_le32(inode->i_generation);
    337	__u32 csum;
    338
    339	if (!ext4_has_metadata_csum(inode->i_sb))
    340		return;
    341
    342	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, sizeof(inum));
    343	ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, sizeof(gen));
    344}
    345
    346/*
    347 * Swap the information from the given @inode and the inode
    348 * EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other
    349 * important fields of the inodes.
    350 *
    351 * @sb:         the super block of the filesystem
    352 * @mnt_userns:	user namespace of the mount the inode was found from
    353 * @inode:      the inode to swap with EXT4_BOOT_LOADER_INO
    354 *
    355 */
    356static long swap_inode_boot_loader(struct super_block *sb,
    357				struct user_namespace *mnt_userns,
    358				struct inode *inode)
    359{
    360	handle_t *handle;
    361	int err;
    362	struct inode *inode_bl;
    363	struct ext4_inode_info *ei_bl;
    364	qsize_t size, size_bl, diff;
    365	blkcnt_t blocks;
    366	unsigned short bytes;
    367
    368	inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
    369	if (IS_ERR(inode_bl))
    370		return PTR_ERR(inode_bl);
    371	ei_bl = EXT4_I(inode_bl);
    372
    373	/* Protect orig inodes against a truncate and make sure,
    374	 * that only 1 swap_inode_boot_loader is running. */
    375	lock_two_nondirectories(inode, inode_bl);
    376
    377	if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) ||
    378	    IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) ||
    379	    (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) ||
    380	    ext4_has_inline_data(inode)) {
    381		err = -EINVAL;
    382		goto journal_err_out;
    383	}
    384
    385	if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
    386	    !inode_owner_or_capable(mnt_userns, inode) ||
    387	    !capable(CAP_SYS_ADMIN)) {
    388		err = -EPERM;
    389		goto journal_err_out;
    390	}
    391
    392	filemap_invalidate_lock(inode->i_mapping);
    393	err = filemap_write_and_wait(inode->i_mapping);
    394	if (err)
    395		goto err_out;
    396
    397	err = filemap_write_and_wait(inode_bl->i_mapping);
    398	if (err)
    399		goto err_out;
    400
    401	/* Wait for all existing dio workers */
    402	inode_dio_wait(inode);
    403	inode_dio_wait(inode_bl);
    404
    405	truncate_inode_pages(&inode->i_data, 0);
    406	truncate_inode_pages(&inode_bl->i_data, 0);
    407
    408	handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
    409	if (IS_ERR(handle)) {
    410		err = -EINVAL;
    411		goto err_out;
    412	}
    413	ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
    414
    415	/* Protect extent tree against block allocations via delalloc */
    416	ext4_double_down_write_data_sem(inode, inode_bl);
    417
    418	if (inode_bl->i_nlink == 0) {
    419		/* this inode has never been used as a BOOT_LOADER */
    420		set_nlink(inode_bl, 1);
    421		i_uid_write(inode_bl, 0);
    422		i_gid_write(inode_bl, 0);
    423		inode_bl->i_flags = 0;
    424		ei_bl->i_flags = 0;
    425		inode_set_iversion(inode_bl, 1);
    426		i_size_write(inode_bl, 0);
    427		inode_bl->i_mode = S_IFREG;
    428		if (ext4_has_feature_extents(sb)) {
    429			ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
    430			ext4_ext_tree_init(handle, inode_bl);
    431		} else
    432			memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data));
    433	}
    434
    435	err = dquot_initialize(inode);
    436	if (err)
    437		goto err_out1;
    438
    439	size = (qsize_t)(inode->i_blocks) * (1 << 9) + inode->i_bytes;
    440	size_bl = (qsize_t)(inode_bl->i_blocks) * (1 << 9) + inode_bl->i_bytes;
    441	diff = size - size_bl;
    442	swap_inode_data(inode, inode_bl);
    443
    444	inode->i_ctime = inode_bl->i_ctime = current_time(inode);
    445
    446	inode->i_generation = prandom_u32();
    447	inode_bl->i_generation = prandom_u32();
    448	ext4_reset_inode_seed(inode);
    449	ext4_reset_inode_seed(inode_bl);
    450
    451	ext4_discard_preallocations(inode, 0);
    452
    453	err = ext4_mark_inode_dirty(handle, inode);
    454	if (err < 0) {
    455		/* No need to update quota information. */
    456		ext4_warning(inode->i_sb,
    457			"couldn't mark inode #%lu dirty (err %d)",
    458			inode->i_ino, err);
    459		/* Revert all changes: */
    460		swap_inode_data(inode, inode_bl);
    461		ext4_mark_inode_dirty(handle, inode);
    462		goto err_out1;
    463	}
    464
    465	blocks = inode_bl->i_blocks;
    466	bytes = inode_bl->i_bytes;
    467	inode_bl->i_blocks = inode->i_blocks;
    468	inode_bl->i_bytes = inode->i_bytes;
    469	err = ext4_mark_inode_dirty(handle, inode_bl);
    470	if (err < 0) {
    471		/* No need to update quota information. */
    472		ext4_warning(inode_bl->i_sb,
    473			"couldn't mark inode #%lu dirty (err %d)",
    474			inode_bl->i_ino, err);
    475		goto revert;
    476	}
    477
    478	/* Bootloader inode should not be counted into quota information. */
    479	if (diff > 0)
    480		dquot_free_space(inode, diff);
    481	else
    482		err = dquot_alloc_space(inode, -1 * diff);
    483
    484	if (err < 0) {
    485revert:
    486		/* Revert all changes: */
    487		inode_bl->i_blocks = blocks;
    488		inode_bl->i_bytes = bytes;
    489		swap_inode_data(inode, inode_bl);
    490		ext4_mark_inode_dirty(handle, inode);
    491		ext4_mark_inode_dirty(handle, inode_bl);
    492	}
    493
    494err_out1:
    495	ext4_journal_stop(handle);
    496	ext4_double_up_write_data_sem(inode, inode_bl);
    497
    498err_out:
    499	filemap_invalidate_unlock(inode->i_mapping);
    500journal_err_out:
    501	unlock_two_nondirectories(inode, inode_bl);
    502	iput(inode_bl);
    503	return err;
    504}
    505
    506/*
    507 * If immutable is set and we are not clearing it, we're not allowed to change
    508 * anything else in the inode.  Don't error out if we're only trying to set
    509 * immutable on an immutable file.
    510 */
    511static int ext4_ioctl_check_immutable(struct inode *inode, __u32 new_projid,
    512				      unsigned int flags)
    513{
    514	struct ext4_inode_info *ei = EXT4_I(inode);
    515	unsigned int oldflags = ei->i_flags;
    516
    517	if (!(oldflags & EXT4_IMMUTABLE_FL) || !(flags & EXT4_IMMUTABLE_FL))
    518		return 0;
    519
    520	if ((oldflags & ~EXT4_IMMUTABLE_FL) != (flags & ~EXT4_IMMUTABLE_FL))
    521		return -EPERM;
    522	if (ext4_has_feature_project(inode->i_sb) &&
    523	    __kprojid_val(ei->i_projid) != new_projid)
    524		return -EPERM;
    525
    526	return 0;
    527}
    528
    529static void ext4_dax_dontcache(struct inode *inode, unsigned int flags)
    530{
    531	struct ext4_inode_info *ei = EXT4_I(inode);
    532
    533	if (S_ISDIR(inode->i_mode))
    534		return;
    535
    536	if (test_opt2(inode->i_sb, DAX_NEVER) ||
    537	    test_opt(inode->i_sb, DAX_ALWAYS))
    538		return;
    539
    540	if ((ei->i_flags ^ flags) & EXT4_DAX_FL)
    541		d_mark_dontcache(inode);
    542}
    543
    544static bool dax_compatible(struct inode *inode, unsigned int oldflags,
    545			   unsigned int flags)
    546{
    547	/* Allow the DAX flag to be changed on inline directories */
    548	if (S_ISDIR(inode->i_mode)) {
    549		flags &= ~EXT4_INLINE_DATA_FL;
    550		oldflags &= ~EXT4_INLINE_DATA_FL;
    551	}
    552
    553	if (flags & EXT4_DAX_FL) {
    554		if ((oldflags & EXT4_DAX_MUT_EXCL) ||
    555		     ext4_test_inode_state(inode,
    556					  EXT4_STATE_VERITY_IN_PROGRESS)) {
    557			return false;
    558		}
    559	}
    560
    561	if ((flags & EXT4_DAX_MUT_EXCL) && (oldflags & EXT4_DAX_FL))
    562			return false;
    563
    564	return true;
    565}
    566
    567static int ext4_ioctl_setflags(struct inode *inode,
    568			       unsigned int flags)
    569{
    570	struct ext4_inode_info *ei = EXT4_I(inode);
    571	handle_t *handle = NULL;
    572	int err = -EPERM, migrate = 0;
    573	struct ext4_iloc iloc;
    574	unsigned int oldflags, mask, i;
    575	struct super_block *sb = inode->i_sb;
    576
    577	/* Is it quota file? Do not allow user to mess with it */
    578	if (ext4_is_quota_file(inode))
    579		goto flags_out;
    580
    581	oldflags = ei->i_flags;
    582	/*
    583	 * The JOURNAL_DATA flag can only be changed by
    584	 * the relevant capability.
    585	 */
    586	if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
    587		if (!capable(CAP_SYS_RESOURCE))
    588			goto flags_out;
    589	}
    590
    591	if (!dax_compatible(inode, oldflags, flags)) {
    592		err = -EOPNOTSUPP;
    593		goto flags_out;
    594	}
    595
    596	if ((flags ^ oldflags) & EXT4_EXTENTS_FL)
    597		migrate = 1;
    598
    599	if ((flags ^ oldflags) & EXT4_CASEFOLD_FL) {
    600		if (!ext4_has_feature_casefold(sb)) {
    601			err = -EOPNOTSUPP;
    602			goto flags_out;
    603		}
    604
    605		if (!S_ISDIR(inode->i_mode)) {
    606			err = -ENOTDIR;
    607			goto flags_out;
    608		}
    609
    610		if (!ext4_empty_dir(inode)) {
    611			err = -ENOTEMPTY;
    612			goto flags_out;
    613		}
    614	}
    615
    616	/*
    617	 * Wait for all pending directio and then flush all the dirty pages
    618	 * for this file.  The flush marks all the pages readonly, so any
    619	 * subsequent attempt to write to the file (particularly mmap pages)
    620	 * will come through the filesystem and fail.
    621	 */
    622	if (S_ISREG(inode->i_mode) && !IS_IMMUTABLE(inode) &&
    623	    (flags & EXT4_IMMUTABLE_FL)) {
    624		inode_dio_wait(inode);
    625		err = filemap_write_and_wait(inode->i_mapping);
    626		if (err)
    627			goto flags_out;
    628	}
    629
    630	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
    631	if (IS_ERR(handle)) {
    632		err = PTR_ERR(handle);
    633		goto flags_out;
    634	}
    635	if (IS_SYNC(inode))
    636		ext4_handle_sync(handle);
    637	err = ext4_reserve_inode_write(handle, inode, &iloc);
    638	if (err)
    639		goto flags_err;
    640
    641	ext4_dax_dontcache(inode, flags);
    642
    643	for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
    644		if (!(mask & EXT4_FL_USER_MODIFIABLE))
    645			continue;
    646		/* These flags get special treatment later */
    647		if (mask == EXT4_JOURNAL_DATA_FL || mask == EXT4_EXTENTS_FL)
    648			continue;
    649		if (mask & flags)
    650			ext4_set_inode_flag(inode, i);
    651		else
    652			ext4_clear_inode_flag(inode, i);
    653	}
    654
    655	ext4_set_inode_flags(inode, false);
    656
    657	inode->i_ctime = current_time(inode);
    658
    659	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
    660flags_err:
    661	ext4_journal_stop(handle);
    662	if (err)
    663		goto flags_out;
    664
    665	if ((flags ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
    666		/*
    667		 * Changes to the journaling mode can cause unsafe changes to
    668		 * S_DAX if the inode is DAX
    669		 */
    670		if (IS_DAX(inode)) {
    671			err = -EBUSY;
    672			goto flags_out;
    673		}
    674
    675		err = ext4_change_inode_journal_flag(inode,
    676						     flags & EXT4_JOURNAL_DATA_FL);
    677		if (err)
    678			goto flags_out;
    679	}
    680	if (migrate) {
    681		if (flags & EXT4_EXTENTS_FL)
    682			err = ext4_ext_migrate(inode);
    683		else
    684			err = ext4_ind_migrate(inode);
    685	}
    686
    687flags_out:
    688	return err;
    689}
    690
    691#ifdef CONFIG_QUOTA
    692static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
    693{
    694	struct super_block *sb = inode->i_sb;
    695	struct ext4_inode_info *ei = EXT4_I(inode);
    696	int err, rc;
    697	handle_t *handle;
    698	kprojid_t kprojid;
    699	struct ext4_iloc iloc;
    700	struct ext4_inode *raw_inode;
    701	struct dquot *transfer_to[MAXQUOTAS] = { };
    702
    703	if (!ext4_has_feature_project(sb)) {
    704		if (projid != EXT4_DEF_PROJID)
    705			return -EOPNOTSUPP;
    706		else
    707			return 0;
    708	}
    709
    710	if (EXT4_INODE_SIZE(sb) <= EXT4_GOOD_OLD_INODE_SIZE)
    711		return -EOPNOTSUPP;
    712
    713	kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
    714
    715	if (projid_eq(kprojid, EXT4_I(inode)->i_projid))
    716		return 0;
    717
    718	err = -EPERM;
    719	/* Is it quota file? Do not allow user to mess with it */
    720	if (ext4_is_quota_file(inode))
    721		return err;
    722
    723	err = ext4_get_inode_loc(inode, &iloc);
    724	if (err)
    725		return err;
    726
    727	raw_inode = ext4_raw_inode(&iloc);
    728	if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
    729		err = ext4_expand_extra_isize(inode,
    730					      EXT4_SB(sb)->s_want_extra_isize,
    731					      &iloc);
    732		if (err)
    733			return err;
    734	} else {
    735		brelse(iloc.bh);
    736	}
    737
    738	err = dquot_initialize(inode);
    739	if (err)
    740		return err;
    741
    742	handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
    743		EXT4_QUOTA_INIT_BLOCKS(sb) +
    744		EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
    745	if (IS_ERR(handle))
    746		return PTR_ERR(handle);
    747
    748	err = ext4_reserve_inode_write(handle, inode, &iloc);
    749	if (err)
    750		goto out_stop;
    751
    752	transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
    753	if (!IS_ERR(transfer_to[PRJQUOTA])) {
    754
    755		/* __dquot_transfer() calls back ext4_get_inode_usage() which
    756		 * counts xattr inode references.
    757		 */
    758		down_read(&EXT4_I(inode)->xattr_sem);
    759		err = __dquot_transfer(inode, transfer_to);
    760		up_read(&EXT4_I(inode)->xattr_sem);
    761		dqput(transfer_to[PRJQUOTA]);
    762		if (err)
    763			goto out_dirty;
    764	}
    765
    766	EXT4_I(inode)->i_projid = kprojid;
    767	inode->i_ctime = current_time(inode);
    768out_dirty:
    769	rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
    770	if (!err)
    771		err = rc;
    772out_stop:
    773	ext4_journal_stop(handle);
    774	return err;
    775}
    776#else
    777static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
    778{
    779	if (projid != EXT4_DEF_PROJID)
    780		return -EOPNOTSUPP;
    781	return 0;
    782}
    783#endif
    784
    785static int ext4_shutdown(struct super_block *sb, unsigned long arg)
    786{
    787	struct ext4_sb_info *sbi = EXT4_SB(sb);
    788	__u32 flags;
    789
    790	if (!capable(CAP_SYS_ADMIN))
    791		return -EPERM;
    792
    793	if (get_user(flags, (__u32 __user *)arg))
    794		return -EFAULT;
    795
    796	if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
    797		return -EINVAL;
    798
    799	if (ext4_forced_shutdown(sbi))
    800		return 0;
    801
    802	ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
    803	trace_ext4_shutdown(sb, flags);
    804
    805	switch (flags) {
    806	case EXT4_GOING_FLAGS_DEFAULT:
    807		freeze_bdev(sb->s_bdev);
    808		set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
    809		thaw_bdev(sb->s_bdev);
    810		break;
    811	case EXT4_GOING_FLAGS_LOGFLUSH:
    812		set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
    813		if (sbi->s_journal && !is_journal_aborted(sbi->s_journal)) {
    814			(void) ext4_force_commit(sb);
    815			jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN);
    816		}
    817		break;
    818	case EXT4_GOING_FLAGS_NOLOGFLUSH:
    819		set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
    820		if (sbi->s_journal && !is_journal_aborted(sbi->s_journal))
    821			jbd2_journal_abort(sbi->s_journal, -ESHUTDOWN);
    822		break;
    823	default:
    824		return -EINVAL;
    825	}
    826	clear_opt(sb, DISCARD);
    827	return 0;
    828}
    829
    830struct getfsmap_info {
    831	struct super_block	*gi_sb;
    832	struct fsmap_head __user *gi_data;
    833	unsigned int		gi_idx;
    834	__u32			gi_last_flags;
    835};
    836
    837static int ext4_getfsmap_format(struct ext4_fsmap *xfm, void *priv)
    838{
    839	struct getfsmap_info *info = priv;
    840	struct fsmap fm;
    841
    842	trace_ext4_getfsmap_mapping(info->gi_sb, xfm);
    843
    844	info->gi_last_flags = xfm->fmr_flags;
    845	ext4_fsmap_from_internal(info->gi_sb, &fm, xfm);
    846	if (copy_to_user(&info->gi_data->fmh_recs[info->gi_idx++], &fm,
    847			sizeof(struct fsmap)))
    848		return -EFAULT;
    849
    850	return 0;
    851}
    852
    853static int ext4_ioc_getfsmap(struct super_block *sb,
    854			     struct fsmap_head __user *arg)
    855{
    856	struct getfsmap_info info = { NULL };
    857	struct ext4_fsmap_head xhead = {0};
    858	struct fsmap_head head;
    859	bool aborted = false;
    860	int error;
    861
    862	if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
    863		return -EFAULT;
    864	if (memchr_inv(head.fmh_reserved, 0, sizeof(head.fmh_reserved)) ||
    865	    memchr_inv(head.fmh_keys[0].fmr_reserved, 0,
    866		       sizeof(head.fmh_keys[0].fmr_reserved)) ||
    867	    memchr_inv(head.fmh_keys[1].fmr_reserved, 0,
    868		       sizeof(head.fmh_keys[1].fmr_reserved)))
    869		return -EINVAL;
    870	/*
    871	 * ext4 doesn't report file extents at all, so the only valid
    872	 * file offsets are the magic ones (all zeroes or all ones).
    873	 */
    874	if (head.fmh_keys[0].fmr_offset ||
    875	    (head.fmh_keys[1].fmr_offset != 0 &&
    876	     head.fmh_keys[1].fmr_offset != -1ULL))
    877		return -EINVAL;
    878
    879	xhead.fmh_iflags = head.fmh_iflags;
    880	xhead.fmh_count = head.fmh_count;
    881	ext4_fsmap_to_internal(sb, &xhead.fmh_keys[0], &head.fmh_keys[0]);
    882	ext4_fsmap_to_internal(sb, &xhead.fmh_keys[1], &head.fmh_keys[1]);
    883
    884	trace_ext4_getfsmap_low_key(sb, &xhead.fmh_keys[0]);
    885	trace_ext4_getfsmap_high_key(sb, &xhead.fmh_keys[1]);
    886
    887	info.gi_sb = sb;
    888	info.gi_data = arg;
    889	error = ext4_getfsmap(sb, &xhead, ext4_getfsmap_format, &info);
    890	if (error == EXT4_QUERY_RANGE_ABORT)
    891		aborted = true;
    892	else if (error)
    893		return error;
    894
    895	/* If we didn't abort, set the "last" flag in the last fmx */
    896	if (!aborted && info.gi_idx) {
    897		info.gi_last_flags |= FMR_OF_LAST;
    898		if (copy_to_user(&info.gi_data->fmh_recs[info.gi_idx - 1].fmr_flags,
    899				 &info.gi_last_flags,
    900				 sizeof(info.gi_last_flags)))
    901			return -EFAULT;
    902	}
    903
    904	/* copy back header */
    905	head.fmh_entries = xhead.fmh_entries;
    906	head.fmh_oflags = xhead.fmh_oflags;
    907	if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
    908		return -EFAULT;
    909
    910	return 0;
    911}
    912
    913static long ext4_ioctl_group_add(struct file *file,
    914				 struct ext4_new_group_data *input)
    915{
    916	struct super_block *sb = file_inode(file)->i_sb;
    917	int err, err2=0;
    918
    919	err = ext4_resize_begin(sb);
    920	if (err)
    921		return err;
    922
    923	if (ext4_has_feature_bigalloc(sb)) {
    924		ext4_msg(sb, KERN_ERR,
    925			 "Online resizing not supported with bigalloc");
    926		err = -EOPNOTSUPP;
    927		goto group_add_out;
    928	}
    929
    930	err = mnt_want_write_file(file);
    931	if (err)
    932		goto group_add_out;
    933
    934	err = ext4_group_add(sb, input);
    935	if (EXT4_SB(sb)->s_journal) {
    936		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
    937		err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
    938		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
    939	}
    940	if (err == 0)
    941		err = err2;
    942	mnt_drop_write_file(file);
    943	if (!err && ext4_has_group_desc_csum(sb) &&
    944	    test_opt(sb, INIT_INODE_TABLE))
    945		err = ext4_register_li_request(sb, input->group);
    946group_add_out:
    947	ext4_resize_end(sb);
    948	return err;
    949}
    950
    951int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa)
    952{
    953	struct inode *inode = d_inode(dentry);
    954	struct ext4_inode_info *ei = EXT4_I(inode);
    955	u32 flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
    956
    957	if (S_ISREG(inode->i_mode))
    958		flags &= ~FS_PROJINHERIT_FL;
    959
    960	fileattr_fill_flags(fa, flags);
    961	if (ext4_has_feature_project(inode->i_sb))
    962		fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid);
    963
    964	return 0;
    965}
    966
    967int ext4_fileattr_set(struct user_namespace *mnt_userns,
    968		      struct dentry *dentry, struct fileattr *fa)
    969{
    970	struct inode *inode = d_inode(dentry);
    971	u32 flags = fa->flags;
    972	int err = -EOPNOTSUPP;
    973
    974	if (flags & ~EXT4_FL_USER_VISIBLE)
    975		goto out;
    976
    977	/*
    978	 * chattr(1) grabs flags via GETFLAGS, modifies the result and
    979	 * passes that to SETFLAGS. So we cannot easily make SETFLAGS
    980	 * more restrictive than just silently masking off visible but
    981	 * not settable flags as we always did.
    982	 */
    983	flags &= EXT4_FL_USER_MODIFIABLE;
    984	if (ext4_mask_flags(inode->i_mode, flags) != flags)
    985		goto out;
    986	err = ext4_ioctl_check_immutable(inode, fa->fsx_projid, flags);
    987	if (err)
    988		goto out;
    989	err = ext4_ioctl_setflags(inode, flags);
    990	if (err)
    991		goto out;
    992	err = ext4_ioctl_setproject(inode, fa->fsx_projid);
    993out:
    994	return err;
    995}
    996
    997/* So that the fiemap access checks can't overflow on 32 bit machines. */
    998#define FIEMAP_MAX_EXTENTS	(UINT_MAX / sizeof(struct fiemap_extent))
    999
   1000static int ext4_ioctl_get_es_cache(struct file *filp, unsigned long arg)
   1001{
   1002	struct fiemap fiemap;
   1003	struct fiemap __user *ufiemap = (struct fiemap __user *) arg;
   1004	struct fiemap_extent_info fieinfo = { 0, };
   1005	struct inode *inode = file_inode(filp);
   1006	int error;
   1007
   1008	if (copy_from_user(&fiemap, ufiemap, sizeof(fiemap)))
   1009		return -EFAULT;
   1010
   1011	if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS)
   1012		return -EINVAL;
   1013
   1014	fieinfo.fi_flags = fiemap.fm_flags;
   1015	fieinfo.fi_extents_max = fiemap.fm_extent_count;
   1016	fieinfo.fi_extents_start = ufiemap->fm_extents;
   1017
   1018	error = ext4_get_es_cache(inode, &fieinfo, fiemap.fm_start,
   1019			fiemap.fm_length);
   1020	fiemap.fm_flags = fieinfo.fi_flags;
   1021	fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped;
   1022	if (copy_to_user(ufiemap, &fiemap, sizeof(fiemap)))
   1023		error = -EFAULT;
   1024
   1025	return error;
   1026}
   1027
   1028static int ext4_ioctl_checkpoint(struct file *filp, unsigned long arg)
   1029{
   1030	int err = 0;
   1031	__u32 flags = 0;
   1032	unsigned int flush_flags = 0;
   1033	struct super_block *sb = file_inode(filp)->i_sb;
   1034
   1035	if (copy_from_user(&flags, (__u32 __user *)arg,
   1036				sizeof(__u32)))
   1037		return -EFAULT;
   1038
   1039	if (!capable(CAP_SYS_ADMIN))
   1040		return -EPERM;
   1041
   1042	/* check for invalid bits set */
   1043	if ((flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID) ||
   1044				((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
   1045				(flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
   1046		return -EINVAL;
   1047
   1048	if (!EXT4_SB(sb)->s_journal)
   1049		return -ENODEV;
   1050
   1051	if (flags & ~EXT4_IOC_CHECKPOINT_FLAG_VALID)
   1052		return -EINVAL;
   1053
   1054	if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
   1055	    !bdev_max_discard_sectors(EXT4_SB(sb)->s_journal->j_dev))
   1056		return -EOPNOTSUPP;
   1057
   1058	if (flags & EXT4_IOC_CHECKPOINT_FLAG_DRY_RUN)
   1059		return 0;
   1060
   1061	if (flags & EXT4_IOC_CHECKPOINT_FLAG_DISCARD)
   1062		flush_flags |= JBD2_JOURNAL_FLUSH_DISCARD;
   1063
   1064	if (flags & EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT) {
   1065		flush_flags |= JBD2_JOURNAL_FLUSH_ZEROOUT;
   1066		pr_info_ratelimited("warning: checkpointing journal with EXT4_IOC_CHECKPOINT_FLAG_ZEROOUT can be slow");
   1067	}
   1068
   1069	jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
   1070	err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, flush_flags);
   1071	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
   1072
   1073	return err;
   1074}
   1075
   1076static int ext4_ioctl_setlabel(struct file *filp, const char __user *user_label)
   1077{
   1078	size_t len;
   1079	int ret = 0;
   1080	char new_label[EXT4_LABEL_MAX + 1];
   1081	struct super_block *sb = file_inode(filp)->i_sb;
   1082
   1083	if (!capable(CAP_SYS_ADMIN))
   1084		return -EPERM;
   1085
   1086	/*
   1087	 * Copy the maximum length allowed for ext4 label with one more to
   1088	 * find the required terminating null byte in order to test the
   1089	 * label length. The on disk label doesn't need to be null terminated.
   1090	 */
   1091	if (copy_from_user(new_label, user_label, EXT4_LABEL_MAX + 1))
   1092		return -EFAULT;
   1093
   1094	len = strnlen(new_label, EXT4_LABEL_MAX + 1);
   1095	if (len > EXT4_LABEL_MAX)
   1096		return -EINVAL;
   1097
   1098	/*
   1099	 * Clear the buffer after the new label
   1100	 */
   1101	memset(new_label + len, 0, EXT4_LABEL_MAX - len);
   1102
   1103	ret = mnt_want_write_file(filp);
   1104	if (ret)
   1105		return ret;
   1106
   1107	ret = ext4_update_superblocks_fn(sb, ext4_sb_setlabel, new_label);
   1108
   1109	mnt_drop_write_file(filp);
   1110	return ret;
   1111}
   1112
   1113static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label)
   1114{
   1115	char label[EXT4_LABEL_MAX + 1];
   1116
   1117	/*
   1118	 * EXT4_LABEL_MAX must always be smaller than FSLABEL_MAX because
   1119	 * FSLABEL_MAX must include terminating null byte, while s_volume_name
   1120	 * does not have to.
   1121	 */
   1122	BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
   1123
   1124	memset(label, 0, sizeof(label));
   1125	lock_buffer(sbi->s_sbh);
   1126	strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
   1127	unlock_buffer(sbi->s_sbh);
   1128
   1129	if (copy_to_user(user_label, label, sizeof(label)))
   1130		return -EFAULT;
   1131	return 0;
   1132}
   1133
   1134static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
   1135{
   1136	struct inode *inode = file_inode(filp);
   1137	struct super_block *sb = inode->i_sb;
   1138	struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
   1139
   1140	ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
   1141
   1142	switch (cmd) {
   1143	case FS_IOC_GETFSMAP:
   1144		return ext4_ioc_getfsmap(sb, (void __user *)arg);
   1145	case EXT4_IOC_GETVERSION:
   1146	case EXT4_IOC_GETVERSION_OLD:
   1147		return put_user(inode->i_generation, (int __user *) arg);
   1148	case EXT4_IOC_SETVERSION:
   1149	case EXT4_IOC_SETVERSION_OLD: {
   1150		handle_t *handle;
   1151		struct ext4_iloc iloc;
   1152		__u32 generation;
   1153		int err;
   1154
   1155		if (!inode_owner_or_capable(mnt_userns, inode))
   1156			return -EPERM;
   1157
   1158		if (ext4_has_metadata_csum(inode->i_sb)) {
   1159			ext4_warning(sb, "Setting inode version is not "
   1160				     "supported with metadata_csum enabled.");
   1161			return -ENOTTY;
   1162		}
   1163
   1164		err = mnt_want_write_file(filp);
   1165		if (err)
   1166			return err;
   1167		if (get_user(generation, (int __user *) arg)) {
   1168			err = -EFAULT;
   1169			goto setversion_out;
   1170		}
   1171
   1172		inode_lock(inode);
   1173		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
   1174		if (IS_ERR(handle)) {
   1175			err = PTR_ERR(handle);
   1176			goto unlock_out;
   1177		}
   1178		err = ext4_reserve_inode_write(handle, inode, &iloc);
   1179		if (err == 0) {
   1180			inode->i_ctime = current_time(inode);
   1181			inode->i_generation = generation;
   1182			err = ext4_mark_iloc_dirty(handle, inode, &iloc);
   1183		}
   1184		ext4_journal_stop(handle);
   1185
   1186unlock_out:
   1187		inode_unlock(inode);
   1188setversion_out:
   1189		mnt_drop_write_file(filp);
   1190		return err;
   1191	}
   1192	case EXT4_IOC_GROUP_EXTEND: {
   1193		ext4_fsblk_t n_blocks_count;
   1194		int err, err2=0;
   1195
   1196		err = ext4_resize_begin(sb);
   1197		if (err)
   1198			return err;
   1199
   1200		if (get_user(n_blocks_count, (__u32 __user *)arg)) {
   1201			err = -EFAULT;
   1202			goto group_extend_out;
   1203		}
   1204
   1205		if (ext4_has_feature_bigalloc(sb)) {
   1206			ext4_msg(sb, KERN_ERR,
   1207				 "Online resizing not supported with bigalloc");
   1208			err = -EOPNOTSUPP;
   1209			goto group_extend_out;
   1210		}
   1211
   1212		err = mnt_want_write_file(filp);
   1213		if (err)
   1214			goto group_extend_out;
   1215
   1216		err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
   1217		if (EXT4_SB(sb)->s_journal) {
   1218			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
   1219			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
   1220			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
   1221		}
   1222		if (err == 0)
   1223			err = err2;
   1224		mnt_drop_write_file(filp);
   1225group_extend_out:
   1226		ext4_resize_end(sb);
   1227		return err;
   1228	}
   1229
   1230	case EXT4_IOC_MOVE_EXT: {
   1231		struct move_extent me;
   1232		struct fd donor;
   1233		int err;
   1234
   1235		if (!(filp->f_mode & FMODE_READ) ||
   1236		    !(filp->f_mode & FMODE_WRITE))
   1237			return -EBADF;
   1238
   1239		if (copy_from_user(&me,
   1240			(struct move_extent __user *)arg, sizeof(me)))
   1241			return -EFAULT;
   1242		me.moved_len = 0;
   1243
   1244		donor = fdget(me.donor_fd);
   1245		if (!donor.file)
   1246			return -EBADF;
   1247
   1248		if (!(donor.file->f_mode & FMODE_WRITE)) {
   1249			err = -EBADF;
   1250			goto mext_out;
   1251		}
   1252
   1253		if (ext4_has_feature_bigalloc(sb)) {
   1254			ext4_msg(sb, KERN_ERR,
   1255				 "Online defrag not supported with bigalloc");
   1256			err = -EOPNOTSUPP;
   1257			goto mext_out;
   1258		} else if (IS_DAX(inode)) {
   1259			ext4_msg(sb, KERN_ERR,
   1260				 "Online defrag not supported with DAX");
   1261			err = -EOPNOTSUPP;
   1262			goto mext_out;
   1263		}
   1264
   1265		err = mnt_want_write_file(filp);
   1266		if (err)
   1267			goto mext_out;
   1268
   1269		err = ext4_move_extents(filp, donor.file, me.orig_start,
   1270					me.donor_start, me.len, &me.moved_len);
   1271		mnt_drop_write_file(filp);
   1272
   1273		if (copy_to_user((struct move_extent __user *)arg,
   1274				 &me, sizeof(me)))
   1275			err = -EFAULT;
   1276mext_out:
   1277		fdput(donor);
   1278		return err;
   1279	}
   1280
   1281	case EXT4_IOC_GROUP_ADD: {
   1282		struct ext4_new_group_data input;
   1283
   1284		if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
   1285				sizeof(input)))
   1286			return -EFAULT;
   1287
   1288		return ext4_ioctl_group_add(filp, &input);
   1289	}
   1290
   1291	case EXT4_IOC_MIGRATE:
   1292	{
   1293		int err;
   1294		if (!inode_owner_or_capable(mnt_userns, inode))
   1295			return -EACCES;
   1296
   1297		err = mnt_want_write_file(filp);
   1298		if (err)
   1299			return err;
   1300		/*
   1301		 * inode_mutex prevent write and truncate on the file.
   1302		 * Read still goes through. We take i_data_sem in
   1303		 * ext4_ext_swap_inode_data before we switch the
   1304		 * inode format to prevent read.
   1305		 */
   1306		inode_lock((inode));
   1307		err = ext4_ext_migrate(inode);
   1308		inode_unlock((inode));
   1309		mnt_drop_write_file(filp);
   1310		return err;
   1311	}
   1312
   1313	case EXT4_IOC_ALLOC_DA_BLKS:
   1314	{
   1315		int err;
   1316		if (!inode_owner_or_capable(mnt_userns, inode))
   1317			return -EACCES;
   1318
   1319		err = mnt_want_write_file(filp);
   1320		if (err)
   1321			return err;
   1322		err = ext4_alloc_da_blocks(inode);
   1323		mnt_drop_write_file(filp);
   1324		return err;
   1325	}
   1326
   1327	case EXT4_IOC_SWAP_BOOT:
   1328	{
   1329		int err;
   1330		if (!(filp->f_mode & FMODE_WRITE))
   1331			return -EBADF;
   1332		err = mnt_want_write_file(filp);
   1333		if (err)
   1334			return err;
   1335		err = swap_inode_boot_loader(sb, mnt_userns, inode);
   1336		mnt_drop_write_file(filp);
   1337		return err;
   1338	}
   1339
   1340	case EXT4_IOC_RESIZE_FS: {
   1341		ext4_fsblk_t n_blocks_count;
   1342		int err = 0, err2 = 0;
   1343		ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
   1344
   1345		if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
   1346				   sizeof(__u64))) {
   1347			return -EFAULT;
   1348		}
   1349
   1350		err = ext4_resize_begin(sb);
   1351		if (err)
   1352			return err;
   1353
   1354		err = mnt_want_write_file(filp);
   1355		if (err)
   1356			goto resizefs_out;
   1357
   1358		err = ext4_resize_fs(sb, n_blocks_count);
   1359		if (EXT4_SB(sb)->s_journal) {
   1360			ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
   1361			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
   1362			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
   1363			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
   1364		}
   1365		if (err == 0)
   1366			err = err2;
   1367		mnt_drop_write_file(filp);
   1368		if (!err && (o_group < EXT4_SB(sb)->s_groups_count) &&
   1369		    ext4_has_group_desc_csum(sb) &&
   1370		    test_opt(sb, INIT_INODE_TABLE))
   1371			err = ext4_register_li_request(sb, o_group);
   1372
   1373resizefs_out:
   1374		ext4_resize_end(sb);
   1375		return err;
   1376	}
   1377
   1378	case FITRIM:
   1379	{
   1380		struct fstrim_range range;
   1381		int ret = 0;
   1382
   1383		if (!capable(CAP_SYS_ADMIN))
   1384			return -EPERM;
   1385
   1386		if (!bdev_max_discard_sectors(sb->s_bdev))
   1387			return -EOPNOTSUPP;
   1388
   1389		/*
   1390		 * We haven't replayed the journal, so we cannot use our
   1391		 * block-bitmap-guided storage zapping commands.
   1392		 */
   1393		if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb))
   1394			return -EROFS;
   1395
   1396		if (copy_from_user(&range, (struct fstrim_range __user *)arg,
   1397		    sizeof(range)))
   1398			return -EFAULT;
   1399
   1400		ret = ext4_trim_fs(sb, &range);
   1401		if (ret < 0)
   1402			return ret;
   1403
   1404		if (copy_to_user((struct fstrim_range __user *)arg, &range,
   1405		    sizeof(range)))
   1406			return -EFAULT;
   1407
   1408		return 0;
   1409	}
   1410	case EXT4_IOC_PRECACHE_EXTENTS:
   1411		return ext4_ext_precache(inode);
   1412
   1413	case FS_IOC_SET_ENCRYPTION_POLICY:
   1414		if (!ext4_has_feature_encrypt(sb))
   1415			return -EOPNOTSUPP;
   1416		return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
   1417
   1418	case FS_IOC_GET_ENCRYPTION_PWSALT:
   1419		return ext4_ioctl_get_encryption_pwsalt(filp, (void __user *)arg);
   1420
   1421	case FS_IOC_GET_ENCRYPTION_POLICY:
   1422		if (!ext4_has_feature_encrypt(sb))
   1423			return -EOPNOTSUPP;
   1424		return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
   1425
   1426	case FS_IOC_GET_ENCRYPTION_POLICY_EX:
   1427		if (!ext4_has_feature_encrypt(sb))
   1428			return -EOPNOTSUPP;
   1429		return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
   1430
   1431	case FS_IOC_ADD_ENCRYPTION_KEY:
   1432		if (!ext4_has_feature_encrypt(sb))
   1433			return -EOPNOTSUPP;
   1434		return fscrypt_ioctl_add_key(filp, (void __user *)arg);
   1435
   1436	case FS_IOC_REMOVE_ENCRYPTION_KEY:
   1437		if (!ext4_has_feature_encrypt(sb))
   1438			return -EOPNOTSUPP;
   1439		return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
   1440
   1441	case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
   1442		if (!ext4_has_feature_encrypt(sb))
   1443			return -EOPNOTSUPP;
   1444		return fscrypt_ioctl_remove_key_all_users(filp,
   1445							  (void __user *)arg);
   1446	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
   1447		if (!ext4_has_feature_encrypt(sb))
   1448			return -EOPNOTSUPP;
   1449		return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
   1450
   1451	case FS_IOC_GET_ENCRYPTION_NONCE:
   1452		if (!ext4_has_feature_encrypt(sb))
   1453			return -EOPNOTSUPP;
   1454		return fscrypt_ioctl_get_nonce(filp, (void __user *)arg);
   1455
   1456	case EXT4_IOC_CLEAR_ES_CACHE:
   1457	{
   1458		if (!inode_owner_or_capable(mnt_userns, inode))
   1459			return -EACCES;
   1460		ext4_clear_inode_es(inode);
   1461		return 0;
   1462	}
   1463
   1464	case EXT4_IOC_GETSTATE:
   1465	{
   1466		__u32	state = 0;
   1467
   1468		if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED))
   1469			state |= EXT4_STATE_FLAG_EXT_PRECACHED;
   1470		if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
   1471			state |= EXT4_STATE_FLAG_NEW;
   1472		if (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
   1473			state |= EXT4_STATE_FLAG_NEWENTRY;
   1474		if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE))
   1475			state |= EXT4_STATE_FLAG_DA_ALLOC_CLOSE;
   1476
   1477		return put_user(state, (__u32 __user *) arg);
   1478	}
   1479
   1480	case EXT4_IOC_GET_ES_CACHE:
   1481		return ext4_ioctl_get_es_cache(filp, arg);
   1482
   1483	case EXT4_IOC_SHUTDOWN:
   1484		return ext4_shutdown(sb, arg);
   1485
   1486	case FS_IOC_ENABLE_VERITY:
   1487		if (!ext4_has_feature_verity(sb))
   1488			return -EOPNOTSUPP;
   1489		return fsverity_ioctl_enable(filp, (const void __user *)arg);
   1490
   1491	case FS_IOC_MEASURE_VERITY:
   1492		if (!ext4_has_feature_verity(sb))
   1493			return -EOPNOTSUPP;
   1494		return fsverity_ioctl_measure(filp, (void __user *)arg);
   1495
   1496	case FS_IOC_READ_VERITY_METADATA:
   1497		if (!ext4_has_feature_verity(sb))
   1498			return -EOPNOTSUPP;
   1499		return fsverity_ioctl_read_metadata(filp,
   1500						    (const void __user *)arg);
   1501
   1502	case EXT4_IOC_CHECKPOINT:
   1503		return ext4_ioctl_checkpoint(filp, arg);
   1504
   1505	case FS_IOC_GETFSLABEL:
   1506		return ext4_ioctl_getlabel(EXT4_SB(sb), (void __user *)arg);
   1507
   1508	case FS_IOC_SETFSLABEL:
   1509		return ext4_ioctl_setlabel(filp,
   1510					   (const void __user *)arg);
   1511
   1512	default:
   1513		return -ENOTTY;
   1514	}
   1515}
   1516
   1517long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
   1518{
   1519	return __ext4_ioctl(filp, cmd, arg);
   1520}
   1521
   1522#ifdef CONFIG_COMPAT
   1523long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
   1524{
   1525	/* These are just misnamed, they actually get/put from/to user an int */
   1526	switch (cmd) {
   1527	case EXT4_IOC32_GETVERSION:
   1528		cmd = EXT4_IOC_GETVERSION;
   1529		break;
   1530	case EXT4_IOC32_SETVERSION:
   1531		cmd = EXT4_IOC_SETVERSION;
   1532		break;
   1533	case EXT4_IOC32_GROUP_EXTEND:
   1534		cmd = EXT4_IOC_GROUP_EXTEND;
   1535		break;
   1536	case EXT4_IOC32_GETVERSION_OLD:
   1537		cmd = EXT4_IOC_GETVERSION_OLD;
   1538		break;
   1539	case EXT4_IOC32_SETVERSION_OLD:
   1540		cmd = EXT4_IOC_SETVERSION_OLD;
   1541		break;
   1542	case EXT4_IOC32_GETRSVSZ:
   1543		cmd = EXT4_IOC_GETRSVSZ;
   1544		break;
   1545	case EXT4_IOC32_SETRSVSZ:
   1546		cmd = EXT4_IOC_SETRSVSZ;
   1547		break;
   1548	case EXT4_IOC32_GROUP_ADD: {
   1549		struct compat_ext4_new_group_input __user *uinput;
   1550		struct ext4_new_group_data input;
   1551		int err;
   1552
   1553		uinput = compat_ptr(arg);
   1554		err = get_user(input.group, &uinput->group);
   1555		err |= get_user(input.block_bitmap, &uinput->block_bitmap);
   1556		err |= get_user(input.inode_bitmap, &uinput->inode_bitmap);
   1557		err |= get_user(input.inode_table, &uinput->inode_table);
   1558		err |= get_user(input.blocks_count, &uinput->blocks_count);
   1559		err |= get_user(input.reserved_blocks,
   1560				&uinput->reserved_blocks);
   1561		if (err)
   1562			return -EFAULT;
   1563		return ext4_ioctl_group_add(file, &input);
   1564	}
   1565	case EXT4_IOC_MOVE_EXT:
   1566	case EXT4_IOC_RESIZE_FS:
   1567	case FITRIM:
   1568	case EXT4_IOC_PRECACHE_EXTENTS:
   1569	case FS_IOC_SET_ENCRYPTION_POLICY:
   1570	case FS_IOC_GET_ENCRYPTION_PWSALT:
   1571	case FS_IOC_GET_ENCRYPTION_POLICY:
   1572	case FS_IOC_GET_ENCRYPTION_POLICY_EX:
   1573	case FS_IOC_ADD_ENCRYPTION_KEY:
   1574	case FS_IOC_REMOVE_ENCRYPTION_KEY:
   1575	case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
   1576	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
   1577	case FS_IOC_GET_ENCRYPTION_NONCE:
   1578	case EXT4_IOC_SHUTDOWN:
   1579	case FS_IOC_GETFSMAP:
   1580	case FS_IOC_ENABLE_VERITY:
   1581	case FS_IOC_MEASURE_VERITY:
   1582	case FS_IOC_READ_VERITY_METADATA:
   1583	case EXT4_IOC_CLEAR_ES_CACHE:
   1584	case EXT4_IOC_GETSTATE:
   1585	case EXT4_IOC_GET_ES_CACHE:
   1586	case EXT4_IOC_CHECKPOINT:
   1587	case FS_IOC_GETFSLABEL:
   1588	case FS_IOC_SETFSLABEL:
   1589		break;
   1590	default:
   1591		return -ENOIOCTLCMD;
   1592	}
   1593	return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
   1594}
   1595#endif
   1596
   1597static void set_overhead(struct ext4_super_block *es, const void *arg)
   1598{
   1599	es->s_overhead_clusters = cpu_to_le32(*((unsigned long *) arg));
   1600}
   1601
   1602int ext4_update_overhead(struct super_block *sb)
   1603{
   1604	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1605
   1606	if (sb_rdonly(sb) || sbi->s_overhead == 0 ||
   1607	    sbi->s_overhead == le32_to_cpu(sbi->s_es->s_overhead_clusters))
   1608		return 0;
   1609
   1610	return ext4_update_superblocks_fn(sb, set_overhead, &sbi->s_overhead);
   1611}