file.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
file.c (118101B)
      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * fs/f2fs/file.c
      4 *
      5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
      6 *             http://www.samsung.com/
      7 */
      8#include <linux/fs.h>
      9#include <linux/f2fs_fs.h>
     10#include <linux/stat.h>
     11#include <linux/buffer_head.h>
     12#include <linux/writeback.h>
     13#include <linux/blkdev.h>
     14#include <linux/falloc.h>
     15#include <linux/types.h>
     16#include <linux/compat.h>
     17#include <linux/uaccess.h>
     18#include <linux/mount.h>
     19#include <linux/pagevec.h>
     20#include <linux/uio.h>
     21#include <linux/uuid.h>
     22#include <linux/file.h>
     23#include <linux/nls.h>
     24#include <linux/sched/signal.h>
     25#include <linux/fileattr.h>
     26#include <linux/fadvise.h>
     27#include <linux/iomap.h>
     28
     29#include "f2fs.h"
     30#include "node.h"
     31#include "segment.h"
     32#include "xattr.h"
     33#include "acl.h"
     34#include "gc.h"
     35#include "iostat.h"
     36#include <trace/events/f2fs.h>
     37#include <uapi/linux/f2fs.h>
     38
     39static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
     40{
     41	struct inode *inode = file_inode(vmf->vma->vm_file);
     42	vm_fault_t ret;
     43
     44	ret = filemap_fault(vmf);
     45	if (!ret)
     46		f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO,
     47							F2FS_BLKSIZE);
     48
     49	trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret);
     50
     51	return ret;
     52}
     53
     54static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
     55{
     56	struct page *page = vmf->page;
     57	struct inode *inode = file_inode(vmf->vma->vm_file);
     58	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
     59	struct dnode_of_data dn;
     60	bool need_alloc = true;
     61	int err = 0;
     62
     63	if (unlikely(IS_IMMUTABLE(inode)))
     64		return VM_FAULT_SIGBUS;
     65
     66	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
     67		return VM_FAULT_SIGBUS;
     68
     69	if (unlikely(f2fs_cp_error(sbi))) {
     70		err = -EIO;
     71		goto err;
     72	}
     73
     74	if (!f2fs_is_checkpoint_ready(sbi)) {
     75		err = -ENOSPC;
     76		goto err;
     77	}
     78
     79	err = f2fs_convert_inline_inode(inode);
     80	if (err)
     81		goto err;
     82
     83#ifdef CONFIG_F2FS_FS_COMPRESSION
     84	if (f2fs_compressed_file(inode)) {
     85		int ret = f2fs_is_compressed_cluster(inode, page->index);
     86
     87		if (ret < 0) {
     88			err = ret;
     89			goto err;
     90		} else if (ret) {
     91			need_alloc = false;
     92		}
     93	}
     94#endif
     95	/* should do out of any locked page */
     96	if (need_alloc)
     97		f2fs_balance_fs(sbi, true);
     98
     99	sb_start_pagefault(inode->i_sb);
    100
    101	f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
    102
    103	file_update_time(vmf->vma->vm_file);
    104	filemap_invalidate_lock_shared(inode->i_mapping);
    105	lock_page(page);
    106	if (unlikely(page->mapping != inode->i_mapping ||
    107			page_offset(page) > i_size_read(inode) ||
    108			!PageUptodate(page))) {
    109		unlock_page(page);
    110		err = -EFAULT;
    111		goto out_sem;
    112	}
    113
    114	if (need_alloc) {
    115		/* block allocation */
    116		f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
    117		set_new_dnode(&dn, inode, NULL, NULL, 0);
    118		err = f2fs_get_block(&dn, page->index);
    119		f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
    120	}
    121
    122#ifdef CONFIG_F2FS_FS_COMPRESSION
    123	if (!need_alloc) {
    124		set_new_dnode(&dn, inode, NULL, NULL, 0);
    125		err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
    126		f2fs_put_dnode(&dn);
    127	}
    128#endif
    129	if (err) {
    130		unlock_page(page);
    131		goto out_sem;
    132	}
    133
    134	f2fs_wait_on_page_writeback(page, DATA, false, true);
    135
    136	/* wait for GCed page writeback via META_MAPPING */
    137	f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
    138
    139	/*
    140	 * check to see if the page is mapped already (no holes)
    141	 */
    142	if (PageMappedToDisk(page))
    143		goto out_sem;
    144
    145	/* page is wholly or partially inside EOF */
    146	if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
    147						i_size_read(inode)) {
    148		loff_t offset;
    149
    150		offset = i_size_read(inode) & ~PAGE_MASK;
    151		zero_user_segment(page, offset, PAGE_SIZE);
    152	}
    153	set_page_dirty(page);
    154	if (!PageUptodate(page))
    155		SetPageUptodate(page);
    156
    157	f2fs_update_iostat(sbi, APP_MAPPED_IO, F2FS_BLKSIZE);
    158	f2fs_update_time(sbi, REQ_TIME);
    159
    160	trace_f2fs_vm_page_mkwrite(page, DATA);
    161out_sem:
    162	filemap_invalidate_unlock_shared(inode->i_mapping);
    163
    164	sb_end_pagefault(inode->i_sb);
    165err:
    166	return block_page_mkwrite_return(err);
    167}
    168
    169static const struct vm_operations_struct f2fs_file_vm_ops = {
    170	.fault		= f2fs_filemap_fault,
    171	.map_pages	= filemap_map_pages,
    172	.page_mkwrite	= f2fs_vm_page_mkwrite,
    173};
    174
    175static int get_parent_ino(struct inode *inode, nid_t *pino)
    176{
    177	struct dentry *dentry;
    178
    179	/*
    180	 * Make sure to get the non-deleted alias.  The alias associated with
    181	 * the open file descriptor being fsync()'ed may be deleted already.
    182	 */
    183	dentry = d_find_alias(inode);
    184	if (!dentry)
    185		return 0;
    186
    187	*pino = parent_ino(dentry);
    188	dput(dentry);
    189	return 1;
    190}
    191
    192static inline enum cp_reason_type need_do_checkpoint(struct inode *inode)
    193{
    194	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    195	enum cp_reason_type cp_reason = CP_NO_NEEDED;
    196
    197	if (!S_ISREG(inode->i_mode))
    198		cp_reason = CP_NON_REGULAR;
    199	else if (f2fs_compressed_file(inode))
    200		cp_reason = CP_COMPRESSED;
    201	else if (inode->i_nlink != 1)
    202		cp_reason = CP_HARDLINK;
    203	else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
    204		cp_reason = CP_SB_NEED_CP;
    205	else if (file_wrong_pino(inode))
    206		cp_reason = CP_WRONG_PINO;
    207	else if (!f2fs_space_for_roll_forward(sbi))
    208		cp_reason = CP_NO_SPC_ROLL;
    209	else if (!f2fs_is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
    210		cp_reason = CP_NODE_NEED_CP;
    211	else if (test_opt(sbi, FASTBOOT))
    212		cp_reason = CP_FASTBOOT_MODE;
    213	else if (F2FS_OPTION(sbi).active_logs == 2)
    214		cp_reason = CP_SPEC_LOG_NUM;
    215	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT &&
    216		f2fs_need_dentry_mark(sbi, inode->i_ino) &&
    217		f2fs_exist_written_data(sbi, F2FS_I(inode)->i_pino,
    218							TRANS_DIR_INO))
    219		cp_reason = CP_RECOVER_DIR;
    220
    221	return cp_reason;
    222}
    223
    224static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino)
    225{
    226	struct page *i = find_get_page(NODE_MAPPING(sbi), ino);
    227	bool ret = false;
    228	/* But we need to avoid that there are some inode updates */
    229	if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino))
    230		ret = true;
    231	f2fs_put_page(i, 0);
    232	return ret;
    233}
    234
    235static void try_to_fix_pino(struct inode *inode)
    236{
    237	struct f2fs_inode_info *fi = F2FS_I(inode);
    238	nid_t pino;
    239
    240	f2fs_down_write(&fi->i_sem);
    241	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
    242			get_parent_ino(inode, &pino)) {
    243		f2fs_i_pino_write(inode, pino);
    244		file_got_pino(inode);
    245	}
    246	f2fs_up_write(&fi->i_sem);
    247}
    248
    249static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
    250						int datasync, bool atomic)
    251{
    252	struct inode *inode = file->f_mapping->host;
    253	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    254	nid_t ino = inode->i_ino;
    255	int ret = 0;
    256	enum cp_reason_type cp_reason = 0;
    257	struct writeback_control wbc = {
    258		.sync_mode = WB_SYNC_ALL,
    259		.nr_to_write = LONG_MAX,
    260		.for_reclaim = 0,
    261	};
    262	unsigned int seq_id = 0;
    263
    264	if (unlikely(f2fs_readonly(inode->i_sb)))
    265		return 0;
    266
    267	trace_f2fs_sync_file_enter(inode);
    268
    269	if (S_ISDIR(inode->i_mode))
    270		goto go_write;
    271
    272	/* if fdatasync is triggered, let's do in-place-update */
    273	if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
    274		set_inode_flag(inode, FI_NEED_IPU);
    275	ret = file_write_and_wait_range(file, start, end);
    276	clear_inode_flag(inode, FI_NEED_IPU);
    277
    278	if (ret || is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
    279		trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
    280		return ret;
    281	}
    282
    283	/* if the inode is dirty, let's recover all the time */
    284	if (!f2fs_skip_inode_update(inode, datasync)) {
    285		f2fs_write_inode(inode, NULL);
    286		goto go_write;
    287	}
    288
    289	/*
    290	 * if there is no written data, don't waste time to write recovery info.
    291	 */
    292	if (!is_inode_flag_set(inode, FI_APPEND_WRITE) &&
    293			!f2fs_exist_written_data(sbi, ino, APPEND_INO)) {
    294
    295		/* it may call write_inode just prior to fsync */
    296		if (need_inode_page_update(sbi, ino))
    297			goto go_write;
    298
    299		if (is_inode_flag_set(inode, FI_UPDATE_WRITE) ||
    300				f2fs_exist_written_data(sbi, ino, UPDATE_INO))
    301			goto flush_out;
    302		goto out;
    303	} else {
    304		/*
    305		 * for OPU case, during fsync(), node can be persisted before
    306		 * data when lower device doesn't support write barrier, result
    307		 * in data corruption after SPO.
    308		 * So for strict fsync mode, force to use atomic write sematics
    309		 * to keep write order in between data/node and last node to
    310		 * avoid potential data corruption.
    311		 */
    312		if (F2FS_OPTION(sbi).fsync_mode ==
    313				FSYNC_MODE_STRICT && !atomic)
    314			atomic = true;
    315	}
    316go_write:
    317	/*
    318	 * Both of fdatasync() and fsync() are able to be recovered from
    319	 * sudden-power-off.
    320	 */
    321	f2fs_down_read(&F2FS_I(inode)->i_sem);
    322	cp_reason = need_do_checkpoint(inode);
    323	f2fs_up_read(&F2FS_I(inode)->i_sem);
    324
    325	if (cp_reason) {
    326		/* all the dirty node pages should be flushed for POR */
    327		ret = f2fs_sync_fs(inode->i_sb, 1);
    328
    329		/*
    330		 * We've secured consistency through sync_fs. Following pino
    331		 * will be used only for fsynced inodes after checkpoint.
    332		 */
    333		try_to_fix_pino(inode);
    334		clear_inode_flag(inode, FI_APPEND_WRITE);
    335		clear_inode_flag(inode, FI_UPDATE_WRITE);
    336		goto out;
    337	}
    338sync_nodes:
    339	atomic_inc(&sbi->wb_sync_req[NODE]);
    340	ret = f2fs_fsync_node_pages(sbi, inode, &wbc, atomic, &seq_id);
    341	atomic_dec(&sbi->wb_sync_req[NODE]);
    342	if (ret)
    343		goto out;
    344
    345	/* if cp_error was enabled, we should avoid infinite loop */
    346	if (unlikely(f2fs_cp_error(sbi))) {
    347		ret = -EIO;
    348		goto out;
    349	}
    350
    351	if (f2fs_need_inode_block_update(sbi, ino)) {
    352		f2fs_mark_inode_dirty_sync(inode, true);
    353		f2fs_write_inode(inode, NULL);
    354		goto sync_nodes;
    355	}
    356
    357	/*
    358	 * If it's atomic_write, it's just fine to keep write ordering. So
    359	 * here we don't need to wait for node write completion, since we use
    360	 * node chain which serializes node blocks. If one of node writes are
    361	 * reordered, we can see simply broken chain, resulting in stopping
    362	 * roll-forward recovery. It means we'll recover all or none node blocks
    363	 * given fsync mark.
    364	 */
    365	if (!atomic) {
    366		ret = f2fs_wait_on_node_pages_writeback(sbi, seq_id);
    367		if (ret)
    368			goto out;
    369	}
    370
    371	/* once recovery info is written, don't need to tack this */
    372	f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
    373	clear_inode_flag(inode, FI_APPEND_WRITE);
    374flush_out:
    375	if ((!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ||
    376	    (atomic && !test_opt(sbi, NOBARRIER) && f2fs_sb_has_blkzoned(sbi)))
    377		ret = f2fs_issue_flush(sbi, inode->i_ino);
    378	if (!ret) {
    379		f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
    380		clear_inode_flag(inode, FI_UPDATE_WRITE);
    381		f2fs_remove_ino_entry(sbi, ino, FLUSH_INO);
    382	}
    383	f2fs_update_time(sbi, REQ_TIME);
    384out:
    385	trace_f2fs_sync_file_exit(inode, cp_reason, datasync, ret);
    386	return ret;
    387}
    388
    389int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
    390{
    391	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
    392		return -EIO;
    393	return f2fs_do_sync_file(file, start, end, datasync, false);
    394}
    395
    396static bool __found_offset(struct address_space *mapping, block_t blkaddr,
    397				pgoff_t index, int whence)
    398{
    399	switch (whence) {
    400	case SEEK_DATA:
    401		if (__is_valid_data_blkaddr(blkaddr))
    402			return true;
    403		if (blkaddr == NEW_ADDR &&
    404		    xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
    405			return true;
    406		break;
    407	case SEEK_HOLE:
    408		if (blkaddr == NULL_ADDR)
    409			return true;
    410		break;
    411	}
    412	return false;
    413}
    414
    415static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
    416{
    417	struct inode *inode = file->f_mapping->host;
    418	loff_t maxbytes = inode->i_sb->s_maxbytes;
    419	struct dnode_of_data dn;
    420	pgoff_t pgofs, end_offset;
    421	loff_t data_ofs = offset;
    422	loff_t isize;
    423	int err = 0;
    424
    425	inode_lock(inode);
    426
    427	isize = i_size_read(inode);
    428	if (offset >= isize)
    429		goto fail;
    430
    431	/* handle inline data case */
    432	if (f2fs_has_inline_data(inode)) {
    433		if (whence == SEEK_HOLE) {
    434			data_ofs = isize;
    435			goto found;
    436		} else if (whence == SEEK_DATA) {
    437			data_ofs = offset;
    438			goto found;
    439		}
    440	}
    441
    442	pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
    443
    444	for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
    445		set_new_dnode(&dn, inode, NULL, NULL, 0);
    446		err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
    447		if (err && err != -ENOENT) {
    448			goto fail;
    449		} else if (err == -ENOENT) {
    450			/* direct node does not exists */
    451			if (whence == SEEK_DATA) {
    452				pgofs = f2fs_get_next_page_offset(&dn, pgofs);
    453				continue;
    454			} else {
    455				goto found;
    456			}
    457		}
    458
    459		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
    460
    461		/* find data/hole in dnode block */
    462		for (; dn.ofs_in_node < end_offset;
    463				dn.ofs_in_node++, pgofs++,
    464				data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
    465			block_t blkaddr;
    466
    467			blkaddr = f2fs_data_blkaddr(&dn);
    468
    469			if (__is_valid_data_blkaddr(blkaddr) &&
    470				!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
    471					blkaddr, DATA_GENERIC_ENHANCE)) {
    472				f2fs_put_dnode(&dn);
    473				goto fail;
    474			}
    475
    476			if (__found_offset(file->f_mapping, blkaddr,
    477							pgofs, whence)) {
    478				f2fs_put_dnode(&dn);
    479				goto found;
    480			}
    481		}
    482		f2fs_put_dnode(&dn);
    483	}
    484
    485	if (whence == SEEK_DATA)
    486		goto fail;
    487found:
    488	if (whence == SEEK_HOLE && data_ofs > isize)
    489		data_ofs = isize;
    490	inode_unlock(inode);
    491	return vfs_setpos(file, data_ofs, maxbytes);
    492fail:
    493	inode_unlock(inode);
    494	return -ENXIO;
    495}
    496
    497static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
    498{
    499	struct inode *inode = file->f_mapping->host;
    500	loff_t maxbytes = inode->i_sb->s_maxbytes;
    501
    502	if (f2fs_compressed_file(inode))
    503		maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
    504
    505	switch (whence) {
    506	case SEEK_SET:
    507	case SEEK_CUR:
    508	case SEEK_END:
    509		return generic_file_llseek_size(file, offset, whence,
    510						maxbytes, i_size_read(inode));
    511	case SEEK_DATA:
    512	case SEEK_HOLE:
    513		if (offset < 0)
    514			return -ENXIO;
    515		return f2fs_seek_block(file, offset, whence);
    516	}
    517
    518	return -EINVAL;
    519}
    520
    521static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
    522{
    523	struct inode *inode = file_inode(file);
    524
    525	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
    526		return -EIO;
    527
    528	if (!f2fs_is_compress_backend_ready(inode))
    529		return -EOPNOTSUPP;
    530
    531	file_accessed(file);
    532	vma->vm_ops = &f2fs_file_vm_ops;
    533	set_inode_flag(inode, FI_MMAP_FILE);
    534	return 0;
    535}
    536
    537static int f2fs_file_open(struct inode *inode, struct file *filp)
    538{
    539	int err = fscrypt_file_open(inode, filp);
    540
    541	if (err)
    542		return err;
    543
    544	if (!f2fs_is_compress_backend_ready(inode))
    545		return -EOPNOTSUPP;
    546
    547	err = fsverity_file_open(inode, filp);
    548	if (err)
    549		return err;
    550
    551	filp->f_mode |= FMODE_NOWAIT;
    552
    553	return dquot_file_open(inode, filp);
    554}
    555
    556void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
    557{
    558	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
    559	struct f2fs_node *raw_node;
    560	int nr_free = 0, ofs = dn->ofs_in_node, len = count;
    561	__le32 *addr;
    562	int base = 0;
    563	bool compressed_cluster = false;
    564	int cluster_index = 0, valid_blocks = 0;
    565	int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
    566	bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
    567
    568	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
    569		base = get_extra_isize(dn->inode);
    570
    571	raw_node = F2FS_NODE(dn->node_page);
    572	addr = blkaddr_in_node(raw_node) + base + ofs;
    573
    574	/* Assumption: truncateion starts with cluster */
    575	for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) {
    576		block_t blkaddr = le32_to_cpu(*addr);
    577
    578		if (f2fs_compressed_file(dn->inode) &&
    579					!(cluster_index & (cluster_size - 1))) {
    580			if (compressed_cluster)
    581				f2fs_i_compr_blocks_update(dn->inode,
    582							valid_blocks, false);
    583			compressed_cluster = (blkaddr == COMPRESS_ADDR);
    584			valid_blocks = 0;
    585		}
    586
    587		if (blkaddr == NULL_ADDR)
    588			continue;
    589
    590		dn->data_blkaddr = NULL_ADDR;
    591		f2fs_set_data_blkaddr(dn);
    592
    593		if (__is_valid_data_blkaddr(blkaddr)) {
    594			if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
    595					DATA_GENERIC_ENHANCE))
    596				continue;
    597			if (compressed_cluster)
    598				valid_blocks++;
    599		}
    600
    601		if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
    602			clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN);
    603
    604		f2fs_invalidate_blocks(sbi, blkaddr);
    605
    606		if (!released || blkaddr != COMPRESS_ADDR)
    607			nr_free++;
    608	}
    609
    610	if (compressed_cluster)
    611		f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false);
    612
    613	if (nr_free) {
    614		pgoff_t fofs;
    615		/*
    616		 * once we invalidate valid blkaddr in range [ofs, ofs + count],
    617		 * we will invalidate all blkaddr in the whole range.
    618		 */
    619		fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
    620							dn->inode) + ofs;
    621		f2fs_update_extent_cache_range(dn, fofs, 0, len);
    622		dec_valid_block_count(sbi, dn->inode, nr_free);
    623	}
    624	dn->ofs_in_node = ofs;
    625
    626	f2fs_update_time(sbi, REQ_TIME);
    627	trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
    628					 dn->ofs_in_node, nr_free);
    629}
    630
    631void f2fs_truncate_data_blocks(struct dnode_of_data *dn)
    632{
    633	f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
    634}
    635
    636static int truncate_partial_data_page(struct inode *inode, u64 from,
    637								bool cache_only)
    638{
    639	loff_t offset = from & (PAGE_SIZE - 1);
    640	pgoff_t index = from >> PAGE_SHIFT;
    641	struct address_space *mapping = inode->i_mapping;
    642	struct page *page;
    643
    644	if (!offset && !cache_only)
    645		return 0;
    646
    647	if (cache_only) {
    648		page = find_lock_page(mapping, index);
    649		if (page && PageUptodate(page))
    650			goto truncate_out;
    651		f2fs_put_page(page, 1);
    652		return 0;
    653	}
    654
    655	page = f2fs_get_lock_data_page(inode, index, true);
    656	if (IS_ERR(page))
    657		return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page);
    658truncate_out:
    659	f2fs_wait_on_page_writeback(page, DATA, true, true);
    660	zero_user(page, offset, PAGE_SIZE - offset);
    661
    662	/* An encrypted inode should have a key and truncate the last page. */
    663	f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode));
    664	if (!cache_only)
    665		set_page_dirty(page);
    666	f2fs_put_page(page, 1);
    667	return 0;
    668}
    669
    670int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock)
    671{
    672	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    673	struct dnode_of_data dn;
    674	pgoff_t free_from;
    675	int count = 0, err = 0;
    676	struct page *ipage;
    677	bool truncate_page = false;
    678
    679	trace_f2fs_truncate_blocks_enter(inode, from);
    680
    681	free_from = (pgoff_t)F2FS_BLK_ALIGN(from);
    682
    683	if (free_from >= max_file_blocks(inode))
    684		goto free_partial;
    685
    686	if (lock)
    687		f2fs_lock_op(sbi);
    688
    689	ipage = f2fs_get_node_page(sbi, inode->i_ino);
    690	if (IS_ERR(ipage)) {
    691		err = PTR_ERR(ipage);
    692		goto out;
    693	}
    694
    695	if (f2fs_has_inline_data(inode)) {
    696		f2fs_truncate_inline_inode(inode, ipage, from);
    697		f2fs_put_page(ipage, 1);
    698		truncate_page = true;
    699		goto out;
    700	}
    701
    702	set_new_dnode(&dn, inode, ipage, NULL, 0);
    703	err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA);
    704	if (err) {
    705		if (err == -ENOENT)
    706			goto free_next;
    707		goto out;
    708	}
    709
    710	count = ADDRS_PER_PAGE(dn.node_page, inode);
    711
    712	count -= dn.ofs_in_node;
    713	f2fs_bug_on(sbi, count < 0);
    714
    715	if (dn.ofs_in_node || IS_INODE(dn.node_page)) {
    716		f2fs_truncate_data_blocks_range(&dn, count);
    717		free_from += count;
    718	}
    719
    720	f2fs_put_dnode(&dn);
    721free_next:
    722	err = f2fs_truncate_inode_blocks(inode, free_from);
    723out:
    724	if (lock)
    725		f2fs_unlock_op(sbi);
    726free_partial:
    727	/* lastly zero out the first data page */
    728	if (!err)
    729		err = truncate_partial_data_page(inode, from, truncate_page);
    730
    731	trace_f2fs_truncate_blocks_exit(inode, err);
    732	return err;
    733}
    734
    735int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
    736{
    737	u64 free_from = from;
    738	int err;
    739
    740#ifdef CONFIG_F2FS_FS_COMPRESSION
    741	/*
    742	 * for compressed file, only support cluster size
    743	 * aligned truncation.
    744	 */
    745	if (f2fs_compressed_file(inode))
    746		free_from = round_up(from,
    747				F2FS_I(inode)->i_cluster_size << PAGE_SHIFT);
    748#endif
    749
    750	err = f2fs_do_truncate_blocks(inode, free_from, lock);
    751	if (err)
    752		return err;
    753
    754#ifdef CONFIG_F2FS_FS_COMPRESSION
    755	/*
    756	 * For compressed file, after release compress blocks, don't allow write
    757	 * direct, but we should allow write direct after truncate to zero.
    758	 */
    759	if (f2fs_compressed_file(inode) && !free_from
    760			&& is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
    761		clear_inode_flag(inode, FI_COMPRESS_RELEASED);
    762
    763	if (from != free_from) {
    764		err = f2fs_truncate_partial_cluster(inode, from, lock);
    765		if (err)
    766			return err;
    767	}
    768#endif
    769
    770	return 0;
    771}
    772
    773int f2fs_truncate(struct inode *inode)
    774{
    775	int err;
    776
    777	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
    778		return -EIO;
    779
    780	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
    781				S_ISLNK(inode->i_mode)))
    782		return 0;
    783
    784	trace_f2fs_truncate(inode);
    785
    786	if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
    787		f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_TRUNCATE);
    788		return -EIO;
    789	}
    790
    791	err = f2fs_dquot_initialize(inode);
    792	if (err)
    793		return err;
    794
    795	/* we should check inline_data size */
    796	if (!f2fs_may_inline_data(inode)) {
    797		err = f2fs_convert_inline_inode(inode);
    798		if (err)
    799			return err;
    800	}
    801
    802	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
    803	if (err)
    804		return err;
    805
    806	inode->i_mtime = inode->i_ctime = current_time(inode);
    807	f2fs_mark_inode_dirty_sync(inode, false);
    808	return 0;
    809}
    810
    811int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
    812		 struct kstat *stat, u32 request_mask, unsigned int query_flags)
    813{
    814	struct inode *inode = d_inode(path->dentry);
    815	struct f2fs_inode_info *fi = F2FS_I(inode);
    816	struct f2fs_inode *ri = NULL;
    817	unsigned int flags;
    818
    819	if (f2fs_has_extra_attr(inode) &&
    820			f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
    821			F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
    822		stat->result_mask |= STATX_BTIME;
    823		stat->btime.tv_sec = fi->i_crtime.tv_sec;
    824		stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
    825	}
    826
    827	flags = fi->i_flags;
    828	if (flags & F2FS_COMPR_FL)
    829		stat->attributes |= STATX_ATTR_COMPRESSED;
    830	if (flags & F2FS_APPEND_FL)
    831		stat->attributes |= STATX_ATTR_APPEND;
    832	if (IS_ENCRYPTED(inode))
    833		stat->attributes |= STATX_ATTR_ENCRYPTED;
    834	if (flags & F2FS_IMMUTABLE_FL)
    835		stat->attributes |= STATX_ATTR_IMMUTABLE;
    836	if (flags & F2FS_NODUMP_FL)
    837		stat->attributes |= STATX_ATTR_NODUMP;
    838	if (IS_VERITY(inode))
    839		stat->attributes |= STATX_ATTR_VERITY;
    840
    841	stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
    842				  STATX_ATTR_APPEND |
    843				  STATX_ATTR_ENCRYPTED |
    844				  STATX_ATTR_IMMUTABLE |
    845				  STATX_ATTR_NODUMP |
    846				  STATX_ATTR_VERITY);
    847
    848	generic_fillattr(mnt_userns, inode, stat);
    849
    850	/* we need to show initial sectors used for inline_data/dentries */
    851	if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
    852					f2fs_has_inline_dentry(inode))
    853		stat->blocks += (stat->size + 511) >> 9;
    854
    855	return 0;
    856}
    857
    858#ifdef CONFIG_F2FS_FS_POSIX_ACL
    859static void __setattr_copy(struct user_namespace *mnt_userns,
    860			   struct inode *inode, const struct iattr *attr)
    861{
    862	unsigned int ia_valid = attr->ia_valid;
    863
    864	if (ia_valid & ATTR_UID)
    865		inode->i_uid = attr->ia_uid;
    866	if (ia_valid & ATTR_GID)
    867		inode->i_gid = attr->ia_gid;
    868	if (ia_valid & ATTR_ATIME)
    869		inode->i_atime = attr->ia_atime;
    870	if (ia_valid & ATTR_MTIME)
    871		inode->i_mtime = attr->ia_mtime;
    872	if (ia_valid & ATTR_CTIME)
    873		inode->i_ctime = attr->ia_ctime;
    874	if (ia_valid & ATTR_MODE) {
    875		umode_t mode = attr->ia_mode;
    876		kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
    877
    878		if (!in_group_p(kgid) && !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
    879			mode &= ~S_ISGID;
    880		set_acl_inode(inode, mode);
    881	}
    882}
    883#else
    884#define __setattr_copy setattr_copy
    885#endif
    886
    887int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
    888		 struct iattr *attr)
    889{
    890	struct inode *inode = d_inode(dentry);
    891	int err;
    892
    893	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
    894		return -EIO;
    895
    896	if (unlikely(IS_IMMUTABLE(inode)))
    897		return -EPERM;
    898
    899	if (unlikely(IS_APPEND(inode) &&
    900			(attr->ia_valid & (ATTR_MODE | ATTR_UID |
    901				  ATTR_GID | ATTR_TIMES_SET))))
    902		return -EPERM;
    903
    904	if ((attr->ia_valid & ATTR_SIZE) &&
    905		!f2fs_is_compress_backend_ready(inode))
    906		return -EOPNOTSUPP;
    907
    908	err = setattr_prepare(mnt_userns, dentry, attr);
    909	if (err)
    910		return err;
    911
    912	err = fscrypt_prepare_setattr(dentry, attr);
    913	if (err)
    914		return err;
    915
    916	err = fsverity_prepare_setattr(dentry, attr);
    917	if (err)
    918		return err;
    919
    920	if (is_quota_modification(inode, attr)) {
    921		err = f2fs_dquot_initialize(inode);
    922		if (err)
    923			return err;
    924	}
    925	if ((attr->ia_valid & ATTR_UID &&
    926		!uid_eq(attr->ia_uid, inode->i_uid)) ||
    927		(attr->ia_valid & ATTR_GID &&
    928		!gid_eq(attr->ia_gid, inode->i_gid))) {
    929		f2fs_lock_op(F2FS_I_SB(inode));
    930		err = dquot_transfer(inode, attr);
    931		if (err) {
    932			set_sbi_flag(F2FS_I_SB(inode),
    933					SBI_QUOTA_NEED_REPAIR);
    934			f2fs_unlock_op(F2FS_I_SB(inode));
    935			return err;
    936		}
    937		/*
    938		 * update uid/gid under lock_op(), so that dquot and inode can
    939		 * be updated atomically.
    940		 */
    941		if (attr->ia_valid & ATTR_UID)
    942			inode->i_uid = attr->ia_uid;
    943		if (attr->ia_valid & ATTR_GID)
    944			inode->i_gid = attr->ia_gid;
    945		f2fs_mark_inode_dirty_sync(inode, true);
    946		f2fs_unlock_op(F2FS_I_SB(inode));
    947	}
    948
    949	if (attr->ia_valid & ATTR_SIZE) {
    950		loff_t old_size = i_size_read(inode);
    951
    952		if (attr->ia_size > MAX_INLINE_DATA(inode)) {
    953			/*
    954			 * should convert inline inode before i_size_write to
    955			 * keep smaller than inline_data size with inline flag.
    956			 */
    957			err = f2fs_convert_inline_inode(inode);
    958			if (err)
    959				return err;
    960		}
    961
    962		f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
    963		filemap_invalidate_lock(inode->i_mapping);
    964
    965		truncate_setsize(inode, attr->ia_size);
    966
    967		if (attr->ia_size <= old_size)
    968			err = f2fs_truncate(inode);
    969		/*
    970		 * do not trim all blocks after i_size if target size is
    971		 * larger than i_size.
    972		 */
    973		filemap_invalidate_unlock(inode->i_mapping);
    974		f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
    975		if (err)
    976			return err;
    977
    978		spin_lock(&F2FS_I(inode)->i_size_lock);
    979		inode->i_mtime = inode->i_ctime = current_time(inode);
    980		F2FS_I(inode)->last_disk_size = i_size_read(inode);
    981		spin_unlock(&F2FS_I(inode)->i_size_lock);
    982	}
    983
    984	__setattr_copy(mnt_userns, inode, attr);
    985
    986	if (attr->ia_valid & ATTR_MODE) {
    987		err = posix_acl_chmod(mnt_userns, inode, f2fs_get_inode_mode(inode));
    988
    989		if (is_inode_flag_set(inode, FI_ACL_MODE)) {
    990			if (!err)
    991				inode->i_mode = F2FS_I(inode)->i_acl_mode;
    992			clear_inode_flag(inode, FI_ACL_MODE);
    993		}
    994	}
    995
    996	/* file size may changed here */
    997	f2fs_mark_inode_dirty_sync(inode, true);
    998
    999	/* inode change will produce dirty node pages flushed by checkpoint */
   1000	f2fs_balance_fs(F2FS_I_SB(inode), true);
   1001
   1002	return err;
   1003}
   1004
   1005const struct inode_operations f2fs_file_inode_operations = {
   1006	.getattr	= f2fs_getattr,
   1007	.setattr	= f2fs_setattr,
   1008	.get_acl	= f2fs_get_acl,
   1009	.set_acl	= f2fs_set_acl,
   1010	.listxattr	= f2fs_listxattr,
   1011	.fiemap		= f2fs_fiemap,
   1012	.fileattr_get	= f2fs_fileattr_get,
   1013	.fileattr_set	= f2fs_fileattr_set,
   1014};
   1015
   1016static int fill_zero(struct inode *inode, pgoff_t index,
   1017					loff_t start, loff_t len)
   1018{
   1019	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1020	struct page *page;
   1021
   1022	if (!len)
   1023		return 0;
   1024
   1025	f2fs_balance_fs(sbi, true);
   1026
   1027	f2fs_lock_op(sbi);
   1028	page = f2fs_get_new_data_page(inode, NULL, index, false);
   1029	f2fs_unlock_op(sbi);
   1030
   1031	if (IS_ERR(page))
   1032		return PTR_ERR(page);
   1033
   1034	f2fs_wait_on_page_writeback(page, DATA, true, true);
   1035	zero_user(page, start, len);
   1036	set_page_dirty(page);
   1037	f2fs_put_page(page, 1);
   1038	return 0;
   1039}
   1040
   1041int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
   1042{
   1043	int err;
   1044
   1045	while (pg_start < pg_end) {
   1046		struct dnode_of_data dn;
   1047		pgoff_t end_offset, count;
   1048
   1049		set_new_dnode(&dn, inode, NULL, NULL, 0);
   1050		err = f2fs_get_dnode_of_data(&dn, pg_start, LOOKUP_NODE);
   1051		if (err) {
   1052			if (err == -ENOENT) {
   1053				pg_start = f2fs_get_next_page_offset(&dn,
   1054								pg_start);
   1055				continue;
   1056			}
   1057			return err;
   1058		}
   1059
   1060		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
   1061		count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
   1062
   1063		f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
   1064
   1065		f2fs_truncate_data_blocks_range(&dn, count);
   1066		f2fs_put_dnode(&dn);
   1067
   1068		pg_start += count;
   1069	}
   1070	return 0;
   1071}
   1072
   1073static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
   1074{
   1075	pgoff_t pg_start, pg_end;
   1076	loff_t off_start, off_end;
   1077	int ret;
   1078
   1079	ret = f2fs_convert_inline_inode(inode);
   1080	if (ret)
   1081		return ret;
   1082
   1083	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
   1084	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
   1085
   1086	off_start = offset & (PAGE_SIZE - 1);
   1087	off_end = (offset + len) & (PAGE_SIZE - 1);
   1088
   1089	if (pg_start == pg_end) {
   1090		ret = fill_zero(inode, pg_start, off_start,
   1091						off_end - off_start);
   1092		if (ret)
   1093			return ret;
   1094	} else {
   1095		if (off_start) {
   1096			ret = fill_zero(inode, pg_start++, off_start,
   1097						PAGE_SIZE - off_start);
   1098			if (ret)
   1099				return ret;
   1100		}
   1101		if (off_end) {
   1102			ret = fill_zero(inode, pg_end, 0, off_end);
   1103			if (ret)
   1104				return ret;
   1105		}
   1106
   1107		if (pg_start < pg_end) {
   1108			loff_t blk_start, blk_end;
   1109			struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1110
   1111			f2fs_balance_fs(sbi, true);
   1112
   1113			blk_start = (loff_t)pg_start << PAGE_SHIFT;
   1114			blk_end = (loff_t)pg_end << PAGE_SHIFT;
   1115
   1116			f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   1117			filemap_invalidate_lock(inode->i_mapping);
   1118
   1119			truncate_pagecache_range(inode, blk_start, blk_end - 1);
   1120
   1121			f2fs_lock_op(sbi);
   1122			ret = f2fs_truncate_hole(inode, pg_start, pg_end);
   1123			f2fs_unlock_op(sbi);
   1124
   1125			filemap_invalidate_unlock(inode->i_mapping);
   1126			f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   1127		}
   1128	}
   1129
   1130	return ret;
   1131}
   1132
   1133static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr,
   1134				int *do_replace, pgoff_t off, pgoff_t len)
   1135{
   1136	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1137	struct dnode_of_data dn;
   1138	int ret, done, i;
   1139
   1140next_dnode:
   1141	set_new_dnode(&dn, inode, NULL, NULL, 0);
   1142	ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
   1143	if (ret && ret != -ENOENT) {
   1144		return ret;
   1145	} else if (ret == -ENOENT) {
   1146		if (dn.max_level == 0)
   1147			return -ENOENT;
   1148		done = min((pgoff_t)ADDRS_PER_BLOCK(inode) -
   1149						dn.ofs_in_node, len);
   1150		blkaddr += done;
   1151		do_replace += done;
   1152		goto next;
   1153	}
   1154
   1155	done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) -
   1156							dn.ofs_in_node, len);
   1157	for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) {
   1158		*blkaddr = f2fs_data_blkaddr(&dn);
   1159
   1160		if (__is_valid_data_blkaddr(*blkaddr) &&
   1161			!f2fs_is_valid_blkaddr(sbi, *blkaddr,
   1162					DATA_GENERIC_ENHANCE)) {
   1163			f2fs_put_dnode(&dn);
   1164			return -EFSCORRUPTED;
   1165		}
   1166
   1167		if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
   1168
   1169			if (f2fs_lfs_mode(sbi)) {
   1170				f2fs_put_dnode(&dn);
   1171				return -EOPNOTSUPP;
   1172			}
   1173
   1174			/* do not invalidate this block address */
   1175			f2fs_update_data_blkaddr(&dn, NULL_ADDR);
   1176			*do_replace = 1;
   1177		}
   1178	}
   1179	f2fs_put_dnode(&dn);
   1180next:
   1181	len -= done;
   1182	off += done;
   1183	if (len)
   1184		goto next_dnode;
   1185	return 0;
   1186}
   1187
   1188static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr,
   1189				int *do_replace, pgoff_t off, int len)
   1190{
   1191	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1192	struct dnode_of_data dn;
   1193	int ret, i;
   1194
   1195	for (i = 0; i < len; i++, do_replace++, blkaddr++) {
   1196		if (*do_replace == 0)
   1197			continue;
   1198
   1199		set_new_dnode(&dn, inode, NULL, NULL, 0);
   1200		ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA);
   1201		if (ret) {
   1202			dec_valid_block_count(sbi, inode, 1);
   1203			f2fs_invalidate_blocks(sbi, *blkaddr);
   1204		} else {
   1205			f2fs_update_data_blkaddr(&dn, *blkaddr);
   1206		}
   1207		f2fs_put_dnode(&dn);
   1208	}
   1209	return 0;
   1210}
   1211
   1212static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
   1213			block_t *blkaddr, int *do_replace,
   1214			pgoff_t src, pgoff_t dst, pgoff_t len, bool full)
   1215{
   1216	struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode);
   1217	pgoff_t i = 0;
   1218	int ret;
   1219
   1220	while (i < len) {
   1221		if (blkaddr[i] == NULL_ADDR && !full) {
   1222			i++;
   1223			continue;
   1224		}
   1225
   1226		if (do_replace[i] || blkaddr[i] == NULL_ADDR) {
   1227			struct dnode_of_data dn;
   1228			struct node_info ni;
   1229			size_t new_size;
   1230			pgoff_t ilen;
   1231
   1232			set_new_dnode(&dn, dst_inode, NULL, NULL, 0);
   1233			ret = f2fs_get_dnode_of_data(&dn, dst + i, ALLOC_NODE);
   1234			if (ret)
   1235				return ret;
   1236
   1237			ret = f2fs_get_node_info(sbi, dn.nid, &ni, false);
   1238			if (ret) {
   1239				f2fs_put_dnode(&dn);
   1240				return ret;
   1241			}
   1242
   1243			ilen = min((pgoff_t)
   1244				ADDRS_PER_PAGE(dn.node_page, dst_inode) -
   1245						dn.ofs_in_node, len - i);
   1246			do {
   1247				dn.data_blkaddr = f2fs_data_blkaddr(&dn);
   1248				f2fs_truncate_data_blocks_range(&dn, 1);
   1249
   1250				if (do_replace[i]) {
   1251					f2fs_i_blocks_write(src_inode,
   1252							1, false, false);
   1253					f2fs_i_blocks_write(dst_inode,
   1254							1, true, false);
   1255					f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
   1256					blkaddr[i], ni.version, true, false);
   1257
   1258					do_replace[i] = 0;
   1259				}
   1260				dn.ofs_in_node++;
   1261				i++;
   1262				new_size = (loff_t)(dst + i) << PAGE_SHIFT;
   1263				if (dst_inode->i_size < new_size)
   1264					f2fs_i_size_write(dst_inode, new_size);
   1265			} while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
   1266
   1267			f2fs_put_dnode(&dn);
   1268		} else {
   1269			struct page *psrc, *pdst;
   1270
   1271			psrc = f2fs_get_lock_data_page(src_inode,
   1272							src + i, true);
   1273			if (IS_ERR(psrc))
   1274				return PTR_ERR(psrc);
   1275			pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i,
   1276								true);
   1277			if (IS_ERR(pdst)) {
   1278				f2fs_put_page(psrc, 1);
   1279				return PTR_ERR(pdst);
   1280			}
   1281			f2fs_copy_page(psrc, pdst);
   1282			set_page_dirty(pdst);
   1283			f2fs_put_page(pdst, 1);
   1284			f2fs_put_page(psrc, 1);
   1285
   1286			ret = f2fs_truncate_hole(src_inode,
   1287						src + i, src + i + 1);
   1288			if (ret)
   1289				return ret;
   1290			i++;
   1291		}
   1292	}
   1293	return 0;
   1294}
   1295
   1296static int __exchange_data_block(struct inode *src_inode,
   1297			struct inode *dst_inode, pgoff_t src, pgoff_t dst,
   1298			pgoff_t len, bool full)
   1299{
   1300	block_t *src_blkaddr;
   1301	int *do_replace;
   1302	pgoff_t olen;
   1303	int ret;
   1304
   1305	while (len) {
   1306		olen = min((pgoff_t)4 * ADDRS_PER_BLOCK(src_inode), len);
   1307
   1308		src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode),
   1309					array_size(olen, sizeof(block_t)),
   1310					GFP_NOFS);
   1311		if (!src_blkaddr)
   1312			return -ENOMEM;
   1313
   1314		do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode),
   1315					array_size(olen, sizeof(int)),
   1316					GFP_NOFS);
   1317		if (!do_replace) {
   1318			kvfree(src_blkaddr);
   1319			return -ENOMEM;
   1320		}
   1321
   1322		ret = __read_out_blkaddrs(src_inode, src_blkaddr,
   1323					do_replace, src, olen);
   1324		if (ret)
   1325			goto roll_back;
   1326
   1327		ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr,
   1328					do_replace, src, dst, olen, full);
   1329		if (ret)
   1330			goto roll_back;
   1331
   1332		src += olen;
   1333		dst += olen;
   1334		len -= olen;
   1335
   1336		kvfree(src_blkaddr);
   1337		kvfree(do_replace);
   1338	}
   1339	return 0;
   1340
   1341roll_back:
   1342	__roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, olen);
   1343	kvfree(src_blkaddr);
   1344	kvfree(do_replace);
   1345	return ret;
   1346}
   1347
   1348static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
   1349{
   1350	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1351	pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
   1352	pgoff_t start = offset >> PAGE_SHIFT;
   1353	pgoff_t end = (offset + len) >> PAGE_SHIFT;
   1354	int ret;
   1355
   1356	f2fs_balance_fs(sbi, true);
   1357
   1358	/* avoid gc operation during block exchange */
   1359	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   1360	filemap_invalidate_lock(inode->i_mapping);
   1361
   1362	f2fs_lock_op(sbi);
   1363	f2fs_drop_extent_tree(inode);
   1364	truncate_pagecache(inode, offset);
   1365	ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
   1366	f2fs_unlock_op(sbi);
   1367
   1368	filemap_invalidate_unlock(inode->i_mapping);
   1369	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   1370	return ret;
   1371}
   1372
   1373static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
   1374{
   1375	loff_t new_size;
   1376	int ret;
   1377
   1378	if (offset + len >= i_size_read(inode))
   1379		return -EINVAL;
   1380
   1381	/* collapse range should be aligned to block size of f2fs. */
   1382	if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
   1383		return -EINVAL;
   1384
   1385	ret = f2fs_convert_inline_inode(inode);
   1386	if (ret)
   1387		return ret;
   1388
   1389	/* write out all dirty pages from offset */
   1390	ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
   1391	if (ret)
   1392		return ret;
   1393
   1394	ret = f2fs_do_collapse(inode, offset, len);
   1395	if (ret)
   1396		return ret;
   1397
   1398	/* write out all moved pages, if possible */
   1399	filemap_invalidate_lock(inode->i_mapping);
   1400	filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
   1401	truncate_pagecache(inode, offset);
   1402
   1403	new_size = i_size_read(inode) - len;
   1404	ret = f2fs_truncate_blocks(inode, new_size, true);
   1405	filemap_invalidate_unlock(inode->i_mapping);
   1406	if (!ret)
   1407		f2fs_i_size_write(inode, new_size);
   1408	return ret;
   1409}
   1410
   1411static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
   1412								pgoff_t end)
   1413{
   1414	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
   1415	pgoff_t index = start;
   1416	unsigned int ofs_in_node = dn->ofs_in_node;
   1417	blkcnt_t count = 0;
   1418	int ret;
   1419
   1420	for (; index < end; index++, dn->ofs_in_node++) {
   1421		if (f2fs_data_blkaddr(dn) == NULL_ADDR)
   1422			count++;
   1423	}
   1424
   1425	dn->ofs_in_node = ofs_in_node;
   1426	ret = f2fs_reserve_new_blocks(dn, count);
   1427	if (ret)
   1428		return ret;
   1429
   1430	dn->ofs_in_node = ofs_in_node;
   1431	for (index = start; index < end; index++, dn->ofs_in_node++) {
   1432		dn->data_blkaddr = f2fs_data_blkaddr(dn);
   1433		/*
   1434		 * f2fs_reserve_new_blocks will not guarantee entire block
   1435		 * allocation.
   1436		 */
   1437		if (dn->data_blkaddr == NULL_ADDR) {
   1438			ret = -ENOSPC;
   1439			break;
   1440		}
   1441
   1442		if (dn->data_blkaddr == NEW_ADDR)
   1443			continue;
   1444
   1445		if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
   1446					DATA_GENERIC_ENHANCE)) {
   1447			ret = -EFSCORRUPTED;
   1448			break;
   1449		}
   1450
   1451		f2fs_invalidate_blocks(sbi, dn->data_blkaddr);
   1452		dn->data_blkaddr = NEW_ADDR;
   1453		f2fs_set_data_blkaddr(dn);
   1454	}
   1455
   1456	f2fs_update_extent_cache_range(dn, start, 0, index - start);
   1457
   1458	return ret;
   1459}
   1460
   1461static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
   1462								int mode)
   1463{
   1464	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1465	struct address_space *mapping = inode->i_mapping;
   1466	pgoff_t index, pg_start, pg_end;
   1467	loff_t new_size = i_size_read(inode);
   1468	loff_t off_start, off_end;
   1469	int ret = 0;
   1470
   1471	ret = inode_newsize_ok(inode, (len + offset));
   1472	if (ret)
   1473		return ret;
   1474
   1475	ret = f2fs_convert_inline_inode(inode);
   1476	if (ret)
   1477		return ret;
   1478
   1479	ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
   1480	if (ret)
   1481		return ret;
   1482
   1483	pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
   1484	pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
   1485
   1486	off_start = offset & (PAGE_SIZE - 1);
   1487	off_end = (offset + len) & (PAGE_SIZE - 1);
   1488
   1489	if (pg_start == pg_end) {
   1490		ret = fill_zero(inode, pg_start, off_start,
   1491						off_end - off_start);
   1492		if (ret)
   1493			return ret;
   1494
   1495		new_size = max_t(loff_t, new_size, offset + len);
   1496	} else {
   1497		if (off_start) {
   1498			ret = fill_zero(inode, pg_start++, off_start,
   1499						PAGE_SIZE - off_start);
   1500			if (ret)
   1501				return ret;
   1502
   1503			new_size = max_t(loff_t, new_size,
   1504					(loff_t)pg_start << PAGE_SHIFT);
   1505		}
   1506
   1507		for (index = pg_start; index < pg_end;) {
   1508			struct dnode_of_data dn;
   1509			unsigned int end_offset;
   1510			pgoff_t end;
   1511
   1512			f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   1513			filemap_invalidate_lock(mapping);
   1514
   1515			truncate_pagecache_range(inode,
   1516				(loff_t)index << PAGE_SHIFT,
   1517				((loff_t)pg_end << PAGE_SHIFT) - 1);
   1518
   1519			f2fs_lock_op(sbi);
   1520
   1521			set_new_dnode(&dn, inode, NULL, NULL, 0);
   1522			ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
   1523			if (ret) {
   1524				f2fs_unlock_op(sbi);
   1525				filemap_invalidate_unlock(mapping);
   1526				f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   1527				goto out;
   1528			}
   1529
   1530			end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
   1531			end = min(pg_end, end_offset - dn.ofs_in_node + index);
   1532
   1533			ret = f2fs_do_zero_range(&dn, index, end);
   1534			f2fs_put_dnode(&dn);
   1535
   1536			f2fs_unlock_op(sbi);
   1537			filemap_invalidate_unlock(mapping);
   1538			f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   1539
   1540			f2fs_balance_fs(sbi, dn.node_changed);
   1541
   1542			if (ret)
   1543				goto out;
   1544
   1545			index = end;
   1546			new_size = max_t(loff_t, new_size,
   1547					(loff_t)index << PAGE_SHIFT);
   1548		}
   1549
   1550		if (off_end) {
   1551			ret = fill_zero(inode, pg_end, 0, off_end);
   1552			if (ret)
   1553				goto out;
   1554
   1555			new_size = max_t(loff_t, new_size, offset + len);
   1556		}
   1557	}
   1558
   1559out:
   1560	if (new_size > i_size_read(inode)) {
   1561		if (mode & FALLOC_FL_KEEP_SIZE)
   1562			file_set_keep_isize(inode);
   1563		else
   1564			f2fs_i_size_write(inode, new_size);
   1565	}
   1566	return ret;
   1567}
   1568
   1569static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
   1570{
   1571	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1572	struct address_space *mapping = inode->i_mapping;
   1573	pgoff_t nr, pg_start, pg_end, delta, idx;
   1574	loff_t new_size;
   1575	int ret = 0;
   1576
   1577	new_size = i_size_read(inode) + len;
   1578	ret = inode_newsize_ok(inode, new_size);
   1579	if (ret)
   1580		return ret;
   1581
   1582	if (offset >= i_size_read(inode))
   1583		return -EINVAL;
   1584
   1585	/* insert range should be aligned to block size of f2fs. */
   1586	if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
   1587		return -EINVAL;
   1588
   1589	ret = f2fs_convert_inline_inode(inode);
   1590	if (ret)
   1591		return ret;
   1592
   1593	f2fs_balance_fs(sbi, true);
   1594
   1595	filemap_invalidate_lock(mapping);
   1596	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
   1597	filemap_invalidate_unlock(mapping);
   1598	if (ret)
   1599		return ret;
   1600
   1601	/* write out all dirty pages from offset */
   1602	ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
   1603	if (ret)
   1604		return ret;
   1605
   1606	pg_start = offset >> PAGE_SHIFT;
   1607	pg_end = (offset + len) >> PAGE_SHIFT;
   1608	delta = pg_end - pg_start;
   1609	idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
   1610
   1611	/* avoid gc operation during block exchange */
   1612	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   1613	filemap_invalidate_lock(mapping);
   1614	truncate_pagecache(inode, offset);
   1615
   1616	while (!ret && idx > pg_start) {
   1617		nr = idx - pg_start;
   1618		if (nr > delta)
   1619			nr = delta;
   1620		idx -= nr;
   1621
   1622		f2fs_lock_op(sbi);
   1623		f2fs_drop_extent_tree(inode);
   1624
   1625		ret = __exchange_data_block(inode, inode, idx,
   1626					idx + delta, nr, false);
   1627		f2fs_unlock_op(sbi);
   1628	}
   1629	filemap_invalidate_unlock(mapping);
   1630	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   1631
   1632	/* write out all moved pages, if possible */
   1633	filemap_invalidate_lock(mapping);
   1634	filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
   1635	truncate_pagecache(inode, offset);
   1636	filemap_invalidate_unlock(mapping);
   1637
   1638	if (!ret)
   1639		f2fs_i_size_write(inode, new_size);
   1640	return ret;
   1641}
   1642
   1643static int expand_inode_data(struct inode *inode, loff_t offset,
   1644					loff_t len, int mode)
   1645{
   1646	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1647	struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
   1648			.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
   1649			.m_may_create = true };
   1650	struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
   1651			.init_gc_type = FG_GC,
   1652			.should_migrate_blocks = false,
   1653			.err_gc_skipped = true,
   1654			.nr_free_secs = 0 };
   1655	pgoff_t pg_start, pg_end;
   1656	loff_t new_size = i_size_read(inode);
   1657	loff_t off_end;
   1658	block_t expanded = 0;
   1659	int err;
   1660
   1661	err = inode_newsize_ok(inode, (len + offset));
   1662	if (err)
   1663		return err;
   1664
   1665	err = f2fs_convert_inline_inode(inode);
   1666	if (err)
   1667		return err;
   1668
   1669	f2fs_balance_fs(sbi, true);
   1670
   1671	pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
   1672	pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
   1673	off_end = (offset + len) & (PAGE_SIZE - 1);
   1674
   1675	map.m_lblk = pg_start;
   1676	map.m_len = pg_end - pg_start;
   1677	if (off_end)
   1678		map.m_len++;
   1679
   1680	if (!map.m_len)
   1681		return 0;
   1682
   1683	if (f2fs_is_pinned_file(inode)) {
   1684		block_t sec_blks = BLKS_PER_SEC(sbi);
   1685		block_t sec_len = roundup(map.m_len, sec_blks);
   1686
   1687		map.m_len = sec_blks;
   1688next_alloc:
   1689		if (has_not_enough_free_secs(sbi, 0,
   1690			GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
   1691			f2fs_down_write(&sbi->gc_lock);
   1692			err = f2fs_gc(sbi, &gc_control);
   1693			if (err && err != -ENODATA)
   1694				goto out_err;
   1695		}
   1696
   1697		f2fs_down_write(&sbi->pin_sem);
   1698
   1699		f2fs_lock_op(sbi);
   1700		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
   1701		f2fs_unlock_op(sbi);
   1702
   1703		map.m_seg_type = CURSEG_COLD_DATA_PINNED;
   1704		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
   1705		file_dont_truncate(inode);
   1706
   1707		f2fs_up_write(&sbi->pin_sem);
   1708
   1709		expanded += map.m_len;
   1710		sec_len -= map.m_len;
   1711		map.m_lblk += map.m_len;
   1712		if (!err && sec_len)
   1713			goto next_alloc;
   1714
   1715		map.m_len = expanded;
   1716	} else {
   1717		err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
   1718		expanded = map.m_len;
   1719	}
   1720out_err:
   1721	if (err) {
   1722		pgoff_t last_off;
   1723
   1724		if (!expanded)
   1725			return err;
   1726
   1727		last_off = pg_start + expanded - 1;
   1728
   1729		/* update new size to the failed position */
   1730		new_size = (last_off == pg_end) ? offset + len :
   1731					(loff_t)(last_off + 1) << PAGE_SHIFT;
   1732	} else {
   1733		new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end;
   1734	}
   1735
   1736	if (new_size > i_size_read(inode)) {
   1737		if (mode & FALLOC_FL_KEEP_SIZE)
   1738			file_set_keep_isize(inode);
   1739		else
   1740			f2fs_i_size_write(inode, new_size);
   1741	}
   1742
   1743	return err;
   1744}
   1745
   1746static long f2fs_fallocate(struct file *file, int mode,
   1747				loff_t offset, loff_t len)
   1748{
   1749	struct inode *inode = file_inode(file);
   1750	long ret = 0;
   1751
   1752	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
   1753		return -EIO;
   1754	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
   1755		return -ENOSPC;
   1756	if (!f2fs_is_compress_backend_ready(inode))
   1757		return -EOPNOTSUPP;
   1758
   1759	/* f2fs only support ->fallocate for regular file */
   1760	if (!S_ISREG(inode->i_mode))
   1761		return -EINVAL;
   1762
   1763	if (IS_ENCRYPTED(inode) &&
   1764		(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
   1765		return -EOPNOTSUPP;
   1766
   1767	/*
   1768	 * Pinned file should not support partial trucation since the block
   1769	 * can be used by applications.
   1770	 */
   1771	if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) &&
   1772		(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE |
   1773			FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)))
   1774		return -EOPNOTSUPP;
   1775
   1776	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
   1777			FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
   1778			FALLOC_FL_INSERT_RANGE))
   1779		return -EOPNOTSUPP;
   1780
   1781	inode_lock(inode);
   1782
   1783	ret = file_modified(file);
   1784	if (ret)
   1785		goto out;
   1786
   1787	if (mode & FALLOC_FL_PUNCH_HOLE) {
   1788		if (offset >= inode->i_size)
   1789			goto out;
   1790
   1791		ret = punch_hole(inode, offset, len);
   1792	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
   1793		ret = f2fs_collapse_range(inode, offset, len);
   1794	} else if (mode & FALLOC_FL_ZERO_RANGE) {
   1795		ret = f2fs_zero_range(inode, offset, len, mode);
   1796	} else if (mode & FALLOC_FL_INSERT_RANGE) {
   1797		ret = f2fs_insert_range(inode, offset, len);
   1798	} else {
   1799		ret = expand_inode_data(inode, offset, len, mode);
   1800	}
   1801
   1802	if (!ret) {
   1803		inode->i_mtime = inode->i_ctime = current_time(inode);
   1804		f2fs_mark_inode_dirty_sync(inode, false);
   1805		f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   1806	}
   1807
   1808out:
   1809	inode_unlock(inode);
   1810
   1811	trace_f2fs_fallocate(inode, mode, offset, len, ret);
   1812	return ret;
   1813}
   1814
   1815static int f2fs_release_file(struct inode *inode, struct file *filp)
   1816{
   1817	/*
   1818	 * f2fs_relase_file is called at every close calls. So we should
   1819	 * not drop any inmemory pages by close called by other process.
   1820	 */
   1821	if (!(filp->f_mode & FMODE_WRITE) ||
   1822			atomic_read(&inode->i_writecount) != 1)
   1823		return 0;
   1824
   1825	if (f2fs_is_atomic_file(inode))
   1826		f2fs_abort_atomic_write(inode, true);
   1827	return 0;
   1828}
   1829
   1830static int f2fs_file_flush(struct file *file, fl_owner_t id)
   1831{
   1832	struct inode *inode = file_inode(file);
   1833
   1834	/*
   1835	 * If the process doing a transaction is crashed, we should do
   1836	 * roll-back. Otherwise, other reader/write can see corrupted database
   1837	 * until all the writers close its file. Since this should be done
   1838	 * before dropping file lock, it needs to do in ->flush.
   1839	 */
   1840	if (f2fs_is_atomic_file(inode) &&
   1841			F2FS_I(inode)->atomic_write_task == current)
   1842		f2fs_abort_atomic_write(inode, true);
   1843	return 0;
   1844}
   1845
   1846static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
   1847{
   1848	struct f2fs_inode_info *fi = F2FS_I(inode);
   1849	u32 masked_flags = fi->i_flags & mask;
   1850
   1851	/* mask can be shrunk by flags_valid selector */
   1852	iflags &= mask;
   1853
   1854	/* Is it quota file? Do not allow user to mess with it */
   1855	if (IS_NOQUOTA(inode))
   1856		return -EPERM;
   1857
   1858	if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) {
   1859		if (!f2fs_sb_has_casefold(F2FS_I_SB(inode)))
   1860			return -EOPNOTSUPP;
   1861		if (!f2fs_empty_dir(inode))
   1862			return -ENOTEMPTY;
   1863	}
   1864
   1865	if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) {
   1866		if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
   1867			return -EOPNOTSUPP;
   1868		if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL))
   1869			return -EINVAL;
   1870	}
   1871
   1872	if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
   1873		if (masked_flags & F2FS_COMPR_FL) {
   1874			if (!f2fs_disable_compressed_file(inode))
   1875				return -EINVAL;
   1876		}
   1877		if (iflags & F2FS_NOCOMP_FL)
   1878			return -EINVAL;
   1879		if (iflags & F2FS_COMPR_FL) {
   1880			if (!f2fs_may_compress(inode))
   1881				return -EINVAL;
   1882			if (S_ISREG(inode->i_mode) && inode->i_size)
   1883				return -EINVAL;
   1884
   1885			set_compress_context(inode);
   1886		}
   1887	}
   1888	if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) {
   1889		if (masked_flags & F2FS_COMPR_FL)
   1890			return -EINVAL;
   1891	}
   1892
   1893	fi->i_flags = iflags | (fi->i_flags & ~mask);
   1894	f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) &&
   1895					(fi->i_flags & F2FS_NOCOMP_FL));
   1896
   1897	if (fi->i_flags & F2FS_PROJINHERIT_FL)
   1898		set_inode_flag(inode, FI_PROJ_INHERIT);
   1899	else
   1900		clear_inode_flag(inode, FI_PROJ_INHERIT);
   1901
   1902	inode->i_ctime = current_time(inode);
   1903	f2fs_set_inode_flags(inode);
   1904	f2fs_mark_inode_dirty_sync(inode, true);
   1905	return 0;
   1906}
   1907
   1908/* FS_IOC_[GS]ETFLAGS and FS_IOC_FS[GS]ETXATTR support */
   1909
   1910/*
   1911 * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
   1912 * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
   1913 * F2FS_GETTABLE_FS_FL.  To also make it settable via FS_IOC_SETFLAGS, also add
   1914 * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
   1915 *
   1916 * Translating flags to fsx_flags value used by FS_IOC_FSGETXATTR and
   1917 * FS_IOC_FSSETXATTR is done by the VFS.
   1918 */
   1919
   1920static const struct {
   1921	u32 iflag;
   1922	u32 fsflag;
   1923} f2fs_fsflags_map[] = {
   1924	{ F2FS_COMPR_FL,	FS_COMPR_FL },
   1925	{ F2FS_SYNC_FL,		FS_SYNC_FL },
   1926	{ F2FS_IMMUTABLE_FL,	FS_IMMUTABLE_FL },
   1927	{ F2FS_APPEND_FL,	FS_APPEND_FL },
   1928	{ F2FS_NODUMP_FL,	FS_NODUMP_FL },
   1929	{ F2FS_NOATIME_FL,	FS_NOATIME_FL },
   1930	{ F2FS_NOCOMP_FL,	FS_NOCOMP_FL },
   1931	{ F2FS_INDEX_FL,	FS_INDEX_FL },
   1932	{ F2FS_DIRSYNC_FL,	FS_DIRSYNC_FL },
   1933	{ F2FS_PROJINHERIT_FL,	FS_PROJINHERIT_FL },
   1934	{ F2FS_CASEFOLD_FL,	FS_CASEFOLD_FL },
   1935};
   1936
   1937#define F2FS_GETTABLE_FS_FL (		\
   1938		FS_COMPR_FL |		\
   1939		FS_SYNC_FL |		\
   1940		FS_IMMUTABLE_FL |	\
   1941		FS_APPEND_FL |		\
   1942		FS_NODUMP_FL |		\
   1943		FS_NOATIME_FL |		\
   1944		FS_NOCOMP_FL |		\
   1945		FS_INDEX_FL |		\
   1946		FS_DIRSYNC_FL |		\
   1947		FS_PROJINHERIT_FL |	\
   1948		FS_ENCRYPT_FL |		\
   1949		FS_INLINE_DATA_FL |	\
   1950		FS_NOCOW_FL |		\
   1951		FS_VERITY_FL |		\
   1952		FS_CASEFOLD_FL)
   1953
   1954#define F2FS_SETTABLE_FS_FL (		\
   1955		FS_COMPR_FL |		\
   1956		FS_SYNC_FL |		\
   1957		FS_IMMUTABLE_FL |	\
   1958		FS_APPEND_FL |		\
   1959		FS_NODUMP_FL |		\
   1960		FS_NOATIME_FL |		\
   1961		FS_NOCOMP_FL |		\
   1962		FS_DIRSYNC_FL |		\
   1963		FS_PROJINHERIT_FL |	\
   1964		FS_CASEFOLD_FL)
   1965
   1966/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
   1967static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
   1968{
   1969	u32 fsflags = 0;
   1970	int i;
   1971
   1972	for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
   1973		if (iflags & f2fs_fsflags_map[i].iflag)
   1974			fsflags |= f2fs_fsflags_map[i].fsflag;
   1975
   1976	return fsflags;
   1977}
   1978
   1979/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
   1980static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
   1981{
   1982	u32 iflags = 0;
   1983	int i;
   1984
   1985	for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
   1986		if (fsflags & f2fs_fsflags_map[i].fsflag)
   1987			iflags |= f2fs_fsflags_map[i].iflag;
   1988
   1989	return iflags;
   1990}
   1991
   1992static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
   1993{
   1994	struct inode *inode = file_inode(filp);
   1995
   1996	return put_user(inode->i_generation, (int __user *)arg);
   1997}
   1998
   1999static int f2fs_ioc_start_atomic_write(struct file *filp)
   2000{
   2001	struct inode *inode = file_inode(filp);
   2002	struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
   2003	struct f2fs_inode_info *fi = F2FS_I(inode);
   2004	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2005	struct inode *pinode;
   2006	int ret;
   2007
   2008	if (!inode_owner_or_capable(mnt_userns, inode))
   2009		return -EACCES;
   2010
   2011	if (!S_ISREG(inode->i_mode))
   2012		return -EINVAL;
   2013
   2014	if (filp->f_flags & O_DIRECT)
   2015		return -EINVAL;
   2016
   2017	ret = mnt_want_write_file(filp);
   2018	if (ret)
   2019		return ret;
   2020
   2021	inode_lock(inode);
   2022
   2023	if (!f2fs_disable_compressed_file(inode)) {
   2024		ret = -EINVAL;
   2025		goto out;
   2026	}
   2027
   2028	if (f2fs_is_atomic_file(inode))
   2029		goto out;
   2030
   2031	ret = f2fs_convert_inline_inode(inode);
   2032	if (ret)
   2033		goto out;
   2034
   2035	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
   2036
   2037	/*
   2038	 * Should wait end_io to count F2FS_WB_CP_DATA correctly by
   2039	 * f2fs_is_atomic_file.
   2040	 */
   2041	if (get_dirty_pages(inode))
   2042		f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u",
   2043			  inode->i_ino, get_dirty_pages(inode));
   2044	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
   2045	if (ret) {
   2046		f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
   2047		goto out;
   2048	}
   2049
   2050	/* Create a COW inode for atomic write */
   2051	pinode = f2fs_iget(inode->i_sb, fi->i_pino);
   2052	if (IS_ERR(pinode)) {
   2053		f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
   2054		ret = PTR_ERR(pinode);
   2055		goto out;
   2056	}
   2057
   2058	ret = f2fs_get_tmpfile(mnt_userns, pinode, &fi->cow_inode);
   2059	iput(pinode);
   2060	if (ret) {
   2061		f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
   2062		goto out;
   2063	}
   2064	f2fs_i_size_write(fi->cow_inode, i_size_read(inode));
   2065
   2066	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
   2067	sbi->atomic_files++;
   2068	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
   2069
   2070	set_inode_flag(inode, FI_ATOMIC_FILE);
   2071	set_inode_flag(fi->cow_inode, FI_ATOMIC_FILE);
   2072	clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
   2073	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
   2074
   2075	f2fs_update_time(sbi, REQ_TIME);
   2076	fi->atomic_write_task = current;
   2077	stat_update_max_atomic_write(inode);
   2078out:
   2079	inode_unlock(inode);
   2080	mnt_drop_write_file(filp);
   2081	return ret;
   2082}
   2083
   2084static int f2fs_ioc_commit_atomic_write(struct file *filp)
   2085{
   2086	struct inode *inode = file_inode(filp);
   2087	struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
   2088	int ret;
   2089
   2090	if (!inode_owner_or_capable(mnt_userns, inode))
   2091		return -EACCES;
   2092
   2093	ret = mnt_want_write_file(filp);
   2094	if (ret)
   2095		return ret;
   2096
   2097	f2fs_balance_fs(F2FS_I_SB(inode), true);
   2098
   2099	inode_lock(inode);
   2100
   2101	if (f2fs_is_atomic_file(inode)) {
   2102		ret = f2fs_commit_atomic_write(inode);
   2103		if (ret)
   2104			goto unlock_out;
   2105
   2106		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
   2107		if (!ret)
   2108			f2fs_abort_atomic_write(inode, false);
   2109	} else {
   2110		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
   2111	}
   2112unlock_out:
   2113	inode_unlock(inode);
   2114	mnt_drop_write_file(filp);
   2115	return ret;
   2116}
   2117
   2118static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
   2119{
   2120	struct inode *inode = file_inode(filp);
   2121	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2122	struct super_block *sb = sbi->sb;
   2123	__u32 in;
   2124	int ret = 0;
   2125
   2126	if (!capable(CAP_SYS_ADMIN))
   2127		return -EPERM;
   2128
   2129	if (get_user(in, (__u32 __user *)arg))
   2130		return -EFAULT;
   2131
   2132	if (in != F2FS_GOING_DOWN_FULLSYNC) {
   2133		ret = mnt_want_write_file(filp);
   2134		if (ret) {
   2135			if (ret == -EROFS) {
   2136				ret = 0;
   2137				f2fs_stop_checkpoint(sbi, false);
   2138				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
   2139				trace_f2fs_shutdown(sbi, in, ret);
   2140			}
   2141			return ret;
   2142		}
   2143	}
   2144
   2145	switch (in) {
   2146	case F2FS_GOING_DOWN_FULLSYNC:
   2147		ret = freeze_bdev(sb->s_bdev);
   2148		if (ret)
   2149			goto out;
   2150		f2fs_stop_checkpoint(sbi, false);
   2151		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
   2152		thaw_bdev(sb->s_bdev);
   2153		break;
   2154	case F2FS_GOING_DOWN_METASYNC:
   2155		/* do checkpoint only */
   2156		ret = f2fs_sync_fs(sb, 1);
   2157		if (ret)
   2158			goto out;
   2159		f2fs_stop_checkpoint(sbi, false);
   2160		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
   2161		break;
   2162	case F2FS_GOING_DOWN_NOSYNC:
   2163		f2fs_stop_checkpoint(sbi, false);
   2164		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
   2165		break;
   2166	case F2FS_GOING_DOWN_METAFLUSH:
   2167		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
   2168		f2fs_stop_checkpoint(sbi, false);
   2169		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
   2170		break;
   2171	case F2FS_GOING_DOWN_NEED_FSCK:
   2172		set_sbi_flag(sbi, SBI_NEED_FSCK);
   2173		set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
   2174		set_sbi_flag(sbi, SBI_IS_DIRTY);
   2175		/* do checkpoint only */
   2176		ret = f2fs_sync_fs(sb, 1);
   2177		goto out;
   2178	default:
   2179		ret = -EINVAL;
   2180		goto out;
   2181	}
   2182
   2183	f2fs_stop_gc_thread(sbi);
   2184	f2fs_stop_discard_thread(sbi);
   2185
   2186	f2fs_drop_discard_cmd(sbi);
   2187	clear_opt(sbi, DISCARD);
   2188
   2189	f2fs_update_time(sbi, REQ_TIME);
   2190out:
   2191	if (in != F2FS_GOING_DOWN_FULLSYNC)
   2192		mnt_drop_write_file(filp);
   2193
   2194	trace_f2fs_shutdown(sbi, in, ret);
   2195
   2196	return ret;
   2197}
   2198
   2199static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
   2200{
   2201	struct inode *inode = file_inode(filp);
   2202	struct super_block *sb = inode->i_sb;
   2203	struct fstrim_range range;
   2204	int ret;
   2205
   2206	if (!capable(CAP_SYS_ADMIN))
   2207		return -EPERM;
   2208
   2209	if (!f2fs_hw_support_discard(F2FS_SB(sb)))
   2210		return -EOPNOTSUPP;
   2211
   2212	if (copy_from_user(&range, (struct fstrim_range __user *)arg,
   2213				sizeof(range)))
   2214		return -EFAULT;
   2215
   2216	ret = mnt_want_write_file(filp);
   2217	if (ret)
   2218		return ret;
   2219
   2220	range.minlen = max((unsigned int)range.minlen,
   2221			   bdev_discard_granularity(sb->s_bdev));
   2222	ret = f2fs_trim_fs(F2FS_SB(sb), &range);
   2223	mnt_drop_write_file(filp);
   2224	if (ret < 0)
   2225		return ret;
   2226
   2227	if (copy_to_user((struct fstrim_range __user *)arg, &range,
   2228				sizeof(range)))
   2229		return -EFAULT;
   2230	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   2231	return 0;
   2232}
   2233
   2234static bool uuid_is_nonzero(__u8 u[16])
   2235{
   2236	int i;
   2237
   2238	for (i = 0; i < 16; i++)
   2239		if (u[i])
   2240			return true;
   2241	return false;
   2242}
   2243
   2244static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
   2245{
   2246	struct inode *inode = file_inode(filp);
   2247
   2248	if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
   2249		return -EOPNOTSUPP;
   2250
   2251	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   2252
   2253	return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
   2254}
   2255
   2256static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
   2257{
   2258	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
   2259		return -EOPNOTSUPP;
   2260	return fscrypt_ioctl_get_policy(filp, (void __user *)arg);
   2261}
   2262
   2263static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
   2264{
   2265	struct inode *inode = file_inode(filp);
   2266	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2267	int err;
   2268
   2269	if (!f2fs_sb_has_encrypt(sbi))
   2270		return -EOPNOTSUPP;
   2271
   2272	err = mnt_want_write_file(filp);
   2273	if (err)
   2274		return err;
   2275
   2276	f2fs_down_write(&sbi->sb_lock);
   2277
   2278	if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt))
   2279		goto got_it;
   2280
   2281	/* update superblock with uuid */
   2282	generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
   2283
   2284	err = f2fs_commit_super(sbi, false);
   2285	if (err) {
   2286		/* undo new data */
   2287		memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
   2288		goto out_err;
   2289	}
   2290got_it:
   2291	if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
   2292									16))
   2293		err = -EFAULT;
   2294out_err:
   2295	f2fs_up_write(&sbi->sb_lock);
   2296	mnt_drop_write_file(filp);
   2297	return err;
   2298}
   2299
   2300static int f2fs_ioc_get_encryption_policy_ex(struct file *filp,
   2301					     unsigned long arg)
   2302{
   2303	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
   2304		return -EOPNOTSUPP;
   2305
   2306	return fscrypt_ioctl_get_policy_ex(filp, (void __user *)arg);
   2307}
   2308
   2309static int f2fs_ioc_add_encryption_key(struct file *filp, unsigned long arg)
   2310{
   2311	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
   2312		return -EOPNOTSUPP;
   2313
   2314	return fscrypt_ioctl_add_key(filp, (void __user *)arg);
   2315}
   2316
   2317static int f2fs_ioc_remove_encryption_key(struct file *filp, unsigned long arg)
   2318{
   2319	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
   2320		return -EOPNOTSUPP;
   2321
   2322	return fscrypt_ioctl_remove_key(filp, (void __user *)arg);
   2323}
   2324
   2325static int f2fs_ioc_remove_encryption_key_all_users(struct file *filp,
   2326						    unsigned long arg)
   2327{
   2328	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
   2329		return -EOPNOTSUPP;
   2330
   2331	return fscrypt_ioctl_remove_key_all_users(filp, (void __user *)arg);
   2332}
   2333
   2334static int f2fs_ioc_get_encryption_key_status(struct file *filp,
   2335					      unsigned long arg)
   2336{
   2337	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
   2338		return -EOPNOTSUPP;
   2339
   2340	return fscrypt_ioctl_get_key_status(filp, (void __user *)arg);
   2341}
   2342
   2343static int f2fs_ioc_get_encryption_nonce(struct file *filp, unsigned long arg)
   2344{
   2345	if (!f2fs_sb_has_encrypt(F2FS_I_SB(file_inode(filp))))
   2346		return -EOPNOTSUPP;
   2347
   2348	return fscrypt_ioctl_get_nonce(filp, (void __user *)arg);
   2349}
   2350
   2351static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
   2352{
   2353	struct inode *inode = file_inode(filp);
   2354	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2355	struct f2fs_gc_control gc_control = { .victim_segno = NULL_SEGNO,
   2356			.no_bg_gc = false,
   2357			.should_migrate_blocks = false,
   2358			.nr_free_secs = 0 };
   2359	__u32 sync;
   2360	int ret;
   2361
   2362	if (!capable(CAP_SYS_ADMIN))
   2363		return -EPERM;
   2364
   2365	if (get_user(sync, (__u32 __user *)arg))
   2366		return -EFAULT;
   2367
   2368	if (f2fs_readonly(sbi->sb))
   2369		return -EROFS;
   2370
   2371	ret = mnt_want_write_file(filp);
   2372	if (ret)
   2373		return ret;
   2374
   2375	if (!sync) {
   2376		if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
   2377			ret = -EBUSY;
   2378			goto out;
   2379		}
   2380	} else {
   2381		f2fs_down_write(&sbi->gc_lock);
   2382	}
   2383
   2384	gc_control.init_gc_type = sync ? FG_GC : BG_GC;
   2385	gc_control.err_gc_skipped = sync;
   2386	ret = f2fs_gc(sbi, &gc_control);
   2387out:
   2388	mnt_drop_write_file(filp);
   2389	return ret;
   2390}
   2391
   2392static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
   2393{
   2394	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
   2395	struct f2fs_gc_control gc_control = {
   2396			.init_gc_type = range->sync ? FG_GC : BG_GC,
   2397			.no_bg_gc = false,
   2398			.should_migrate_blocks = false,
   2399			.err_gc_skipped = range->sync,
   2400			.nr_free_secs = 0 };
   2401	u64 end;
   2402	int ret;
   2403
   2404	if (!capable(CAP_SYS_ADMIN))
   2405		return -EPERM;
   2406	if (f2fs_readonly(sbi->sb))
   2407		return -EROFS;
   2408
   2409	end = range->start + range->len;
   2410	if (end < range->start || range->start < MAIN_BLKADDR(sbi) ||
   2411					end >= MAX_BLKADDR(sbi))
   2412		return -EINVAL;
   2413
   2414	ret = mnt_want_write_file(filp);
   2415	if (ret)
   2416		return ret;
   2417
   2418do_more:
   2419	if (!range->sync) {
   2420		if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
   2421			ret = -EBUSY;
   2422			goto out;
   2423		}
   2424	} else {
   2425		f2fs_down_write(&sbi->gc_lock);
   2426	}
   2427
   2428	gc_control.victim_segno = GET_SEGNO(sbi, range->start);
   2429	ret = f2fs_gc(sbi, &gc_control);
   2430	if (ret) {
   2431		if (ret == -EBUSY)
   2432			ret = -EAGAIN;
   2433		goto out;
   2434	}
   2435	range->start += BLKS_PER_SEC(sbi);
   2436	if (range->start <= end)
   2437		goto do_more;
   2438out:
   2439	mnt_drop_write_file(filp);
   2440	return ret;
   2441}
   2442
   2443static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
   2444{
   2445	struct f2fs_gc_range range;
   2446
   2447	if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
   2448							sizeof(range)))
   2449		return -EFAULT;
   2450	return __f2fs_ioc_gc_range(filp, &range);
   2451}
   2452
   2453static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
   2454{
   2455	struct inode *inode = file_inode(filp);
   2456	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2457	int ret;
   2458
   2459	if (!capable(CAP_SYS_ADMIN))
   2460		return -EPERM;
   2461
   2462	if (f2fs_readonly(sbi->sb))
   2463		return -EROFS;
   2464
   2465	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
   2466		f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
   2467		return -EINVAL;
   2468	}
   2469
   2470	ret = mnt_want_write_file(filp);
   2471	if (ret)
   2472		return ret;
   2473
   2474	ret = f2fs_sync_fs(sbi->sb, 1);
   2475
   2476	mnt_drop_write_file(filp);
   2477	return ret;
   2478}
   2479
   2480static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
   2481					struct file *filp,
   2482					struct f2fs_defragment *range)
   2483{
   2484	struct inode *inode = file_inode(filp);
   2485	struct f2fs_map_blocks map = { .m_next_extent = NULL,
   2486					.m_seg_type = NO_CHECK_TYPE,
   2487					.m_may_create = false };
   2488	struct extent_info ei = {0, 0, 0};
   2489	pgoff_t pg_start, pg_end, next_pgofs;
   2490	unsigned int blk_per_seg = sbi->blocks_per_seg;
   2491	unsigned int total = 0, sec_num;
   2492	block_t blk_end = 0;
   2493	bool fragmented = false;
   2494	int err;
   2495
   2496	pg_start = range->start >> PAGE_SHIFT;
   2497	pg_end = (range->start + range->len) >> PAGE_SHIFT;
   2498
   2499	f2fs_balance_fs(sbi, true);
   2500
   2501	inode_lock(inode);
   2502
   2503	/* if in-place-update policy is enabled, don't waste time here */
   2504	set_inode_flag(inode, FI_OPU_WRITE);
   2505	if (f2fs_should_update_inplace(inode, NULL)) {
   2506		err = -EINVAL;
   2507		goto out;
   2508	}
   2509
   2510	/* writeback all dirty pages in the range */
   2511	err = filemap_write_and_wait_range(inode->i_mapping, range->start,
   2512						range->start + range->len - 1);
   2513	if (err)
   2514		goto out;
   2515
   2516	/*
   2517	 * lookup mapping info in extent cache, skip defragmenting if physical
   2518	 * block addresses are continuous.
   2519	 */
   2520	if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
   2521		if (ei.fofs + ei.len >= pg_end)
   2522			goto out;
   2523	}
   2524
   2525	map.m_lblk = pg_start;
   2526	map.m_next_pgofs = &next_pgofs;
   2527
   2528	/*
   2529	 * lookup mapping info in dnode page cache, skip defragmenting if all
   2530	 * physical block addresses are continuous even if there are hole(s)
   2531	 * in logical blocks.
   2532	 */
   2533	while (map.m_lblk < pg_end) {
   2534		map.m_len = pg_end - map.m_lblk;
   2535		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
   2536		if (err)
   2537			goto out;
   2538
   2539		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
   2540			map.m_lblk = next_pgofs;
   2541			continue;
   2542		}
   2543
   2544		if (blk_end && blk_end != map.m_pblk)
   2545			fragmented = true;
   2546
   2547		/* record total count of block that we're going to move */
   2548		total += map.m_len;
   2549
   2550		blk_end = map.m_pblk + map.m_len;
   2551
   2552		map.m_lblk += map.m_len;
   2553	}
   2554
   2555	if (!fragmented) {
   2556		total = 0;
   2557		goto out;
   2558	}
   2559
   2560	sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi));
   2561
   2562	/*
   2563	 * make sure there are enough free section for LFS allocation, this can
   2564	 * avoid defragment running in SSR mode when free section are allocated
   2565	 * intensively
   2566	 */
   2567	if (has_not_enough_free_secs(sbi, 0, sec_num)) {
   2568		err = -EAGAIN;
   2569		goto out;
   2570	}
   2571
   2572	map.m_lblk = pg_start;
   2573	map.m_len = pg_end - pg_start;
   2574	total = 0;
   2575
   2576	while (map.m_lblk < pg_end) {
   2577		pgoff_t idx;
   2578		int cnt = 0;
   2579
   2580do_map:
   2581		map.m_len = pg_end - map.m_lblk;
   2582		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
   2583		if (err)
   2584			goto clear_out;
   2585
   2586		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
   2587			map.m_lblk = next_pgofs;
   2588			goto check;
   2589		}
   2590
   2591		set_inode_flag(inode, FI_SKIP_WRITES);
   2592
   2593		idx = map.m_lblk;
   2594		while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
   2595			struct page *page;
   2596
   2597			page = f2fs_get_lock_data_page(inode, idx, true);
   2598			if (IS_ERR(page)) {
   2599				err = PTR_ERR(page);
   2600				goto clear_out;
   2601			}
   2602
   2603			set_page_dirty(page);
   2604			set_page_private_gcing(page);
   2605			f2fs_put_page(page, 1);
   2606
   2607			idx++;
   2608			cnt++;
   2609			total++;
   2610		}
   2611
   2612		map.m_lblk = idx;
   2613check:
   2614		if (map.m_lblk < pg_end && cnt < blk_per_seg)
   2615			goto do_map;
   2616
   2617		clear_inode_flag(inode, FI_SKIP_WRITES);
   2618
   2619		err = filemap_fdatawrite(inode->i_mapping);
   2620		if (err)
   2621			goto out;
   2622	}
   2623clear_out:
   2624	clear_inode_flag(inode, FI_SKIP_WRITES);
   2625out:
   2626	clear_inode_flag(inode, FI_OPU_WRITE);
   2627	inode_unlock(inode);
   2628	if (!err)
   2629		range->len = (u64)total << PAGE_SHIFT;
   2630	return err;
   2631}
   2632
   2633static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
   2634{
   2635	struct inode *inode = file_inode(filp);
   2636	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2637	struct f2fs_defragment range;
   2638	int err;
   2639
   2640	if (!capable(CAP_SYS_ADMIN))
   2641		return -EPERM;
   2642
   2643	if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode))
   2644		return -EINVAL;
   2645
   2646	if (f2fs_readonly(sbi->sb))
   2647		return -EROFS;
   2648
   2649	if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
   2650							sizeof(range)))
   2651		return -EFAULT;
   2652
   2653	/* verify alignment of offset & size */
   2654	if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1))
   2655		return -EINVAL;
   2656
   2657	if (unlikely((range.start + range.len) >> PAGE_SHIFT >
   2658					max_file_blocks(inode)))
   2659		return -EINVAL;
   2660
   2661	err = mnt_want_write_file(filp);
   2662	if (err)
   2663		return err;
   2664
   2665	err = f2fs_defragment_range(sbi, filp, &range);
   2666	mnt_drop_write_file(filp);
   2667
   2668	f2fs_update_time(sbi, REQ_TIME);
   2669	if (err < 0)
   2670		return err;
   2671
   2672	if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
   2673							sizeof(range)))
   2674		return -EFAULT;
   2675
   2676	return 0;
   2677}
   2678
   2679static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
   2680			struct file *file_out, loff_t pos_out, size_t len)
   2681{
   2682	struct inode *src = file_inode(file_in);
   2683	struct inode *dst = file_inode(file_out);
   2684	struct f2fs_sb_info *sbi = F2FS_I_SB(src);
   2685	size_t olen = len, dst_max_i_size = 0;
   2686	size_t dst_osize;
   2687	int ret;
   2688
   2689	if (file_in->f_path.mnt != file_out->f_path.mnt ||
   2690				src->i_sb != dst->i_sb)
   2691		return -EXDEV;
   2692
   2693	if (unlikely(f2fs_readonly(src->i_sb)))
   2694		return -EROFS;
   2695
   2696	if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
   2697		return -EINVAL;
   2698
   2699	if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
   2700		return -EOPNOTSUPP;
   2701
   2702	if (pos_out < 0 || pos_in < 0)
   2703		return -EINVAL;
   2704
   2705	if (src == dst) {
   2706		if (pos_in == pos_out)
   2707			return 0;
   2708		if (pos_out > pos_in && pos_out < pos_in + len)
   2709			return -EINVAL;
   2710	}
   2711
   2712	inode_lock(src);
   2713	if (src != dst) {
   2714		ret = -EBUSY;
   2715		if (!inode_trylock(dst))
   2716			goto out;
   2717	}
   2718
   2719	ret = -EINVAL;
   2720	if (pos_in + len > src->i_size || pos_in + len < pos_in)
   2721		goto out_unlock;
   2722	if (len == 0)
   2723		olen = len = src->i_size - pos_in;
   2724	if (pos_in + len == src->i_size)
   2725		len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in;
   2726	if (len == 0) {
   2727		ret = 0;
   2728		goto out_unlock;
   2729	}
   2730
   2731	dst_osize = dst->i_size;
   2732	if (pos_out + olen > dst->i_size)
   2733		dst_max_i_size = pos_out + olen;
   2734
   2735	/* verify the end result is block aligned */
   2736	if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) ||
   2737			!IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) ||
   2738			!IS_ALIGNED(pos_out, F2FS_BLKSIZE))
   2739		goto out_unlock;
   2740
   2741	ret = f2fs_convert_inline_inode(src);
   2742	if (ret)
   2743		goto out_unlock;
   2744
   2745	ret = f2fs_convert_inline_inode(dst);
   2746	if (ret)
   2747		goto out_unlock;
   2748
   2749	/* write out all dirty pages from offset */
   2750	ret = filemap_write_and_wait_range(src->i_mapping,
   2751					pos_in, pos_in + len);
   2752	if (ret)
   2753		goto out_unlock;
   2754
   2755	ret = filemap_write_and_wait_range(dst->i_mapping,
   2756					pos_out, pos_out + len);
   2757	if (ret)
   2758		goto out_unlock;
   2759
   2760	f2fs_balance_fs(sbi, true);
   2761
   2762	f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
   2763	if (src != dst) {
   2764		ret = -EBUSY;
   2765		if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE]))
   2766			goto out_src;
   2767	}
   2768
   2769	f2fs_lock_op(sbi);
   2770	ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
   2771				pos_out >> F2FS_BLKSIZE_BITS,
   2772				len >> F2FS_BLKSIZE_BITS, false);
   2773
   2774	if (!ret) {
   2775		if (dst_max_i_size)
   2776			f2fs_i_size_write(dst, dst_max_i_size);
   2777		else if (dst_osize != dst->i_size)
   2778			f2fs_i_size_write(dst, dst_osize);
   2779	}
   2780	f2fs_unlock_op(sbi);
   2781
   2782	if (src != dst)
   2783		f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]);
   2784out_src:
   2785	f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]);
   2786out_unlock:
   2787	if (src != dst)
   2788		inode_unlock(dst);
   2789out:
   2790	inode_unlock(src);
   2791	return ret;
   2792}
   2793
   2794static int __f2fs_ioc_move_range(struct file *filp,
   2795				struct f2fs_move_range *range)
   2796{
   2797	struct fd dst;
   2798	int err;
   2799
   2800	if (!(filp->f_mode & FMODE_READ) ||
   2801			!(filp->f_mode & FMODE_WRITE))
   2802		return -EBADF;
   2803
   2804	dst = fdget(range->dst_fd);
   2805	if (!dst.file)
   2806		return -EBADF;
   2807
   2808	if (!(dst.file->f_mode & FMODE_WRITE)) {
   2809		err = -EBADF;
   2810		goto err_out;
   2811	}
   2812
   2813	err = mnt_want_write_file(filp);
   2814	if (err)
   2815		goto err_out;
   2816
   2817	err = f2fs_move_file_range(filp, range->pos_in, dst.file,
   2818					range->pos_out, range->len);
   2819
   2820	mnt_drop_write_file(filp);
   2821err_out:
   2822	fdput(dst);
   2823	return err;
   2824}
   2825
   2826static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
   2827{
   2828	struct f2fs_move_range range;
   2829
   2830	if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
   2831							sizeof(range)))
   2832		return -EFAULT;
   2833	return __f2fs_ioc_move_range(filp, &range);
   2834}
   2835
   2836static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
   2837{
   2838	struct inode *inode = file_inode(filp);
   2839	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2840	struct sit_info *sm = SIT_I(sbi);
   2841	unsigned int start_segno = 0, end_segno = 0;
   2842	unsigned int dev_start_segno = 0, dev_end_segno = 0;
   2843	struct f2fs_flush_device range;
   2844	struct f2fs_gc_control gc_control = {
   2845			.init_gc_type = FG_GC,
   2846			.should_migrate_blocks = true,
   2847			.err_gc_skipped = true,
   2848			.nr_free_secs = 0 };
   2849	int ret;
   2850
   2851	if (!capable(CAP_SYS_ADMIN))
   2852		return -EPERM;
   2853
   2854	if (f2fs_readonly(sbi->sb))
   2855		return -EROFS;
   2856
   2857	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
   2858		return -EINVAL;
   2859
   2860	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
   2861							sizeof(range)))
   2862		return -EFAULT;
   2863
   2864	if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
   2865			__is_large_section(sbi)) {
   2866		f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1",
   2867			  range.dev_num, sbi->s_ndevs, sbi->segs_per_sec);
   2868		return -EINVAL;
   2869	}
   2870
   2871	ret = mnt_want_write_file(filp);
   2872	if (ret)
   2873		return ret;
   2874
   2875	if (range.dev_num != 0)
   2876		dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk);
   2877	dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk);
   2878
   2879	start_segno = sm->last_victim[FLUSH_DEVICE];
   2880	if (start_segno < dev_start_segno || start_segno >= dev_end_segno)
   2881		start_segno = dev_start_segno;
   2882	end_segno = min(start_segno + range.segments, dev_end_segno);
   2883
   2884	while (start_segno < end_segno) {
   2885		if (!f2fs_down_write_trylock(&sbi->gc_lock)) {
   2886			ret = -EBUSY;
   2887			goto out;
   2888		}
   2889		sm->last_victim[GC_CB] = end_segno + 1;
   2890		sm->last_victim[GC_GREEDY] = end_segno + 1;
   2891		sm->last_victim[ALLOC_NEXT] = end_segno + 1;
   2892
   2893		gc_control.victim_segno = start_segno;
   2894		ret = f2fs_gc(sbi, &gc_control);
   2895		if (ret == -EAGAIN)
   2896			ret = 0;
   2897		else if (ret < 0)
   2898			break;
   2899		start_segno++;
   2900	}
   2901out:
   2902	mnt_drop_write_file(filp);
   2903	return ret;
   2904}
   2905
   2906static int f2fs_ioc_get_features(struct file *filp, unsigned long arg)
   2907{
   2908	struct inode *inode = file_inode(filp);
   2909	u32 sb_feature = le32_to_cpu(F2FS_I_SB(inode)->raw_super->feature);
   2910
   2911	/* Must validate to set it with SQLite behavior in Android. */
   2912	sb_feature |= F2FS_FEATURE_ATOMIC_WRITE;
   2913
   2914	return put_user(sb_feature, (u32 __user *)arg);
   2915}
   2916
   2917#ifdef CONFIG_QUOTA
   2918int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
   2919{
   2920	struct dquot *transfer_to[MAXQUOTAS] = {};
   2921	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2922	struct super_block *sb = sbi->sb;
   2923	int err = 0;
   2924
   2925	transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
   2926	if (!IS_ERR(transfer_to[PRJQUOTA])) {
   2927		err = __dquot_transfer(inode, transfer_to);
   2928		if (err)
   2929			set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
   2930		dqput(transfer_to[PRJQUOTA]);
   2931	}
   2932	return err;
   2933}
   2934
   2935static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
   2936{
   2937	struct f2fs_inode_info *fi = F2FS_I(inode);
   2938	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2939	struct f2fs_inode *ri = NULL;
   2940	kprojid_t kprojid;
   2941	int err;
   2942
   2943	if (!f2fs_sb_has_project_quota(sbi)) {
   2944		if (projid != F2FS_DEF_PROJID)
   2945			return -EOPNOTSUPP;
   2946		else
   2947			return 0;
   2948	}
   2949
   2950	if (!f2fs_has_extra_attr(inode))
   2951		return -EOPNOTSUPP;
   2952
   2953	kprojid = make_kprojid(&init_user_ns, (projid_t)projid);
   2954
   2955	if (projid_eq(kprojid, fi->i_projid))
   2956		return 0;
   2957
   2958	err = -EPERM;
   2959	/* Is it quota file? Do not allow user to mess with it */
   2960	if (IS_NOQUOTA(inode))
   2961		return err;
   2962
   2963	if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
   2964		return -EOVERFLOW;
   2965
   2966	err = f2fs_dquot_initialize(inode);
   2967	if (err)
   2968		return err;
   2969
   2970	f2fs_lock_op(sbi);
   2971	err = f2fs_transfer_project_quota(inode, kprojid);
   2972	if (err)
   2973		goto out_unlock;
   2974
   2975	fi->i_projid = kprojid;
   2976	inode->i_ctime = current_time(inode);
   2977	f2fs_mark_inode_dirty_sync(inode, true);
   2978out_unlock:
   2979	f2fs_unlock_op(sbi);
   2980	return err;
   2981}
   2982#else
   2983int f2fs_transfer_project_quota(struct inode *inode, kprojid_t kprojid)
   2984{
   2985	return 0;
   2986}
   2987
   2988static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
   2989{
   2990	if (projid != F2FS_DEF_PROJID)
   2991		return -EOPNOTSUPP;
   2992	return 0;
   2993}
   2994#endif
   2995
   2996int f2fs_fileattr_get(struct dentry *dentry, struct fileattr *fa)
   2997{
   2998	struct inode *inode = d_inode(dentry);
   2999	struct f2fs_inode_info *fi = F2FS_I(inode);
   3000	u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
   3001
   3002	if (IS_ENCRYPTED(inode))
   3003		fsflags |= FS_ENCRYPT_FL;
   3004	if (IS_VERITY(inode))
   3005		fsflags |= FS_VERITY_FL;
   3006	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
   3007		fsflags |= FS_INLINE_DATA_FL;
   3008	if (is_inode_flag_set(inode, FI_PIN_FILE))
   3009		fsflags |= FS_NOCOW_FL;
   3010
   3011	fileattr_fill_flags(fa, fsflags & F2FS_GETTABLE_FS_FL);
   3012
   3013	if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
   3014		fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid);
   3015
   3016	return 0;
   3017}
   3018
   3019int f2fs_fileattr_set(struct user_namespace *mnt_userns,
   3020		      struct dentry *dentry, struct fileattr *fa)
   3021{
   3022	struct inode *inode = d_inode(dentry);
   3023	u32 fsflags = fa->flags, mask = F2FS_SETTABLE_FS_FL;
   3024	u32 iflags;
   3025	int err;
   3026
   3027	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
   3028		return -EIO;
   3029	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode)))
   3030		return -ENOSPC;
   3031	if (fsflags & ~F2FS_GETTABLE_FS_FL)
   3032		return -EOPNOTSUPP;
   3033	fsflags &= F2FS_SETTABLE_FS_FL;
   3034	if (!fa->flags_valid)
   3035		mask &= FS_COMMON_FL;
   3036
   3037	iflags = f2fs_fsflags_to_iflags(fsflags);
   3038	if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
   3039		return -EOPNOTSUPP;
   3040
   3041	err = f2fs_setflags_common(inode, iflags, f2fs_fsflags_to_iflags(mask));
   3042	if (!err)
   3043		err = f2fs_ioc_setproject(inode, fa->fsx_projid);
   3044
   3045	return err;
   3046}
   3047
   3048int f2fs_pin_file_control(struct inode *inode, bool inc)
   3049{
   3050	struct f2fs_inode_info *fi = F2FS_I(inode);
   3051	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3052
   3053	/* Use i_gc_failures for normal file as a risk signal. */
   3054	if (inc)
   3055		f2fs_i_gc_failures_write(inode,
   3056				fi->i_gc_failures[GC_FAILURE_PIN] + 1);
   3057
   3058	if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
   3059		f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
   3060			  __func__, inode->i_ino,
   3061			  fi->i_gc_failures[GC_FAILURE_PIN]);
   3062		clear_inode_flag(inode, FI_PIN_FILE);
   3063		return -EAGAIN;
   3064	}
   3065	return 0;
   3066}
   3067
   3068static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
   3069{
   3070	struct inode *inode = file_inode(filp);
   3071	__u32 pin;
   3072	int ret = 0;
   3073
   3074	if (get_user(pin, (__u32 __user *)arg))
   3075		return -EFAULT;
   3076
   3077	if (!S_ISREG(inode->i_mode))
   3078		return -EINVAL;
   3079
   3080	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
   3081		return -EROFS;
   3082
   3083	ret = mnt_want_write_file(filp);
   3084	if (ret)
   3085		return ret;
   3086
   3087	inode_lock(inode);
   3088
   3089	if (!pin) {
   3090		clear_inode_flag(inode, FI_PIN_FILE);
   3091		f2fs_i_gc_failures_write(inode, 0);
   3092		goto done;
   3093	}
   3094
   3095	if (f2fs_should_update_outplace(inode, NULL)) {
   3096		ret = -EINVAL;
   3097		goto out;
   3098	}
   3099
   3100	if (f2fs_pin_file_control(inode, false)) {
   3101		ret = -EAGAIN;
   3102		goto out;
   3103	}
   3104
   3105	ret = f2fs_convert_inline_inode(inode);
   3106	if (ret)
   3107		goto out;
   3108
   3109	if (!f2fs_disable_compressed_file(inode)) {
   3110		ret = -EOPNOTSUPP;
   3111		goto out;
   3112	}
   3113
   3114	set_inode_flag(inode, FI_PIN_FILE);
   3115	ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
   3116done:
   3117	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   3118out:
   3119	inode_unlock(inode);
   3120	mnt_drop_write_file(filp);
   3121	return ret;
   3122}
   3123
   3124static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
   3125{
   3126	struct inode *inode = file_inode(filp);
   3127	__u32 pin = 0;
   3128
   3129	if (is_inode_flag_set(inode, FI_PIN_FILE))
   3130		pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
   3131	return put_user(pin, (u32 __user *)arg);
   3132}
   3133
   3134int f2fs_precache_extents(struct inode *inode)
   3135{
   3136	struct f2fs_inode_info *fi = F2FS_I(inode);
   3137	struct f2fs_map_blocks map;
   3138	pgoff_t m_next_extent;
   3139	loff_t end;
   3140	int err;
   3141
   3142	if (is_inode_flag_set(inode, FI_NO_EXTENT))
   3143		return -EOPNOTSUPP;
   3144
   3145	map.m_lblk = 0;
   3146	map.m_next_pgofs = NULL;
   3147	map.m_next_extent = &m_next_extent;
   3148	map.m_seg_type = NO_CHECK_TYPE;
   3149	map.m_may_create = false;
   3150	end = max_file_blocks(inode);
   3151
   3152	while (map.m_lblk < end) {
   3153		map.m_len = end - map.m_lblk;
   3154
   3155		f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
   3156		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE);
   3157		f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
   3158		if (err)
   3159			return err;
   3160
   3161		map.m_lblk = m_next_extent;
   3162	}
   3163
   3164	return 0;
   3165}
   3166
   3167static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg)
   3168{
   3169	return f2fs_precache_extents(file_inode(filp));
   3170}
   3171
   3172static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
   3173{
   3174	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
   3175	__u64 block_count;
   3176
   3177	if (!capable(CAP_SYS_ADMIN))
   3178		return -EPERM;
   3179
   3180	if (f2fs_readonly(sbi->sb))
   3181		return -EROFS;
   3182
   3183	if (copy_from_user(&block_count, (void __user *)arg,
   3184			   sizeof(block_count)))
   3185		return -EFAULT;
   3186
   3187	return f2fs_resize_fs(sbi, block_count);
   3188}
   3189
   3190static int f2fs_ioc_enable_verity(struct file *filp, unsigned long arg)
   3191{
   3192	struct inode *inode = file_inode(filp);
   3193
   3194	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   3195
   3196	if (!f2fs_sb_has_verity(F2FS_I_SB(inode))) {
   3197		f2fs_warn(F2FS_I_SB(inode),
   3198			  "Can't enable fs-verity on inode %lu: the verity feature is not enabled on this filesystem",
   3199			  inode->i_ino);
   3200		return -EOPNOTSUPP;
   3201	}
   3202
   3203	return fsverity_ioctl_enable(filp, (const void __user *)arg);
   3204}
   3205
   3206static int f2fs_ioc_measure_verity(struct file *filp, unsigned long arg)
   3207{
   3208	if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
   3209		return -EOPNOTSUPP;
   3210
   3211	return fsverity_ioctl_measure(filp, (void __user *)arg);
   3212}
   3213
   3214static int f2fs_ioc_read_verity_metadata(struct file *filp, unsigned long arg)
   3215{
   3216	if (!f2fs_sb_has_verity(F2FS_I_SB(file_inode(filp))))
   3217		return -EOPNOTSUPP;
   3218
   3219	return fsverity_ioctl_read_metadata(filp, (const void __user *)arg);
   3220}
   3221
   3222static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
   3223{
   3224	struct inode *inode = file_inode(filp);
   3225	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3226	char *vbuf;
   3227	int count;
   3228	int err = 0;
   3229
   3230	vbuf = f2fs_kzalloc(sbi, MAX_VOLUME_NAME, GFP_KERNEL);
   3231	if (!vbuf)
   3232		return -ENOMEM;
   3233
   3234	f2fs_down_read(&sbi->sb_lock);
   3235	count = utf16s_to_utf8s(sbi->raw_super->volume_name,
   3236			ARRAY_SIZE(sbi->raw_super->volume_name),
   3237			UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME);
   3238	f2fs_up_read(&sbi->sb_lock);
   3239
   3240	if (copy_to_user((char __user *)arg, vbuf,
   3241				min(FSLABEL_MAX, count)))
   3242		err = -EFAULT;
   3243
   3244	kfree(vbuf);
   3245	return err;
   3246}
   3247
   3248static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg)
   3249{
   3250	struct inode *inode = file_inode(filp);
   3251	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3252	char *vbuf;
   3253	int err = 0;
   3254
   3255	if (!capable(CAP_SYS_ADMIN))
   3256		return -EPERM;
   3257
   3258	vbuf = strndup_user((const char __user *)arg, FSLABEL_MAX);
   3259	if (IS_ERR(vbuf))
   3260		return PTR_ERR(vbuf);
   3261
   3262	err = mnt_want_write_file(filp);
   3263	if (err)
   3264		goto out;
   3265
   3266	f2fs_down_write(&sbi->sb_lock);
   3267
   3268	memset(sbi->raw_super->volume_name, 0,
   3269			sizeof(sbi->raw_super->volume_name));
   3270	utf8s_to_utf16s(vbuf, strlen(vbuf), UTF16_LITTLE_ENDIAN,
   3271			sbi->raw_super->volume_name,
   3272			ARRAY_SIZE(sbi->raw_super->volume_name));
   3273
   3274	err = f2fs_commit_super(sbi, false);
   3275
   3276	f2fs_up_write(&sbi->sb_lock);
   3277
   3278	mnt_drop_write_file(filp);
   3279out:
   3280	kfree(vbuf);
   3281	return err;
   3282}
   3283
   3284static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg)
   3285{
   3286	struct inode *inode = file_inode(filp);
   3287	__u64 blocks;
   3288
   3289	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
   3290		return -EOPNOTSUPP;
   3291
   3292	if (!f2fs_compressed_file(inode))
   3293		return -EINVAL;
   3294
   3295	blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
   3296	return put_user(blocks, (u64 __user *)arg);
   3297}
   3298
   3299static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
   3300{
   3301	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
   3302	unsigned int released_blocks = 0;
   3303	int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
   3304	block_t blkaddr;
   3305	int i;
   3306
   3307	for (i = 0; i < count; i++) {
   3308		blkaddr = data_blkaddr(dn->inode, dn->node_page,
   3309						dn->ofs_in_node + i);
   3310
   3311		if (!__is_valid_data_blkaddr(blkaddr))
   3312			continue;
   3313		if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
   3314					DATA_GENERIC_ENHANCE)))
   3315			return -EFSCORRUPTED;
   3316	}
   3317
   3318	while (count) {
   3319		int compr_blocks = 0;
   3320
   3321		for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
   3322			blkaddr = f2fs_data_blkaddr(dn);
   3323
   3324			if (i == 0) {
   3325				if (blkaddr == COMPRESS_ADDR)
   3326					continue;
   3327				dn->ofs_in_node += cluster_size;
   3328				goto next;
   3329			}
   3330
   3331			if (__is_valid_data_blkaddr(blkaddr))
   3332				compr_blocks++;
   3333
   3334			if (blkaddr != NEW_ADDR)
   3335				continue;
   3336
   3337			dn->data_blkaddr = NULL_ADDR;
   3338			f2fs_set_data_blkaddr(dn);
   3339		}
   3340
   3341		f2fs_i_compr_blocks_update(dn->inode, compr_blocks, false);
   3342		dec_valid_block_count(sbi, dn->inode,
   3343					cluster_size - compr_blocks);
   3344
   3345		released_blocks += cluster_size - compr_blocks;
   3346next:
   3347		count -= cluster_size;
   3348	}
   3349
   3350	return released_blocks;
   3351}
   3352
   3353static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
   3354{
   3355	struct inode *inode = file_inode(filp);
   3356	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3357	pgoff_t page_idx = 0, last_idx;
   3358	unsigned int released_blocks = 0;
   3359	int ret;
   3360	int writecount;
   3361
   3362	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
   3363		return -EOPNOTSUPP;
   3364
   3365	if (!f2fs_compressed_file(inode))
   3366		return -EINVAL;
   3367
   3368	if (f2fs_readonly(sbi->sb))
   3369		return -EROFS;
   3370
   3371	ret = mnt_want_write_file(filp);
   3372	if (ret)
   3373		return ret;
   3374
   3375	f2fs_balance_fs(F2FS_I_SB(inode), true);
   3376
   3377	inode_lock(inode);
   3378
   3379	writecount = atomic_read(&inode->i_writecount);
   3380	if ((filp->f_mode & FMODE_WRITE && writecount != 1) ||
   3381			(!(filp->f_mode & FMODE_WRITE) && writecount)) {
   3382		ret = -EBUSY;
   3383		goto out;
   3384	}
   3385
   3386	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
   3387		ret = -EINVAL;
   3388		goto out;
   3389	}
   3390
   3391	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
   3392	if (ret)
   3393		goto out;
   3394
   3395	set_inode_flag(inode, FI_COMPRESS_RELEASED);
   3396	inode->i_ctime = current_time(inode);
   3397	f2fs_mark_inode_dirty_sync(inode, true);
   3398
   3399	if (!atomic_read(&F2FS_I(inode)->i_compr_blocks))
   3400		goto out;
   3401
   3402	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3403	filemap_invalidate_lock(inode->i_mapping);
   3404
   3405	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
   3406
   3407	while (page_idx < last_idx) {
   3408		struct dnode_of_data dn;
   3409		pgoff_t end_offset, count;
   3410
   3411		set_new_dnode(&dn, inode, NULL, NULL, 0);
   3412		ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
   3413		if (ret) {
   3414			if (ret == -ENOENT) {
   3415				page_idx = f2fs_get_next_page_offset(&dn,
   3416								page_idx);
   3417				ret = 0;
   3418				continue;
   3419			}
   3420			break;
   3421		}
   3422
   3423		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
   3424		count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
   3425		count = round_up(count, F2FS_I(inode)->i_cluster_size);
   3426
   3427		ret = release_compress_blocks(&dn, count);
   3428
   3429		f2fs_put_dnode(&dn);
   3430
   3431		if (ret < 0)
   3432			break;
   3433
   3434		page_idx += count;
   3435		released_blocks += ret;
   3436	}
   3437
   3438	filemap_invalidate_unlock(inode->i_mapping);
   3439	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3440out:
   3441	inode_unlock(inode);
   3442
   3443	mnt_drop_write_file(filp);
   3444
   3445	if (ret >= 0) {
   3446		ret = put_user(released_blocks, (u64 __user *)arg);
   3447	} else if (released_blocks &&
   3448			atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
   3449		set_sbi_flag(sbi, SBI_NEED_FSCK);
   3450		f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
   3451			"iblocks=%llu, released=%u, compr_blocks=%u, "
   3452			"run fsck to fix.",
   3453			__func__, inode->i_ino, inode->i_blocks,
   3454			released_blocks,
   3455			atomic_read(&F2FS_I(inode)->i_compr_blocks));
   3456	}
   3457
   3458	return ret;
   3459}
   3460
   3461static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
   3462{
   3463	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
   3464	unsigned int reserved_blocks = 0;
   3465	int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
   3466	block_t blkaddr;
   3467	int i;
   3468
   3469	for (i = 0; i < count; i++) {
   3470		blkaddr = data_blkaddr(dn->inode, dn->node_page,
   3471						dn->ofs_in_node + i);
   3472
   3473		if (!__is_valid_data_blkaddr(blkaddr))
   3474			continue;
   3475		if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
   3476					DATA_GENERIC_ENHANCE)))
   3477			return -EFSCORRUPTED;
   3478	}
   3479
   3480	while (count) {
   3481		int compr_blocks = 0;
   3482		blkcnt_t reserved;
   3483		int ret;
   3484
   3485		for (i = 0; i < cluster_size; i++, dn->ofs_in_node++) {
   3486			blkaddr = f2fs_data_blkaddr(dn);
   3487
   3488			if (i == 0) {
   3489				if (blkaddr == COMPRESS_ADDR)
   3490					continue;
   3491				dn->ofs_in_node += cluster_size;
   3492				goto next;
   3493			}
   3494
   3495			if (__is_valid_data_blkaddr(blkaddr)) {
   3496				compr_blocks++;
   3497				continue;
   3498			}
   3499
   3500			dn->data_blkaddr = NEW_ADDR;
   3501			f2fs_set_data_blkaddr(dn);
   3502		}
   3503
   3504		reserved = cluster_size - compr_blocks;
   3505		ret = inc_valid_block_count(sbi, dn->inode, &reserved);
   3506		if (ret)
   3507			return ret;
   3508
   3509		if (reserved != cluster_size - compr_blocks)
   3510			return -ENOSPC;
   3511
   3512		f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true);
   3513
   3514		reserved_blocks += reserved;
   3515next:
   3516		count -= cluster_size;
   3517	}
   3518
   3519	return reserved_blocks;
   3520}
   3521
   3522static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
   3523{
   3524	struct inode *inode = file_inode(filp);
   3525	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3526	pgoff_t page_idx = 0, last_idx;
   3527	unsigned int reserved_blocks = 0;
   3528	int ret;
   3529
   3530	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
   3531		return -EOPNOTSUPP;
   3532
   3533	if (!f2fs_compressed_file(inode))
   3534		return -EINVAL;
   3535
   3536	if (f2fs_readonly(sbi->sb))
   3537		return -EROFS;
   3538
   3539	ret = mnt_want_write_file(filp);
   3540	if (ret)
   3541		return ret;
   3542
   3543	if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
   3544		goto out;
   3545
   3546	f2fs_balance_fs(F2FS_I_SB(inode), true);
   3547
   3548	inode_lock(inode);
   3549
   3550	if (!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
   3551		ret = -EINVAL;
   3552		goto unlock_inode;
   3553	}
   3554
   3555	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3556	filemap_invalidate_lock(inode->i_mapping);
   3557
   3558	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
   3559
   3560	while (page_idx < last_idx) {
   3561		struct dnode_of_data dn;
   3562		pgoff_t end_offset, count;
   3563
   3564		set_new_dnode(&dn, inode, NULL, NULL, 0);
   3565		ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE);
   3566		if (ret) {
   3567			if (ret == -ENOENT) {
   3568				page_idx = f2fs_get_next_page_offset(&dn,
   3569								page_idx);
   3570				ret = 0;
   3571				continue;
   3572			}
   3573			break;
   3574		}
   3575
   3576		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
   3577		count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
   3578		count = round_up(count, F2FS_I(inode)->i_cluster_size);
   3579
   3580		ret = reserve_compress_blocks(&dn, count);
   3581
   3582		f2fs_put_dnode(&dn);
   3583
   3584		if (ret < 0)
   3585			break;
   3586
   3587		page_idx += count;
   3588		reserved_blocks += ret;
   3589	}
   3590
   3591	filemap_invalidate_unlock(inode->i_mapping);
   3592	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3593
   3594	if (ret >= 0) {
   3595		clear_inode_flag(inode, FI_COMPRESS_RELEASED);
   3596		inode->i_ctime = current_time(inode);
   3597		f2fs_mark_inode_dirty_sync(inode, true);
   3598	}
   3599unlock_inode:
   3600	inode_unlock(inode);
   3601out:
   3602	mnt_drop_write_file(filp);
   3603
   3604	if (ret >= 0) {
   3605		ret = put_user(reserved_blocks, (u64 __user *)arg);
   3606	} else if (reserved_blocks &&
   3607			atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
   3608		set_sbi_flag(sbi, SBI_NEED_FSCK);
   3609		f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
   3610			"iblocks=%llu, reserved=%u, compr_blocks=%u, "
   3611			"run fsck to fix.",
   3612			__func__, inode->i_ino, inode->i_blocks,
   3613			reserved_blocks,
   3614			atomic_read(&F2FS_I(inode)->i_compr_blocks));
   3615	}
   3616
   3617	return ret;
   3618}
   3619
   3620static int f2fs_secure_erase(struct block_device *bdev, struct inode *inode,
   3621		pgoff_t off, block_t block, block_t len, u32 flags)
   3622{
   3623	sector_t sector = SECTOR_FROM_BLOCK(block);
   3624	sector_t nr_sects = SECTOR_FROM_BLOCK(len);
   3625	int ret = 0;
   3626
   3627	if (flags & F2FS_TRIM_FILE_DISCARD) {
   3628		if (bdev_max_secure_erase_sectors(bdev))
   3629			ret = blkdev_issue_secure_erase(bdev, sector, nr_sects,
   3630					GFP_NOFS);
   3631		else
   3632			ret = blkdev_issue_discard(bdev, sector, nr_sects,
   3633					GFP_NOFS);
   3634	}
   3635
   3636	if (!ret && (flags & F2FS_TRIM_FILE_ZEROOUT)) {
   3637		if (IS_ENCRYPTED(inode))
   3638			ret = fscrypt_zeroout_range(inode, off, block, len);
   3639		else
   3640			ret = blkdev_issue_zeroout(bdev, sector, nr_sects,
   3641					GFP_NOFS, 0);
   3642	}
   3643
   3644	return ret;
   3645}
   3646
   3647static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
   3648{
   3649	struct inode *inode = file_inode(filp);
   3650	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3651	struct address_space *mapping = inode->i_mapping;
   3652	struct block_device *prev_bdev = NULL;
   3653	struct f2fs_sectrim_range range;
   3654	pgoff_t index, pg_end, prev_index = 0;
   3655	block_t prev_block = 0, len = 0;
   3656	loff_t end_addr;
   3657	bool to_end = false;
   3658	int ret = 0;
   3659
   3660	if (!(filp->f_mode & FMODE_WRITE))
   3661		return -EBADF;
   3662
   3663	if (copy_from_user(&range, (struct f2fs_sectrim_range __user *)arg,
   3664				sizeof(range)))
   3665		return -EFAULT;
   3666
   3667	if (range.flags == 0 || (range.flags & ~F2FS_TRIM_FILE_MASK) ||
   3668			!S_ISREG(inode->i_mode))
   3669		return -EINVAL;
   3670
   3671	if (((range.flags & F2FS_TRIM_FILE_DISCARD) &&
   3672			!f2fs_hw_support_discard(sbi)) ||
   3673			((range.flags & F2FS_TRIM_FILE_ZEROOUT) &&
   3674			 IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
   3675		return -EOPNOTSUPP;
   3676
   3677	file_start_write(filp);
   3678	inode_lock(inode);
   3679
   3680	if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
   3681			range.start >= inode->i_size) {
   3682		ret = -EINVAL;
   3683		goto err;
   3684	}
   3685
   3686	if (range.len == 0)
   3687		goto err;
   3688
   3689	if (inode->i_size - range.start > range.len) {
   3690		end_addr = range.start + range.len;
   3691	} else {
   3692		end_addr = range.len == (u64)-1 ?
   3693			sbi->sb->s_maxbytes : inode->i_size;
   3694		to_end = true;
   3695	}
   3696
   3697	if (!IS_ALIGNED(range.start, F2FS_BLKSIZE) ||
   3698			(!to_end && !IS_ALIGNED(end_addr, F2FS_BLKSIZE))) {
   3699		ret = -EINVAL;
   3700		goto err;
   3701	}
   3702
   3703	index = F2FS_BYTES_TO_BLK(range.start);
   3704	pg_end = DIV_ROUND_UP(end_addr, F2FS_BLKSIZE);
   3705
   3706	ret = f2fs_convert_inline_inode(inode);
   3707	if (ret)
   3708		goto err;
   3709
   3710	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3711	filemap_invalidate_lock(mapping);
   3712
   3713	ret = filemap_write_and_wait_range(mapping, range.start,
   3714			to_end ? LLONG_MAX : end_addr - 1);
   3715	if (ret)
   3716		goto out;
   3717
   3718	truncate_inode_pages_range(mapping, range.start,
   3719			to_end ? -1 : end_addr - 1);
   3720
   3721	while (index < pg_end) {
   3722		struct dnode_of_data dn;
   3723		pgoff_t end_offset, count;
   3724		int i;
   3725
   3726		set_new_dnode(&dn, inode, NULL, NULL, 0);
   3727		ret = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
   3728		if (ret) {
   3729			if (ret == -ENOENT) {
   3730				index = f2fs_get_next_page_offset(&dn, index);
   3731				continue;
   3732			}
   3733			goto out;
   3734		}
   3735
   3736		end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
   3737		count = min(end_offset - dn.ofs_in_node, pg_end - index);
   3738		for (i = 0; i < count; i++, index++, dn.ofs_in_node++) {
   3739			struct block_device *cur_bdev;
   3740			block_t blkaddr = f2fs_data_blkaddr(&dn);
   3741
   3742			if (!__is_valid_data_blkaddr(blkaddr))
   3743				continue;
   3744
   3745			if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
   3746						DATA_GENERIC_ENHANCE)) {
   3747				ret = -EFSCORRUPTED;
   3748				f2fs_put_dnode(&dn);
   3749				goto out;
   3750			}
   3751
   3752			cur_bdev = f2fs_target_device(sbi, blkaddr, NULL);
   3753			if (f2fs_is_multi_device(sbi)) {
   3754				int di = f2fs_target_device_index(sbi, blkaddr);
   3755
   3756				blkaddr -= FDEV(di).start_blk;
   3757			}
   3758
   3759			if (len) {
   3760				if (prev_bdev == cur_bdev &&
   3761						index == prev_index + len &&
   3762						blkaddr == prev_block + len) {
   3763					len++;
   3764				} else {
   3765					ret = f2fs_secure_erase(prev_bdev,
   3766						inode, prev_index, prev_block,
   3767						len, range.flags);
   3768					if (ret) {
   3769						f2fs_put_dnode(&dn);
   3770						goto out;
   3771					}
   3772
   3773					len = 0;
   3774				}
   3775			}
   3776
   3777			if (!len) {
   3778				prev_bdev = cur_bdev;
   3779				prev_index = index;
   3780				prev_block = blkaddr;
   3781				len = 1;
   3782			}
   3783		}
   3784
   3785		f2fs_put_dnode(&dn);
   3786
   3787		if (fatal_signal_pending(current)) {
   3788			ret = -EINTR;
   3789			goto out;
   3790		}
   3791		cond_resched();
   3792	}
   3793
   3794	if (len)
   3795		ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
   3796				prev_block, len, range.flags);
   3797out:
   3798	filemap_invalidate_unlock(mapping);
   3799	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3800err:
   3801	inode_unlock(inode);
   3802	file_end_write(filp);
   3803
   3804	return ret;
   3805}
   3806
   3807static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
   3808{
   3809	struct inode *inode = file_inode(filp);
   3810	struct f2fs_comp_option option;
   3811
   3812	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
   3813		return -EOPNOTSUPP;
   3814
   3815	inode_lock_shared(inode);
   3816
   3817	if (!f2fs_compressed_file(inode)) {
   3818		inode_unlock_shared(inode);
   3819		return -ENODATA;
   3820	}
   3821
   3822	option.algorithm = F2FS_I(inode)->i_compress_algorithm;
   3823	option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
   3824
   3825	inode_unlock_shared(inode);
   3826
   3827	if (copy_to_user((struct f2fs_comp_option __user *)arg, &option,
   3828				sizeof(option)))
   3829		return -EFAULT;
   3830
   3831	return 0;
   3832}
   3833
   3834static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
   3835{
   3836	struct inode *inode = file_inode(filp);
   3837	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3838	struct f2fs_comp_option option;
   3839	int ret = 0;
   3840
   3841	if (!f2fs_sb_has_compression(sbi))
   3842		return -EOPNOTSUPP;
   3843
   3844	if (!(filp->f_mode & FMODE_WRITE))
   3845		return -EBADF;
   3846
   3847	if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg,
   3848				sizeof(option)))
   3849		return -EFAULT;
   3850
   3851	if (!f2fs_compressed_file(inode) ||
   3852			option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
   3853			option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
   3854			option.algorithm >= COMPRESS_MAX)
   3855		return -EINVAL;
   3856
   3857	file_start_write(filp);
   3858	inode_lock(inode);
   3859
   3860	if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
   3861		ret = -EBUSY;
   3862		goto out;
   3863	}
   3864
   3865	if (inode->i_size != 0) {
   3866		ret = -EFBIG;
   3867		goto out;
   3868	}
   3869
   3870	F2FS_I(inode)->i_compress_algorithm = option.algorithm;
   3871	F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size;
   3872	F2FS_I(inode)->i_cluster_size = 1 << option.log_cluster_size;
   3873	f2fs_mark_inode_dirty_sync(inode, true);
   3874
   3875	if (!f2fs_is_compress_backend_ready(inode))
   3876		f2fs_warn(sbi, "compression algorithm is successfully set, "
   3877			"but current kernel doesn't support this algorithm.");
   3878out:
   3879	inode_unlock(inode);
   3880	file_end_write(filp);
   3881
   3882	return ret;
   3883}
   3884
   3885static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
   3886{
   3887	DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx);
   3888	struct address_space *mapping = inode->i_mapping;
   3889	struct page *page;
   3890	pgoff_t redirty_idx = page_idx;
   3891	int i, page_len = 0, ret = 0;
   3892
   3893	page_cache_ra_unbounded(&ractl, len, 0);
   3894
   3895	for (i = 0; i < len; i++, page_idx++) {
   3896		page = read_cache_page(mapping, page_idx, NULL, NULL);
   3897		if (IS_ERR(page)) {
   3898			ret = PTR_ERR(page);
   3899			break;
   3900		}
   3901		page_len++;
   3902	}
   3903
   3904	for (i = 0; i < page_len; i++, redirty_idx++) {
   3905		page = find_lock_page(mapping, redirty_idx);
   3906		if (!page) {
   3907			ret = -ENOMEM;
   3908			break;
   3909		}
   3910		set_page_dirty(page);
   3911		f2fs_put_page(page, 1);
   3912		f2fs_put_page(page, 0);
   3913	}
   3914
   3915	return ret;
   3916}
   3917
   3918static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
   3919{
   3920	struct inode *inode = file_inode(filp);
   3921	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3922	struct f2fs_inode_info *fi = F2FS_I(inode);
   3923	pgoff_t page_idx = 0, last_idx;
   3924	unsigned int blk_per_seg = sbi->blocks_per_seg;
   3925	int cluster_size = fi->i_cluster_size;
   3926	int count, ret;
   3927
   3928	if (!f2fs_sb_has_compression(sbi) ||
   3929			F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
   3930		return -EOPNOTSUPP;
   3931
   3932	if (!(filp->f_mode & FMODE_WRITE))
   3933		return -EBADF;
   3934
   3935	if (!f2fs_compressed_file(inode))
   3936		return -EINVAL;
   3937
   3938	f2fs_balance_fs(F2FS_I_SB(inode), true);
   3939
   3940	file_start_write(filp);
   3941	inode_lock(inode);
   3942
   3943	if (!f2fs_is_compress_backend_ready(inode)) {
   3944		ret = -EOPNOTSUPP;
   3945		goto out;
   3946	}
   3947
   3948	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
   3949	if (ret)
   3950		goto out;
   3951
   3952	if (!atomic_read(&fi->i_compr_blocks))
   3953		goto out;
   3954
   3955	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
   3956
   3957	count = last_idx - page_idx;
   3958	while (count) {
   3959		int len = min(cluster_size, count);
   3960
   3961		ret = redirty_blocks(inode, page_idx, len);
   3962		if (ret < 0)
   3963			break;
   3964
   3965		if (get_dirty_pages(inode) >= blk_per_seg)
   3966			filemap_fdatawrite(inode->i_mapping);
   3967
   3968		count -= len;
   3969		page_idx += len;
   3970	}
   3971
   3972	if (!ret)
   3973		ret = filemap_write_and_wait_range(inode->i_mapping, 0,
   3974							LLONG_MAX);
   3975
   3976	if (ret)
   3977		f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.",
   3978			  __func__, ret);
   3979out:
   3980	inode_unlock(inode);
   3981	file_end_write(filp);
   3982
   3983	return ret;
   3984}
   3985
   3986static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg)
   3987{
   3988	struct inode *inode = file_inode(filp);
   3989	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3990	pgoff_t page_idx = 0, last_idx;
   3991	unsigned int blk_per_seg = sbi->blocks_per_seg;
   3992	int cluster_size = F2FS_I(inode)->i_cluster_size;
   3993	int count, ret;
   3994
   3995	if (!f2fs_sb_has_compression(sbi) ||
   3996			F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
   3997		return -EOPNOTSUPP;
   3998
   3999	if (!(filp->f_mode & FMODE_WRITE))
   4000		return -EBADF;
   4001
   4002	if (!f2fs_compressed_file(inode))
   4003		return -EINVAL;
   4004
   4005	f2fs_balance_fs(F2FS_I_SB(inode), true);
   4006
   4007	file_start_write(filp);
   4008	inode_lock(inode);
   4009
   4010	if (!f2fs_is_compress_backend_ready(inode)) {
   4011		ret = -EOPNOTSUPP;
   4012		goto out;
   4013	}
   4014
   4015	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
   4016	if (ret)
   4017		goto out;
   4018
   4019	set_inode_flag(inode, FI_ENABLE_COMPRESS);
   4020
   4021	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
   4022
   4023	count = last_idx - page_idx;
   4024	while (count) {
   4025		int len = min(cluster_size, count);
   4026
   4027		ret = redirty_blocks(inode, page_idx, len);
   4028		if (ret < 0)
   4029			break;
   4030
   4031		if (get_dirty_pages(inode) >= blk_per_seg)
   4032			filemap_fdatawrite(inode->i_mapping);
   4033
   4034		count -= len;
   4035		page_idx += len;
   4036	}
   4037
   4038	if (!ret)
   4039		ret = filemap_write_and_wait_range(inode->i_mapping, 0,
   4040							LLONG_MAX);
   4041
   4042	clear_inode_flag(inode, FI_ENABLE_COMPRESS);
   4043
   4044	if (ret)
   4045		f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.",
   4046			  __func__, ret);
   4047out:
   4048	inode_unlock(inode);
   4049	file_end_write(filp);
   4050
   4051	return ret;
   4052}
   4053
   4054static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
   4055{
   4056	switch (cmd) {
   4057	case FS_IOC_GETVERSION:
   4058		return f2fs_ioc_getversion(filp, arg);
   4059	case F2FS_IOC_START_ATOMIC_WRITE:
   4060		return f2fs_ioc_start_atomic_write(filp);
   4061	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
   4062		return f2fs_ioc_commit_atomic_write(filp);
   4063	case F2FS_IOC_START_VOLATILE_WRITE:
   4064	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
   4065	case F2FS_IOC_ABORT_VOLATILE_WRITE:
   4066		return -EOPNOTSUPP;
   4067	case F2FS_IOC_SHUTDOWN:
   4068		return f2fs_ioc_shutdown(filp, arg);
   4069	case FITRIM:
   4070		return f2fs_ioc_fitrim(filp, arg);
   4071	case FS_IOC_SET_ENCRYPTION_POLICY:
   4072		return f2fs_ioc_set_encryption_policy(filp, arg);
   4073	case FS_IOC_GET_ENCRYPTION_POLICY:
   4074		return f2fs_ioc_get_encryption_policy(filp, arg);
   4075	case FS_IOC_GET_ENCRYPTION_PWSALT:
   4076		return f2fs_ioc_get_encryption_pwsalt(filp, arg);
   4077	case FS_IOC_GET_ENCRYPTION_POLICY_EX:
   4078		return f2fs_ioc_get_encryption_policy_ex(filp, arg);
   4079	case FS_IOC_ADD_ENCRYPTION_KEY:
   4080		return f2fs_ioc_add_encryption_key(filp, arg);
   4081	case FS_IOC_REMOVE_ENCRYPTION_KEY:
   4082		return f2fs_ioc_remove_encryption_key(filp, arg);
   4083	case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
   4084		return f2fs_ioc_remove_encryption_key_all_users(filp, arg);
   4085	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
   4086		return f2fs_ioc_get_encryption_key_status(filp, arg);
   4087	case FS_IOC_GET_ENCRYPTION_NONCE:
   4088		return f2fs_ioc_get_encryption_nonce(filp, arg);
   4089	case F2FS_IOC_GARBAGE_COLLECT:
   4090		return f2fs_ioc_gc(filp, arg);
   4091	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
   4092		return f2fs_ioc_gc_range(filp, arg);
   4093	case F2FS_IOC_WRITE_CHECKPOINT:
   4094		return f2fs_ioc_write_checkpoint(filp, arg);
   4095	case F2FS_IOC_DEFRAGMENT:
   4096		return f2fs_ioc_defragment(filp, arg);
   4097	case F2FS_IOC_MOVE_RANGE:
   4098		return f2fs_ioc_move_range(filp, arg);
   4099	case F2FS_IOC_FLUSH_DEVICE:
   4100		return f2fs_ioc_flush_device(filp, arg);
   4101	case F2FS_IOC_GET_FEATURES:
   4102		return f2fs_ioc_get_features(filp, arg);
   4103	case F2FS_IOC_GET_PIN_FILE:
   4104		return f2fs_ioc_get_pin_file(filp, arg);
   4105	case F2FS_IOC_SET_PIN_FILE:
   4106		return f2fs_ioc_set_pin_file(filp, arg);
   4107	case F2FS_IOC_PRECACHE_EXTENTS:
   4108		return f2fs_ioc_precache_extents(filp, arg);
   4109	case F2FS_IOC_RESIZE_FS:
   4110		return f2fs_ioc_resize_fs(filp, arg);
   4111	case FS_IOC_ENABLE_VERITY:
   4112		return f2fs_ioc_enable_verity(filp, arg);
   4113	case FS_IOC_MEASURE_VERITY:
   4114		return f2fs_ioc_measure_verity(filp, arg);
   4115	case FS_IOC_READ_VERITY_METADATA:
   4116		return f2fs_ioc_read_verity_metadata(filp, arg);
   4117	case FS_IOC_GETFSLABEL:
   4118		return f2fs_ioc_getfslabel(filp, arg);
   4119	case FS_IOC_SETFSLABEL:
   4120		return f2fs_ioc_setfslabel(filp, arg);
   4121	case F2FS_IOC_GET_COMPRESS_BLOCKS:
   4122		return f2fs_get_compress_blocks(filp, arg);
   4123	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
   4124		return f2fs_release_compress_blocks(filp, arg);
   4125	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
   4126		return f2fs_reserve_compress_blocks(filp, arg);
   4127	case F2FS_IOC_SEC_TRIM_FILE:
   4128		return f2fs_sec_trim_file(filp, arg);
   4129	case F2FS_IOC_GET_COMPRESS_OPTION:
   4130		return f2fs_ioc_get_compress_option(filp, arg);
   4131	case F2FS_IOC_SET_COMPRESS_OPTION:
   4132		return f2fs_ioc_set_compress_option(filp, arg);
   4133	case F2FS_IOC_DECOMPRESS_FILE:
   4134		return f2fs_ioc_decompress_file(filp, arg);
   4135	case F2FS_IOC_COMPRESS_FILE:
   4136		return f2fs_ioc_compress_file(filp, arg);
   4137	default:
   4138		return -ENOTTY;
   4139	}
   4140}
   4141
   4142long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
   4143{
   4144	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
   4145		return -EIO;
   4146	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
   4147		return -ENOSPC;
   4148
   4149	return __f2fs_ioctl(filp, cmd, arg);
   4150}
   4151
   4152/*
   4153 * Return %true if the given read or write request should use direct I/O, or
   4154 * %false if it should use buffered I/O.
   4155 */
   4156static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb,
   4157				struct iov_iter *iter)
   4158{
   4159	unsigned int align;
   4160
   4161	if (!(iocb->ki_flags & IOCB_DIRECT))
   4162		return false;
   4163
   4164	if (f2fs_force_buffered_io(inode, iocb, iter))
   4165		return false;
   4166
   4167	/*
   4168	 * Direct I/O not aligned to the disk's logical_block_size will be
   4169	 * attempted, but will fail with -EINVAL.
   4170	 *
   4171	 * f2fs additionally requires that direct I/O be aligned to the
   4172	 * filesystem block size, which is often a stricter requirement.
   4173	 * However, f2fs traditionally falls back to buffered I/O on requests
   4174	 * that are logical_block_size-aligned but not fs-block aligned.
   4175	 *
   4176	 * The below logic implements this behavior.
   4177	 */
   4178	align = iocb->ki_pos | iov_iter_alignment(iter);
   4179	if (!IS_ALIGNED(align, i_blocksize(inode)) &&
   4180	    IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev)))
   4181		return false;
   4182
   4183	return true;
   4184}
   4185
   4186static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error,
   4187				unsigned int flags)
   4188{
   4189	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
   4190
   4191	dec_page_count(sbi, F2FS_DIO_READ);
   4192	if (error)
   4193		return error;
   4194	f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size);
   4195	return 0;
   4196}
   4197
   4198static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = {
   4199	.end_io = f2fs_dio_read_end_io,
   4200};
   4201
   4202static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
   4203{
   4204	struct file *file = iocb->ki_filp;
   4205	struct inode *inode = file_inode(file);
   4206	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   4207	struct f2fs_inode_info *fi = F2FS_I(inode);
   4208	const loff_t pos = iocb->ki_pos;
   4209	const size_t count = iov_iter_count(to);
   4210	struct iomap_dio *dio;
   4211	ssize_t ret;
   4212
   4213	if (count == 0)
   4214		return 0; /* skip atime update */
   4215
   4216	trace_f2fs_direct_IO_enter(inode, iocb, count, READ);
   4217
   4218	if (iocb->ki_flags & IOCB_NOWAIT) {
   4219		if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
   4220			ret = -EAGAIN;
   4221			goto out;
   4222		}
   4223	} else {
   4224		f2fs_down_read(&fi->i_gc_rwsem[READ]);
   4225	}
   4226
   4227	/*
   4228	 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
   4229	 * the higher-level function iomap_dio_rw() in order to ensure that the
   4230	 * F2FS_DIO_READ counter will be decremented correctly in all cases.
   4231	 */
   4232	inc_page_count(sbi, F2FS_DIO_READ);
   4233	dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops,
   4234			     &f2fs_iomap_dio_read_ops, 0, NULL, 0);
   4235	if (IS_ERR_OR_NULL(dio)) {
   4236		ret = PTR_ERR_OR_ZERO(dio);
   4237		if (ret != -EIOCBQUEUED)
   4238			dec_page_count(sbi, F2FS_DIO_READ);
   4239	} else {
   4240		ret = iomap_dio_complete(dio);
   4241	}
   4242
   4243	f2fs_up_read(&fi->i_gc_rwsem[READ]);
   4244
   4245	file_accessed(file);
   4246out:
   4247	trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret);
   4248	return ret;
   4249}
   4250
   4251static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
   4252{
   4253	struct inode *inode = file_inode(iocb->ki_filp);
   4254	const loff_t pos = iocb->ki_pos;
   4255	ssize_t ret;
   4256
   4257	if (!f2fs_is_compress_backend_ready(inode))
   4258		return -EOPNOTSUPP;
   4259
   4260	if (trace_f2fs_dataread_start_enabled()) {
   4261		char *p = f2fs_kmalloc(F2FS_I_SB(inode), PATH_MAX, GFP_KERNEL);
   4262		char *path;
   4263
   4264		if (!p)
   4265			goto skip_read_trace;
   4266
   4267		path = dentry_path_raw(file_dentry(iocb->ki_filp), p, PATH_MAX);
   4268		if (IS_ERR(path)) {
   4269			kfree(p);
   4270			goto skip_read_trace;
   4271		}
   4272
   4273		trace_f2fs_dataread_start(inode, pos, iov_iter_count(to),
   4274					current->pid, path, current->comm);
   4275		kfree(p);
   4276	}
   4277skip_read_trace:
   4278	if (f2fs_should_use_dio(inode, iocb, to)) {
   4279		ret = f2fs_dio_read_iter(iocb, to);
   4280	} else {
   4281		ret = filemap_read(iocb, to, 0);
   4282		if (ret > 0)
   4283			f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret);
   4284	}
   4285	if (trace_f2fs_dataread_end_enabled())
   4286		trace_f2fs_dataread_end(inode, pos, ret);
   4287	return ret;
   4288}
   4289
   4290static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
   4291{
   4292	struct file *file = iocb->ki_filp;
   4293	struct inode *inode = file_inode(file);
   4294	ssize_t count;
   4295	int err;
   4296
   4297	if (IS_IMMUTABLE(inode))
   4298		return -EPERM;
   4299
   4300	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
   4301		return -EPERM;
   4302
   4303	count = generic_write_checks(iocb, from);
   4304	if (count <= 0)
   4305		return count;
   4306
   4307	err = file_modified(file);
   4308	if (err)
   4309		return err;
   4310	return count;
   4311}
   4312
   4313/*
   4314 * Preallocate blocks for a write request, if it is possible and helpful to do
   4315 * so.  Returns a positive number if blocks may have been preallocated, 0 if no
   4316 * blocks were preallocated, or a negative errno value if something went
   4317 * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
   4318 * requested blocks (not just some of them) have been allocated.
   4319 */
   4320static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
   4321				   bool dio)
   4322{
   4323	struct inode *inode = file_inode(iocb->ki_filp);
   4324	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   4325	const loff_t pos = iocb->ki_pos;
   4326	const size_t count = iov_iter_count(iter);
   4327	struct f2fs_map_blocks map = {};
   4328	int flag;
   4329	int ret;
   4330
   4331	/* If it will be an out-of-place direct write, don't bother. */
   4332	if (dio && f2fs_lfs_mode(sbi))
   4333		return 0;
   4334	/*
   4335	 * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into
   4336	 * buffered IO, if DIO meets any holes.
   4337	 */
   4338	if (dio && i_size_read(inode) &&
   4339		(F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode))))
   4340		return 0;
   4341
   4342	/* No-wait I/O can't allocate blocks. */
   4343	if (iocb->ki_flags & IOCB_NOWAIT)
   4344		return 0;
   4345
   4346	/* If it will be a short write, don't bother. */
   4347	if (fault_in_iov_iter_readable(iter, count))
   4348		return 0;
   4349
   4350	if (f2fs_has_inline_data(inode)) {
   4351		/* If the data will fit inline, don't bother. */
   4352		if (pos + count <= MAX_INLINE_DATA(inode))
   4353			return 0;
   4354		ret = f2fs_convert_inline_inode(inode);
   4355		if (ret)
   4356			return ret;
   4357	}
   4358
   4359	/* Do not preallocate blocks that will be written partially in 4KB. */
   4360	map.m_lblk = F2FS_BLK_ALIGN(pos);
   4361	map.m_len = F2FS_BYTES_TO_BLK(pos + count);
   4362	if (map.m_len > map.m_lblk)
   4363		map.m_len -= map.m_lblk;
   4364	else
   4365		map.m_len = 0;
   4366	map.m_may_create = true;
   4367	if (dio) {
   4368		map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
   4369		flag = F2FS_GET_BLOCK_PRE_DIO;
   4370	} else {
   4371		map.m_seg_type = NO_CHECK_TYPE;
   4372		flag = F2FS_GET_BLOCK_PRE_AIO;
   4373	}
   4374
   4375	ret = f2fs_map_blocks(inode, &map, 1, flag);
   4376	/* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */
   4377	if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0))
   4378		return ret;
   4379	if (ret == 0)
   4380		set_inode_flag(inode, FI_PREALLOCATED_ALL);
   4381	return map.m_len;
   4382}
   4383
   4384static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
   4385					struct iov_iter *from)
   4386{
   4387	struct file *file = iocb->ki_filp;
   4388	struct inode *inode = file_inode(file);
   4389	ssize_t ret;
   4390
   4391	if (iocb->ki_flags & IOCB_NOWAIT)
   4392		return -EOPNOTSUPP;
   4393
   4394	current->backing_dev_info = inode_to_bdi(inode);
   4395	ret = generic_perform_write(iocb, from);
   4396	current->backing_dev_info = NULL;
   4397
   4398	if (ret > 0) {
   4399		iocb->ki_pos += ret;
   4400		f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret);
   4401	}
   4402	return ret;
   4403}
   4404
   4405static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
   4406				 unsigned int flags)
   4407{
   4408	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp));
   4409
   4410	dec_page_count(sbi, F2FS_DIO_WRITE);
   4411	if (error)
   4412		return error;
   4413	f2fs_update_iostat(sbi, APP_DIRECT_IO, size);
   4414	return 0;
   4415}
   4416
   4417static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
   4418	.end_io = f2fs_dio_write_end_io,
   4419};
   4420
   4421static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from,
   4422				   bool *may_need_sync)
   4423{
   4424	struct file *file = iocb->ki_filp;
   4425	struct inode *inode = file_inode(file);
   4426	struct f2fs_inode_info *fi = F2FS_I(inode);
   4427	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   4428	const bool do_opu = f2fs_lfs_mode(sbi);
   4429	const loff_t pos = iocb->ki_pos;
   4430	const ssize_t count = iov_iter_count(from);
   4431	unsigned int dio_flags;
   4432	struct iomap_dio *dio;
   4433	ssize_t ret;
   4434
   4435	trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE);
   4436
   4437	if (iocb->ki_flags & IOCB_NOWAIT) {
   4438		/* f2fs_convert_inline_inode() and block allocation can block */
   4439		if (f2fs_has_inline_data(inode) ||
   4440		    !f2fs_overwrite_io(inode, pos, count)) {
   4441			ret = -EAGAIN;
   4442			goto out;
   4443		}
   4444
   4445		if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) {
   4446			ret = -EAGAIN;
   4447			goto out;
   4448		}
   4449		if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) {
   4450			f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
   4451			ret = -EAGAIN;
   4452			goto out;
   4453		}
   4454	} else {
   4455		ret = f2fs_convert_inline_inode(inode);
   4456		if (ret)
   4457			goto out;
   4458
   4459		f2fs_down_read(&fi->i_gc_rwsem[WRITE]);
   4460		if (do_opu)
   4461			f2fs_down_read(&fi->i_gc_rwsem[READ]);
   4462	}
   4463
   4464	/*
   4465	 * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
   4466	 * the higher-level function iomap_dio_rw() in order to ensure that the
   4467	 * F2FS_DIO_WRITE counter will be decremented correctly in all cases.
   4468	 */
   4469	inc_page_count(sbi, F2FS_DIO_WRITE);
   4470	dio_flags = 0;
   4471	if (pos + count > inode->i_size)
   4472		dio_flags |= IOMAP_DIO_FORCE_WAIT;
   4473	dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops,
   4474			     &f2fs_iomap_dio_write_ops, dio_flags, NULL, 0);
   4475	if (IS_ERR_OR_NULL(dio)) {
   4476		ret = PTR_ERR_OR_ZERO(dio);
   4477		if (ret == -ENOTBLK)
   4478			ret = 0;
   4479		if (ret != -EIOCBQUEUED)
   4480			dec_page_count(sbi, F2FS_DIO_WRITE);
   4481	} else {
   4482		ret = iomap_dio_complete(dio);
   4483	}
   4484
   4485	if (do_opu)
   4486		f2fs_up_read(&fi->i_gc_rwsem[READ]);
   4487	f2fs_up_read(&fi->i_gc_rwsem[WRITE]);
   4488
   4489	if (ret < 0)
   4490		goto out;
   4491	if (pos + ret > inode->i_size)
   4492		f2fs_i_size_write(inode, pos + ret);
   4493	if (!do_opu)
   4494		set_inode_flag(inode, FI_UPDATE_WRITE);
   4495
   4496	if (iov_iter_count(from)) {
   4497		ssize_t ret2;
   4498		loff_t bufio_start_pos = iocb->ki_pos;
   4499
   4500		/*
   4501		 * The direct write was partial, so we need to fall back to a
   4502		 * buffered write for the remainder.
   4503		 */
   4504
   4505		ret2 = f2fs_buffered_write_iter(iocb, from);
   4506		if (iov_iter_count(from))
   4507			f2fs_write_failed(inode, iocb->ki_pos);
   4508		if (ret2 < 0)
   4509			goto out;
   4510
   4511		/*
   4512		 * Ensure that the pagecache pages are written to disk and
   4513		 * invalidated to preserve the expected O_DIRECT semantics.
   4514		 */
   4515		if (ret2 > 0) {
   4516			loff_t bufio_end_pos = bufio_start_pos + ret2 - 1;
   4517
   4518			ret += ret2;
   4519
   4520			ret2 = filemap_write_and_wait_range(file->f_mapping,
   4521							    bufio_start_pos,
   4522							    bufio_end_pos);
   4523			if (ret2 < 0)
   4524				goto out;
   4525			invalidate_mapping_pages(file->f_mapping,
   4526						 bufio_start_pos >> PAGE_SHIFT,
   4527						 bufio_end_pos >> PAGE_SHIFT);
   4528		}
   4529	} else {
   4530		/* iomap_dio_rw() already handled the generic_write_sync(). */
   4531		*may_need_sync = false;
   4532	}
   4533out:
   4534	trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret);
   4535	return ret;
   4536}
   4537
   4538static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
   4539{
   4540	struct inode *inode = file_inode(iocb->ki_filp);
   4541	const loff_t orig_pos = iocb->ki_pos;
   4542	const size_t orig_count = iov_iter_count(from);
   4543	loff_t target_size;
   4544	bool dio;
   4545	bool may_need_sync = true;
   4546	int preallocated;
   4547	ssize_t ret;
   4548
   4549	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
   4550		ret = -EIO;
   4551		goto out;
   4552	}
   4553
   4554	if (!f2fs_is_compress_backend_ready(inode)) {
   4555		ret = -EOPNOTSUPP;
   4556		goto out;
   4557	}
   4558
   4559	if (iocb->ki_flags & IOCB_NOWAIT) {
   4560		if (!inode_trylock(inode)) {
   4561			ret = -EAGAIN;
   4562			goto out;
   4563		}
   4564	} else {
   4565		inode_lock(inode);
   4566	}
   4567
   4568	ret = f2fs_write_checks(iocb, from);
   4569	if (ret <= 0)
   4570		goto out_unlock;
   4571
   4572	/* Determine whether we will do a direct write or a buffered write. */
   4573	dio = f2fs_should_use_dio(inode, iocb, from);
   4574
   4575	/* Possibly preallocate the blocks for the write. */
   4576	target_size = iocb->ki_pos + iov_iter_count(from);
   4577	preallocated = f2fs_preallocate_blocks(iocb, from, dio);
   4578	if (preallocated < 0) {
   4579		ret = preallocated;
   4580	} else {
   4581		if (trace_f2fs_datawrite_start_enabled()) {
   4582			char *p = f2fs_kmalloc(F2FS_I_SB(inode),
   4583						PATH_MAX, GFP_KERNEL);
   4584			char *path;
   4585
   4586			if (!p)
   4587				goto skip_write_trace;
   4588			path = dentry_path_raw(file_dentry(iocb->ki_filp),
   4589								p, PATH_MAX);
   4590			if (IS_ERR(path)) {
   4591				kfree(p);
   4592				goto skip_write_trace;
   4593			}
   4594			trace_f2fs_datawrite_start(inode, orig_pos, orig_count,
   4595					current->pid, path, current->comm);
   4596			kfree(p);
   4597		}
   4598skip_write_trace:
   4599		/* Do the actual write. */
   4600		ret = dio ?
   4601			f2fs_dio_write_iter(iocb, from, &may_need_sync):
   4602			f2fs_buffered_write_iter(iocb, from);
   4603
   4604		if (trace_f2fs_datawrite_end_enabled())
   4605			trace_f2fs_datawrite_end(inode, orig_pos, ret);
   4606	}
   4607
   4608	/* Don't leave any preallocated blocks around past i_size. */
   4609	if (preallocated && i_size_read(inode) < target_size) {
   4610		f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   4611		filemap_invalidate_lock(inode->i_mapping);
   4612		if (!f2fs_truncate(inode))
   4613			file_dont_truncate(inode);
   4614		filemap_invalidate_unlock(inode->i_mapping);
   4615		f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   4616	} else {
   4617		file_dont_truncate(inode);
   4618	}
   4619
   4620	clear_inode_flag(inode, FI_PREALLOCATED_ALL);
   4621out_unlock:
   4622	inode_unlock(inode);
   4623out:
   4624	trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret);
   4625	if (ret > 0 && may_need_sync)
   4626		ret = generic_write_sync(iocb, ret);
   4627	return ret;
   4628}
   4629
   4630static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
   4631		int advice)
   4632{
   4633	struct address_space *mapping;
   4634	struct backing_dev_info *bdi;
   4635	struct inode *inode = file_inode(filp);
   4636	int err;
   4637
   4638	if (advice == POSIX_FADV_SEQUENTIAL) {
   4639		if (S_ISFIFO(inode->i_mode))
   4640			return -ESPIPE;
   4641
   4642		mapping = filp->f_mapping;
   4643		if (!mapping || len < 0)
   4644			return -EINVAL;
   4645
   4646		bdi = inode_to_bdi(mapping->host);
   4647		filp->f_ra.ra_pages = bdi->ra_pages *
   4648			F2FS_I_SB(inode)->seq_file_ra_mul;
   4649		spin_lock(&filp->f_lock);
   4650		filp->f_mode &= ~FMODE_RANDOM;
   4651		spin_unlock(&filp->f_lock);
   4652		return 0;
   4653	}
   4654
   4655	err = generic_fadvise(filp, offset, len, advice);
   4656	if (!err && advice == POSIX_FADV_DONTNEED &&
   4657		test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) &&
   4658		f2fs_compressed_file(inode))
   4659		f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino);
   4660
   4661	return err;
   4662}
   4663
   4664#ifdef CONFIG_COMPAT
   4665struct compat_f2fs_gc_range {
   4666	u32 sync;
   4667	compat_u64 start;
   4668	compat_u64 len;
   4669};
   4670#define F2FS_IOC32_GARBAGE_COLLECT_RANGE	_IOW(F2FS_IOCTL_MAGIC, 11,\
   4671						struct compat_f2fs_gc_range)
   4672
   4673static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg)
   4674{
   4675	struct compat_f2fs_gc_range __user *urange;
   4676	struct f2fs_gc_range range;
   4677	int err;
   4678
   4679	urange = compat_ptr(arg);
   4680	err = get_user(range.sync, &urange->sync);
   4681	err |= get_user(range.start, &urange->start);
   4682	err |= get_user(range.len, &urange->len);
   4683	if (err)
   4684		return -EFAULT;
   4685
   4686	return __f2fs_ioc_gc_range(file, &range);
   4687}
   4688
   4689struct compat_f2fs_move_range {
   4690	u32 dst_fd;
   4691	compat_u64 pos_in;
   4692	compat_u64 pos_out;
   4693	compat_u64 len;
   4694};
   4695#define F2FS_IOC32_MOVE_RANGE		_IOWR(F2FS_IOCTL_MAGIC, 9,	\
   4696					struct compat_f2fs_move_range)
   4697
   4698static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg)
   4699{
   4700	struct compat_f2fs_move_range __user *urange;
   4701	struct f2fs_move_range range;
   4702	int err;
   4703
   4704	urange = compat_ptr(arg);
   4705	err = get_user(range.dst_fd, &urange->dst_fd);
   4706	err |= get_user(range.pos_in, &urange->pos_in);
   4707	err |= get_user(range.pos_out, &urange->pos_out);
   4708	err |= get_user(range.len, &urange->len);
   4709	if (err)
   4710		return -EFAULT;
   4711
   4712	return __f2fs_ioc_move_range(file, &range);
   4713}
   4714
   4715long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
   4716{
   4717	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
   4718		return -EIO;
   4719	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file))))
   4720		return -ENOSPC;
   4721
   4722	switch (cmd) {
   4723	case FS_IOC32_GETVERSION:
   4724		cmd = FS_IOC_GETVERSION;
   4725		break;
   4726	case F2FS_IOC32_GARBAGE_COLLECT_RANGE:
   4727		return f2fs_compat_ioc_gc_range(file, arg);
   4728	case F2FS_IOC32_MOVE_RANGE:
   4729		return f2fs_compat_ioc_move_range(file, arg);
   4730	case F2FS_IOC_START_ATOMIC_WRITE:
   4731	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
   4732	case F2FS_IOC_START_VOLATILE_WRITE:
   4733	case F2FS_IOC_RELEASE_VOLATILE_WRITE:
   4734	case F2FS_IOC_ABORT_VOLATILE_WRITE:
   4735	case F2FS_IOC_SHUTDOWN:
   4736	case FITRIM:
   4737	case FS_IOC_SET_ENCRYPTION_POLICY:
   4738	case FS_IOC_GET_ENCRYPTION_PWSALT:
   4739	case FS_IOC_GET_ENCRYPTION_POLICY:
   4740	case FS_IOC_GET_ENCRYPTION_POLICY_EX:
   4741	case FS_IOC_ADD_ENCRYPTION_KEY:
   4742	case FS_IOC_REMOVE_ENCRYPTION_KEY:
   4743	case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
   4744	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
   4745	case FS_IOC_GET_ENCRYPTION_NONCE:
   4746	case F2FS_IOC_GARBAGE_COLLECT:
   4747	case F2FS_IOC_WRITE_CHECKPOINT:
   4748	case F2FS_IOC_DEFRAGMENT:
   4749	case F2FS_IOC_FLUSH_DEVICE:
   4750	case F2FS_IOC_GET_FEATURES:
   4751	case F2FS_IOC_GET_PIN_FILE:
   4752	case F2FS_IOC_SET_PIN_FILE:
   4753	case F2FS_IOC_PRECACHE_EXTENTS:
   4754	case F2FS_IOC_RESIZE_FS:
   4755	case FS_IOC_ENABLE_VERITY:
   4756	case FS_IOC_MEASURE_VERITY:
   4757	case FS_IOC_READ_VERITY_METADATA:
   4758	case FS_IOC_GETFSLABEL:
   4759	case FS_IOC_SETFSLABEL:
   4760	case F2FS_IOC_GET_COMPRESS_BLOCKS:
   4761	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
   4762	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
   4763	case F2FS_IOC_SEC_TRIM_FILE:
   4764	case F2FS_IOC_GET_COMPRESS_OPTION:
   4765	case F2FS_IOC_SET_COMPRESS_OPTION:
   4766	case F2FS_IOC_DECOMPRESS_FILE:
   4767	case F2FS_IOC_COMPRESS_FILE:
   4768		break;
   4769	default:
   4770		return -ENOIOCTLCMD;
   4771	}
   4772	return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
   4773}
   4774#endif
   4775
   4776const struct file_operations f2fs_file_operations = {
   4777	.llseek		= f2fs_llseek,
   4778	.read_iter	= f2fs_file_read_iter,
   4779	.write_iter	= f2fs_file_write_iter,
   4780	.open		= f2fs_file_open,
   4781	.release	= f2fs_release_file,
   4782	.mmap		= f2fs_file_mmap,
   4783	.flush		= f2fs_file_flush,
   4784	.fsync		= f2fs_sync_file,
   4785	.fallocate	= f2fs_fallocate,
   4786	.unlocked_ioctl	= f2fs_ioctl,
   4787#ifdef CONFIG_COMPAT
   4788	.compat_ioctl	= f2fs_compat_ioctl,
   4789#endif
   4790	.splice_read	= generic_file_splice_read,
   4791	.splice_write	= iter_file_splice_write,
   4792	.fadvise	= f2fs_file_fadvise,
   4793};