cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

data.c (102576B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * fs/f2fs/data.c
      4 *
      5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
      6 *             http://www.samsung.com/
      7 */
      8#include <linux/fs.h>
      9#include <linux/f2fs_fs.h>
     10#include <linux/buffer_head.h>
     11#include <linux/sched/mm.h>
     12#include <linux/mpage.h>
     13#include <linux/writeback.h>
     14#include <linux/pagevec.h>
     15#include <linux/blkdev.h>
     16#include <linux/bio.h>
     17#include <linux/blk-crypto.h>
     18#include <linux/swap.h>
     19#include <linux/prefetch.h>
     20#include <linux/uio.h>
     21#include <linux/sched/signal.h>
     22#include <linux/fiemap.h>
     23#include <linux/iomap.h>
     24
     25#include "f2fs.h"
     26#include "node.h"
     27#include "segment.h"
     28#include "iostat.h"
     29#include <trace/events/f2fs.h>
     30
     31#define NUM_PREALLOC_POST_READ_CTXS	128
     32
     33static struct kmem_cache *bio_post_read_ctx_cache;
     34static struct kmem_cache *bio_entry_slab;
     35static mempool_t *bio_post_read_ctx_pool;
     36static struct bio_set f2fs_bioset;
     37
     38#define	F2FS_BIO_POOL_SIZE	NR_CURSEG_TYPE
     39
     40int __init f2fs_init_bioset(void)
     41{
     42	if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
     43					0, BIOSET_NEED_BVECS))
     44		return -ENOMEM;
     45	return 0;
     46}
     47
     48void f2fs_destroy_bioset(void)
     49{
     50	bioset_exit(&f2fs_bioset);
     51}
     52
     53static bool __is_cp_guaranteed(struct page *page)
     54{
     55	struct address_space *mapping = page->mapping;
     56	struct inode *inode;
     57	struct f2fs_sb_info *sbi;
     58
     59	if (!mapping)
     60		return false;
     61
     62	inode = mapping->host;
     63	sbi = F2FS_I_SB(inode);
     64
     65	if (inode->i_ino == F2FS_META_INO(sbi) ||
     66			inode->i_ino == F2FS_NODE_INO(sbi) ||
     67			S_ISDIR(inode->i_mode))
     68		return true;
     69
     70	if (f2fs_is_compressed_page(page))
     71		return false;
     72	if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
     73			page_private_gcing(page))
     74		return true;
     75	return false;
     76}
     77
     78static enum count_type __read_io_type(struct page *page)
     79{
     80	struct address_space *mapping = page_file_mapping(page);
     81
     82	if (mapping) {
     83		struct inode *inode = mapping->host;
     84		struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
     85
     86		if (inode->i_ino == F2FS_META_INO(sbi))
     87			return F2FS_RD_META;
     88
     89		if (inode->i_ino == F2FS_NODE_INO(sbi))
     90			return F2FS_RD_NODE;
     91	}
     92	return F2FS_RD_DATA;
     93}
     94
     95/* postprocessing steps for read bios */
     96enum bio_post_read_step {
     97#ifdef CONFIG_FS_ENCRYPTION
     98	STEP_DECRYPT	= 1 << 0,
     99#else
    100	STEP_DECRYPT	= 0,	/* compile out the decryption-related code */
    101#endif
    102#ifdef CONFIG_F2FS_FS_COMPRESSION
    103	STEP_DECOMPRESS	= 1 << 1,
    104#else
    105	STEP_DECOMPRESS	= 0,	/* compile out the decompression-related code */
    106#endif
    107#ifdef CONFIG_FS_VERITY
    108	STEP_VERITY	= 1 << 2,
    109#else
    110	STEP_VERITY	= 0,	/* compile out the verity-related code */
    111#endif
    112};
    113
    114struct bio_post_read_ctx {
    115	struct bio *bio;
    116	struct f2fs_sb_info *sbi;
    117	struct work_struct work;
    118	unsigned int enabled_steps;
    119	block_t fs_blkaddr;
    120};
    121
    122static void f2fs_finish_read_bio(struct bio *bio)
    123{
    124	struct bio_vec *bv;
    125	struct bvec_iter_all iter_all;
    126
    127	/*
    128	 * Update and unlock the bio's pagecache pages, and put the
    129	 * decompression context for any compressed pages.
    130	 */
    131	bio_for_each_segment_all(bv, bio, iter_all) {
    132		struct page *page = bv->bv_page;
    133
    134		if (f2fs_is_compressed_page(page)) {
    135			if (bio->bi_status)
    136				f2fs_end_read_compressed_page(page, true, 0);
    137			f2fs_put_page_dic(page);
    138			continue;
    139		}
    140
    141		/* PG_error was set if decryption or verity failed. */
    142		if (bio->bi_status || PageError(page)) {
    143			ClearPageUptodate(page);
    144			/* will re-read again later */
    145			ClearPageError(page);
    146		} else {
    147			SetPageUptodate(page);
    148		}
    149		dec_page_count(F2FS_P_SB(page), __read_io_type(page));
    150		unlock_page(page);
    151	}
    152
    153	if (bio->bi_private)
    154		mempool_free(bio->bi_private, bio_post_read_ctx_pool);
    155	bio_put(bio);
    156}
    157
    158static void f2fs_verify_bio(struct work_struct *work)
    159{
    160	struct bio_post_read_ctx *ctx =
    161		container_of(work, struct bio_post_read_ctx, work);
    162	struct bio *bio = ctx->bio;
    163	bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
    164
    165	/*
    166	 * fsverity_verify_bio() may call readahead() again, and while verity
    167	 * will be disabled for this, decryption and/or decompression may still
    168	 * be needed, resulting in another bio_post_read_ctx being allocated.
    169	 * So to prevent deadlocks we need to release the current ctx to the
    170	 * mempool first.  This assumes that verity is the last post-read step.
    171	 */
    172	mempool_free(ctx, bio_post_read_ctx_pool);
    173	bio->bi_private = NULL;
    174
    175	/*
    176	 * Verify the bio's pages with fs-verity.  Exclude compressed pages,
    177	 * as those were handled separately by f2fs_end_read_compressed_page().
    178	 */
    179	if (may_have_compressed_pages) {
    180		struct bio_vec *bv;
    181		struct bvec_iter_all iter_all;
    182
    183		bio_for_each_segment_all(bv, bio, iter_all) {
    184			struct page *page = bv->bv_page;
    185
    186			if (!f2fs_is_compressed_page(page) &&
    187			    !PageError(page) && !fsverity_verify_page(page))
    188				SetPageError(page);
    189		}
    190	} else {
    191		fsverity_verify_bio(bio);
    192	}
    193
    194	f2fs_finish_read_bio(bio);
    195}
    196
    197/*
    198 * If the bio's data needs to be verified with fs-verity, then enqueue the
    199 * verity work for the bio.  Otherwise finish the bio now.
    200 *
    201 * Note that to avoid deadlocks, the verity work can't be done on the
    202 * decryption/decompression workqueue.  This is because verifying the data pages
    203 * can involve reading verity metadata pages from the file, and these verity
    204 * metadata pages may be encrypted and/or compressed.
    205 */
    206static void f2fs_verify_and_finish_bio(struct bio *bio)
    207{
    208	struct bio_post_read_ctx *ctx = bio->bi_private;
    209
    210	if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
    211		INIT_WORK(&ctx->work, f2fs_verify_bio);
    212		fsverity_enqueue_verify_work(&ctx->work);
    213	} else {
    214		f2fs_finish_read_bio(bio);
    215	}
    216}
    217
    218/*
    219 * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
    220 * remaining page was read by @ctx->bio.
    221 *
    222 * Note that a bio may span clusters (even a mix of compressed and uncompressed
    223 * clusters) or be for just part of a cluster.  STEP_DECOMPRESS just indicates
    224 * that the bio includes at least one compressed page.  The actual decompression
    225 * is done on a per-cluster basis, not a per-bio basis.
    226 */
    227static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx)
    228{
    229	struct bio_vec *bv;
    230	struct bvec_iter_all iter_all;
    231	bool all_compressed = true;
    232	block_t blkaddr = ctx->fs_blkaddr;
    233
    234	bio_for_each_segment_all(bv, ctx->bio, iter_all) {
    235		struct page *page = bv->bv_page;
    236
    237		/* PG_error was set if decryption failed. */
    238		if (f2fs_is_compressed_page(page))
    239			f2fs_end_read_compressed_page(page, PageError(page),
    240						blkaddr);
    241		else
    242			all_compressed = false;
    243
    244		blkaddr++;
    245	}
    246
    247	/*
    248	 * Optimization: if all the bio's pages are compressed, then scheduling
    249	 * the per-bio verity work is unnecessary, as verity will be fully
    250	 * handled at the compression cluster level.
    251	 */
    252	if (all_compressed)
    253		ctx->enabled_steps &= ~STEP_VERITY;
    254}
    255
    256static void f2fs_post_read_work(struct work_struct *work)
    257{
    258	struct bio_post_read_ctx *ctx =
    259		container_of(work, struct bio_post_read_ctx, work);
    260
    261	if (ctx->enabled_steps & STEP_DECRYPT)
    262		fscrypt_decrypt_bio(ctx->bio);
    263
    264	if (ctx->enabled_steps & STEP_DECOMPRESS)
    265		f2fs_handle_step_decompress(ctx);
    266
    267	f2fs_verify_and_finish_bio(ctx->bio);
    268}
    269
    270static void f2fs_read_end_io(struct bio *bio)
    271{
    272	struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
    273	struct bio_post_read_ctx *ctx;
    274
    275	iostat_update_and_unbind_ctx(bio, 0);
    276	ctx = bio->bi_private;
    277
    278	if (time_to_inject(sbi, FAULT_READ_IO)) {
    279		f2fs_show_injection_info(sbi, FAULT_READ_IO);
    280		bio->bi_status = BLK_STS_IOERR;
    281	}
    282
    283	if (bio->bi_status) {
    284		f2fs_finish_read_bio(bio);
    285		return;
    286	}
    287
    288	if (ctx && (ctx->enabled_steps & (STEP_DECRYPT | STEP_DECOMPRESS))) {
    289		INIT_WORK(&ctx->work, f2fs_post_read_work);
    290		queue_work(ctx->sbi->post_read_wq, &ctx->work);
    291	} else {
    292		f2fs_verify_and_finish_bio(bio);
    293	}
    294}
    295
    296static void f2fs_write_end_io(struct bio *bio)
    297{
    298	struct f2fs_sb_info *sbi;
    299	struct bio_vec *bvec;
    300	struct bvec_iter_all iter_all;
    301
    302	iostat_update_and_unbind_ctx(bio, 1);
    303	sbi = bio->bi_private;
    304
    305	if (time_to_inject(sbi, FAULT_WRITE_IO)) {
    306		f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
    307		bio->bi_status = BLK_STS_IOERR;
    308	}
    309
    310	bio_for_each_segment_all(bvec, bio, iter_all) {
    311		struct page *page = bvec->bv_page;
    312		enum count_type type = WB_DATA_TYPE(page);
    313
    314		if (page_private_dummy(page)) {
    315			clear_page_private_dummy(page);
    316			unlock_page(page);
    317			mempool_free(page, sbi->write_io_dummy);
    318
    319			if (unlikely(bio->bi_status))
    320				f2fs_stop_checkpoint(sbi, true);
    321			continue;
    322		}
    323
    324		fscrypt_finalize_bounce_page(&page);
    325
    326#ifdef CONFIG_F2FS_FS_COMPRESSION
    327		if (f2fs_is_compressed_page(page)) {
    328			f2fs_compress_write_end_io(bio, page);
    329			continue;
    330		}
    331#endif
    332
    333		if (unlikely(bio->bi_status)) {
    334			mapping_set_error(page->mapping, -EIO);
    335			if (type == F2FS_WB_CP_DATA)
    336				f2fs_stop_checkpoint(sbi, true);
    337		}
    338
    339		f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
    340					page->index != nid_of_node(page));
    341
    342		dec_page_count(sbi, type);
    343		if (f2fs_in_warm_node_list(sbi, page))
    344			f2fs_del_fsync_node_entry(sbi, page);
    345		clear_page_private_gcing(page);
    346		end_page_writeback(page);
    347	}
    348	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
    349				wq_has_sleeper(&sbi->cp_wait))
    350		wake_up(&sbi->cp_wait);
    351
    352	bio_put(bio);
    353}
    354
    355struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
    356		block_t blk_addr, sector_t *sector)
    357{
    358	struct block_device *bdev = sbi->sb->s_bdev;
    359	int i;
    360
    361	if (f2fs_is_multi_device(sbi)) {
    362		for (i = 0; i < sbi->s_ndevs; i++) {
    363			if (FDEV(i).start_blk <= blk_addr &&
    364			    FDEV(i).end_blk >= blk_addr) {
    365				blk_addr -= FDEV(i).start_blk;
    366				bdev = FDEV(i).bdev;
    367				break;
    368			}
    369		}
    370	}
    371
    372	if (sector)
    373		*sector = SECTOR_FROM_BLOCK(blk_addr);
    374	return bdev;
    375}
    376
    377int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
    378{
    379	int i;
    380
    381	if (!f2fs_is_multi_device(sbi))
    382		return 0;
    383
    384	for (i = 0; i < sbi->s_ndevs; i++)
    385		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
    386			return i;
    387	return 0;
    388}
    389
    390static unsigned int f2fs_io_flags(struct f2fs_io_info *fio)
    391{
    392	unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
    393	unsigned int fua_flag, meta_flag, io_flag;
    394	unsigned int op_flags = 0;
    395
    396	if (fio->op != REQ_OP_WRITE)
    397		return 0;
    398	if (fio->type == DATA)
    399		io_flag = fio->sbi->data_io_flag;
    400	else if (fio->type == NODE)
    401		io_flag = fio->sbi->node_io_flag;
    402	else
    403		return 0;
    404
    405	fua_flag = io_flag & temp_mask;
    406	meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
    407
    408	/*
    409	 * data/node io flag bits per temp:
    410	 *      REQ_META     |      REQ_FUA      |
    411	 *    5 |    4 |   3 |    2 |    1 |   0 |
    412	 * Cold | Warm | Hot | Cold | Warm | Hot |
    413	 */
    414	if ((1 << fio->temp) & meta_flag)
    415		op_flags |= REQ_META;
    416	if ((1 << fio->temp) & fua_flag)
    417		op_flags |= REQ_FUA;
    418	return op_flags;
    419}
    420
    421static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
    422{
    423	struct f2fs_sb_info *sbi = fio->sbi;
    424	struct block_device *bdev;
    425	sector_t sector;
    426	struct bio *bio;
    427
    428	bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
    429	bio = bio_alloc_bioset(bdev, npages,
    430				fio->op | fio->op_flags | f2fs_io_flags(fio),
    431				GFP_NOIO, &f2fs_bioset);
    432	bio->bi_iter.bi_sector = sector;
    433	if (is_read_io(fio->op)) {
    434		bio->bi_end_io = f2fs_read_end_io;
    435		bio->bi_private = NULL;
    436	} else {
    437		bio->bi_end_io = f2fs_write_end_io;
    438		bio->bi_private = sbi;
    439	}
    440	iostat_alloc_and_bind_ctx(sbi, bio, NULL);
    441
    442	if (fio->io_wbc)
    443		wbc_init_bio(fio->io_wbc, bio);
    444
    445	return bio;
    446}
    447
    448static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
    449				  pgoff_t first_idx,
    450				  const struct f2fs_io_info *fio,
    451				  gfp_t gfp_mask)
    452{
    453	/*
    454	 * The f2fs garbage collector sets ->encrypted_page when it wants to
    455	 * read/write raw data without encryption.
    456	 */
    457	if (!fio || !fio->encrypted_page)
    458		fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
    459}
    460
    461static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
    462				     pgoff_t next_idx,
    463				     const struct f2fs_io_info *fio)
    464{
    465	/*
    466	 * The f2fs garbage collector sets ->encrypted_page when it wants to
    467	 * read/write raw data without encryption.
    468	 */
    469	if (fio && fio->encrypted_page)
    470		return !bio_has_crypt_ctx(bio);
    471
    472	return fscrypt_mergeable_bio(bio, inode, next_idx);
    473}
    474
    475static inline void __submit_bio(struct f2fs_sb_info *sbi,
    476				struct bio *bio, enum page_type type)
    477{
    478	if (!is_read_io(bio_op(bio))) {
    479		unsigned int start;
    480
    481		if (type != DATA && type != NODE)
    482			goto submit_io;
    483
    484		if (f2fs_lfs_mode(sbi) && current->plug)
    485			blk_finish_plug(current->plug);
    486
    487		if (!F2FS_IO_ALIGNED(sbi))
    488			goto submit_io;
    489
    490		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
    491		start %= F2FS_IO_SIZE(sbi);
    492
    493		if (start == 0)
    494			goto submit_io;
    495
    496		/* fill dummy pages */
    497		for (; start < F2FS_IO_SIZE(sbi); start++) {
    498			struct page *page =
    499				mempool_alloc(sbi->write_io_dummy,
    500					      GFP_NOIO | __GFP_NOFAIL);
    501			f2fs_bug_on(sbi, !page);
    502
    503			lock_page(page);
    504
    505			zero_user_segment(page, 0, PAGE_SIZE);
    506			set_page_private_dummy(page);
    507
    508			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
    509				f2fs_bug_on(sbi, 1);
    510		}
    511		/*
    512		 * In the NODE case, we lose next block address chain. So, we
    513		 * need to do checkpoint in f2fs_sync_file.
    514		 */
    515		if (type == NODE)
    516			set_sbi_flag(sbi, SBI_NEED_CP);
    517	}
    518submit_io:
    519	if (is_read_io(bio_op(bio)))
    520		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
    521	else
    522		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
    523
    524	iostat_update_submit_ctx(bio, type);
    525	submit_bio(bio);
    526}
    527
    528void f2fs_submit_bio(struct f2fs_sb_info *sbi,
    529				struct bio *bio, enum page_type type)
    530{
    531	__submit_bio(sbi, bio, type);
    532}
    533
    534static void __submit_merged_bio(struct f2fs_bio_info *io)
    535{
    536	struct f2fs_io_info *fio = &io->fio;
    537
    538	if (!io->bio)
    539		return;
    540
    541	if (is_read_io(fio->op))
    542		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
    543	else
    544		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
    545
    546	__submit_bio(io->sbi, io->bio, fio->type);
    547	io->bio = NULL;
    548}
    549
    550static bool __has_merged_page(struct bio *bio, struct inode *inode,
    551						struct page *page, nid_t ino)
    552{
    553	struct bio_vec *bvec;
    554	struct bvec_iter_all iter_all;
    555
    556	if (!bio)
    557		return false;
    558
    559	if (!inode && !page && !ino)
    560		return true;
    561
    562	bio_for_each_segment_all(bvec, bio, iter_all) {
    563		struct page *target = bvec->bv_page;
    564
    565		if (fscrypt_is_bounce_page(target)) {
    566			target = fscrypt_pagecache_page(target);
    567			if (IS_ERR(target))
    568				continue;
    569		}
    570		if (f2fs_is_compressed_page(target)) {
    571			target = f2fs_compress_control_page(target);
    572			if (IS_ERR(target))
    573				continue;
    574		}
    575
    576		if (inode && inode == target->mapping->host)
    577			return true;
    578		if (page && page == target)
    579			return true;
    580		if (ino && ino == ino_of_node(target))
    581			return true;
    582	}
    583
    584	return false;
    585}
    586
    587int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
    588{
    589	int i;
    590
    591	for (i = 0; i < NR_PAGE_TYPE; i++) {
    592		int n = (i == META) ? 1 : NR_TEMP_TYPE;
    593		int j;
    594
    595		sbi->write_io[i] = f2fs_kmalloc(sbi,
    596				array_size(n, sizeof(struct f2fs_bio_info)),
    597				GFP_KERNEL);
    598		if (!sbi->write_io[i])
    599			return -ENOMEM;
    600
    601		for (j = HOT; j < n; j++) {
    602			init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
    603			sbi->write_io[i][j].sbi = sbi;
    604			sbi->write_io[i][j].bio = NULL;
    605			spin_lock_init(&sbi->write_io[i][j].io_lock);
    606			INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
    607			INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
    608			init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
    609		}
    610	}
    611
    612	return 0;
    613}
    614
    615static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
    616				enum page_type type, enum temp_type temp)
    617{
    618	enum page_type btype = PAGE_TYPE_OF_BIO(type);
    619	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
    620
    621	f2fs_down_write(&io->io_rwsem);
    622
    623	/* change META to META_FLUSH in the checkpoint procedure */
    624	if (type >= META_FLUSH) {
    625		io->fio.type = META_FLUSH;
    626		io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
    627		if (!test_opt(sbi, NOBARRIER))
    628			io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
    629	}
    630	__submit_merged_bio(io);
    631	f2fs_up_write(&io->io_rwsem);
    632}
    633
    634static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
    635				struct inode *inode, struct page *page,
    636				nid_t ino, enum page_type type, bool force)
    637{
    638	enum temp_type temp;
    639	bool ret = true;
    640
    641	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
    642		if (!force)	{
    643			enum page_type btype = PAGE_TYPE_OF_BIO(type);
    644			struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
    645
    646			f2fs_down_read(&io->io_rwsem);
    647			ret = __has_merged_page(io->bio, inode, page, ino);
    648			f2fs_up_read(&io->io_rwsem);
    649		}
    650		if (ret)
    651			__f2fs_submit_merged_write(sbi, type, temp);
    652
    653		/* TODO: use HOT temp only for meta pages now. */
    654		if (type >= META)
    655			break;
    656	}
    657}
    658
    659void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
    660{
    661	__submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
    662}
    663
    664void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
    665				struct inode *inode, struct page *page,
    666				nid_t ino, enum page_type type)
    667{
    668	__submit_merged_write_cond(sbi, inode, page, ino, type, false);
    669}
    670
    671void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
    672{
    673	f2fs_submit_merged_write(sbi, DATA);
    674	f2fs_submit_merged_write(sbi, NODE);
    675	f2fs_submit_merged_write(sbi, META);
    676}
    677
    678/*
    679 * Fill the locked page with data located in the block address.
    680 * A caller needs to unlock the page on failure.
    681 */
    682int f2fs_submit_page_bio(struct f2fs_io_info *fio)
    683{
    684	struct bio *bio;
    685	struct page *page = fio->encrypted_page ?
    686			fio->encrypted_page : fio->page;
    687
    688	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
    689			fio->is_por ? META_POR : (__is_meta_io(fio) ?
    690			META_GENERIC : DATA_GENERIC_ENHANCE)))
    691		return -EFSCORRUPTED;
    692
    693	trace_f2fs_submit_page_bio(page, fio);
    694
    695	/* Allocate a new bio */
    696	bio = __bio_alloc(fio, 1);
    697
    698	f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
    699			       fio->page->index, fio, GFP_NOIO);
    700
    701	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
    702		bio_put(bio);
    703		return -EFAULT;
    704	}
    705
    706	if (fio->io_wbc && !is_read_io(fio->op))
    707		wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
    708
    709	inc_page_count(fio->sbi, is_read_io(fio->op) ?
    710			__read_io_type(page): WB_DATA_TYPE(fio->page));
    711
    712	__submit_bio(fio->sbi, bio, fio->type);
    713	return 0;
    714}
    715
    716static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
    717				block_t last_blkaddr, block_t cur_blkaddr)
    718{
    719	if (unlikely(sbi->max_io_bytes &&
    720			bio->bi_iter.bi_size >= sbi->max_io_bytes))
    721		return false;
    722	if (last_blkaddr + 1 != cur_blkaddr)
    723		return false;
    724	return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
    725}
    726
    727static bool io_type_is_mergeable(struct f2fs_bio_info *io,
    728						struct f2fs_io_info *fio)
    729{
    730	if (io->fio.op != fio->op)
    731		return false;
    732	return io->fio.op_flags == fio->op_flags;
    733}
    734
    735static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
    736					struct f2fs_bio_info *io,
    737					struct f2fs_io_info *fio,
    738					block_t last_blkaddr,
    739					block_t cur_blkaddr)
    740{
    741	if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
    742		unsigned int filled_blocks =
    743				F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
    744		unsigned int io_size = F2FS_IO_SIZE(sbi);
    745		unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
    746
    747		/* IOs in bio is aligned and left space of vectors is not enough */
    748		if (!(filled_blocks % io_size) && left_vecs < io_size)
    749			return false;
    750	}
    751	if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
    752		return false;
    753	return io_type_is_mergeable(io, fio);
    754}
    755
    756static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
    757				struct page *page, enum temp_type temp)
    758{
    759	struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
    760	struct bio_entry *be;
    761
    762	be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
    763	be->bio = bio;
    764	bio_get(bio);
    765
    766	if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
    767		f2fs_bug_on(sbi, 1);
    768
    769	f2fs_down_write(&io->bio_list_lock);
    770	list_add_tail(&be->list, &io->bio_list);
    771	f2fs_up_write(&io->bio_list_lock);
    772}
    773
    774static void del_bio_entry(struct bio_entry *be)
    775{
    776	list_del(&be->list);
    777	kmem_cache_free(bio_entry_slab, be);
    778}
    779
    780static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
    781							struct page *page)
    782{
    783	struct f2fs_sb_info *sbi = fio->sbi;
    784	enum temp_type temp;
    785	bool found = false;
    786	int ret = -EAGAIN;
    787
    788	for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
    789		struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
    790		struct list_head *head = &io->bio_list;
    791		struct bio_entry *be;
    792
    793		f2fs_down_write(&io->bio_list_lock);
    794		list_for_each_entry(be, head, list) {
    795			if (be->bio != *bio)
    796				continue;
    797
    798			found = true;
    799
    800			f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
    801							    *fio->last_block,
    802							    fio->new_blkaddr));
    803			if (f2fs_crypt_mergeable_bio(*bio,
    804					fio->page->mapping->host,
    805					fio->page->index, fio) &&
    806			    bio_add_page(*bio, page, PAGE_SIZE, 0) ==
    807					PAGE_SIZE) {
    808				ret = 0;
    809				break;
    810			}
    811
    812			/* page can't be merged into bio; submit the bio */
    813			del_bio_entry(be);
    814			__submit_bio(sbi, *bio, DATA);
    815			break;
    816		}
    817		f2fs_up_write(&io->bio_list_lock);
    818	}
    819
    820	if (ret) {
    821		bio_put(*bio);
    822		*bio = NULL;
    823	}
    824
    825	return ret;
    826}
    827
    828void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
    829					struct bio **bio, struct page *page)
    830{
    831	enum temp_type temp;
    832	bool found = false;
    833	struct bio *target = bio ? *bio : NULL;
    834
    835	for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
    836		struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
    837		struct list_head *head = &io->bio_list;
    838		struct bio_entry *be;
    839
    840		if (list_empty(head))
    841			continue;
    842
    843		f2fs_down_read(&io->bio_list_lock);
    844		list_for_each_entry(be, head, list) {
    845			if (target)
    846				found = (target == be->bio);
    847			else
    848				found = __has_merged_page(be->bio, NULL,
    849								page, 0);
    850			if (found)
    851				break;
    852		}
    853		f2fs_up_read(&io->bio_list_lock);
    854
    855		if (!found)
    856			continue;
    857
    858		found = false;
    859
    860		f2fs_down_write(&io->bio_list_lock);
    861		list_for_each_entry(be, head, list) {
    862			if (target)
    863				found = (target == be->bio);
    864			else
    865				found = __has_merged_page(be->bio, NULL,
    866								page, 0);
    867			if (found) {
    868				target = be->bio;
    869				del_bio_entry(be);
    870				break;
    871			}
    872		}
    873		f2fs_up_write(&io->bio_list_lock);
    874	}
    875
    876	if (found)
    877		__submit_bio(sbi, target, DATA);
    878	if (bio && *bio) {
    879		bio_put(*bio);
    880		*bio = NULL;
    881	}
    882}
    883
    884int f2fs_merge_page_bio(struct f2fs_io_info *fio)
    885{
    886	struct bio *bio = *fio->bio;
    887	struct page *page = fio->encrypted_page ?
    888			fio->encrypted_page : fio->page;
    889
    890	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
    891			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
    892		return -EFSCORRUPTED;
    893
    894	trace_f2fs_submit_page_bio(page, fio);
    895
    896	if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
    897						fio->new_blkaddr))
    898		f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
    899alloc_new:
    900	if (!bio) {
    901		bio = __bio_alloc(fio, BIO_MAX_VECS);
    902		f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
    903				       fio->page->index, fio, GFP_NOIO);
    904
    905		add_bio_entry(fio->sbi, bio, page, fio->temp);
    906	} else {
    907		if (add_ipu_page(fio, &bio, page))
    908			goto alloc_new;
    909	}
    910
    911	if (fio->io_wbc)
    912		wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
    913
    914	inc_page_count(fio->sbi, WB_DATA_TYPE(page));
    915
    916	*fio->last_block = fio->new_blkaddr;
    917	*fio->bio = bio;
    918
    919	return 0;
    920}
    921
    922void f2fs_submit_page_write(struct f2fs_io_info *fio)
    923{
    924	struct f2fs_sb_info *sbi = fio->sbi;
    925	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
    926	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
    927	struct page *bio_page;
    928
    929	f2fs_bug_on(sbi, is_read_io(fio->op));
    930
    931	f2fs_down_write(&io->io_rwsem);
    932next:
    933	if (fio->in_list) {
    934		spin_lock(&io->io_lock);
    935		if (list_empty(&io->io_list)) {
    936			spin_unlock(&io->io_lock);
    937			goto out;
    938		}
    939		fio = list_first_entry(&io->io_list,
    940						struct f2fs_io_info, list);
    941		list_del(&fio->list);
    942		spin_unlock(&io->io_lock);
    943	}
    944
    945	verify_fio_blkaddr(fio);
    946
    947	if (fio->encrypted_page)
    948		bio_page = fio->encrypted_page;
    949	else if (fio->compressed_page)
    950		bio_page = fio->compressed_page;
    951	else
    952		bio_page = fio->page;
    953
    954	/* set submitted = true as a return value */
    955	fio->submitted = true;
    956
    957	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
    958
    959	if (io->bio &&
    960	    (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
    961			      fio->new_blkaddr) ||
    962	     !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
    963				       bio_page->index, fio)))
    964		__submit_merged_bio(io);
    965alloc_new:
    966	if (io->bio == NULL) {
    967		if (F2FS_IO_ALIGNED(sbi) &&
    968				(fio->type == DATA || fio->type == NODE) &&
    969				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
    970			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
    971			fio->retry = true;
    972			goto skip;
    973		}
    974		io->bio = __bio_alloc(fio, BIO_MAX_VECS);
    975		f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
    976				       bio_page->index, fio, GFP_NOIO);
    977		io->fio = *fio;
    978	}
    979
    980	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
    981		__submit_merged_bio(io);
    982		goto alloc_new;
    983	}
    984
    985	if (fio->io_wbc)
    986		wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
    987
    988	io->last_block_in_bio = fio->new_blkaddr;
    989
    990	trace_f2fs_submit_page_write(fio->page, fio);
    991skip:
    992	if (fio->in_list)
    993		goto next;
    994out:
    995	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
    996				!f2fs_is_checkpoint_ready(sbi))
    997		__submit_merged_bio(io);
    998	f2fs_up_write(&io->io_rwsem);
    999}
   1000
   1001static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
   1002				      unsigned nr_pages, unsigned op_flag,
   1003				      pgoff_t first_idx, bool for_write)
   1004{
   1005	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1006	struct bio *bio;
   1007	struct bio_post_read_ctx *ctx = NULL;
   1008	unsigned int post_read_steps = 0;
   1009	sector_t sector;
   1010	struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
   1011
   1012	bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
   1013			       REQ_OP_READ | op_flag,
   1014			       for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
   1015	if (!bio)
   1016		return ERR_PTR(-ENOMEM);
   1017	bio->bi_iter.bi_sector = sector;
   1018	f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
   1019	bio->bi_end_io = f2fs_read_end_io;
   1020
   1021	if (fscrypt_inode_uses_fs_layer_crypto(inode))
   1022		post_read_steps |= STEP_DECRYPT;
   1023
   1024	if (f2fs_need_verity(inode, first_idx))
   1025		post_read_steps |= STEP_VERITY;
   1026
   1027	/*
   1028	 * STEP_DECOMPRESS is handled specially, since a compressed file might
   1029	 * contain both compressed and uncompressed clusters.  We'll allocate a
   1030	 * bio_post_read_ctx if the file is compressed, but the caller is
   1031	 * responsible for enabling STEP_DECOMPRESS if it's actually needed.
   1032	 */
   1033
   1034	if (post_read_steps || f2fs_compressed_file(inode)) {
   1035		/* Due to the mempool, this never fails. */
   1036		ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
   1037		ctx->bio = bio;
   1038		ctx->sbi = sbi;
   1039		ctx->enabled_steps = post_read_steps;
   1040		ctx->fs_blkaddr = blkaddr;
   1041		bio->bi_private = ctx;
   1042	}
   1043	iostat_alloc_and_bind_ctx(sbi, bio, ctx);
   1044
   1045	return bio;
   1046}
   1047
   1048/* This can handle encryption stuffs */
   1049static int f2fs_submit_page_read(struct inode *inode, struct page *page,
   1050				 block_t blkaddr, int op_flags, bool for_write)
   1051{
   1052	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1053	struct bio *bio;
   1054
   1055	bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
   1056					page->index, for_write);
   1057	if (IS_ERR(bio))
   1058		return PTR_ERR(bio);
   1059
   1060	/* wait for GCed page writeback via META_MAPPING */
   1061	f2fs_wait_on_block_writeback(inode, blkaddr);
   1062
   1063	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
   1064		bio_put(bio);
   1065		return -EFAULT;
   1066	}
   1067	ClearPageError(page);
   1068	inc_page_count(sbi, F2FS_RD_DATA);
   1069	f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
   1070	__submit_bio(sbi, bio, DATA);
   1071	return 0;
   1072}
   1073
   1074static void __set_data_blkaddr(struct dnode_of_data *dn)
   1075{
   1076	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
   1077	__le32 *addr_array;
   1078	int base = 0;
   1079
   1080	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
   1081		base = get_extra_isize(dn->inode);
   1082
   1083	/* Get physical address of data block */
   1084	addr_array = blkaddr_in_node(rn);
   1085	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
   1086}
   1087
   1088/*
   1089 * Lock ordering for the change of data block address:
   1090 * ->data_page
   1091 *  ->node_page
   1092 *    update block addresses in the node page
   1093 */
   1094void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
   1095{
   1096	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
   1097	__set_data_blkaddr(dn);
   1098	if (set_page_dirty(dn->node_page))
   1099		dn->node_changed = true;
   1100}
   1101
   1102void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
   1103{
   1104	dn->data_blkaddr = blkaddr;
   1105	f2fs_set_data_blkaddr(dn);
   1106	f2fs_update_extent_cache(dn);
   1107}
   1108
   1109/* dn->ofs_in_node will be returned with up-to-date last block pointer */
   1110int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
   1111{
   1112	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
   1113	int err;
   1114
   1115	if (!count)
   1116		return 0;
   1117
   1118	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
   1119		return -EPERM;
   1120	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
   1121		return err;
   1122
   1123	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
   1124						dn->ofs_in_node, count);
   1125
   1126	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
   1127
   1128	for (; count > 0; dn->ofs_in_node++) {
   1129		block_t blkaddr = f2fs_data_blkaddr(dn);
   1130
   1131		if (blkaddr == NULL_ADDR) {
   1132			dn->data_blkaddr = NEW_ADDR;
   1133			__set_data_blkaddr(dn);
   1134			count--;
   1135		}
   1136	}
   1137
   1138	if (set_page_dirty(dn->node_page))
   1139		dn->node_changed = true;
   1140	return 0;
   1141}
   1142
   1143/* Should keep dn->ofs_in_node unchanged */
   1144int f2fs_reserve_new_block(struct dnode_of_data *dn)
   1145{
   1146	unsigned int ofs_in_node = dn->ofs_in_node;
   1147	int ret;
   1148
   1149	ret = f2fs_reserve_new_blocks(dn, 1);
   1150	dn->ofs_in_node = ofs_in_node;
   1151	return ret;
   1152}
   1153
   1154int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
   1155{
   1156	bool need_put = dn->inode_page ? false : true;
   1157	int err;
   1158
   1159	err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
   1160	if (err)
   1161		return err;
   1162
   1163	if (dn->data_blkaddr == NULL_ADDR)
   1164		err = f2fs_reserve_new_block(dn);
   1165	if (err || need_put)
   1166		f2fs_put_dnode(dn);
   1167	return err;
   1168}
   1169
   1170int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
   1171{
   1172	struct extent_info ei = {0, };
   1173	struct inode *inode = dn->inode;
   1174
   1175	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
   1176		dn->data_blkaddr = ei.blk + index - ei.fofs;
   1177		return 0;
   1178	}
   1179
   1180	return f2fs_reserve_block(dn, index);
   1181}
   1182
   1183struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
   1184						int op_flags, bool for_write)
   1185{
   1186	struct address_space *mapping = inode->i_mapping;
   1187	struct dnode_of_data dn;
   1188	struct page *page;
   1189	struct extent_info ei = {0, };
   1190	int err;
   1191
   1192	page = f2fs_grab_cache_page(mapping, index, for_write);
   1193	if (!page)
   1194		return ERR_PTR(-ENOMEM);
   1195
   1196	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
   1197		dn.data_blkaddr = ei.blk + index - ei.fofs;
   1198		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
   1199						DATA_GENERIC_ENHANCE_READ)) {
   1200			err = -EFSCORRUPTED;
   1201			goto put_err;
   1202		}
   1203		goto got_it;
   1204	}
   1205
   1206	set_new_dnode(&dn, inode, NULL, NULL, 0);
   1207	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
   1208	if (err)
   1209		goto put_err;
   1210	f2fs_put_dnode(&dn);
   1211
   1212	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
   1213		err = -ENOENT;
   1214		goto put_err;
   1215	}
   1216	if (dn.data_blkaddr != NEW_ADDR &&
   1217			!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
   1218						dn.data_blkaddr,
   1219						DATA_GENERIC_ENHANCE)) {
   1220		err = -EFSCORRUPTED;
   1221		goto put_err;
   1222	}
   1223got_it:
   1224	if (PageUptodate(page)) {
   1225		unlock_page(page);
   1226		return page;
   1227	}
   1228
   1229	/*
   1230	 * A new dentry page is allocated but not able to be written, since its
   1231	 * new inode page couldn't be allocated due to -ENOSPC.
   1232	 * In such the case, its blkaddr can be remained as NEW_ADDR.
   1233	 * see, f2fs_add_link -> f2fs_get_new_data_page ->
   1234	 * f2fs_init_inode_metadata.
   1235	 */
   1236	if (dn.data_blkaddr == NEW_ADDR) {
   1237		zero_user_segment(page, 0, PAGE_SIZE);
   1238		if (!PageUptodate(page))
   1239			SetPageUptodate(page);
   1240		unlock_page(page);
   1241		return page;
   1242	}
   1243
   1244	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
   1245						op_flags, for_write);
   1246	if (err)
   1247		goto put_err;
   1248	return page;
   1249
   1250put_err:
   1251	f2fs_put_page(page, 1);
   1252	return ERR_PTR(err);
   1253}
   1254
   1255struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
   1256{
   1257	struct address_space *mapping = inode->i_mapping;
   1258	struct page *page;
   1259
   1260	page = find_get_page(mapping, index);
   1261	if (page && PageUptodate(page))
   1262		return page;
   1263	f2fs_put_page(page, 0);
   1264
   1265	page = f2fs_get_read_data_page(inode, index, 0, false);
   1266	if (IS_ERR(page))
   1267		return page;
   1268
   1269	if (PageUptodate(page))
   1270		return page;
   1271
   1272	wait_on_page_locked(page);
   1273	if (unlikely(!PageUptodate(page))) {
   1274		f2fs_put_page(page, 0);
   1275		return ERR_PTR(-EIO);
   1276	}
   1277	return page;
   1278}
   1279
   1280/*
   1281 * If it tries to access a hole, return an error.
   1282 * Because, the callers, functions in dir.c and GC, should be able to know
   1283 * whether this page exists or not.
   1284 */
   1285struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
   1286							bool for_write)
   1287{
   1288	struct address_space *mapping = inode->i_mapping;
   1289	struct page *page;
   1290repeat:
   1291	page = f2fs_get_read_data_page(inode, index, 0, for_write);
   1292	if (IS_ERR(page))
   1293		return page;
   1294
   1295	/* wait for read completion */
   1296	lock_page(page);
   1297	if (unlikely(page->mapping != mapping)) {
   1298		f2fs_put_page(page, 1);
   1299		goto repeat;
   1300	}
   1301	if (unlikely(!PageUptodate(page))) {
   1302		f2fs_put_page(page, 1);
   1303		return ERR_PTR(-EIO);
   1304	}
   1305	return page;
   1306}
   1307
   1308/*
   1309 * Caller ensures that this data page is never allocated.
   1310 * A new zero-filled data page is allocated in the page cache.
   1311 *
   1312 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
   1313 * f2fs_unlock_op().
   1314 * Note that, ipage is set only by make_empty_dir, and if any error occur,
   1315 * ipage should be released by this function.
   1316 */
   1317struct page *f2fs_get_new_data_page(struct inode *inode,
   1318		struct page *ipage, pgoff_t index, bool new_i_size)
   1319{
   1320	struct address_space *mapping = inode->i_mapping;
   1321	struct page *page;
   1322	struct dnode_of_data dn;
   1323	int err;
   1324
   1325	page = f2fs_grab_cache_page(mapping, index, true);
   1326	if (!page) {
   1327		/*
   1328		 * before exiting, we should make sure ipage will be released
   1329		 * if any error occur.
   1330		 */
   1331		f2fs_put_page(ipage, 1);
   1332		return ERR_PTR(-ENOMEM);
   1333	}
   1334
   1335	set_new_dnode(&dn, inode, ipage, NULL, 0);
   1336	err = f2fs_reserve_block(&dn, index);
   1337	if (err) {
   1338		f2fs_put_page(page, 1);
   1339		return ERR_PTR(err);
   1340	}
   1341	if (!ipage)
   1342		f2fs_put_dnode(&dn);
   1343
   1344	if (PageUptodate(page))
   1345		goto got_it;
   1346
   1347	if (dn.data_blkaddr == NEW_ADDR) {
   1348		zero_user_segment(page, 0, PAGE_SIZE);
   1349		if (!PageUptodate(page))
   1350			SetPageUptodate(page);
   1351	} else {
   1352		f2fs_put_page(page, 1);
   1353
   1354		/* if ipage exists, blkaddr should be NEW_ADDR */
   1355		f2fs_bug_on(F2FS_I_SB(inode), ipage);
   1356		page = f2fs_get_lock_data_page(inode, index, true);
   1357		if (IS_ERR(page))
   1358			return page;
   1359	}
   1360got_it:
   1361	if (new_i_size && i_size_read(inode) <
   1362				((loff_t)(index + 1) << PAGE_SHIFT))
   1363		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
   1364	return page;
   1365}
   1366
   1367static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
   1368{
   1369	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
   1370	struct f2fs_summary sum;
   1371	struct node_info ni;
   1372	block_t old_blkaddr;
   1373	blkcnt_t count = 1;
   1374	int err;
   1375
   1376	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
   1377		return -EPERM;
   1378
   1379	err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
   1380	if (err)
   1381		return err;
   1382
   1383	dn->data_blkaddr = f2fs_data_blkaddr(dn);
   1384	if (dn->data_blkaddr != NULL_ADDR)
   1385		goto alloc;
   1386
   1387	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
   1388		return err;
   1389
   1390alloc:
   1391	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
   1392	old_blkaddr = dn->data_blkaddr;
   1393	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
   1394				&sum, seg_type, NULL);
   1395	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
   1396		invalidate_mapping_pages(META_MAPPING(sbi),
   1397					old_blkaddr, old_blkaddr);
   1398		f2fs_invalidate_compress_page(sbi, old_blkaddr);
   1399	}
   1400	f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
   1401	return 0;
   1402}
   1403
   1404void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
   1405{
   1406	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
   1407		if (lock)
   1408			f2fs_down_read(&sbi->node_change);
   1409		else
   1410			f2fs_up_read(&sbi->node_change);
   1411	} else {
   1412		if (lock)
   1413			f2fs_lock_op(sbi);
   1414		else
   1415			f2fs_unlock_op(sbi);
   1416	}
   1417}
   1418
   1419/*
   1420 * f2fs_map_blocks() tries to find or build mapping relationship which
   1421 * maps continuous logical blocks to physical blocks, and return such
   1422 * info via f2fs_map_blocks structure.
   1423 */
   1424int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
   1425						int create, int flag)
   1426{
   1427	unsigned int maxblocks = map->m_len;
   1428	struct dnode_of_data dn;
   1429	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1430	int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
   1431	pgoff_t pgofs, end_offset, end;
   1432	int err = 0, ofs = 1;
   1433	unsigned int ofs_in_node, last_ofs_in_node;
   1434	blkcnt_t prealloc;
   1435	struct extent_info ei = {0, };
   1436	block_t blkaddr;
   1437	unsigned int start_pgofs;
   1438	int bidx = 0;
   1439
   1440	if (!maxblocks)
   1441		return 0;
   1442
   1443	map->m_bdev = inode->i_sb->s_bdev;
   1444	map->m_multidev_dio =
   1445		f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
   1446
   1447	map->m_len = 0;
   1448	map->m_flags = 0;
   1449
   1450	/* it only supports block size == page size */
   1451	pgofs =	(pgoff_t)map->m_lblk;
   1452	end = pgofs + maxblocks;
   1453
   1454	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
   1455		if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
   1456							map->m_may_create)
   1457			goto next_dnode;
   1458
   1459		map->m_pblk = ei.blk + pgofs - ei.fofs;
   1460		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
   1461		map->m_flags = F2FS_MAP_MAPPED;
   1462		if (map->m_next_extent)
   1463			*map->m_next_extent = pgofs + map->m_len;
   1464
   1465		/* for hardware encryption, but to avoid potential issue in future */
   1466		if (flag == F2FS_GET_BLOCK_DIO)
   1467			f2fs_wait_on_block_writeback_range(inode,
   1468						map->m_pblk, map->m_len);
   1469
   1470		if (map->m_multidev_dio) {
   1471			block_t blk_addr = map->m_pblk;
   1472
   1473			bidx = f2fs_target_device_index(sbi, map->m_pblk);
   1474
   1475			map->m_bdev = FDEV(bidx).bdev;
   1476			map->m_pblk -= FDEV(bidx).start_blk;
   1477			map->m_len = min(map->m_len,
   1478				FDEV(bidx).end_blk + 1 - map->m_pblk);
   1479
   1480			if (map->m_may_create)
   1481				f2fs_update_device_state(sbi, inode->i_ino,
   1482							blk_addr, map->m_len);
   1483		}
   1484		goto out;
   1485	}
   1486
   1487next_dnode:
   1488	if (map->m_may_create)
   1489		f2fs_do_map_lock(sbi, flag, true);
   1490
   1491	/* When reading holes, we need its node page */
   1492	set_new_dnode(&dn, inode, NULL, NULL, 0);
   1493	err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
   1494	if (err) {
   1495		if (flag == F2FS_GET_BLOCK_BMAP)
   1496			map->m_pblk = 0;
   1497
   1498		if (err == -ENOENT) {
   1499			/*
   1500			 * There is one exceptional case that read_node_page()
   1501			 * may return -ENOENT due to filesystem has been
   1502			 * shutdown or cp_error, so force to convert error
   1503			 * number to EIO for such case.
   1504			 */
   1505			if (map->m_may_create &&
   1506				(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
   1507				f2fs_cp_error(sbi))) {
   1508				err = -EIO;
   1509				goto unlock_out;
   1510			}
   1511
   1512			err = 0;
   1513			if (map->m_next_pgofs)
   1514				*map->m_next_pgofs =
   1515					f2fs_get_next_page_offset(&dn, pgofs);
   1516			if (map->m_next_extent)
   1517				*map->m_next_extent =
   1518					f2fs_get_next_page_offset(&dn, pgofs);
   1519		}
   1520		goto unlock_out;
   1521	}
   1522
   1523	start_pgofs = pgofs;
   1524	prealloc = 0;
   1525	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
   1526	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
   1527
   1528next_block:
   1529	blkaddr = f2fs_data_blkaddr(&dn);
   1530
   1531	if (__is_valid_data_blkaddr(blkaddr) &&
   1532		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
   1533		err = -EFSCORRUPTED;
   1534		goto sync_out;
   1535	}
   1536
   1537	if (__is_valid_data_blkaddr(blkaddr)) {
   1538		/* use out-place-update for driect IO under LFS mode */
   1539		if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
   1540							map->m_may_create) {
   1541			err = __allocate_data_block(&dn, map->m_seg_type);
   1542			if (err)
   1543				goto sync_out;
   1544			blkaddr = dn.data_blkaddr;
   1545			set_inode_flag(inode, FI_APPEND_WRITE);
   1546		}
   1547	} else {
   1548		if (create) {
   1549			if (unlikely(f2fs_cp_error(sbi))) {
   1550				err = -EIO;
   1551				goto sync_out;
   1552			}
   1553			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
   1554				if (blkaddr == NULL_ADDR) {
   1555					prealloc++;
   1556					last_ofs_in_node = dn.ofs_in_node;
   1557				}
   1558			} else {
   1559				WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
   1560					flag != F2FS_GET_BLOCK_DIO);
   1561				err = __allocate_data_block(&dn,
   1562							map->m_seg_type);
   1563				if (!err) {
   1564					if (flag == F2FS_GET_BLOCK_PRE_DIO)
   1565						file_need_truncate(inode);
   1566					set_inode_flag(inode, FI_APPEND_WRITE);
   1567				}
   1568			}
   1569			if (err)
   1570				goto sync_out;
   1571			map->m_flags |= F2FS_MAP_NEW;
   1572			blkaddr = dn.data_blkaddr;
   1573		} else {
   1574			if (f2fs_compressed_file(inode) &&
   1575					f2fs_sanity_check_cluster(&dn) &&
   1576					(flag != F2FS_GET_BLOCK_FIEMAP ||
   1577					IS_ENABLED(CONFIG_F2FS_CHECK_FS))) {
   1578				err = -EFSCORRUPTED;
   1579				goto sync_out;
   1580			}
   1581			if (flag == F2FS_GET_BLOCK_BMAP) {
   1582				map->m_pblk = 0;
   1583				goto sync_out;
   1584			}
   1585			if (flag == F2FS_GET_BLOCK_PRECACHE)
   1586				goto sync_out;
   1587			if (flag == F2FS_GET_BLOCK_FIEMAP &&
   1588						blkaddr == NULL_ADDR) {
   1589				if (map->m_next_pgofs)
   1590					*map->m_next_pgofs = pgofs + 1;
   1591				goto sync_out;
   1592			}
   1593			if (flag != F2FS_GET_BLOCK_FIEMAP) {
   1594				/* for defragment case */
   1595				if (map->m_next_pgofs)
   1596					*map->m_next_pgofs = pgofs + 1;
   1597				goto sync_out;
   1598			}
   1599		}
   1600	}
   1601
   1602	if (flag == F2FS_GET_BLOCK_PRE_AIO)
   1603		goto skip;
   1604
   1605	if (map->m_multidev_dio)
   1606		bidx = f2fs_target_device_index(sbi, blkaddr);
   1607
   1608	if (map->m_len == 0) {
   1609		/* preallocated unwritten block should be mapped for fiemap. */
   1610		if (blkaddr == NEW_ADDR)
   1611			map->m_flags |= F2FS_MAP_UNWRITTEN;
   1612		map->m_flags |= F2FS_MAP_MAPPED;
   1613
   1614		map->m_pblk = blkaddr;
   1615		map->m_len = 1;
   1616
   1617		if (map->m_multidev_dio)
   1618			map->m_bdev = FDEV(bidx).bdev;
   1619	} else if ((map->m_pblk != NEW_ADDR &&
   1620			blkaddr == (map->m_pblk + ofs)) ||
   1621			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
   1622			flag == F2FS_GET_BLOCK_PRE_DIO) {
   1623		if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
   1624			goto sync_out;
   1625		ofs++;
   1626		map->m_len++;
   1627	} else {
   1628		goto sync_out;
   1629	}
   1630
   1631skip:
   1632	dn.ofs_in_node++;
   1633	pgofs++;
   1634
   1635	/* preallocate blocks in batch for one dnode page */
   1636	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
   1637			(pgofs == end || dn.ofs_in_node == end_offset)) {
   1638
   1639		dn.ofs_in_node = ofs_in_node;
   1640		err = f2fs_reserve_new_blocks(&dn, prealloc);
   1641		if (err)
   1642			goto sync_out;
   1643
   1644		map->m_len += dn.ofs_in_node - ofs_in_node;
   1645		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
   1646			err = -ENOSPC;
   1647			goto sync_out;
   1648		}
   1649		dn.ofs_in_node = end_offset;
   1650	}
   1651
   1652	if (pgofs >= end)
   1653		goto sync_out;
   1654	else if (dn.ofs_in_node < end_offset)
   1655		goto next_block;
   1656
   1657	if (flag == F2FS_GET_BLOCK_PRECACHE) {
   1658		if (map->m_flags & F2FS_MAP_MAPPED) {
   1659			unsigned int ofs = start_pgofs - map->m_lblk;
   1660
   1661			f2fs_update_extent_cache_range(&dn,
   1662				start_pgofs, map->m_pblk + ofs,
   1663				map->m_len - ofs);
   1664		}
   1665	}
   1666
   1667	f2fs_put_dnode(&dn);
   1668
   1669	if (map->m_may_create) {
   1670		f2fs_do_map_lock(sbi, flag, false);
   1671		f2fs_balance_fs(sbi, dn.node_changed);
   1672	}
   1673	goto next_dnode;
   1674
   1675sync_out:
   1676
   1677	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
   1678		/*
   1679		 * for hardware encryption, but to avoid potential issue
   1680		 * in future
   1681		 */
   1682		f2fs_wait_on_block_writeback_range(inode,
   1683						map->m_pblk, map->m_len);
   1684		invalidate_mapping_pages(META_MAPPING(sbi),
   1685						map->m_pblk, map->m_pblk);
   1686
   1687		if (map->m_multidev_dio) {
   1688			block_t blk_addr = map->m_pblk;
   1689
   1690			bidx = f2fs_target_device_index(sbi, map->m_pblk);
   1691
   1692			map->m_bdev = FDEV(bidx).bdev;
   1693			map->m_pblk -= FDEV(bidx).start_blk;
   1694
   1695			if (map->m_may_create)
   1696				f2fs_update_device_state(sbi, inode->i_ino,
   1697							blk_addr, map->m_len);
   1698
   1699			f2fs_bug_on(sbi, blk_addr + map->m_len >
   1700						FDEV(bidx).end_blk + 1);
   1701		}
   1702	}
   1703
   1704	if (flag == F2FS_GET_BLOCK_PRECACHE) {
   1705		if (map->m_flags & F2FS_MAP_MAPPED) {
   1706			unsigned int ofs = start_pgofs - map->m_lblk;
   1707
   1708			f2fs_update_extent_cache_range(&dn,
   1709				start_pgofs, map->m_pblk + ofs,
   1710				map->m_len - ofs);
   1711		}
   1712		if (map->m_next_extent)
   1713			*map->m_next_extent = pgofs + 1;
   1714	}
   1715	f2fs_put_dnode(&dn);
   1716unlock_out:
   1717	if (map->m_may_create) {
   1718		f2fs_do_map_lock(sbi, flag, false);
   1719		f2fs_balance_fs(sbi, dn.node_changed);
   1720	}
   1721out:
   1722	trace_f2fs_map_blocks(inode, map, create, flag, err);
   1723	return err;
   1724}
   1725
   1726bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
   1727{
   1728	struct f2fs_map_blocks map;
   1729	block_t last_lblk;
   1730	int err;
   1731
   1732	if (pos + len > i_size_read(inode))
   1733		return false;
   1734
   1735	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
   1736	map.m_next_pgofs = NULL;
   1737	map.m_next_extent = NULL;
   1738	map.m_seg_type = NO_CHECK_TYPE;
   1739	map.m_may_create = false;
   1740	last_lblk = F2FS_BLK_ALIGN(pos + len);
   1741
   1742	while (map.m_lblk < last_lblk) {
   1743		map.m_len = last_lblk - map.m_lblk;
   1744		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
   1745		if (err || map.m_len == 0)
   1746			return false;
   1747		map.m_lblk += map.m_len;
   1748	}
   1749	return true;
   1750}
   1751
   1752static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
   1753{
   1754	return (bytes >> inode->i_blkbits);
   1755}
   1756
   1757static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
   1758{
   1759	return (blks << inode->i_blkbits);
   1760}
   1761
   1762static int f2fs_xattr_fiemap(struct inode *inode,
   1763				struct fiemap_extent_info *fieinfo)
   1764{
   1765	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   1766	struct page *page;
   1767	struct node_info ni;
   1768	__u64 phys = 0, len;
   1769	__u32 flags;
   1770	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
   1771	int err = 0;
   1772
   1773	if (f2fs_has_inline_xattr(inode)) {
   1774		int offset;
   1775
   1776		page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
   1777						inode->i_ino, false);
   1778		if (!page)
   1779			return -ENOMEM;
   1780
   1781		err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
   1782		if (err) {
   1783			f2fs_put_page(page, 1);
   1784			return err;
   1785		}
   1786
   1787		phys = blks_to_bytes(inode, ni.blk_addr);
   1788		offset = offsetof(struct f2fs_inode, i_addr) +
   1789					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
   1790					get_inline_xattr_addrs(inode));
   1791
   1792		phys += offset;
   1793		len = inline_xattr_size(inode);
   1794
   1795		f2fs_put_page(page, 1);
   1796
   1797		flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
   1798
   1799		if (!xnid)
   1800			flags |= FIEMAP_EXTENT_LAST;
   1801
   1802		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
   1803		trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
   1804		if (err || err == 1)
   1805			return err;
   1806	}
   1807
   1808	if (xnid) {
   1809		page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
   1810		if (!page)
   1811			return -ENOMEM;
   1812
   1813		err = f2fs_get_node_info(sbi, xnid, &ni, false);
   1814		if (err) {
   1815			f2fs_put_page(page, 1);
   1816			return err;
   1817		}
   1818
   1819		phys = blks_to_bytes(inode, ni.blk_addr);
   1820		len = inode->i_sb->s_blocksize;
   1821
   1822		f2fs_put_page(page, 1);
   1823
   1824		flags = FIEMAP_EXTENT_LAST;
   1825	}
   1826
   1827	if (phys) {
   1828		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
   1829		trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
   1830	}
   1831
   1832	return (err < 0 ? err : 0);
   1833}
   1834
   1835static loff_t max_inode_blocks(struct inode *inode)
   1836{
   1837	loff_t result = ADDRS_PER_INODE(inode);
   1838	loff_t leaf_count = ADDRS_PER_BLOCK(inode);
   1839
   1840	/* two direct node blocks */
   1841	result += (leaf_count * 2);
   1842
   1843	/* two indirect node blocks */
   1844	leaf_count *= NIDS_PER_BLOCK;
   1845	result += (leaf_count * 2);
   1846
   1847	/* one double indirect node block */
   1848	leaf_count *= NIDS_PER_BLOCK;
   1849	result += leaf_count;
   1850
   1851	return result;
   1852}
   1853
   1854int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
   1855		u64 start, u64 len)
   1856{
   1857	struct f2fs_map_blocks map;
   1858	sector_t start_blk, last_blk;
   1859	pgoff_t next_pgofs;
   1860	u64 logical = 0, phys = 0, size = 0;
   1861	u32 flags = 0;
   1862	int ret = 0;
   1863	bool compr_cluster = false, compr_appended;
   1864	unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
   1865	unsigned int count_in_cluster = 0;
   1866	loff_t maxbytes;
   1867
   1868	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
   1869		ret = f2fs_precache_extents(inode);
   1870		if (ret)
   1871			return ret;
   1872	}
   1873
   1874	ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
   1875	if (ret)
   1876		return ret;
   1877
   1878	inode_lock(inode);
   1879
   1880	maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
   1881	if (start > maxbytes) {
   1882		ret = -EFBIG;
   1883		goto out;
   1884	}
   1885
   1886	if (len > maxbytes || (maxbytes - len) < start)
   1887		len = maxbytes - start;
   1888
   1889	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
   1890		ret = f2fs_xattr_fiemap(inode, fieinfo);
   1891		goto out;
   1892	}
   1893
   1894	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
   1895		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
   1896		if (ret != -EAGAIN)
   1897			goto out;
   1898	}
   1899
   1900	if (bytes_to_blks(inode, len) == 0)
   1901		len = blks_to_bytes(inode, 1);
   1902
   1903	start_blk = bytes_to_blks(inode, start);
   1904	last_blk = bytes_to_blks(inode, start + len - 1);
   1905
   1906next:
   1907	memset(&map, 0, sizeof(map));
   1908	map.m_lblk = start_blk;
   1909	map.m_len = bytes_to_blks(inode, len);
   1910	map.m_next_pgofs = &next_pgofs;
   1911	map.m_seg_type = NO_CHECK_TYPE;
   1912
   1913	if (compr_cluster) {
   1914		map.m_lblk += 1;
   1915		map.m_len = cluster_size - count_in_cluster;
   1916	}
   1917
   1918	ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
   1919	if (ret)
   1920		goto out;
   1921
   1922	/* HOLE */
   1923	if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
   1924		start_blk = next_pgofs;
   1925
   1926		if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
   1927						max_inode_blocks(inode)))
   1928			goto prep_next;
   1929
   1930		flags |= FIEMAP_EXTENT_LAST;
   1931	}
   1932
   1933	compr_appended = false;
   1934	/* In a case of compressed cluster, append this to the last extent */
   1935	if (compr_cluster && ((map.m_flags & F2FS_MAP_UNWRITTEN) ||
   1936			!(map.m_flags & F2FS_MAP_FLAGS))) {
   1937		compr_appended = true;
   1938		goto skip_fill;
   1939	}
   1940
   1941	if (size) {
   1942		flags |= FIEMAP_EXTENT_MERGED;
   1943		if (IS_ENCRYPTED(inode))
   1944			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
   1945
   1946		ret = fiemap_fill_next_extent(fieinfo, logical,
   1947				phys, size, flags);
   1948		trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
   1949		if (ret)
   1950			goto out;
   1951		size = 0;
   1952	}
   1953
   1954	if (start_blk > last_blk)
   1955		goto out;
   1956
   1957skip_fill:
   1958	if (map.m_pblk == COMPRESS_ADDR) {
   1959		compr_cluster = true;
   1960		count_in_cluster = 1;
   1961	} else if (compr_appended) {
   1962		unsigned int appended_blks = cluster_size -
   1963						count_in_cluster + 1;
   1964		size += blks_to_bytes(inode, appended_blks);
   1965		start_blk += appended_blks;
   1966		compr_cluster = false;
   1967	} else {
   1968		logical = blks_to_bytes(inode, start_blk);
   1969		phys = __is_valid_data_blkaddr(map.m_pblk) ?
   1970			blks_to_bytes(inode, map.m_pblk) : 0;
   1971		size = blks_to_bytes(inode, map.m_len);
   1972		flags = 0;
   1973
   1974		if (compr_cluster) {
   1975			flags = FIEMAP_EXTENT_ENCODED;
   1976			count_in_cluster += map.m_len;
   1977			if (count_in_cluster == cluster_size) {
   1978				compr_cluster = false;
   1979				size += blks_to_bytes(inode, 1);
   1980			}
   1981		} else if (map.m_flags & F2FS_MAP_UNWRITTEN) {
   1982			flags = FIEMAP_EXTENT_UNWRITTEN;
   1983		}
   1984
   1985		start_blk += bytes_to_blks(inode, size);
   1986	}
   1987
   1988prep_next:
   1989	cond_resched();
   1990	if (fatal_signal_pending(current))
   1991		ret = -EINTR;
   1992	else
   1993		goto next;
   1994out:
   1995	if (ret == 1)
   1996		ret = 0;
   1997
   1998	inode_unlock(inode);
   1999	return ret;
   2000}
   2001
   2002static inline loff_t f2fs_readpage_limit(struct inode *inode)
   2003{
   2004	if (IS_ENABLED(CONFIG_FS_VERITY) &&
   2005	    (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
   2006		return inode->i_sb->s_maxbytes;
   2007
   2008	return i_size_read(inode);
   2009}
   2010
   2011static int f2fs_read_single_page(struct inode *inode, struct page *page,
   2012					unsigned nr_pages,
   2013					struct f2fs_map_blocks *map,
   2014					struct bio **bio_ret,
   2015					sector_t *last_block_in_bio,
   2016					bool is_readahead)
   2017{
   2018	struct bio *bio = *bio_ret;
   2019	const unsigned blocksize = blks_to_bytes(inode, 1);
   2020	sector_t block_in_file;
   2021	sector_t last_block;
   2022	sector_t last_block_in_file;
   2023	sector_t block_nr;
   2024	int ret = 0;
   2025
   2026	block_in_file = (sector_t)page_index(page);
   2027	last_block = block_in_file + nr_pages;
   2028	last_block_in_file = bytes_to_blks(inode,
   2029			f2fs_readpage_limit(inode) + blocksize - 1);
   2030	if (last_block > last_block_in_file)
   2031		last_block = last_block_in_file;
   2032
   2033	/* just zeroing out page which is beyond EOF */
   2034	if (block_in_file >= last_block)
   2035		goto zero_out;
   2036	/*
   2037	 * Map blocks using the previous result first.
   2038	 */
   2039	if ((map->m_flags & F2FS_MAP_MAPPED) &&
   2040			block_in_file > map->m_lblk &&
   2041			block_in_file < (map->m_lblk + map->m_len))
   2042		goto got_it;
   2043
   2044	/*
   2045	 * Then do more f2fs_map_blocks() calls until we are
   2046	 * done with this page.
   2047	 */
   2048	map->m_lblk = block_in_file;
   2049	map->m_len = last_block - block_in_file;
   2050
   2051	ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
   2052	if (ret)
   2053		goto out;
   2054got_it:
   2055	if ((map->m_flags & F2FS_MAP_MAPPED)) {
   2056		block_nr = map->m_pblk + block_in_file - map->m_lblk;
   2057		SetPageMappedToDisk(page);
   2058
   2059		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
   2060						DATA_GENERIC_ENHANCE_READ)) {
   2061			ret = -EFSCORRUPTED;
   2062			goto out;
   2063		}
   2064	} else {
   2065zero_out:
   2066		zero_user_segment(page, 0, PAGE_SIZE);
   2067		if (f2fs_need_verity(inode, page->index) &&
   2068		    !fsverity_verify_page(page)) {
   2069			ret = -EIO;
   2070			goto out;
   2071		}
   2072		if (!PageUptodate(page))
   2073			SetPageUptodate(page);
   2074		unlock_page(page);
   2075		goto out;
   2076	}
   2077
   2078	/*
   2079	 * This page will go to BIO.  Do we need to send this
   2080	 * BIO off first?
   2081	 */
   2082	if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
   2083				       *last_block_in_bio, block_nr) ||
   2084		    !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
   2085submit_and_realloc:
   2086		__submit_bio(F2FS_I_SB(inode), bio, DATA);
   2087		bio = NULL;
   2088	}
   2089	if (bio == NULL) {
   2090		bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
   2091				is_readahead ? REQ_RAHEAD : 0, page->index,
   2092				false);
   2093		if (IS_ERR(bio)) {
   2094			ret = PTR_ERR(bio);
   2095			bio = NULL;
   2096			goto out;
   2097		}
   2098	}
   2099
   2100	/*
   2101	 * If the page is under writeback, we need to wait for
   2102	 * its completion to see the correct decrypted data.
   2103	 */
   2104	f2fs_wait_on_block_writeback(inode, block_nr);
   2105
   2106	if (bio_add_page(bio, page, blocksize, 0) < blocksize)
   2107		goto submit_and_realloc;
   2108
   2109	inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
   2110	f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
   2111	ClearPageError(page);
   2112	*last_block_in_bio = block_nr;
   2113	goto out;
   2114out:
   2115	*bio_ret = bio;
   2116	return ret;
   2117}
   2118
   2119#ifdef CONFIG_F2FS_FS_COMPRESSION
   2120int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
   2121				unsigned nr_pages, sector_t *last_block_in_bio,
   2122				bool is_readahead, bool for_write)
   2123{
   2124	struct dnode_of_data dn;
   2125	struct inode *inode = cc->inode;
   2126	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2127	struct bio *bio = *bio_ret;
   2128	unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
   2129	sector_t last_block_in_file;
   2130	const unsigned blocksize = blks_to_bytes(inode, 1);
   2131	struct decompress_io_ctx *dic = NULL;
   2132	struct extent_info ei = {0, };
   2133	bool from_dnode = true;
   2134	int i;
   2135	int ret = 0;
   2136
   2137	f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
   2138
   2139	last_block_in_file = bytes_to_blks(inode,
   2140			f2fs_readpage_limit(inode) + blocksize - 1);
   2141
   2142	/* get rid of pages beyond EOF */
   2143	for (i = 0; i < cc->cluster_size; i++) {
   2144		struct page *page = cc->rpages[i];
   2145
   2146		if (!page)
   2147			continue;
   2148		if ((sector_t)page->index >= last_block_in_file) {
   2149			zero_user_segment(page, 0, PAGE_SIZE);
   2150			if (!PageUptodate(page))
   2151				SetPageUptodate(page);
   2152		} else if (!PageUptodate(page)) {
   2153			continue;
   2154		}
   2155		unlock_page(page);
   2156		if (for_write)
   2157			put_page(page);
   2158		cc->rpages[i] = NULL;
   2159		cc->nr_rpages--;
   2160	}
   2161
   2162	/* we are done since all pages are beyond EOF */
   2163	if (f2fs_cluster_is_empty(cc))
   2164		goto out;
   2165
   2166	if (f2fs_lookup_extent_cache(inode, start_idx, &ei))
   2167		from_dnode = false;
   2168
   2169	if (!from_dnode)
   2170		goto skip_reading_dnode;
   2171
   2172	set_new_dnode(&dn, inode, NULL, NULL, 0);
   2173	ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
   2174	if (ret)
   2175		goto out;
   2176
   2177	f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
   2178
   2179skip_reading_dnode:
   2180	for (i = 1; i < cc->cluster_size; i++) {
   2181		block_t blkaddr;
   2182
   2183		blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
   2184					dn.ofs_in_node + i) :
   2185					ei.blk + i - 1;
   2186
   2187		if (!__is_valid_data_blkaddr(blkaddr))
   2188			break;
   2189
   2190		if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
   2191			ret = -EFAULT;
   2192			goto out_put_dnode;
   2193		}
   2194		cc->nr_cpages++;
   2195
   2196		if (!from_dnode && i >= ei.c_len)
   2197			break;
   2198	}
   2199
   2200	/* nothing to decompress */
   2201	if (cc->nr_cpages == 0) {
   2202		ret = 0;
   2203		goto out_put_dnode;
   2204	}
   2205
   2206	dic = f2fs_alloc_dic(cc);
   2207	if (IS_ERR(dic)) {
   2208		ret = PTR_ERR(dic);
   2209		goto out_put_dnode;
   2210	}
   2211
   2212	for (i = 0; i < cc->nr_cpages; i++) {
   2213		struct page *page = dic->cpages[i];
   2214		block_t blkaddr;
   2215		struct bio_post_read_ctx *ctx;
   2216
   2217		blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
   2218					dn.ofs_in_node + i + 1) :
   2219					ei.blk + i;
   2220
   2221		f2fs_wait_on_block_writeback(inode, blkaddr);
   2222
   2223		if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
   2224			if (atomic_dec_and_test(&dic->remaining_pages))
   2225				f2fs_decompress_cluster(dic);
   2226			continue;
   2227		}
   2228
   2229		if (bio && (!page_is_mergeable(sbi, bio,
   2230					*last_block_in_bio, blkaddr) ||
   2231		    !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
   2232submit_and_realloc:
   2233			__submit_bio(sbi, bio, DATA);
   2234			bio = NULL;
   2235		}
   2236
   2237		if (!bio) {
   2238			bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
   2239					is_readahead ? REQ_RAHEAD : 0,
   2240					page->index, for_write);
   2241			if (IS_ERR(bio)) {
   2242				ret = PTR_ERR(bio);
   2243				f2fs_decompress_end_io(dic, ret);
   2244				f2fs_put_dnode(&dn);
   2245				*bio_ret = NULL;
   2246				return ret;
   2247			}
   2248		}
   2249
   2250		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
   2251			goto submit_and_realloc;
   2252
   2253		ctx = get_post_read_ctx(bio);
   2254		ctx->enabled_steps |= STEP_DECOMPRESS;
   2255		refcount_inc(&dic->refcnt);
   2256
   2257		inc_page_count(sbi, F2FS_RD_DATA);
   2258		f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
   2259		f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
   2260		ClearPageError(page);
   2261		*last_block_in_bio = blkaddr;
   2262	}
   2263
   2264	if (from_dnode)
   2265		f2fs_put_dnode(&dn);
   2266
   2267	*bio_ret = bio;
   2268	return 0;
   2269
   2270out_put_dnode:
   2271	if (from_dnode)
   2272		f2fs_put_dnode(&dn);
   2273out:
   2274	for (i = 0; i < cc->cluster_size; i++) {
   2275		if (cc->rpages[i]) {
   2276			ClearPageUptodate(cc->rpages[i]);
   2277			ClearPageError(cc->rpages[i]);
   2278			unlock_page(cc->rpages[i]);
   2279		}
   2280	}
   2281	*bio_ret = bio;
   2282	return ret;
   2283}
   2284#endif
   2285
   2286/*
   2287 * This function was originally taken from fs/mpage.c, and customized for f2fs.
   2288 * Major change was from block_size == page_size in f2fs by default.
   2289 */
   2290static int f2fs_mpage_readpages(struct inode *inode,
   2291		struct readahead_control *rac, struct page *page)
   2292{
   2293	struct bio *bio = NULL;
   2294	sector_t last_block_in_bio = 0;
   2295	struct f2fs_map_blocks map;
   2296#ifdef CONFIG_F2FS_FS_COMPRESSION
   2297	struct compress_ctx cc = {
   2298		.inode = inode,
   2299		.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
   2300		.cluster_size = F2FS_I(inode)->i_cluster_size,
   2301		.cluster_idx = NULL_CLUSTER,
   2302		.rpages = NULL,
   2303		.cpages = NULL,
   2304		.nr_rpages = 0,
   2305		.nr_cpages = 0,
   2306	};
   2307	pgoff_t nc_cluster_idx = NULL_CLUSTER;
   2308#endif
   2309	unsigned nr_pages = rac ? readahead_count(rac) : 1;
   2310	unsigned max_nr_pages = nr_pages;
   2311	int ret = 0;
   2312
   2313	map.m_pblk = 0;
   2314	map.m_lblk = 0;
   2315	map.m_len = 0;
   2316	map.m_flags = 0;
   2317	map.m_next_pgofs = NULL;
   2318	map.m_next_extent = NULL;
   2319	map.m_seg_type = NO_CHECK_TYPE;
   2320	map.m_may_create = false;
   2321
   2322	for (; nr_pages; nr_pages--) {
   2323		if (rac) {
   2324			page = readahead_page(rac);
   2325			prefetchw(&page->flags);
   2326		}
   2327
   2328#ifdef CONFIG_F2FS_FS_COMPRESSION
   2329		if (f2fs_compressed_file(inode)) {
   2330			/* there are remained comressed pages, submit them */
   2331			if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
   2332				ret = f2fs_read_multi_pages(&cc, &bio,
   2333							max_nr_pages,
   2334							&last_block_in_bio,
   2335							rac != NULL, false);
   2336				f2fs_destroy_compress_ctx(&cc, false);
   2337				if (ret)
   2338					goto set_error_page;
   2339			}
   2340			if (cc.cluster_idx == NULL_CLUSTER) {
   2341				if (nc_cluster_idx ==
   2342					page->index >> cc.log_cluster_size) {
   2343					goto read_single_page;
   2344				}
   2345
   2346				ret = f2fs_is_compressed_cluster(inode, page->index);
   2347				if (ret < 0)
   2348					goto set_error_page;
   2349				else if (!ret) {
   2350					nc_cluster_idx =
   2351						page->index >> cc.log_cluster_size;
   2352					goto read_single_page;
   2353				}
   2354
   2355				nc_cluster_idx = NULL_CLUSTER;
   2356			}
   2357			ret = f2fs_init_compress_ctx(&cc);
   2358			if (ret)
   2359				goto set_error_page;
   2360
   2361			f2fs_compress_ctx_add_page(&cc, page);
   2362
   2363			goto next_page;
   2364		}
   2365read_single_page:
   2366#endif
   2367
   2368		ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
   2369					&bio, &last_block_in_bio, rac);
   2370		if (ret) {
   2371#ifdef CONFIG_F2FS_FS_COMPRESSION
   2372set_error_page:
   2373#endif
   2374			SetPageError(page);
   2375			zero_user_segment(page, 0, PAGE_SIZE);
   2376			unlock_page(page);
   2377		}
   2378#ifdef CONFIG_F2FS_FS_COMPRESSION
   2379next_page:
   2380#endif
   2381		if (rac)
   2382			put_page(page);
   2383
   2384#ifdef CONFIG_F2FS_FS_COMPRESSION
   2385		if (f2fs_compressed_file(inode)) {
   2386			/* last page */
   2387			if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
   2388				ret = f2fs_read_multi_pages(&cc, &bio,
   2389							max_nr_pages,
   2390							&last_block_in_bio,
   2391							rac != NULL, false);
   2392				f2fs_destroy_compress_ctx(&cc, false);
   2393			}
   2394		}
   2395#endif
   2396	}
   2397	if (bio)
   2398		__submit_bio(F2FS_I_SB(inode), bio, DATA);
   2399	return ret;
   2400}
   2401
   2402static int f2fs_read_data_folio(struct file *file, struct folio *folio)
   2403{
   2404	struct page *page = &folio->page;
   2405	struct inode *inode = page_file_mapping(page)->host;
   2406	int ret = -EAGAIN;
   2407
   2408	trace_f2fs_readpage(page, DATA);
   2409
   2410	if (!f2fs_is_compress_backend_ready(inode)) {
   2411		unlock_page(page);
   2412		return -EOPNOTSUPP;
   2413	}
   2414
   2415	/* If the file has inline data, try to read it directly */
   2416	if (f2fs_has_inline_data(inode))
   2417		ret = f2fs_read_inline_data(inode, page);
   2418	if (ret == -EAGAIN)
   2419		ret = f2fs_mpage_readpages(inode, NULL, page);
   2420	return ret;
   2421}
   2422
   2423static void f2fs_readahead(struct readahead_control *rac)
   2424{
   2425	struct inode *inode = rac->mapping->host;
   2426
   2427	trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
   2428
   2429	if (!f2fs_is_compress_backend_ready(inode))
   2430		return;
   2431
   2432	/* If the file has inline data, skip readahead */
   2433	if (f2fs_has_inline_data(inode))
   2434		return;
   2435
   2436	f2fs_mpage_readpages(inode, rac, NULL);
   2437}
   2438
   2439int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
   2440{
   2441	struct inode *inode = fio->page->mapping->host;
   2442	struct page *mpage, *page;
   2443	gfp_t gfp_flags = GFP_NOFS;
   2444
   2445	if (!f2fs_encrypted_file(inode))
   2446		return 0;
   2447
   2448	page = fio->compressed_page ? fio->compressed_page : fio->page;
   2449
   2450	/* wait for GCed page writeback via META_MAPPING */
   2451	f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
   2452
   2453	if (fscrypt_inode_uses_inline_crypto(inode))
   2454		return 0;
   2455
   2456retry_encrypt:
   2457	fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
   2458					PAGE_SIZE, 0, gfp_flags);
   2459	if (IS_ERR(fio->encrypted_page)) {
   2460		/* flush pending IOs and wait for a while in the ENOMEM case */
   2461		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
   2462			f2fs_flush_merged_writes(fio->sbi);
   2463			memalloc_retry_wait(GFP_NOFS);
   2464			gfp_flags |= __GFP_NOFAIL;
   2465			goto retry_encrypt;
   2466		}
   2467		return PTR_ERR(fio->encrypted_page);
   2468	}
   2469
   2470	mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
   2471	if (mpage) {
   2472		if (PageUptodate(mpage))
   2473			memcpy(page_address(mpage),
   2474				page_address(fio->encrypted_page), PAGE_SIZE);
   2475		f2fs_put_page(mpage, 1);
   2476	}
   2477	return 0;
   2478}
   2479
   2480static inline bool check_inplace_update_policy(struct inode *inode,
   2481				struct f2fs_io_info *fio)
   2482{
   2483	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2484	unsigned int policy = SM_I(sbi)->ipu_policy;
   2485
   2486	if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) &&
   2487			is_inode_flag_set(inode, FI_OPU_WRITE))
   2488		return false;
   2489	if (policy & (0x1 << F2FS_IPU_FORCE))
   2490		return true;
   2491	if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
   2492		return true;
   2493	if (policy & (0x1 << F2FS_IPU_UTIL) &&
   2494			utilization(sbi) > SM_I(sbi)->min_ipu_util)
   2495		return true;
   2496	if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
   2497			utilization(sbi) > SM_I(sbi)->min_ipu_util)
   2498		return true;
   2499
   2500	/*
   2501	 * IPU for rewrite async pages
   2502	 */
   2503	if (policy & (0x1 << F2FS_IPU_ASYNC) &&
   2504			fio && fio->op == REQ_OP_WRITE &&
   2505			!(fio->op_flags & REQ_SYNC) &&
   2506			!IS_ENCRYPTED(inode))
   2507		return true;
   2508
   2509	/* this is only set during fdatasync */
   2510	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
   2511			is_inode_flag_set(inode, FI_NEED_IPU))
   2512		return true;
   2513
   2514	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
   2515			!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
   2516		return true;
   2517
   2518	return false;
   2519}
   2520
   2521bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
   2522{
   2523	/* swap file is migrating in aligned write mode */
   2524	if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
   2525		return false;
   2526
   2527	if (f2fs_is_pinned_file(inode))
   2528		return true;
   2529
   2530	/* if this is cold file, we should overwrite to avoid fragmentation */
   2531	if (file_is_cold(inode))
   2532		return true;
   2533
   2534	return check_inplace_update_policy(inode, fio);
   2535}
   2536
   2537bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
   2538{
   2539	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2540
   2541	/* The below cases were checked when setting it. */
   2542	if (f2fs_is_pinned_file(inode))
   2543		return false;
   2544	if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
   2545		return true;
   2546	if (f2fs_lfs_mode(sbi))
   2547		return true;
   2548	if (S_ISDIR(inode->i_mode))
   2549		return true;
   2550	if (IS_NOQUOTA(inode))
   2551		return true;
   2552	if (f2fs_is_atomic_file(inode))
   2553		return true;
   2554
   2555	/* swap file is migrating in aligned write mode */
   2556	if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
   2557		return true;
   2558
   2559	if (is_inode_flag_set(inode, FI_OPU_WRITE))
   2560		return true;
   2561
   2562	if (fio) {
   2563		if (page_private_gcing(fio->page))
   2564			return true;
   2565		if (page_private_dummy(fio->page))
   2566			return true;
   2567		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
   2568			f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
   2569			return true;
   2570	}
   2571	return false;
   2572}
   2573
   2574static inline bool need_inplace_update(struct f2fs_io_info *fio)
   2575{
   2576	struct inode *inode = fio->page->mapping->host;
   2577
   2578	if (f2fs_should_update_outplace(inode, fio))
   2579		return false;
   2580
   2581	return f2fs_should_update_inplace(inode, fio);
   2582}
   2583
   2584int f2fs_do_write_data_page(struct f2fs_io_info *fio)
   2585{
   2586	struct page *page = fio->page;
   2587	struct inode *inode = page->mapping->host;
   2588	struct dnode_of_data dn;
   2589	struct extent_info ei = {0, };
   2590	struct node_info ni;
   2591	bool ipu_force = false;
   2592	int err = 0;
   2593
   2594	/* Use COW inode to make dnode_of_data for atomic write */
   2595	if (f2fs_is_atomic_file(inode))
   2596		set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
   2597	else
   2598		set_new_dnode(&dn, inode, NULL, NULL, 0);
   2599
   2600	if (need_inplace_update(fio) &&
   2601			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
   2602		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
   2603
   2604		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
   2605						DATA_GENERIC_ENHANCE))
   2606			return -EFSCORRUPTED;
   2607
   2608		ipu_force = true;
   2609		fio->need_lock = LOCK_DONE;
   2610		goto got_it;
   2611	}
   2612
   2613	/* Deadlock due to between page->lock and f2fs_lock_op */
   2614	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
   2615		return -EAGAIN;
   2616
   2617	err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
   2618	if (err)
   2619		goto out;
   2620
   2621	fio->old_blkaddr = dn.data_blkaddr;
   2622
   2623	/* This page is already truncated */
   2624	if (fio->old_blkaddr == NULL_ADDR) {
   2625		ClearPageUptodate(page);
   2626		clear_page_private_gcing(page);
   2627		goto out_writepage;
   2628	}
   2629got_it:
   2630	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
   2631		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
   2632						DATA_GENERIC_ENHANCE)) {
   2633		err = -EFSCORRUPTED;
   2634		goto out_writepage;
   2635	}
   2636
   2637	/*
   2638	 * If current allocation needs SSR,
   2639	 * it had better in-place writes for updated data.
   2640	 */
   2641	if (ipu_force ||
   2642		(__is_valid_data_blkaddr(fio->old_blkaddr) &&
   2643					need_inplace_update(fio))) {
   2644		err = f2fs_encrypt_one_page(fio);
   2645		if (err)
   2646			goto out_writepage;
   2647
   2648		set_page_writeback(page);
   2649		ClearPageError(page);
   2650		f2fs_put_dnode(&dn);
   2651		if (fio->need_lock == LOCK_REQ)
   2652			f2fs_unlock_op(fio->sbi);
   2653		err = f2fs_inplace_write_data(fio);
   2654		if (err) {
   2655			if (fscrypt_inode_uses_fs_layer_crypto(inode))
   2656				fscrypt_finalize_bounce_page(&fio->encrypted_page);
   2657			if (PageWriteback(page))
   2658				end_page_writeback(page);
   2659		} else {
   2660			set_inode_flag(inode, FI_UPDATE_WRITE);
   2661		}
   2662		trace_f2fs_do_write_data_page(fio->page, IPU);
   2663		return err;
   2664	}
   2665
   2666	if (fio->need_lock == LOCK_RETRY) {
   2667		if (!f2fs_trylock_op(fio->sbi)) {
   2668			err = -EAGAIN;
   2669			goto out_writepage;
   2670		}
   2671		fio->need_lock = LOCK_REQ;
   2672	}
   2673
   2674	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
   2675	if (err)
   2676		goto out_writepage;
   2677
   2678	fio->version = ni.version;
   2679
   2680	err = f2fs_encrypt_one_page(fio);
   2681	if (err)
   2682		goto out_writepage;
   2683
   2684	set_page_writeback(page);
   2685	ClearPageError(page);
   2686
   2687	if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
   2688		f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
   2689
   2690	/* LFS mode write path */
   2691	f2fs_outplace_write_data(&dn, fio);
   2692	trace_f2fs_do_write_data_page(page, OPU);
   2693	set_inode_flag(inode, FI_APPEND_WRITE);
   2694	if (page->index == 0)
   2695		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
   2696out_writepage:
   2697	f2fs_put_dnode(&dn);
   2698out:
   2699	if (fio->need_lock == LOCK_REQ)
   2700		f2fs_unlock_op(fio->sbi);
   2701	return err;
   2702}
   2703
   2704int f2fs_write_single_data_page(struct page *page, int *submitted,
   2705				struct bio **bio,
   2706				sector_t *last_block,
   2707				struct writeback_control *wbc,
   2708				enum iostat_type io_type,
   2709				int compr_blocks,
   2710				bool allow_balance)
   2711{
   2712	struct inode *inode = page->mapping->host;
   2713	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   2714	loff_t i_size = i_size_read(inode);
   2715	const pgoff_t end_index = ((unsigned long long)i_size)
   2716							>> PAGE_SHIFT;
   2717	loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
   2718	unsigned offset = 0;
   2719	bool need_balance_fs = false;
   2720	int err = 0;
   2721	struct f2fs_io_info fio = {
   2722		.sbi = sbi,
   2723		.ino = inode->i_ino,
   2724		.type = DATA,
   2725		.op = REQ_OP_WRITE,
   2726		.op_flags = wbc_to_write_flags(wbc),
   2727		.old_blkaddr = NULL_ADDR,
   2728		.page = page,
   2729		.encrypted_page = NULL,
   2730		.submitted = false,
   2731		.compr_blocks = compr_blocks,
   2732		.need_lock = LOCK_RETRY,
   2733		.io_type = io_type,
   2734		.io_wbc = wbc,
   2735		.bio = bio,
   2736		.last_block = last_block,
   2737	};
   2738
   2739	trace_f2fs_writepage(page, DATA);
   2740
   2741	/* we should bypass data pages to proceed the kworkder jobs */
   2742	if (unlikely(f2fs_cp_error(sbi))) {
   2743		mapping_set_error(page->mapping, -EIO);
   2744		/*
   2745		 * don't drop any dirty dentry pages for keeping lastest
   2746		 * directory structure.
   2747		 */
   2748		if (S_ISDIR(inode->i_mode))
   2749			goto redirty_out;
   2750		goto out;
   2751	}
   2752
   2753	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
   2754		goto redirty_out;
   2755
   2756	if (page->index < end_index ||
   2757			f2fs_verity_in_progress(inode) ||
   2758			compr_blocks)
   2759		goto write;
   2760
   2761	/*
   2762	 * If the offset is out-of-range of file size,
   2763	 * this page does not have to be written to disk.
   2764	 */
   2765	offset = i_size & (PAGE_SIZE - 1);
   2766	if ((page->index >= end_index + 1) || !offset)
   2767		goto out;
   2768
   2769	zero_user_segment(page, offset, PAGE_SIZE);
   2770write:
   2771	if (f2fs_is_drop_cache(inode))
   2772		goto out;
   2773
   2774	/* Dentry/quota blocks are controlled by checkpoint */
   2775	if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
   2776		/*
   2777		 * We need to wait for node_write to avoid block allocation during
   2778		 * checkpoint. This can only happen to quota writes which can cause
   2779		 * the below discard race condition.
   2780		 */
   2781		if (IS_NOQUOTA(inode))
   2782			f2fs_down_read(&sbi->node_write);
   2783
   2784		fio.need_lock = LOCK_DONE;
   2785		err = f2fs_do_write_data_page(&fio);
   2786
   2787		if (IS_NOQUOTA(inode))
   2788			f2fs_up_read(&sbi->node_write);
   2789
   2790		goto done;
   2791	}
   2792
   2793	if (!wbc->for_reclaim)
   2794		need_balance_fs = true;
   2795	else if (has_not_enough_free_secs(sbi, 0, 0))
   2796		goto redirty_out;
   2797	else
   2798		set_inode_flag(inode, FI_HOT_DATA);
   2799
   2800	err = -EAGAIN;
   2801	if (f2fs_has_inline_data(inode)) {
   2802		err = f2fs_write_inline_data(inode, page);
   2803		if (!err)
   2804			goto out;
   2805	}
   2806
   2807	if (err == -EAGAIN) {
   2808		err = f2fs_do_write_data_page(&fio);
   2809		if (err == -EAGAIN) {
   2810			fio.need_lock = LOCK_REQ;
   2811			err = f2fs_do_write_data_page(&fio);
   2812		}
   2813	}
   2814
   2815	if (err) {
   2816		file_set_keep_isize(inode);
   2817	} else {
   2818		spin_lock(&F2FS_I(inode)->i_size_lock);
   2819		if (F2FS_I(inode)->last_disk_size < psize)
   2820			F2FS_I(inode)->last_disk_size = psize;
   2821		spin_unlock(&F2FS_I(inode)->i_size_lock);
   2822	}
   2823
   2824done:
   2825	if (err && err != -ENOENT)
   2826		goto redirty_out;
   2827
   2828out:
   2829	inode_dec_dirty_pages(inode);
   2830	if (err) {
   2831		ClearPageUptodate(page);
   2832		clear_page_private_gcing(page);
   2833	}
   2834
   2835	if (wbc->for_reclaim) {
   2836		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
   2837		clear_inode_flag(inode, FI_HOT_DATA);
   2838		f2fs_remove_dirty_inode(inode);
   2839		submitted = NULL;
   2840	}
   2841	unlock_page(page);
   2842	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
   2843			!F2FS_I(inode)->cp_task && allow_balance)
   2844		f2fs_balance_fs(sbi, need_balance_fs);
   2845
   2846	if (unlikely(f2fs_cp_error(sbi))) {
   2847		f2fs_submit_merged_write(sbi, DATA);
   2848		f2fs_submit_merged_ipu_write(sbi, bio, NULL);
   2849		submitted = NULL;
   2850	}
   2851
   2852	if (submitted)
   2853		*submitted = fio.submitted ? 1 : 0;
   2854
   2855	return 0;
   2856
   2857redirty_out:
   2858	redirty_page_for_writepage(wbc, page);
   2859	/*
   2860	 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
   2861	 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
   2862	 * file_write_and_wait_range() will see EIO error, which is critical
   2863	 * to return value of fsync() followed by atomic_write failure to user.
   2864	 */
   2865	if (!err || wbc->for_reclaim)
   2866		return AOP_WRITEPAGE_ACTIVATE;
   2867	unlock_page(page);
   2868	return err;
   2869}
   2870
   2871static int f2fs_write_data_page(struct page *page,
   2872					struct writeback_control *wbc)
   2873{
   2874#ifdef CONFIG_F2FS_FS_COMPRESSION
   2875	struct inode *inode = page->mapping->host;
   2876
   2877	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
   2878		goto out;
   2879
   2880	if (f2fs_compressed_file(inode)) {
   2881		if (f2fs_is_compressed_cluster(inode, page->index)) {
   2882			redirty_page_for_writepage(wbc, page);
   2883			return AOP_WRITEPAGE_ACTIVATE;
   2884		}
   2885	}
   2886out:
   2887#endif
   2888
   2889	return f2fs_write_single_data_page(page, NULL, NULL, NULL,
   2890						wbc, FS_DATA_IO, 0, true);
   2891}
   2892
   2893/*
   2894 * This function was copied from write_cche_pages from mm/page-writeback.c.
   2895 * The major change is making write step of cold data page separately from
   2896 * warm/hot data page.
   2897 */
   2898static int f2fs_write_cache_pages(struct address_space *mapping,
   2899					struct writeback_control *wbc,
   2900					enum iostat_type io_type)
   2901{
   2902	int ret = 0;
   2903	int done = 0, retry = 0;
   2904	struct pagevec pvec;
   2905	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
   2906	struct bio *bio = NULL;
   2907	sector_t last_block;
   2908#ifdef CONFIG_F2FS_FS_COMPRESSION
   2909	struct inode *inode = mapping->host;
   2910	struct compress_ctx cc = {
   2911		.inode = inode,
   2912		.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
   2913		.cluster_size = F2FS_I(inode)->i_cluster_size,
   2914		.cluster_idx = NULL_CLUSTER,
   2915		.rpages = NULL,
   2916		.nr_rpages = 0,
   2917		.cpages = NULL,
   2918		.valid_nr_cpages = 0,
   2919		.rbuf = NULL,
   2920		.cbuf = NULL,
   2921		.rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
   2922		.private = NULL,
   2923	};
   2924#endif
   2925	int nr_pages;
   2926	pgoff_t index;
   2927	pgoff_t end;		/* Inclusive */
   2928	pgoff_t done_index;
   2929	int range_whole = 0;
   2930	xa_mark_t tag;
   2931	int nwritten = 0;
   2932	int submitted = 0;
   2933	int i;
   2934
   2935	pagevec_init(&pvec);
   2936
   2937	if (get_dirty_pages(mapping->host) <=
   2938				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
   2939		set_inode_flag(mapping->host, FI_HOT_DATA);
   2940	else
   2941		clear_inode_flag(mapping->host, FI_HOT_DATA);
   2942
   2943	if (wbc->range_cyclic) {
   2944		index = mapping->writeback_index; /* prev offset */
   2945		end = -1;
   2946	} else {
   2947		index = wbc->range_start >> PAGE_SHIFT;
   2948		end = wbc->range_end >> PAGE_SHIFT;
   2949		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
   2950			range_whole = 1;
   2951	}
   2952	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
   2953		tag = PAGECACHE_TAG_TOWRITE;
   2954	else
   2955		tag = PAGECACHE_TAG_DIRTY;
   2956retry:
   2957	retry = 0;
   2958	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
   2959		tag_pages_for_writeback(mapping, index, end);
   2960	done_index = index;
   2961	while (!done && !retry && (index <= end)) {
   2962		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
   2963				tag);
   2964		if (nr_pages == 0)
   2965			break;
   2966
   2967		for (i = 0; i < nr_pages; i++) {
   2968			struct page *page = pvec.pages[i];
   2969			bool need_readd;
   2970readd:
   2971			need_readd = false;
   2972#ifdef CONFIG_F2FS_FS_COMPRESSION
   2973			if (f2fs_compressed_file(inode)) {
   2974				void *fsdata = NULL;
   2975				struct page *pagep;
   2976				int ret2;
   2977
   2978				ret = f2fs_init_compress_ctx(&cc);
   2979				if (ret) {
   2980					done = 1;
   2981					break;
   2982				}
   2983
   2984				if (!f2fs_cluster_can_merge_page(&cc,
   2985								page->index)) {
   2986					ret = f2fs_write_multi_pages(&cc,
   2987						&submitted, wbc, io_type);
   2988					if (!ret)
   2989						need_readd = true;
   2990					goto result;
   2991				}
   2992
   2993				if (unlikely(f2fs_cp_error(sbi)))
   2994					goto lock_page;
   2995
   2996				if (!f2fs_cluster_is_empty(&cc))
   2997					goto lock_page;
   2998
   2999				ret2 = f2fs_prepare_compress_overwrite(
   3000							inode, &pagep,
   3001							page->index, &fsdata);
   3002				if (ret2 < 0) {
   3003					ret = ret2;
   3004					done = 1;
   3005					break;
   3006				} else if (ret2 &&
   3007					(!f2fs_compress_write_end(inode,
   3008						fsdata, page->index, 1) ||
   3009					 !f2fs_all_cluster_page_loaded(&cc,
   3010						&pvec, i, nr_pages))) {
   3011					retry = 1;
   3012					break;
   3013				}
   3014			}
   3015#endif
   3016			/* give a priority to WB_SYNC threads */
   3017			if (atomic_read(&sbi->wb_sync_req[DATA]) &&
   3018					wbc->sync_mode == WB_SYNC_NONE) {
   3019				done = 1;
   3020				break;
   3021			}
   3022#ifdef CONFIG_F2FS_FS_COMPRESSION
   3023lock_page:
   3024#endif
   3025			done_index = page->index;
   3026retry_write:
   3027			lock_page(page);
   3028
   3029			if (unlikely(page->mapping != mapping)) {
   3030continue_unlock:
   3031				unlock_page(page);
   3032				continue;
   3033			}
   3034
   3035			if (!PageDirty(page)) {
   3036				/* someone wrote it for us */
   3037				goto continue_unlock;
   3038			}
   3039
   3040			if (PageWriteback(page)) {
   3041				if (wbc->sync_mode != WB_SYNC_NONE)
   3042					f2fs_wait_on_page_writeback(page,
   3043							DATA, true, true);
   3044				else
   3045					goto continue_unlock;
   3046			}
   3047
   3048			if (!clear_page_dirty_for_io(page))
   3049				goto continue_unlock;
   3050
   3051#ifdef CONFIG_F2FS_FS_COMPRESSION
   3052			if (f2fs_compressed_file(inode)) {
   3053				get_page(page);
   3054				f2fs_compress_ctx_add_page(&cc, page);
   3055				continue;
   3056			}
   3057#endif
   3058			ret = f2fs_write_single_data_page(page, &submitted,
   3059					&bio, &last_block, wbc, io_type,
   3060					0, true);
   3061			if (ret == AOP_WRITEPAGE_ACTIVATE)
   3062				unlock_page(page);
   3063#ifdef CONFIG_F2FS_FS_COMPRESSION
   3064result:
   3065#endif
   3066			nwritten += submitted;
   3067			wbc->nr_to_write -= submitted;
   3068
   3069			if (unlikely(ret)) {
   3070				/*
   3071				 * keep nr_to_write, since vfs uses this to
   3072				 * get # of written pages.
   3073				 */
   3074				if (ret == AOP_WRITEPAGE_ACTIVATE) {
   3075					ret = 0;
   3076					goto next;
   3077				} else if (ret == -EAGAIN) {
   3078					ret = 0;
   3079					if (wbc->sync_mode == WB_SYNC_ALL) {
   3080						f2fs_io_schedule_timeout(
   3081							DEFAULT_IO_TIMEOUT);
   3082						goto retry_write;
   3083					}
   3084					goto next;
   3085				}
   3086				done_index = page->index + 1;
   3087				done = 1;
   3088				break;
   3089			}
   3090
   3091			if (wbc->nr_to_write <= 0 &&
   3092					wbc->sync_mode == WB_SYNC_NONE) {
   3093				done = 1;
   3094				break;
   3095			}
   3096next:
   3097			if (need_readd)
   3098				goto readd;
   3099		}
   3100		pagevec_release(&pvec);
   3101		cond_resched();
   3102	}
   3103#ifdef CONFIG_F2FS_FS_COMPRESSION
   3104	/* flush remained pages in compress cluster */
   3105	if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
   3106		ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
   3107		nwritten += submitted;
   3108		wbc->nr_to_write -= submitted;
   3109		if (ret) {
   3110			done = 1;
   3111			retry = 0;
   3112		}
   3113	}
   3114	if (f2fs_compressed_file(inode))
   3115		f2fs_destroy_compress_ctx(&cc, false);
   3116#endif
   3117	if (retry) {
   3118		index = 0;
   3119		end = -1;
   3120		goto retry;
   3121	}
   3122	if (wbc->range_cyclic && !done)
   3123		done_index = 0;
   3124	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
   3125		mapping->writeback_index = done_index;
   3126
   3127	if (nwritten)
   3128		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
   3129								NULL, 0, DATA);
   3130	/* submit cached bio of IPU write */
   3131	if (bio)
   3132		f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
   3133
   3134	return ret;
   3135}
   3136
   3137static inline bool __should_serialize_io(struct inode *inode,
   3138					struct writeback_control *wbc)
   3139{
   3140	/* to avoid deadlock in path of data flush */
   3141	if (F2FS_I(inode)->cp_task)
   3142		return false;
   3143
   3144	if (!S_ISREG(inode->i_mode))
   3145		return false;
   3146	if (IS_NOQUOTA(inode))
   3147		return false;
   3148
   3149	if (f2fs_need_compress_data(inode))
   3150		return true;
   3151	if (wbc->sync_mode != WB_SYNC_ALL)
   3152		return true;
   3153	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
   3154		return true;
   3155	return false;
   3156}
   3157
   3158static int __f2fs_write_data_pages(struct address_space *mapping,
   3159						struct writeback_control *wbc,
   3160						enum iostat_type io_type)
   3161{
   3162	struct inode *inode = mapping->host;
   3163	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3164	struct blk_plug plug;
   3165	int ret;
   3166	bool locked = false;
   3167
   3168	/* deal with chardevs and other special file */
   3169	if (!mapping->a_ops->writepage)
   3170		return 0;
   3171
   3172	/* skip writing if there is no dirty page in this inode */
   3173	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
   3174		return 0;
   3175
   3176	/* during POR, we don't need to trigger writepage at all. */
   3177	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
   3178		goto skip_write;
   3179
   3180	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
   3181			wbc->sync_mode == WB_SYNC_NONE &&
   3182			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
   3183			f2fs_available_free_memory(sbi, DIRTY_DENTS))
   3184		goto skip_write;
   3185
   3186	/* skip writing in file defragment preparing stage */
   3187	if (is_inode_flag_set(inode, FI_SKIP_WRITES))
   3188		goto skip_write;
   3189
   3190	trace_f2fs_writepages(mapping->host, wbc, DATA);
   3191
   3192	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
   3193	if (wbc->sync_mode == WB_SYNC_ALL)
   3194		atomic_inc(&sbi->wb_sync_req[DATA]);
   3195	else if (atomic_read(&sbi->wb_sync_req[DATA])) {
   3196		/* to avoid potential deadlock */
   3197		if (current->plug)
   3198			blk_finish_plug(current->plug);
   3199		goto skip_write;
   3200	}
   3201
   3202	if (__should_serialize_io(inode, wbc)) {
   3203		mutex_lock(&sbi->writepages);
   3204		locked = true;
   3205	}
   3206
   3207	blk_start_plug(&plug);
   3208	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
   3209	blk_finish_plug(&plug);
   3210
   3211	if (locked)
   3212		mutex_unlock(&sbi->writepages);
   3213
   3214	if (wbc->sync_mode == WB_SYNC_ALL)
   3215		atomic_dec(&sbi->wb_sync_req[DATA]);
   3216	/*
   3217	 * if some pages were truncated, we cannot guarantee its mapping->host
   3218	 * to detect pending bios.
   3219	 */
   3220
   3221	f2fs_remove_dirty_inode(inode);
   3222	return ret;
   3223
   3224skip_write:
   3225	wbc->pages_skipped += get_dirty_pages(inode);
   3226	trace_f2fs_writepages(mapping->host, wbc, DATA);
   3227	return 0;
   3228}
   3229
   3230static int f2fs_write_data_pages(struct address_space *mapping,
   3231			    struct writeback_control *wbc)
   3232{
   3233	struct inode *inode = mapping->host;
   3234
   3235	return __f2fs_write_data_pages(mapping, wbc,
   3236			F2FS_I(inode)->cp_task == current ?
   3237			FS_CP_DATA_IO : FS_DATA_IO);
   3238}
   3239
   3240void f2fs_write_failed(struct inode *inode, loff_t to)
   3241{
   3242	loff_t i_size = i_size_read(inode);
   3243
   3244	if (IS_NOQUOTA(inode))
   3245		return;
   3246
   3247	/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
   3248	if (to > i_size && !f2fs_verity_in_progress(inode)) {
   3249		f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3250		filemap_invalidate_lock(inode->i_mapping);
   3251
   3252		truncate_pagecache(inode, i_size);
   3253		f2fs_truncate_blocks(inode, i_size, true);
   3254
   3255		filemap_invalidate_unlock(inode->i_mapping);
   3256		f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3257	}
   3258}
   3259
   3260static int prepare_write_begin(struct f2fs_sb_info *sbi,
   3261			struct page *page, loff_t pos, unsigned len,
   3262			block_t *blk_addr, bool *node_changed)
   3263{
   3264	struct inode *inode = page->mapping->host;
   3265	pgoff_t index = page->index;
   3266	struct dnode_of_data dn;
   3267	struct page *ipage;
   3268	bool locked = false;
   3269	struct extent_info ei = {0, };
   3270	int err = 0;
   3271	int flag;
   3272
   3273	/*
   3274	 * If a whole page is being written and we already preallocated all the
   3275	 * blocks, then there is no need to get a block address now.
   3276	 */
   3277	if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
   3278		return 0;
   3279
   3280	/* f2fs_lock_op avoids race between write CP and convert_inline_page */
   3281	if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
   3282		flag = F2FS_GET_BLOCK_DEFAULT;
   3283	else
   3284		flag = F2FS_GET_BLOCK_PRE_AIO;
   3285
   3286	if (f2fs_has_inline_data(inode) ||
   3287			(pos & PAGE_MASK) >= i_size_read(inode)) {
   3288		f2fs_do_map_lock(sbi, flag, true);
   3289		locked = true;
   3290	}
   3291
   3292restart:
   3293	/* check inline_data */
   3294	ipage = f2fs_get_node_page(sbi, inode->i_ino);
   3295	if (IS_ERR(ipage)) {
   3296		err = PTR_ERR(ipage);
   3297		goto unlock_out;
   3298	}
   3299
   3300	set_new_dnode(&dn, inode, ipage, ipage, 0);
   3301
   3302	if (f2fs_has_inline_data(inode)) {
   3303		if (pos + len <= MAX_INLINE_DATA(inode)) {
   3304			f2fs_do_read_inline_data(page, ipage);
   3305			set_inode_flag(inode, FI_DATA_EXIST);
   3306			if (inode->i_nlink)
   3307				set_page_private_inline(ipage);
   3308		} else {
   3309			err = f2fs_convert_inline_page(&dn, page);
   3310			if (err)
   3311				goto out;
   3312			if (dn.data_blkaddr == NULL_ADDR)
   3313				err = f2fs_get_block(&dn, index);
   3314		}
   3315	} else if (locked) {
   3316		err = f2fs_get_block(&dn, index);
   3317	} else {
   3318		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
   3319			dn.data_blkaddr = ei.blk + index - ei.fofs;
   3320		} else {
   3321			/* hole case */
   3322			err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
   3323			if (err || dn.data_blkaddr == NULL_ADDR) {
   3324				f2fs_put_dnode(&dn);
   3325				f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
   3326								true);
   3327				WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
   3328				locked = true;
   3329				goto restart;
   3330			}
   3331		}
   3332	}
   3333
   3334	/* convert_inline_page can make node_changed */
   3335	*blk_addr = dn.data_blkaddr;
   3336	*node_changed = dn.node_changed;
   3337out:
   3338	f2fs_put_dnode(&dn);
   3339unlock_out:
   3340	if (locked)
   3341		f2fs_do_map_lock(sbi, flag, false);
   3342	return err;
   3343}
   3344
   3345static int __find_data_block(struct inode *inode, pgoff_t index,
   3346				block_t *blk_addr)
   3347{
   3348	struct dnode_of_data dn;
   3349	struct page *ipage;
   3350	struct extent_info ei = {0, };
   3351	int err = 0;
   3352
   3353	ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
   3354	if (IS_ERR(ipage))
   3355		return PTR_ERR(ipage);
   3356
   3357	set_new_dnode(&dn, inode, ipage, ipage, 0);
   3358
   3359	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
   3360		dn.data_blkaddr = ei.blk + index - ei.fofs;
   3361	} else {
   3362		/* hole case */
   3363		err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
   3364		if (err) {
   3365			dn.data_blkaddr = NULL_ADDR;
   3366			err = 0;
   3367		}
   3368	}
   3369	*blk_addr = dn.data_blkaddr;
   3370	f2fs_put_dnode(&dn);
   3371	return err;
   3372}
   3373
   3374static int __reserve_data_block(struct inode *inode, pgoff_t index,
   3375				block_t *blk_addr, bool *node_changed)
   3376{
   3377	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3378	struct dnode_of_data dn;
   3379	struct page *ipage;
   3380	int err = 0;
   3381
   3382	f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
   3383
   3384	ipage = f2fs_get_node_page(sbi, inode->i_ino);
   3385	if (IS_ERR(ipage)) {
   3386		err = PTR_ERR(ipage);
   3387		goto unlock_out;
   3388	}
   3389	set_new_dnode(&dn, inode, ipage, ipage, 0);
   3390
   3391	err = f2fs_get_block(&dn, index);
   3392
   3393	*blk_addr = dn.data_blkaddr;
   3394	*node_changed = dn.node_changed;
   3395	f2fs_put_dnode(&dn);
   3396
   3397unlock_out:
   3398	f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
   3399	return err;
   3400}
   3401
   3402static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
   3403			struct page *page, loff_t pos, unsigned int len,
   3404			block_t *blk_addr, bool *node_changed)
   3405{
   3406	struct inode *inode = page->mapping->host;
   3407	struct inode *cow_inode = F2FS_I(inode)->cow_inode;
   3408	pgoff_t index = page->index;
   3409	int err = 0;
   3410	block_t ori_blk_addr;
   3411
   3412	/* If pos is beyond the end of file, reserve a new block in COW inode */
   3413	if ((pos & PAGE_MASK) >= i_size_read(inode))
   3414		return __reserve_data_block(cow_inode, index, blk_addr,
   3415					node_changed);
   3416
   3417	/* Look for the block in COW inode first */
   3418	err = __find_data_block(cow_inode, index, blk_addr);
   3419	if (err)
   3420		return err;
   3421	else if (*blk_addr != NULL_ADDR)
   3422		return 0;
   3423
   3424	/* Look for the block in the original inode */
   3425	err = __find_data_block(inode, index, &ori_blk_addr);
   3426	if (err)
   3427		return err;
   3428
   3429	/* Finally, we should reserve a new block in COW inode for the update */
   3430	err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
   3431	if (err)
   3432		return err;
   3433
   3434	if (ori_blk_addr != NULL_ADDR)
   3435		*blk_addr = ori_blk_addr;
   3436	return 0;
   3437}
   3438
   3439static int f2fs_write_begin(struct file *file, struct address_space *mapping,
   3440		loff_t pos, unsigned len, struct page **pagep, void **fsdata)
   3441{
   3442	struct inode *inode = mapping->host;
   3443	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3444	struct page *page = NULL;
   3445	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
   3446	bool need_balance = false;
   3447	block_t blkaddr = NULL_ADDR;
   3448	int err = 0;
   3449
   3450	trace_f2fs_write_begin(inode, pos, len);
   3451
   3452	if (!f2fs_is_checkpoint_ready(sbi)) {
   3453		err = -ENOSPC;
   3454		goto fail;
   3455	}
   3456
   3457	/*
   3458	 * We should check this at this moment to avoid deadlock on inode page
   3459	 * and #0 page. The locking rule for inline_data conversion should be:
   3460	 * lock_page(page #0) -> lock_page(inode_page)
   3461	 */
   3462	if (index != 0) {
   3463		err = f2fs_convert_inline_inode(inode);
   3464		if (err)
   3465			goto fail;
   3466	}
   3467
   3468#ifdef CONFIG_F2FS_FS_COMPRESSION
   3469	if (f2fs_compressed_file(inode)) {
   3470		int ret;
   3471
   3472		*fsdata = NULL;
   3473
   3474		if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
   3475			goto repeat;
   3476
   3477		ret = f2fs_prepare_compress_overwrite(inode, pagep,
   3478							index, fsdata);
   3479		if (ret < 0) {
   3480			err = ret;
   3481			goto fail;
   3482		} else if (ret) {
   3483			return 0;
   3484		}
   3485	}
   3486#endif
   3487
   3488repeat:
   3489	/*
   3490	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
   3491	 * wait_for_stable_page. Will wait that below with our IO control.
   3492	 */
   3493	page = f2fs_pagecache_get_page(mapping, index,
   3494				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
   3495	if (!page) {
   3496		err = -ENOMEM;
   3497		goto fail;
   3498	}
   3499
   3500	/* TODO: cluster can be compressed due to race with .writepage */
   3501
   3502	*pagep = page;
   3503
   3504	if (f2fs_is_atomic_file(inode))
   3505		err = prepare_atomic_write_begin(sbi, page, pos, len,
   3506					&blkaddr, &need_balance);
   3507	else
   3508		err = prepare_write_begin(sbi, page, pos, len,
   3509					&blkaddr, &need_balance);
   3510	if (err)
   3511		goto fail;
   3512
   3513	if (need_balance && !IS_NOQUOTA(inode) &&
   3514			has_not_enough_free_secs(sbi, 0, 0)) {
   3515		unlock_page(page);
   3516		f2fs_balance_fs(sbi, true);
   3517		lock_page(page);
   3518		if (page->mapping != mapping) {
   3519			/* The page got truncated from under us */
   3520			f2fs_put_page(page, 1);
   3521			goto repeat;
   3522		}
   3523	}
   3524
   3525	f2fs_wait_on_page_writeback(page, DATA, false, true);
   3526
   3527	if (len == PAGE_SIZE || PageUptodate(page))
   3528		return 0;
   3529
   3530	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
   3531	    !f2fs_verity_in_progress(inode)) {
   3532		zero_user_segment(page, len, PAGE_SIZE);
   3533		return 0;
   3534	}
   3535
   3536	if (blkaddr == NEW_ADDR) {
   3537		zero_user_segment(page, 0, PAGE_SIZE);
   3538		SetPageUptodate(page);
   3539	} else {
   3540		if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
   3541				DATA_GENERIC_ENHANCE_READ)) {
   3542			err = -EFSCORRUPTED;
   3543			goto fail;
   3544		}
   3545		err = f2fs_submit_page_read(inode, page, blkaddr, 0, true);
   3546		if (err)
   3547			goto fail;
   3548
   3549		lock_page(page);
   3550		if (unlikely(page->mapping != mapping)) {
   3551			f2fs_put_page(page, 1);
   3552			goto repeat;
   3553		}
   3554		if (unlikely(!PageUptodate(page))) {
   3555			err = -EIO;
   3556			goto fail;
   3557		}
   3558	}
   3559	return 0;
   3560
   3561fail:
   3562	f2fs_put_page(page, 1);
   3563	f2fs_write_failed(inode, pos + len);
   3564	return err;
   3565}
   3566
   3567static int f2fs_write_end(struct file *file,
   3568			struct address_space *mapping,
   3569			loff_t pos, unsigned len, unsigned copied,
   3570			struct page *page, void *fsdata)
   3571{
   3572	struct inode *inode = page->mapping->host;
   3573
   3574	trace_f2fs_write_end(inode, pos, len, copied);
   3575
   3576	/*
   3577	 * This should be come from len == PAGE_SIZE, and we expect copied
   3578	 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
   3579	 * let generic_perform_write() try to copy data again through copied=0.
   3580	 */
   3581	if (!PageUptodate(page)) {
   3582		if (unlikely(copied != len))
   3583			copied = 0;
   3584		else
   3585			SetPageUptodate(page);
   3586	}
   3587
   3588#ifdef CONFIG_F2FS_FS_COMPRESSION
   3589	/* overwrite compressed file */
   3590	if (f2fs_compressed_file(inode) && fsdata) {
   3591		f2fs_compress_write_end(inode, fsdata, page->index, copied);
   3592		f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   3593
   3594		if (pos + copied > i_size_read(inode) &&
   3595				!f2fs_verity_in_progress(inode))
   3596			f2fs_i_size_write(inode, pos + copied);
   3597		return copied;
   3598	}
   3599#endif
   3600
   3601	if (!copied)
   3602		goto unlock_out;
   3603
   3604	set_page_dirty(page);
   3605
   3606	if (pos + copied > i_size_read(inode) &&
   3607	    !f2fs_verity_in_progress(inode)) {
   3608		f2fs_i_size_write(inode, pos + copied);
   3609		if (f2fs_is_atomic_file(inode))
   3610			f2fs_i_size_write(F2FS_I(inode)->cow_inode,
   3611					pos + copied);
   3612	}
   3613unlock_out:
   3614	f2fs_put_page(page, 1);
   3615	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   3616	return copied;
   3617}
   3618
   3619void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
   3620{
   3621	struct inode *inode = folio->mapping->host;
   3622	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3623
   3624	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
   3625				(offset || length != folio_size(folio)))
   3626		return;
   3627
   3628	if (folio_test_dirty(folio)) {
   3629		if (inode->i_ino == F2FS_META_INO(sbi)) {
   3630			dec_page_count(sbi, F2FS_DIRTY_META);
   3631		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
   3632			dec_page_count(sbi, F2FS_DIRTY_NODES);
   3633		} else {
   3634			inode_dec_dirty_pages(inode);
   3635			f2fs_remove_dirty_inode(inode);
   3636		}
   3637	}
   3638
   3639	clear_page_private_gcing(&folio->page);
   3640
   3641	if (test_opt(sbi, COMPRESS_CACHE) &&
   3642			inode->i_ino == F2FS_COMPRESS_INO(sbi))
   3643		clear_page_private_data(&folio->page);
   3644
   3645	folio_detach_private(folio);
   3646}
   3647
   3648bool f2fs_release_folio(struct folio *folio, gfp_t wait)
   3649{
   3650	struct f2fs_sb_info *sbi;
   3651
   3652	/* If this is dirty folio, keep private data */
   3653	if (folio_test_dirty(folio))
   3654		return false;
   3655
   3656	sbi = F2FS_M_SB(folio->mapping);
   3657	if (test_opt(sbi, COMPRESS_CACHE)) {
   3658		struct inode *inode = folio->mapping->host;
   3659
   3660		if (inode->i_ino == F2FS_COMPRESS_INO(sbi))
   3661			clear_page_private_data(&folio->page);
   3662	}
   3663
   3664	clear_page_private_gcing(&folio->page);
   3665
   3666	folio_detach_private(folio);
   3667	return true;
   3668}
   3669
   3670static bool f2fs_dirty_data_folio(struct address_space *mapping,
   3671		struct folio *folio)
   3672{
   3673	struct inode *inode = mapping->host;
   3674
   3675	trace_f2fs_set_page_dirty(&folio->page, DATA);
   3676
   3677	if (!folio_test_uptodate(folio))
   3678		folio_mark_uptodate(folio);
   3679	BUG_ON(folio_test_swapcache(folio));
   3680
   3681	if (!folio_test_dirty(folio)) {
   3682		filemap_dirty_folio(mapping, folio);
   3683		f2fs_update_dirty_folio(inode, folio);
   3684		return true;
   3685	}
   3686	return false;
   3687}
   3688
   3689
   3690static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
   3691{
   3692#ifdef CONFIG_F2FS_FS_COMPRESSION
   3693	struct dnode_of_data dn;
   3694	sector_t start_idx, blknr = 0;
   3695	int ret;
   3696
   3697	start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
   3698
   3699	set_new_dnode(&dn, inode, NULL, NULL, 0);
   3700	ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
   3701	if (ret)
   3702		return 0;
   3703
   3704	if (dn.data_blkaddr != COMPRESS_ADDR) {
   3705		dn.ofs_in_node += block - start_idx;
   3706		blknr = f2fs_data_blkaddr(&dn);
   3707		if (!__is_valid_data_blkaddr(blknr))
   3708			blknr = 0;
   3709	}
   3710
   3711	f2fs_put_dnode(&dn);
   3712	return blknr;
   3713#else
   3714	return 0;
   3715#endif
   3716}
   3717
   3718
   3719static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
   3720{
   3721	struct inode *inode = mapping->host;
   3722	sector_t blknr = 0;
   3723
   3724	if (f2fs_has_inline_data(inode))
   3725		goto out;
   3726
   3727	/* make sure allocating whole blocks */
   3728	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
   3729		filemap_write_and_wait(mapping);
   3730
   3731	/* Block number less than F2FS MAX BLOCKS */
   3732	if (unlikely(block >= max_file_blocks(inode)))
   3733		goto out;
   3734
   3735	if (f2fs_compressed_file(inode)) {
   3736		blknr = f2fs_bmap_compress(inode, block);
   3737	} else {
   3738		struct f2fs_map_blocks map;
   3739
   3740		memset(&map, 0, sizeof(map));
   3741		map.m_lblk = block;
   3742		map.m_len = 1;
   3743		map.m_next_pgofs = NULL;
   3744		map.m_seg_type = NO_CHECK_TYPE;
   3745
   3746		if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP))
   3747			blknr = map.m_pblk;
   3748	}
   3749out:
   3750	trace_f2fs_bmap(inode, block, blknr);
   3751	return blknr;
   3752}
   3753
   3754#ifdef CONFIG_MIGRATION
   3755#include <linux/migrate.h>
   3756
   3757int f2fs_migrate_page(struct address_space *mapping,
   3758		struct page *newpage, struct page *page, enum migrate_mode mode)
   3759{
   3760	int rc, extra_count = 0;
   3761
   3762	BUG_ON(PageWriteback(page));
   3763
   3764	rc = migrate_page_move_mapping(mapping, newpage,
   3765				page, extra_count);
   3766	if (rc != MIGRATEPAGE_SUCCESS)
   3767		return rc;
   3768
   3769	/* guarantee to start from no stale private field */
   3770	set_page_private(newpage, 0);
   3771	if (PagePrivate(page)) {
   3772		set_page_private(newpage, page_private(page));
   3773		SetPagePrivate(newpage);
   3774		get_page(newpage);
   3775
   3776		set_page_private(page, 0);
   3777		ClearPagePrivate(page);
   3778		put_page(page);
   3779	}
   3780
   3781	if (mode != MIGRATE_SYNC_NO_COPY)
   3782		migrate_page_copy(newpage, page);
   3783	else
   3784		migrate_page_states(newpage, page);
   3785
   3786	return MIGRATEPAGE_SUCCESS;
   3787}
   3788#endif
   3789
   3790#ifdef CONFIG_SWAP
   3791static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
   3792							unsigned int blkcnt)
   3793{
   3794	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3795	unsigned int blkofs;
   3796	unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
   3797	unsigned int secidx = start_blk / blk_per_sec;
   3798	unsigned int end_sec = secidx + blkcnt / blk_per_sec;
   3799	int ret = 0;
   3800
   3801	f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3802	filemap_invalidate_lock(inode->i_mapping);
   3803
   3804	set_inode_flag(inode, FI_ALIGNED_WRITE);
   3805	set_inode_flag(inode, FI_OPU_WRITE);
   3806
   3807	for (; secidx < end_sec; secidx++) {
   3808		f2fs_down_write(&sbi->pin_sem);
   3809
   3810		f2fs_lock_op(sbi);
   3811		f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
   3812		f2fs_unlock_op(sbi);
   3813
   3814		set_inode_flag(inode, FI_SKIP_WRITES);
   3815
   3816		for (blkofs = 0; blkofs < blk_per_sec; blkofs++) {
   3817			struct page *page;
   3818			unsigned int blkidx = secidx * blk_per_sec + blkofs;
   3819
   3820			page = f2fs_get_lock_data_page(inode, blkidx, true);
   3821			if (IS_ERR(page)) {
   3822				f2fs_up_write(&sbi->pin_sem);
   3823				ret = PTR_ERR(page);
   3824				goto done;
   3825			}
   3826
   3827			set_page_dirty(page);
   3828			f2fs_put_page(page, 1);
   3829		}
   3830
   3831		clear_inode_flag(inode, FI_SKIP_WRITES);
   3832
   3833		ret = filemap_fdatawrite(inode->i_mapping);
   3834
   3835		f2fs_up_write(&sbi->pin_sem);
   3836
   3837		if (ret)
   3838			break;
   3839	}
   3840
   3841done:
   3842	clear_inode_flag(inode, FI_SKIP_WRITES);
   3843	clear_inode_flag(inode, FI_OPU_WRITE);
   3844	clear_inode_flag(inode, FI_ALIGNED_WRITE);
   3845
   3846	filemap_invalidate_unlock(inode->i_mapping);
   3847	f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
   3848
   3849	return ret;
   3850}
   3851
   3852static int check_swap_activate(struct swap_info_struct *sis,
   3853				struct file *swap_file, sector_t *span)
   3854{
   3855	struct address_space *mapping = swap_file->f_mapping;
   3856	struct inode *inode = mapping->host;
   3857	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3858	sector_t cur_lblock;
   3859	sector_t last_lblock;
   3860	sector_t pblock;
   3861	sector_t lowest_pblock = -1;
   3862	sector_t highest_pblock = 0;
   3863	int nr_extents = 0;
   3864	unsigned long nr_pblocks;
   3865	unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
   3866	unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
   3867	unsigned int not_aligned = 0;
   3868	int ret = 0;
   3869
   3870	/*
   3871	 * Map all the blocks into the extent list.  This code doesn't try
   3872	 * to be very smart.
   3873	 */
   3874	cur_lblock = 0;
   3875	last_lblock = bytes_to_blks(inode, i_size_read(inode));
   3876
   3877	while (cur_lblock < last_lblock && cur_lblock < sis->max) {
   3878		struct f2fs_map_blocks map;
   3879retry:
   3880		cond_resched();
   3881
   3882		memset(&map, 0, sizeof(map));
   3883		map.m_lblk = cur_lblock;
   3884		map.m_len = last_lblock - cur_lblock;
   3885		map.m_next_pgofs = NULL;
   3886		map.m_next_extent = NULL;
   3887		map.m_seg_type = NO_CHECK_TYPE;
   3888		map.m_may_create = false;
   3889
   3890		ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
   3891		if (ret)
   3892			goto out;
   3893
   3894		/* hole */
   3895		if (!(map.m_flags & F2FS_MAP_FLAGS)) {
   3896			f2fs_err(sbi, "Swapfile has holes");
   3897			ret = -EINVAL;
   3898			goto out;
   3899		}
   3900
   3901		pblock = map.m_pblk;
   3902		nr_pblocks = map.m_len;
   3903
   3904		if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
   3905				nr_pblocks & sec_blks_mask) {
   3906			not_aligned++;
   3907
   3908			nr_pblocks = roundup(nr_pblocks, blks_per_sec);
   3909			if (cur_lblock + nr_pblocks > sis->max)
   3910				nr_pblocks -= blks_per_sec;
   3911
   3912			if (!nr_pblocks) {
   3913				/* this extent is last one */
   3914				nr_pblocks = map.m_len;
   3915				f2fs_warn(sbi, "Swapfile: last extent is not aligned to section");
   3916				goto next;
   3917			}
   3918
   3919			ret = f2fs_migrate_blocks(inode, cur_lblock,
   3920							nr_pblocks);
   3921			if (ret)
   3922				goto out;
   3923			goto retry;
   3924		}
   3925next:
   3926		if (cur_lblock + nr_pblocks >= sis->max)
   3927			nr_pblocks = sis->max - cur_lblock;
   3928
   3929		if (cur_lblock) {	/* exclude the header page */
   3930			if (pblock < lowest_pblock)
   3931				lowest_pblock = pblock;
   3932			if (pblock + nr_pblocks - 1 > highest_pblock)
   3933				highest_pblock = pblock + nr_pblocks - 1;
   3934		}
   3935
   3936		/*
   3937		 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
   3938		 */
   3939		ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
   3940		if (ret < 0)
   3941			goto out;
   3942		nr_extents += ret;
   3943		cur_lblock += nr_pblocks;
   3944	}
   3945	ret = nr_extents;
   3946	*span = 1 + highest_pblock - lowest_pblock;
   3947	if (cur_lblock == 0)
   3948		cur_lblock = 1;	/* force Empty message */
   3949	sis->max = cur_lblock;
   3950	sis->pages = cur_lblock - 1;
   3951	sis->highest_bit = cur_lblock - 1;
   3952out:
   3953	if (not_aligned)
   3954		f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
   3955			  not_aligned, blks_per_sec * F2FS_BLKSIZE);
   3956	return ret;
   3957}
   3958
   3959static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
   3960				sector_t *span)
   3961{
   3962	struct inode *inode = file_inode(file);
   3963	int ret;
   3964
   3965	if (!S_ISREG(inode->i_mode))
   3966		return -EINVAL;
   3967
   3968	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
   3969		return -EROFS;
   3970
   3971	if (f2fs_lfs_mode(F2FS_I_SB(inode))) {
   3972		f2fs_err(F2FS_I_SB(inode),
   3973			"Swapfile not supported in LFS mode");
   3974		return -EINVAL;
   3975	}
   3976
   3977	ret = f2fs_convert_inline_inode(inode);
   3978	if (ret)
   3979		return ret;
   3980
   3981	if (!f2fs_disable_compressed_file(inode))
   3982		return -EINVAL;
   3983
   3984	f2fs_precache_extents(inode);
   3985
   3986	ret = check_swap_activate(sis, file, span);
   3987	if (ret < 0)
   3988		return ret;
   3989
   3990	set_inode_flag(inode, FI_PIN_FILE);
   3991	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
   3992	return ret;
   3993}
   3994
   3995static void f2fs_swap_deactivate(struct file *file)
   3996{
   3997	struct inode *inode = file_inode(file);
   3998
   3999	clear_inode_flag(inode, FI_PIN_FILE);
   4000}
   4001#else
   4002static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
   4003				sector_t *span)
   4004{
   4005	return -EOPNOTSUPP;
   4006}
   4007
   4008static void f2fs_swap_deactivate(struct file *file)
   4009{
   4010}
   4011#endif
   4012
   4013const struct address_space_operations f2fs_dblock_aops = {
   4014	.read_folio	= f2fs_read_data_folio,
   4015	.readahead	= f2fs_readahead,
   4016	.writepage	= f2fs_write_data_page,
   4017	.writepages	= f2fs_write_data_pages,
   4018	.write_begin	= f2fs_write_begin,
   4019	.write_end	= f2fs_write_end,
   4020	.dirty_folio	= f2fs_dirty_data_folio,
   4021	.invalidate_folio = f2fs_invalidate_folio,
   4022	.release_folio	= f2fs_release_folio,
   4023	.direct_IO	= noop_direct_IO,
   4024	.bmap		= f2fs_bmap,
   4025	.swap_activate  = f2fs_swap_activate,
   4026	.swap_deactivate = f2fs_swap_deactivate,
   4027#ifdef CONFIG_MIGRATION
   4028	.migratepage    = f2fs_migrate_page,
   4029#endif
   4030};
   4031
   4032void f2fs_clear_page_cache_dirty_tag(struct page *page)
   4033{
   4034	struct address_space *mapping = page_mapping(page);
   4035	unsigned long flags;
   4036
   4037	xa_lock_irqsave(&mapping->i_pages, flags);
   4038	__xa_clear_mark(&mapping->i_pages, page_index(page),
   4039						PAGECACHE_TAG_DIRTY);
   4040	xa_unlock_irqrestore(&mapping->i_pages, flags);
   4041}
   4042
   4043int __init f2fs_init_post_read_processing(void)
   4044{
   4045	bio_post_read_ctx_cache =
   4046		kmem_cache_create("f2fs_bio_post_read_ctx",
   4047				  sizeof(struct bio_post_read_ctx), 0, 0, NULL);
   4048	if (!bio_post_read_ctx_cache)
   4049		goto fail;
   4050	bio_post_read_ctx_pool =
   4051		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
   4052					 bio_post_read_ctx_cache);
   4053	if (!bio_post_read_ctx_pool)
   4054		goto fail_free_cache;
   4055	return 0;
   4056
   4057fail_free_cache:
   4058	kmem_cache_destroy(bio_post_read_ctx_cache);
   4059fail:
   4060	return -ENOMEM;
   4061}
   4062
   4063void f2fs_destroy_post_read_processing(void)
   4064{
   4065	mempool_destroy(bio_post_read_ctx_pool);
   4066	kmem_cache_destroy(bio_post_read_ctx_cache);
   4067}
   4068
   4069int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
   4070{
   4071	if (!f2fs_sb_has_encrypt(sbi) &&
   4072		!f2fs_sb_has_verity(sbi) &&
   4073		!f2fs_sb_has_compression(sbi))
   4074		return 0;
   4075
   4076	sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
   4077						 WQ_UNBOUND | WQ_HIGHPRI,
   4078						 num_online_cpus());
   4079	if (!sbi->post_read_wq)
   4080		return -ENOMEM;
   4081	return 0;
   4082}
   4083
   4084void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
   4085{
   4086	if (sbi->post_read_wq)
   4087		destroy_workqueue(sbi->post_read_wq);
   4088}
   4089
   4090int __init f2fs_init_bio_entry_cache(void)
   4091{
   4092	bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
   4093			sizeof(struct bio_entry));
   4094	if (!bio_entry_slab)
   4095		return -ENOMEM;
   4096	return 0;
   4097}
   4098
   4099void f2fs_destroy_bio_entry_cache(void)
   4100{
   4101	kmem_cache_destroy(bio_entry_slab);
   4102}
   4103
   4104static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
   4105			    unsigned int flags, struct iomap *iomap,
   4106			    struct iomap *srcmap)
   4107{
   4108	struct f2fs_map_blocks map = {};
   4109	pgoff_t next_pgofs = 0;
   4110	int err;
   4111
   4112	map.m_lblk = bytes_to_blks(inode, offset);
   4113	map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
   4114	map.m_next_pgofs = &next_pgofs;
   4115	map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
   4116	if (flags & IOMAP_WRITE)
   4117		map.m_may_create = true;
   4118
   4119	err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE,
   4120			      F2FS_GET_BLOCK_DIO);
   4121	if (err)
   4122		return err;
   4123
   4124	iomap->offset = blks_to_bytes(inode, map.m_lblk);
   4125
   4126	/*
   4127	 * When inline encryption is enabled, sometimes I/O to an encrypted file
   4128	 * has to be broken up to guarantee DUN contiguity.  Handle this by
   4129	 * limiting the length of the mapping returned.
   4130	 */
   4131	map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
   4132
   4133	if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) {
   4134		iomap->length = blks_to_bytes(inode, map.m_len);
   4135		if (map.m_flags & F2FS_MAP_MAPPED) {
   4136			iomap->type = IOMAP_MAPPED;
   4137			iomap->flags |= IOMAP_F_MERGED;
   4138		} else {
   4139			iomap->type = IOMAP_UNWRITTEN;
   4140		}
   4141		if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk)))
   4142			return -EINVAL;
   4143
   4144		iomap->bdev = map.m_bdev;
   4145		iomap->addr = blks_to_bytes(inode, map.m_pblk);
   4146	} else {
   4147		iomap->length = blks_to_bytes(inode, next_pgofs) -
   4148				iomap->offset;
   4149		iomap->type = IOMAP_HOLE;
   4150		iomap->addr = IOMAP_NULL_ADDR;
   4151	}
   4152
   4153	if (map.m_flags & F2FS_MAP_NEW)
   4154		iomap->flags |= IOMAP_F_NEW;
   4155	if ((inode->i_state & I_DIRTY_DATASYNC) ||
   4156	    offset + length > i_size_read(inode))
   4157		iomap->flags |= IOMAP_F_DIRTY;
   4158
   4159	return 0;
   4160}
   4161
   4162const struct iomap_ops f2fs_iomap_ops = {
   4163	.iomap_begin	= f2fs_iomap_begin,
   4164};