cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

segment.c (136714B)


      1// SPDX-License-Identifier: GPL-2.0
      2/*
      3 * fs/f2fs/segment.c
      4 *
      5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
      6 *             http://www.samsung.com/
      7 */
      8#include <linux/fs.h>
      9#include <linux/f2fs_fs.h>
     10#include <linux/bio.h>
     11#include <linux/blkdev.h>
     12#include <linux/sched/mm.h>
     13#include <linux/prefetch.h>
     14#include <linux/kthread.h>
     15#include <linux/swap.h>
     16#include <linux/timer.h>
     17#include <linux/freezer.h>
     18#include <linux/sched/signal.h>
     19#include <linux/random.h>
     20
     21#include "f2fs.h"
     22#include "segment.h"
     23#include "node.h"
     24#include "gc.h"
     25#include "iostat.h"
     26#include <trace/events/f2fs.h>
     27
     28#define __reverse_ffz(x) __reverse_ffs(~(x))
     29
     30static struct kmem_cache *discard_entry_slab;
     31static struct kmem_cache *discard_cmd_slab;
     32static struct kmem_cache *sit_entry_set_slab;
     33static struct kmem_cache *revoke_entry_slab;
     34
     35static unsigned long __reverse_ulong(unsigned char *str)
     36{
     37	unsigned long tmp = 0;
     38	int shift = 24, idx = 0;
     39
     40#if BITS_PER_LONG == 64
     41	shift = 56;
     42#endif
     43	while (shift >= 0) {
     44		tmp |= (unsigned long)str[idx++] << shift;
     45		shift -= BITS_PER_BYTE;
     46	}
     47	return tmp;
     48}
     49
     50/*
     51 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
     52 * MSB and LSB are reversed in a byte by f2fs_set_bit.
     53 */
     54static inline unsigned long __reverse_ffs(unsigned long word)
     55{
     56	int num = 0;
     57
     58#if BITS_PER_LONG == 64
     59	if ((word & 0xffffffff00000000UL) == 0)
     60		num += 32;
     61	else
     62		word >>= 32;
     63#endif
     64	if ((word & 0xffff0000) == 0)
     65		num += 16;
     66	else
     67		word >>= 16;
     68
     69	if ((word & 0xff00) == 0)
     70		num += 8;
     71	else
     72		word >>= 8;
     73
     74	if ((word & 0xf0) == 0)
     75		num += 4;
     76	else
     77		word >>= 4;
     78
     79	if ((word & 0xc) == 0)
     80		num += 2;
     81	else
     82		word >>= 2;
     83
     84	if ((word & 0x2) == 0)
     85		num += 1;
     86	return num;
     87}
     88
     89/*
     90 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
     91 * f2fs_set_bit makes MSB and LSB reversed in a byte.
     92 * @size must be integral times of unsigned long.
     93 * Example:
     94 *                             MSB <--> LSB
     95 *   f2fs_set_bit(0, bitmap) => 1000 0000
     96 *   f2fs_set_bit(7, bitmap) => 0000 0001
     97 */
     98static unsigned long __find_rev_next_bit(const unsigned long *addr,
     99			unsigned long size, unsigned long offset)
    100{
    101	const unsigned long *p = addr + BIT_WORD(offset);
    102	unsigned long result = size;
    103	unsigned long tmp;
    104
    105	if (offset >= size)
    106		return size;
    107
    108	size -= (offset & ~(BITS_PER_LONG - 1));
    109	offset %= BITS_PER_LONG;
    110
    111	while (1) {
    112		if (*p == 0)
    113			goto pass;
    114
    115		tmp = __reverse_ulong((unsigned char *)p);
    116
    117		tmp &= ~0UL >> offset;
    118		if (size < BITS_PER_LONG)
    119			tmp &= (~0UL << (BITS_PER_LONG - size));
    120		if (tmp)
    121			goto found;
    122pass:
    123		if (size <= BITS_PER_LONG)
    124			break;
    125		size -= BITS_PER_LONG;
    126		offset = 0;
    127		p++;
    128	}
    129	return result;
    130found:
    131	return result - size + __reverse_ffs(tmp);
    132}
    133
    134static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
    135			unsigned long size, unsigned long offset)
    136{
    137	const unsigned long *p = addr + BIT_WORD(offset);
    138	unsigned long result = size;
    139	unsigned long tmp;
    140
    141	if (offset >= size)
    142		return size;
    143
    144	size -= (offset & ~(BITS_PER_LONG - 1));
    145	offset %= BITS_PER_LONG;
    146
    147	while (1) {
    148		if (*p == ~0UL)
    149			goto pass;
    150
    151		tmp = __reverse_ulong((unsigned char *)p);
    152
    153		if (offset)
    154			tmp |= ~0UL << (BITS_PER_LONG - offset);
    155		if (size < BITS_PER_LONG)
    156			tmp |= ~0UL >> size;
    157		if (tmp != ~0UL)
    158			goto found;
    159pass:
    160		if (size <= BITS_PER_LONG)
    161			break;
    162		size -= BITS_PER_LONG;
    163		offset = 0;
    164		p++;
    165	}
    166	return result;
    167found:
    168	return result - size + __reverse_ffz(tmp);
    169}
    170
    171bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
    172{
    173	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
    174	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
    175	int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
    176
    177	if (f2fs_lfs_mode(sbi))
    178		return false;
    179	if (sbi->gc_mode == GC_URGENT_HIGH)
    180		return true;
    181	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
    182		return true;
    183
    184	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
    185			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
    186}
    187
    188void f2fs_abort_atomic_write(struct inode *inode, bool clean)
    189{
    190	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    191	struct f2fs_inode_info *fi = F2FS_I(inode);
    192
    193	if (f2fs_is_atomic_file(inode)) {
    194		if (clean)
    195			truncate_inode_pages_final(inode->i_mapping);
    196		clear_inode_flag(fi->cow_inode, FI_ATOMIC_FILE);
    197		iput(fi->cow_inode);
    198		fi->cow_inode = NULL;
    199		clear_inode_flag(inode, FI_ATOMIC_FILE);
    200
    201		spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
    202		sbi->atomic_files--;
    203		spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
    204	}
    205}
    206
    207static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
    208			block_t new_addr, block_t *old_addr, bool recover)
    209{
    210	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    211	struct dnode_of_data dn;
    212	struct node_info ni;
    213	int err;
    214
    215retry:
    216	set_new_dnode(&dn, inode, NULL, NULL, 0);
    217	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE_RA);
    218	if (err) {
    219		if (err == -ENOMEM) {
    220			f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
    221			goto retry;
    222		}
    223		return err;
    224	}
    225
    226	err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
    227	if (err) {
    228		f2fs_put_dnode(&dn);
    229		return err;
    230	}
    231
    232	if (recover) {
    233		/* dn.data_blkaddr is always valid */
    234		if (!__is_valid_data_blkaddr(new_addr)) {
    235			if (new_addr == NULL_ADDR)
    236				dec_valid_block_count(sbi, inode, 1);
    237			f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
    238			f2fs_update_data_blkaddr(&dn, new_addr);
    239		} else {
    240			f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
    241				new_addr, ni.version, true, true);
    242		}
    243	} else {
    244		blkcnt_t count = 1;
    245
    246		*old_addr = dn.data_blkaddr;
    247		f2fs_truncate_data_blocks_range(&dn, 1);
    248		dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
    249		inc_valid_block_count(sbi, inode, &count);
    250		f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
    251					ni.version, true, false);
    252	}
    253
    254	f2fs_put_dnode(&dn);
    255	return 0;
    256}
    257
    258static void __complete_revoke_list(struct inode *inode, struct list_head *head,
    259					bool revoke)
    260{
    261	struct revoke_entry *cur, *tmp;
    262
    263	list_for_each_entry_safe(cur, tmp, head, list) {
    264		if (revoke)
    265			__replace_atomic_write_block(inode, cur->index,
    266						cur->old_addr, NULL, true);
    267		list_del(&cur->list);
    268		kmem_cache_free(revoke_entry_slab, cur);
    269	}
    270}
    271
    272static int __f2fs_commit_atomic_write(struct inode *inode)
    273{
    274	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    275	struct f2fs_inode_info *fi = F2FS_I(inode);
    276	struct inode *cow_inode = fi->cow_inode;
    277	struct revoke_entry *new;
    278	struct list_head revoke_list;
    279	block_t blkaddr;
    280	struct dnode_of_data dn;
    281	pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
    282	pgoff_t off = 0, blen, index;
    283	int ret = 0, i;
    284
    285	INIT_LIST_HEAD(&revoke_list);
    286
    287	while (len) {
    288		blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
    289
    290		set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
    291		ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
    292		if (ret && ret != -ENOENT) {
    293			goto out;
    294		} else if (ret == -ENOENT) {
    295			ret = 0;
    296			if (dn.max_level == 0)
    297				goto out;
    298			goto next;
    299		}
    300
    301		blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
    302				len);
    303		index = off;
    304		for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
    305			blkaddr = f2fs_data_blkaddr(&dn);
    306
    307			if (!__is_valid_data_blkaddr(blkaddr)) {
    308				continue;
    309			} else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
    310					DATA_GENERIC_ENHANCE)) {
    311				f2fs_put_dnode(&dn);
    312				ret = -EFSCORRUPTED;
    313				goto out;
    314			}
    315
    316			new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
    317							true, NULL);
    318
    319			ret = __replace_atomic_write_block(inode, index, blkaddr,
    320							&new->old_addr, false);
    321			if (ret) {
    322				f2fs_put_dnode(&dn);
    323				kmem_cache_free(revoke_entry_slab, new);
    324				goto out;
    325			}
    326
    327			f2fs_update_data_blkaddr(&dn, NULL_ADDR);
    328			new->index = index;
    329			list_add_tail(&new->list, &revoke_list);
    330		}
    331		f2fs_put_dnode(&dn);
    332next:
    333		off += blen;
    334		len -= blen;
    335	}
    336
    337out:
    338	__complete_revoke_list(inode, &revoke_list, ret ? true : false);
    339
    340	return ret;
    341}
    342
    343int f2fs_commit_atomic_write(struct inode *inode)
    344{
    345	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
    346	struct f2fs_inode_info *fi = F2FS_I(inode);
    347	int err;
    348
    349	err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
    350	if (err)
    351		return err;
    352
    353	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
    354	f2fs_lock_op(sbi);
    355
    356	err = __f2fs_commit_atomic_write(inode);
    357
    358	f2fs_unlock_op(sbi);
    359	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
    360
    361	return err;
    362}
    363
    364/*
    365 * This function balances dirty node and dentry pages.
    366 * In addition, it controls garbage collection.
    367 */
    368void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
    369{
    370	if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
    371		f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
    372		f2fs_stop_checkpoint(sbi, false);
    373	}
    374
    375	/* balance_fs_bg is able to be pending */
    376	if (need && excess_cached_nats(sbi))
    377		f2fs_balance_fs_bg(sbi, false);
    378
    379	if (!f2fs_is_checkpoint_ready(sbi))
    380		return;
    381
    382	/*
    383	 * We should do GC or end up with checkpoint, if there are so many dirty
    384	 * dir/node pages without enough free segments.
    385	 */
    386	if (has_not_enough_free_secs(sbi, 0, 0)) {
    387		if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
    388					sbi->gc_thread->f2fs_gc_task) {
    389			DEFINE_WAIT(wait);
    390
    391			prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
    392						TASK_UNINTERRUPTIBLE);
    393			wake_up(&sbi->gc_thread->gc_wait_queue_head);
    394			io_schedule();
    395			finish_wait(&sbi->gc_thread->fggc_wq, &wait);
    396		} else {
    397			struct f2fs_gc_control gc_control = {
    398				.victim_segno = NULL_SEGNO,
    399				.init_gc_type = BG_GC,
    400				.no_bg_gc = true,
    401				.should_migrate_blocks = false,
    402				.err_gc_skipped = false,
    403				.nr_free_secs = 1 };
    404			f2fs_down_write(&sbi->gc_lock);
    405			f2fs_gc(sbi, &gc_control);
    406		}
    407	}
    408}
    409
    410static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
    411{
    412	int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
    413	unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
    414	unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
    415	unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
    416	unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
    417	unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
    418	unsigned int threshold = sbi->blocks_per_seg * factor *
    419					DEFAULT_DIRTY_THRESHOLD;
    420	unsigned int global_threshold = threshold * 3 / 2;
    421
    422	if (dents >= threshold || qdata >= threshold ||
    423		nodes >= threshold || meta >= threshold ||
    424		imeta >= threshold)
    425		return true;
    426	return dents + qdata + nodes + meta + imeta >  global_threshold;
    427}
    428
    429void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
    430{
    431	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
    432		return;
    433
    434	/* try to shrink extent cache when there is no enough memory */
    435	if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
    436		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
    437
    438	/* check the # of cached NAT entries */
    439	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
    440		f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
    441
    442	if (!f2fs_available_free_memory(sbi, FREE_NIDS))
    443		f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
    444	else
    445		f2fs_build_free_nids(sbi, false, false);
    446
    447	if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
    448		excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
    449		goto do_sync;
    450
    451	/* there is background inflight IO or foreground operation recently */
    452	if (is_inflight_io(sbi, REQ_TIME) ||
    453		(!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
    454		return;
    455
    456	/* exceed periodical checkpoint timeout threshold */
    457	if (f2fs_time_over(sbi, CP_TIME))
    458		goto do_sync;
    459
    460	/* checkpoint is the only way to shrink partial cached entries */
    461	if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
    462		f2fs_available_free_memory(sbi, INO_ENTRIES))
    463		return;
    464
    465do_sync:
    466	if (test_opt(sbi, DATA_FLUSH) && from_bg) {
    467		struct blk_plug plug;
    468
    469		mutex_lock(&sbi->flush_lock);
    470
    471		blk_start_plug(&plug);
    472		f2fs_sync_dirty_inodes(sbi, FILE_INODE);
    473		blk_finish_plug(&plug);
    474
    475		mutex_unlock(&sbi->flush_lock);
    476	}
    477	f2fs_sync_fs(sbi->sb, true);
    478	stat_inc_bg_cp_count(sbi->stat_info);
    479}
    480
    481static int __submit_flush_wait(struct f2fs_sb_info *sbi,
    482				struct block_device *bdev)
    483{
    484	int ret = blkdev_issue_flush(bdev);
    485
    486	trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
    487				test_opt(sbi, FLUSH_MERGE), ret);
    488	return ret;
    489}
    490
    491static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
    492{
    493	int ret = 0;
    494	int i;
    495
    496	if (!f2fs_is_multi_device(sbi))
    497		return __submit_flush_wait(sbi, sbi->sb->s_bdev);
    498
    499	for (i = 0; i < sbi->s_ndevs; i++) {
    500		if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
    501			continue;
    502		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
    503		if (ret)
    504			break;
    505	}
    506	return ret;
    507}
    508
    509static int issue_flush_thread(void *data)
    510{
    511	struct f2fs_sb_info *sbi = data;
    512	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
    513	wait_queue_head_t *q = &fcc->flush_wait_queue;
    514repeat:
    515	if (kthread_should_stop())
    516		return 0;
    517
    518	if (!llist_empty(&fcc->issue_list)) {
    519		struct flush_cmd *cmd, *next;
    520		int ret;
    521
    522		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
    523		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
    524
    525		cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
    526
    527		ret = submit_flush_wait(sbi, cmd->ino);
    528		atomic_inc(&fcc->issued_flush);
    529
    530		llist_for_each_entry_safe(cmd, next,
    531					  fcc->dispatch_list, llnode) {
    532			cmd->ret = ret;
    533			complete(&cmd->wait);
    534		}
    535		fcc->dispatch_list = NULL;
    536	}
    537
    538	wait_event_interruptible(*q,
    539		kthread_should_stop() || !llist_empty(&fcc->issue_list));
    540	goto repeat;
    541}
    542
    543int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
    544{
    545	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
    546	struct flush_cmd cmd;
    547	int ret;
    548
    549	if (test_opt(sbi, NOBARRIER))
    550		return 0;
    551
    552	if (!test_opt(sbi, FLUSH_MERGE)) {
    553		atomic_inc(&fcc->queued_flush);
    554		ret = submit_flush_wait(sbi, ino);
    555		atomic_dec(&fcc->queued_flush);
    556		atomic_inc(&fcc->issued_flush);
    557		return ret;
    558	}
    559
    560	if (atomic_inc_return(&fcc->queued_flush) == 1 ||
    561	    f2fs_is_multi_device(sbi)) {
    562		ret = submit_flush_wait(sbi, ino);
    563		atomic_dec(&fcc->queued_flush);
    564
    565		atomic_inc(&fcc->issued_flush);
    566		return ret;
    567	}
    568
    569	cmd.ino = ino;
    570	init_completion(&cmd.wait);
    571
    572	llist_add(&cmd.llnode, &fcc->issue_list);
    573
    574	/*
    575	 * update issue_list before we wake up issue_flush thread, this
    576	 * smp_mb() pairs with another barrier in ___wait_event(), see
    577	 * more details in comments of waitqueue_active().
    578	 */
    579	smp_mb();
    580
    581	if (waitqueue_active(&fcc->flush_wait_queue))
    582		wake_up(&fcc->flush_wait_queue);
    583
    584	if (fcc->f2fs_issue_flush) {
    585		wait_for_completion(&cmd.wait);
    586		atomic_dec(&fcc->queued_flush);
    587	} else {
    588		struct llist_node *list;
    589
    590		list = llist_del_all(&fcc->issue_list);
    591		if (!list) {
    592			wait_for_completion(&cmd.wait);
    593			atomic_dec(&fcc->queued_flush);
    594		} else {
    595			struct flush_cmd *tmp, *next;
    596
    597			ret = submit_flush_wait(sbi, ino);
    598
    599			llist_for_each_entry_safe(tmp, next, list, llnode) {
    600				if (tmp == &cmd) {
    601					cmd.ret = ret;
    602					atomic_dec(&fcc->queued_flush);
    603					continue;
    604				}
    605				tmp->ret = ret;
    606				complete(&tmp->wait);
    607			}
    608		}
    609	}
    610
    611	return cmd.ret;
    612}
    613
    614int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
    615{
    616	dev_t dev = sbi->sb->s_bdev->bd_dev;
    617	struct flush_cmd_control *fcc;
    618	int err = 0;
    619
    620	if (SM_I(sbi)->fcc_info) {
    621		fcc = SM_I(sbi)->fcc_info;
    622		if (fcc->f2fs_issue_flush)
    623			return err;
    624		goto init_thread;
    625	}
    626
    627	fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
    628	if (!fcc)
    629		return -ENOMEM;
    630	atomic_set(&fcc->issued_flush, 0);
    631	atomic_set(&fcc->queued_flush, 0);
    632	init_waitqueue_head(&fcc->flush_wait_queue);
    633	init_llist_head(&fcc->issue_list);
    634	SM_I(sbi)->fcc_info = fcc;
    635	if (!test_opt(sbi, FLUSH_MERGE))
    636		return err;
    637
    638init_thread:
    639	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
    640				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
    641	if (IS_ERR(fcc->f2fs_issue_flush)) {
    642		err = PTR_ERR(fcc->f2fs_issue_flush);
    643		kfree(fcc);
    644		SM_I(sbi)->fcc_info = NULL;
    645		return err;
    646	}
    647
    648	return err;
    649}
    650
    651void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
    652{
    653	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
    654
    655	if (fcc && fcc->f2fs_issue_flush) {
    656		struct task_struct *flush_thread = fcc->f2fs_issue_flush;
    657
    658		fcc->f2fs_issue_flush = NULL;
    659		kthread_stop(flush_thread);
    660	}
    661	if (free) {
    662		kfree(fcc);
    663		SM_I(sbi)->fcc_info = NULL;
    664	}
    665}
    666
    667int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
    668{
    669	int ret = 0, i;
    670
    671	if (!f2fs_is_multi_device(sbi))
    672		return 0;
    673
    674	if (test_opt(sbi, NOBARRIER))
    675		return 0;
    676
    677	for (i = 1; i < sbi->s_ndevs; i++) {
    678		int count = DEFAULT_RETRY_IO_COUNT;
    679
    680		if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
    681			continue;
    682
    683		do {
    684			ret = __submit_flush_wait(sbi, FDEV(i).bdev);
    685			if (ret)
    686				f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
    687		} while (ret && --count);
    688
    689		if (ret) {
    690			f2fs_stop_checkpoint(sbi, false);
    691			break;
    692		}
    693
    694		spin_lock(&sbi->dev_lock);
    695		f2fs_clear_bit(i, (char *)&sbi->dirty_device);
    696		spin_unlock(&sbi->dev_lock);
    697	}
    698
    699	return ret;
    700}
    701
    702static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
    703		enum dirty_type dirty_type)
    704{
    705	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
    706
    707	/* need not be added */
    708	if (IS_CURSEG(sbi, segno))
    709		return;
    710
    711	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
    712		dirty_i->nr_dirty[dirty_type]++;
    713
    714	if (dirty_type == DIRTY) {
    715		struct seg_entry *sentry = get_seg_entry(sbi, segno);
    716		enum dirty_type t = sentry->type;
    717
    718		if (unlikely(t >= DIRTY)) {
    719			f2fs_bug_on(sbi, 1);
    720			return;
    721		}
    722		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
    723			dirty_i->nr_dirty[t]++;
    724
    725		if (__is_large_section(sbi)) {
    726			unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
    727			block_t valid_blocks =
    728				get_valid_blocks(sbi, segno, true);
    729
    730			f2fs_bug_on(sbi, unlikely(!valid_blocks ||
    731					valid_blocks == BLKS_PER_SEC(sbi)));
    732
    733			if (!IS_CURSEC(sbi, secno))
    734				set_bit(secno, dirty_i->dirty_secmap);
    735		}
    736	}
    737}
    738
    739static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
    740		enum dirty_type dirty_type)
    741{
    742	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
    743	block_t valid_blocks;
    744
    745	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
    746		dirty_i->nr_dirty[dirty_type]--;
    747
    748	if (dirty_type == DIRTY) {
    749		struct seg_entry *sentry = get_seg_entry(sbi, segno);
    750		enum dirty_type t = sentry->type;
    751
    752		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
    753			dirty_i->nr_dirty[t]--;
    754
    755		valid_blocks = get_valid_blocks(sbi, segno, true);
    756		if (valid_blocks == 0) {
    757			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
    758						dirty_i->victim_secmap);
    759#ifdef CONFIG_F2FS_CHECK_FS
    760			clear_bit(segno, SIT_I(sbi)->invalid_segmap);
    761#endif
    762		}
    763		if (__is_large_section(sbi)) {
    764			unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
    765
    766			if (!valid_blocks ||
    767					valid_blocks == BLKS_PER_SEC(sbi)) {
    768				clear_bit(secno, dirty_i->dirty_secmap);
    769				return;
    770			}
    771
    772			if (!IS_CURSEC(sbi, secno))
    773				set_bit(secno, dirty_i->dirty_secmap);
    774		}
    775	}
    776}
    777
    778/*
    779 * Should not occur error such as -ENOMEM.
    780 * Adding dirty entry into seglist is not critical operation.
    781 * If a given segment is one of current working segments, it won't be added.
    782 */
    783static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
    784{
    785	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
    786	unsigned short valid_blocks, ckpt_valid_blocks;
    787	unsigned int usable_blocks;
    788
    789	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
    790		return;
    791
    792	usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
    793	mutex_lock(&dirty_i->seglist_lock);
    794
    795	valid_blocks = get_valid_blocks(sbi, segno, false);
    796	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
    797
    798	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
    799		ckpt_valid_blocks == usable_blocks)) {
    800		__locate_dirty_segment(sbi, segno, PRE);
    801		__remove_dirty_segment(sbi, segno, DIRTY);
    802	} else if (valid_blocks < usable_blocks) {
    803		__locate_dirty_segment(sbi, segno, DIRTY);
    804	} else {
    805		/* Recovery routine with SSR needs this */
    806		__remove_dirty_segment(sbi, segno, DIRTY);
    807	}
    808
    809	mutex_unlock(&dirty_i->seglist_lock);
    810}
    811
    812/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
    813void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
    814{
    815	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
    816	unsigned int segno;
    817
    818	mutex_lock(&dirty_i->seglist_lock);
    819	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
    820		if (get_valid_blocks(sbi, segno, false))
    821			continue;
    822		if (IS_CURSEG(sbi, segno))
    823			continue;
    824		__locate_dirty_segment(sbi, segno, PRE);
    825		__remove_dirty_segment(sbi, segno, DIRTY);
    826	}
    827	mutex_unlock(&dirty_i->seglist_lock);
    828}
    829
    830block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
    831{
    832	int ovp_hole_segs =
    833		(overprovision_segments(sbi) - reserved_segments(sbi));
    834	block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
    835	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
    836	block_t holes[2] = {0, 0};	/* DATA and NODE */
    837	block_t unusable;
    838	struct seg_entry *se;
    839	unsigned int segno;
    840
    841	mutex_lock(&dirty_i->seglist_lock);
    842	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
    843		se = get_seg_entry(sbi, segno);
    844		if (IS_NODESEG(se->type))
    845			holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
    846							se->valid_blocks;
    847		else
    848			holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
    849							se->valid_blocks;
    850	}
    851	mutex_unlock(&dirty_i->seglist_lock);
    852
    853	unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
    854	if (unusable > ovp_holes)
    855		return unusable - ovp_holes;
    856	return 0;
    857}
    858
    859int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
    860{
    861	int ovp_hole_segs =
    862		(overprovision_segments(sbi) - reserved_segments(sbi));
    863	if (unusable > F2FS_OPTION(sbi).unusable_cap)
    864		return -EAGAIN;
    865	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
    866		dirty_segments(sbi) > ovp_hole_segs)
    867		return -EAGAIN;
    868	return 0;
    869}
    870
    871/* This is only used by SBI_CP_DISABLED */
    872static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
    873{
    874	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
    875	unsigned int segno = 0;
    876
    877	mutex_lock(&dirty_i->seglist_lock);
    878	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
    879		if (get_valid_blocks(sbi, segno, false))
    880			continue;
    881		if (get_ckpt_valid_blocks(sbi, segno, false))
    882			continue;
    883		mutex_unlock(&dirty_i->seglist_lock);
    884		return segno;
    885	}
    886	mutex_unlock(&dirty_i->seglist_lock);
    887	return NULL_SEGNO;
    888}
    889
    890static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
    891		struct block_device *bdev, block_t lstart,
    892		block_t start, block_t len)
    893{
    894	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
    895	struct list_head *pend_list;
    896	struct discard_cmd *dc;
    897
    898	f2fs_bug_on(sbi, !len);
    899
    900	pend_list = &dcc->pend_list[plist_idx(len)];
    901
    902	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
    903	INIT_LIST_HEAD(&dc->list);
    904	dc->bdev = bdev;
    905	dc->lstart = lstart;
    906	dc->start = start;
    907	dc->len = len;
    908	dc->ref = 0;
    909	dc->state = D_PREP;
    910	dc->queued = 0;
    911	dc->error = 0;
    912	init_completion(&dc->wait);
    913	list_add_tail(&dc->list, pend_list);
    914	spin_lock_init(&dc->lock);
    915	dc->bio_ref = 0;
    916	atomic_inc(&dcc->discard_cmd_cnt);
    917	dcc->undiscard_blks += len;
    918
    919	return dc;
    920}
    921
    922static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
    923				struct block_device *bdev, block_t lstart,
    924				block_t start, block_t len,
    925				struct rb_node *parent, struct rb_node **p,
    926				bool leftmost)
    927{
    928	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
    929	struct discard_cmd *dc;
    930
    931	dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
    932
    933	rb_link_node(&dc->rb_node, parent, p);
    934	rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
    935
    936	return dc;
    937}
    938
    939static void __detach_discard_cmd(struct discard_cmd_control *dcc,
    940							struct discard_cmd *dc)
    941{
    942	if (dc->state == D_DONE)
    943		atomic_sub(dc->queued, &dcc->queued_discard);
    944
    945	list_del(&dc->list);
    946	rb_erase_cached(&dc->rb_node, &dcc->root);
    947	dcc->undiscard_blks -= dc->len;
    948
    949	kmem_cache_free(discard_cmd_slab, dc);
    950
    951	atomic_dec(&dcc->discard_cmd_cnt);
    952}
    953
    954static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
    955							struct discard_cmd *dc)
    956{
    957	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
    958	unsigned long flags;
    959
    960	trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
    961
    962	spin_lock_irqsave(&dc->lock, flags);
    963	if (dc->bio_ref) {
    964		spin_unlock_irqrestore(&dc->lock, flags);
    965		return;
    966	}
    967	spin_unlock_irqrestore(&dc->lock, flags);
    968
    969	f2fs_bug_on(sbi, dc->ref);
    970
    971	if (dc->error == -EOPNOTSUPP)
    972		dc->error = 0;
    973
    974	if (dc->error)
    975		printk_ratelimited(
    976			"%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
    977			KERN_INFO, sbi->sb->s_id,
    978			dc->lstart, dc->start, dc->len, dc->error);
    979	__detach_discard_cmd(dcc, dc);
    980}
    981
    982static void f2fs_submit_discard_endio(struct bio *bio)
    983{
    984	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
    985	unsigned long flags;
    986
    987	spin_lock_irqsave(&dc->lock, flags);
    988	if (!dc->error)
    989		dc->error = blk_status_to_errno(bio->bi_status);
    990	dc->bio_ref--;
    991	if (!dc->bio_ref && dc->state == D_SUBMIT) {
    992		dc->state = D_DONE;
    993		complete_all(&dc->wait);
    994	}
    995	spin_unlock_irqrestore(&dc->lock, flags);
    996	bio_put(bio);
    997}
    998
    999static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
   1000				block_t start, block_t end)
   1001{
   1002#ifdef CONFIG_F2FS_CHECK_FS
   1003	struct seg_entry *sentry;
   1004	unsigned int segno;
   1005	block_t blk = start;
   1006	unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
   1007	unsigned long *map;
   1008
   1009	while (blk < end) {
   1010		segno = GET_SEGNO(sbi, blk);
   1011		sentry = get_seg_entry(sbi, segno);
   1012		offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
   1013
   1014		if (end < START_BLOCK(sbi, segno + 1))
   1015			size = GET_BLKOFF_FROM_SEG0(sbi, end);
   1016		else
   1017			size = max_blocks;
   1018		map = (unsigned long *)(sentry->cur_valid_map);
   1019		offset = __find_rev_next_bit(map, size, offset);
   1020		f2fs_bug_on(sbi, offset != size);
   1021		blk = START_BLOCK(sbi, segno + 1);
   1022	}
   1023#endif
   1024}
   1025
   1026static void __init_discard_policy(struct f2fs_sb_info *sbi,
   1027				struct discard_policy *dpolicy,
   1028				int discard_type, unsigned int granularity)
   1029{
   1030	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1031
   1032	/* common policy */
   1033	dpolicy->type = discard_type;
   1034	dpolicy->sync = true;
   1035	dpolicy->ordered = false;
   1036	dpolicy->granularity = granularity;
   1037
   1038	dpolicy->max_requests = dcc->max_discard_request;
   1039	dpolicy->io_aware_gran = MAX_PLIST_NUM;
   1040	dpolicy->timeout = false;
   1041
   1042	if (discard_type == DPOLICY_BG) {
   1043		dpolicy->min_interval = dcc->min_discard_issue_time;
   1044		dpolicy->mid_interval = dcc->mid_discard_issue_time;
   1045		dpolicy->max_interval = dcc->max_discard_issue_time;
   1046		dpolicy->io_aware = true;
   1047		dpolicy->sync = false;
   1048		dpolicy->ordered = true;
   1049		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
   1050			dpolicy->granularity = 1;
   1051			if (atomic_read(&dcc->discard_cmd_cnt))
   1052				dpolicy->max_interval =
   1053					dcc->min_discard_issue_time;
   1054		}
   1055	} else if (discard_type == DPOLICY_FORCE) {
   1056		dpolicy->min_interval = dcc->min_discard_issue_time;
   1057		dpolicy->mid_interval = dcc->mid_discard_issue_time;
   1058		dpolicy->max_interval = dcc->max_discard_issue_time;
   1059		dpolicy->io_aware = false;
   1060	} else if (discard_type == DPOLICY_FSTRIM) {
   1061		dpolicy->io_aware = false;
   1062	} else if (discard_type == DPOLICY_UMOUNT) {
   1063		dpolicy->io_aware = false;
   1064		/* we need to issue all to keep CP_TRIMMED_FLAG */
   1065		dpolicy->granularity = 1;
   1066		dpolicy->timeout = true;
   1067	}
   1068}
   1069
   1070static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
   1071				struct block_device *bdev, block_t lstart,
   1072				block_t start, block_t len);
   1073/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
   1074static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
   1075						struct discard_policy *dpolicy,
   1076						struct discard_cmd *dc,
   1077						unsigned int *issued)
   1078{
   1079	struct block_device *bdev = dc->bdev;
   1080	unsigned int max_discard_blocks =
   1081			SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
   1082	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1083	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
   1084					&(dcc->fstrim_list) : &(dcc->wait_list);
   1085	int flag = dpolicy->sync ? REQ_SYNC : 0;
   1086	block_t lstart, start, len, total_len;
   1087	int err = 0;
   1088
   1089	if (dc->state != D_PREP)
   1090		return 0;
   1091
   1092	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
   1093		return 0;
   1094
   1095	trace_f2fs_issue_discard(bdev, dc->start, dc->len);
   1096
   1097	lstart = dc->lstart;
   1098	start = dc->start;
   1099	len = dc->len;
   1100	total_len = len;
   1101
   1102	dc->len = 0;
   1103
   1104	while (total_len && *issued < dpolicy->max_requests && !err) {
   1105		struct bio *bio = NULL;
   1106		unsigned long flags;
   1107		bool last = true;
   1108
   1109		if (len > max_discard_blocks) {
   1110			len = max_discard_blocks;
   1111			last = false;
   1112		}
   1113
   1114		(*issued)++;
   1115		if (*issued == dpolicy->max_requests)
   1116			last = true;
   1117
   1118		dc->len += len;
   1119
   1120		if (time_to_inject(sbi, FAULT_DISCARD)) {
   1121			f2fs_show_injection_info(sbi, FAULT_DISCARD);
   1122			err = -EIO;
   1123			goto submit;
   1124		}
   1125		err = __blkdev_issue_discard(bdev,
   1126					SECTOR_FROM_BLOCK(start),
   1127					SECTOR_FROM_BLOCK(len),
   1128					GFP_NOFS, &bio);
   1129submit:
   1130		if (err) {
   1131			spin_lock_irqsave(&dc->lock, flags);
   1132			if (dc->state == D_PARTIAL)
   1133				dc->state = D_SUBMIT;
   1134			spin_unlock_irqrestore(&dc->lock, flags);
   1135
   1136			break;
   1137		}
   1138
   1139		f2fs_bug_on(sbi, !bio);
   1140
   1141		/*
   1142		 * should keep before submission to avoid D_DONE
   1143		 * right away
   1144		 */
   1145		spin_lock_irqsave(&dc->lock, flags);
   1146		if (last)
   1147			dc->state = D_SUBMIT;
   1148		else
   1149			dc->state = D_PARTIAL;
   1150		dc->bio_ref++;
   1151		spin_unlock_irqrestore(&dc->lock, flags);
   1152
   1153		atomic_inc(&dcc->queued_discard);
   1154		dc->queued++;
   1155		list_move_tail(&dc->list, wait_list);
   1156
   1157		/* sanity check on discard range */
   1158		__check_sit_bitmap(sbi, lstart, lstart + len);
   1159
   1160		bio->bi_private = dc;
   1161		bio->bi_end_io = f2fs_submit_discard_endio;
   1162		bio->bi_opf |= flag;
   1163		submit_bio(bio);
   1164
   1165		atomic_inc(&dcc->issued_discard);
   1166
   1167		f2fs_update_iostat(sbi, FS_DISCARD, 1);
   1168
   1169		lstart += len;
   1170		start += len;
   1171		total_len -= len;
   1172		len = total_len;
   1173	}
   1174
   1175	if (!err && len) {
   1176		dcc->undiscard_blks -= len;
   1177		__update_discard_tree_range(sbi, bdev, lstart, start, len);
   1178	}
   1179	return err;
   1180}
   1181
   1182static void __insert_discard_tree(struct f2fs_sb_info *sbi,
   1183				struct block_device *bdev, block_t lstart,
   1184				block_t start, block_t len,
   1185				struct rb_node **insert_p,
   1186				struct rb_node *insert_parent)
   1187{
   1188	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1189	struct rb_node **p;
   1190	struct rb_node *parent = NULL;
   1191	bool leftmost = true;
   1192
   1193	if (insert_p && insert_parent) {
   1194		parent = insert_parent;
   1195		p = insert_p;
   1196		goto do_insert;
   1197	}
   1198
   1199	p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
   1200							lstart, &leftmost);
   1201do_insert:
   1202	__attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
   1203								p, leftmost);
   1204}
   1205
   1206static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
   1207						struct discard_cmd *dc)
   1208{
   1209	list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
   1210}
   1211
   1212static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
   1213				struct discard_cmd *dc, block_t blkaddr)
   1214{
   1215	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1216	struct discard_info di = dc->di;
   1217	bool modified = false;
   1218
   1219	if (dc->state == D_DONE || dc->len == 1) {
   1220		__remove_discard_cmd(sbi, dc);
   1221		return;
   1222	}
   1223
   1224	dcc->undiscard_blks -= di.len;
   1225
   1226	if (blkaddr > di.lstart) {
   1227		dc->len = blkaddr - dc->lstart;
   1228		dcc->undiscard_blks += dc->len;
   1229		__relocate_discard_cmd(dcc, dc);
   1230		modified = true;
   1231	}
   1232
   1233	if (blkaddr < di.lstart + di.len - 1) {
   1234		if (modified) {
   1235			__insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
   1236					di.start + blkaddr + 1 - di.lstart,
   1237					di.lstart + di.len - 1 - blkaddr,
   1238					NULL, NULL);
   1239		} else {
   1240			dc->lstart++;
   1241			dc->len--;
   1242			dc->start++;
   1243			dcc->undiscard_blks += dc->len;
   1244			__relocate_discard_cmd(dcc, dc);
   1245		}
   1246	}
   1247}
   1248
   1249static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
   1250				struct block_device *bdev, block_t lstart,
   1251				block_t start, block_t len)
   1252{
   1253	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1254	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
   1255	struct discard_cmd *dc;
   1256	struct discard_info di = {0};
   1257	struct rb_node **insert_p = NULL, *insert_parent = NULL;
   1258	unsigned int max_discard_blocks =
   1259			SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
   1260	block_t end = lstart + len;
   1261
   1262	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
   1263					NULL, lstart,
   1264					(struct rb_entry **)&prev_dc,
   1265					(struct rb_entry **)&next_dc,
   1266					&insert_p, &insert_parent, true, NULL);
   1267	if (dc)
   1268		prev_dc = dc;
   1269
   1270	if (!prev_dc) {
   1271		di.lstart = lstart;
   1272		di.len = next_dc ? next_dc->lstart - lstart : len;
   1273		di.len = min(di.len, len);
   1274		di.start = start;
   1275	}
   1276
   1277	while (1) {
   1278		struct rb_node *node;
   1279		bool merged = false;
   1280		struct discard_cmd *tdc = NULL;
   1281
   1282		if (prev_dc) {
   1283			di.lstart = prev_dc->lstart + prev_dc->len;
   1284			if (di.lstart < lstart)
   1285				di.lstart = lstart;
   1286			if (di.lstart >= end)
   1287				break;
   1288
   1289			if (!next_dc || next_dc->lstart > end)
   1290				di.len = end - di.lstart;
   1291			else
   1292				di.len = next_dc->lstart - di.lstart;
   1293			di.start = start + di.lstart - lstart;
   1294		}
   1295
   1296		if (!di.len)
   1297			goto next;
   1298
   1299		if (prev_dc && prev_dc->state == D_PREP &&
   1300			prev_dc->bdev == bdev &&
   1301			__is_discard_back_mergeable(&di, &prev_dc->di,
   1302							max_discard_blocks)) {
   1303			prev_dc->di.len += di.len;
   1304			dcc->undiscard_blks += di.len;
   1305			__relocate_discard_cmd(dcc, prev_dc);
   1306			di = prev_dc->di;
   1307			tdc = prev_dc;
   1308			merged = true;
   1309		}
   1310
   1311		if (next_dc && next_dc->state == D_PREP &&
   1312			next_dc->bdev == bdev &&
   1313			__is_discard_front_mergeable(&di, &next_dc->di,
   1314							max_discard_blocks)) {
   1315			next_dc->di.lstart = di.lstart;
   1316			next_dc->di.len += di.len;
   1317			next_dc->di.start = di.start;
   1318			dcc->undiscard_blks += di.len;
   1319			__relocate_discard_cmd(dcc, next_dc);
   1320			if (tdc)
   1321				__remove_discard_cmd(sbi, tdc);
   1322			merged = true;
   1323		}
   1324
   1325		if (!merged) {
   1326			__insert_discard_tree(sbi, bdev, di.lstart, di.start,
   1327							di.len, NULL, NULL);
   1328		}
   1329 next:
   1330		prev_dc = next_dc;
   1331		if (!prev_dc)
   1332			break;
   1333
   1334		node = rb_next(&prev_dc->rb_node);
   1335		next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
   1336	}
   1337}
   1338
   1339static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
   1340		struct block_device *bdev, block_t blkstart, block_t blklen)
   1341{
   1342	block_t lblkstart = blkstart;
   1343
   1344	if (!f2fs_bdev_support_discard(bdev))
   1345		return 0;
   1346
   1347	trace_f2fs_queue_discard(bdev, blkstart, blklen);
   1348
   1349	if (f2fs_is_multi_device(sbi)) {
   1350		int devi = f2fs_target_device_index(sbi, blkstart);
   1351
   1352		blkstart -= FDEV(devi).start_blk;
   1353	}
   1354	mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
   1355	__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
   1356	mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
   1357	return 0;
   1358}
   1359
   1360static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
   1361					struct discard_policy *dpolicy)
   1362{
   1363	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1364	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
   1365	struct rb_node **insert_p = NULL, *insert_parent = NULL;
   1366	struct discard_cmd *dc;
   1367	struct blk_plug plug;
   1368	unsigned int pos = dcc->next_pos;
   1369	unsigned int issued = 0;
   1370	bool io_interrupted = false;
   1371
   1372	mutex_lock(&dcc->cmd_lock);
   1373	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
   1374					NULL, pos,
   1375					(struct rb_entry **)&prev_dc,
   1376					(struct rb_entry **)&next_dc,
   1377					&insert_p, &insert_parent, true, NULL);
   1378	if (!dc)
   1379		dc = next_dc;
   1380
   1381	blk_start_plug(&plug);
   1382
   1383	while (dc) {
   1384		struct rb_node *node;
   1385		int err = 0;
   1386
   1387		if (dc->state != D_PREP)
   1388			goto next;
   1389
   1390		if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
   1391			io_interrupted = true;
   1392			break;
   1393		}
   1394
   1395		dcc->next_pos = dc->lstart + dc->len;
   1396		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
   1397
   1398		if (issued >= dpolicy->max_requests)
   1399			break;
   1400next:
   1401		node = rb_next(&dc->rb_node);
   1402		if (err)
   1403			__remove_discard_cmd(sbi, dc);
   1404		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
   1405	}
   1406
   1407	blk_finish_plug(&plug);
   1408
   1409	if (!dc)
   1410		dcc->next_pos = 0;
   1411
   1412	mutex_unlock(&dcc->cmd_lock);
   1413
   1414	if (!issued && io_interrupted)
   1415		issued = -1;
   1416
   1417	return issued;
   1418}
   1419static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
   1420					struct discard_policy *dpolicy);
   1421
   1422static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
   1423					struct discard_policy *dpolicy)
   1424{
   1425	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1426	struct list_head *pend_list;
   1427	struct discard_cmd *dc, *tmp;
   1428	struct blk_plug plug;
   1429	int i, issued;
   1430	bool io_interrupted = false;
   1431
   1432	if (dpolicy->timeout)
   1433		f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
   1434
   1435retry:
   1436	issued = 0;
   1437	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
   1438		if (dpolicy->timeout &&
   1439				f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
   1440			break;
   1441
   1442		if (i + 1 < dpolicy->granularity)
   1443			break;
   1444
   1445		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
   1446			return __issue_discard_cmd_orderly(sbi, dpolicy);
   1447
   1448		pend_list = &dcc->pend_list[i];
   1449
   1450		mutex_lock(&dcc->cmd_lock);
   1451		if (list_empty(pend_list))
   1452			goto next;
   1453		if (unlikely(dcc->rbtree_check))
   1454			f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
   1455							&dcc->root, false));
   1456		blk_start_plug(&plug);
   1457		list_for_each_entry_safe(dc, tmp, pend_list, list) {
   1458			f2fs_bug_on(sbi, dc->state != D_PREP);
   1459
   1460			if (dpolicy->timeout &&
   1461				f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
   1462				break;
   1463
   1464			if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
   1465						!is_idle(sbi, DISCARD_TIME)) {
   1466				io_interrupted = true;
   1467				break;
   1468			}
   1469
   1470			__submit_discard_cmd(sbi, dpolicy, dc, &issued);
   1471
   1472			if (issued >= dpolicy->max_requests)
   1473				break;
   1474		}
   1475		blk_finish_plug(&plug);
   1476next:
   1477		mutex_unlock(&dcc->cmd_lock);
   1478
   1479		if (issued >= dpolicy->max_requests || io_interrupted)
   1480			break;
   1481	}
   1482
   1483	if (dpolicy->type == DPOLICY_UMOUNT && issued) {
   1484		__wait_all_discard_cmd(sbi, dpolicy);
   1485		goto retry;
   1486	}
   1487
   1488	if (!issued && io_interrupted)
   1489		issued = -1;
   1490
   1491	return issued;
   1492}
   1493
   1494static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
   1495{
   1496	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1497	struct list_head *pend_list;
   1498	struct discard_cmd *dc, *tmp;
   1499	int i;
   1500	bool dropped = false;
   1501
   1502	mutex_lock(&dcc->cmd_lock);
   1503	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
   1504		pend_list = &dcc->pend_list[i];
   1505		list_for_each_entry_safe(dc, tmp, pend_list, list) {
   1506			f2fs_bug_on(sbi, dc->state != D_PREP);
   1507			__remove_discard_cmd(sbi, dc);
   1508			dropped = true;
   1509		}
   1510	}
   1511	mutex_unlock(&dcc->cmd_lock);
   1512
   1513	return dropped;
   1514}
   1515
   1516void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
   1517{
   1518	__drop_discard_cmd(sbi);
   1519}
   1520
   1521static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
   1522							struct discard_cmd *dc)
   1523{
   1524	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1525	unsigned int len = 0;
   1526
   1527	wait_for_completion_io(&dc->wait);
   1528	mutex_lock(&dcc->cmd_lock);
   1529	f2fs_bug_on(sbi, dc->state != D_DONE);
   1530	dc->ref--;
   1531	if (!dc->ref) {
   1532		if (!dc->error)
   1533			len = dc->len;
   1534		__remove_discard_cmd(sbi, dc);
   1535	}
   1536	mutex_unlock(&dcc->cmd_lock);
   1537
   1538	return len;
   1539}
   1540
   1541static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
   1542						struct discard_policy *dpolicy,
   1543						block_t start, block_t end)
   1544{
   1545	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1546	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
   1547					&(dcc->fstrim_list) : &(dcc->wait_list);
   1548	struct discard_cmd *dc = NULL, *iter, *tmp;
   1549	unsigned int trimmed = 0;
   1550
   1551next:
   1552	dc = NULL;
   1553
   1554	mutex_lock(&dcc->cmd_lock);
   1555	list_for_each_entry_safe(iter, tmp, wait_list, list) {
   1556		if (iter->lstart + iter->len <= start || end <= iter->lstart)
   1557			continue;
   1558		if (iter->len < dpolicy->granularity)
   1559			continue;
   1560		if (iter->state == D_DONE && !iter->ref) {
   1561			wait_for_completion_io(&iter->wait);
   1562			if (!iter->error)
   1563				trimmed += iter->len;
   1564			__remove_discard_cmd(sbi, iter);
   1565		} else {
   1566			iter->ref++;
   1567			dc = iter;
   1568			break;
   1569		}
   1570	}
   1571	mutex_unlock(&dcc->cmd_lock);
   1572
   1573	if (dc) {
   1574		trimmed += __wait_one_discard_bio(sbi, dc);
   1575		goto next;
   1576	}
   1577
   1578	return trimmed;
   1579}
   1580
   1581static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
   1582						struct discard_policy *dpolicy)
   1583{
   1584	struct discard_policy dp;
   1585	unsigned int discard_blks;
   1586
   1587	if (dpolicy)
   1588		return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
   1589
   1590	/* wait all */
   1591	__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
   1592	discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
   1593	__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
   1594	discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
   1595
   1596	return discard_blks;
   1597}
   1598
   1599/* This should be covered by global mutex, &sit_i->sentry_lock */
   1600static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
   1601{
   1602	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1603	struct discard_cmd *dc;
   1604	bool need_wait = false;
   1605
   1606	mutex_lock(&dcc->cmd_lock);
   1607	dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
   1608							NULL, blkaddr);
   1609	if (dc) {
   1610		if (dc->state == D_PREP) {
   1611			__punch_discard_cmd(sbi, dc, blkaddr);
   1612		} else {
   1613			dc->ref++;
   1614			need_wait = true;
   1615		}
   1616	}
   1617	mutex_unlock(&dcc->cmd_lock);
   1618
   1619	if (need_wait)
   1620		__wait_one_discard_bio(sbi, dc);
   1621}
   1622
   1623void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
   1624{
   1625	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1626
   1627	if (dcc && dcc->f2fs_issue_discard) {
   1628		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
   1629
   1630		dcc->f2fs_issue_discard = NULL;
   1631		kthread_stop(discard_thread);
   1632	}
   1633}
   1634
   1635/* This comes from f2fs_put_super */
   1636bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
   1637{
   1638	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1639	struct discard_policy dpolicy;
   1640	bool dropped;
   1641
   1642	__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
   1643					dcc->discard_granularity);
   1644	__issue_discard_cmd(sbi, &dpolicy);
   1645	dropped = __drop_discard_cmd(sbi);
   1646
   1647	/* just to make sure there is no pending discard commands */
   1648	__wait_all_discard_cmd(sbi, NULL);
   1649
   1650	f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
   1651	return dropped;
   1652}
   1653
   1654static int issue_discard_thread(void *data)
   1655{
   1656	struct f2fs_sb_info *sbi = data;
   1657	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1658	wait_queue_head_t *q = &dcc->discard_wait_queue;
   1659	struct discard_policy dpolicy;
   1660	unsigned int wait_ms = dcc->min_discard_issue_time;
   1661	int issued;
   1662
   1663	set_freezable();
   1664
   1665	do {
   1666		if (sbi->gc_mode == GC_URGENT_HIGH ||
   1667			!f2fs_available_free_memory(sbi, DISCARD_CACHE))
   1668			__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
   1669		else
   1670			__init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
   1671						dcc->discard_granularity);
   1672
   1673		if (!atomic_read(&dcc->discard_cmd_cnt))
   1674		       wait_ms = dpolicy.max_interval;
   1675
   1676		wait_event_interruptible_timeout(*q,
   1677				kthread_should_stop() || freezing(current) ||
   1678				dcc->discard_wake,
   1679				msecs_to_jiffies(wait_ms));
   1680
   1681		if (dcc->discard_wake)
   1682			dcc->discard_wake = 0;
   1683
   1684		/* clean up pending candidates before going to sleep */
   1685		if (atomic_read(&dcc->queued_discard))
   1686			__wait_all_discard_cmd(sbi, NULL);
   1687
   1688		if (try_to_freeze())
   1689			continue;
   1690		if (f2fs_readonly(sbi->sb))
   1691			continue;
   1692		if (kthread_should_stop())
   1693			return 0;
   1694		if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
   1695			wait_ms = dpolicy.max_interval;
   1696			continue;
   1697		}
   1698		if (!atomic_read(&dcc->discard_cmd_cnt))
   1699			continue;
   1700
   1701		sb_start_intwrite(sbi->sb);
   1702
   1703		issued = __issue_discard_cmd(sbi, &dpolicy);
   1704		if (issued > 0) {
   1705			__wait_all_discard_cmd(sbi, &dpolicy);
   1706			wait_ms = dpolicy.min_interval;
   1707		} else if (issued == -1) {
   1708			wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
   1709			if (!wait_ms)
   1710				wait_ms = dpolicy.mid_interval;
   1711		} else {
   1712			wait_ms = dpolicy.max_interval;
   1713		}
   1714
   1715		sb_end_intwrite(sbi->sb);
   1716
   1717	} while (!kthread_should_stop());
   1718	return 0;
   1719}
   1720
   1721#ifdef CONFIG_BLK_DEV_ZONED
   1722static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
   1723		struct block_device *bdev, block_t blkstart, block_t blklen)
   1724{
   1725	sector_t sector, nr_sects;
   1726	block_t lblkstart = blkstart;
   1727	int devi = 0;
   1728
   1729	if (f2fs_is_multi_device(sbi)) {
   1730		devi = f2fs_target_device_index(sbi, blkstart);
   1731		if (blkstart < FDEV(devi).start_blk ||
   1732		    blkstart > FDEV(devi).end_blk) {
   1733			f2fs_err(sbi, "Invalid block %x", blkstart);
   1734			return -EIO;
   1735		}
   1736		blkstart -= FDEV(devi).start_blk;
   1737	}
   1738
   1739	/* For sequential zones, reset the zone write pointer */
   1740	if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
   1741		sector = SECTOR_FROM_BLOCK(blkstart);
   1742		nr_sects = SECTOR_FROM_BLOCK(blklen);
   1743
   1744		if (sector & (bdev_zone_sectors(bdev) - 1) ||
   1745				nr_sects != bdev_zone_sectors(bdev)) {
   1746			f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
   1747				 devi, sbi->s_ndevs ? FDEV(devi).path : "",
   1748				 blkstart, blklen);
   1749			return -EIO;
   1750		}
   1751		trace_f2fs_issue_reset_zone(bdev, blkstart);
   1752		return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
   1753					sector, nr_sects, GFP_NOFS);
   1754	}
   1755
   1756	/* For conventional zones, use regular discard if supported */
   1757	return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
   1758}
   1759#endif
   1760
   1761static int __issue_discard_async(struct f2fs_sb_info *sbi,
   1762		struct block_device *bdev, block_t blkstart, block_t blklen)
   1763{
   1764#ifdef CONFIG_BLK_DEV_ZONED
   1765	if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
   1766		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
   1767#endif
   1768	return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
   1769}
   1770
   1771static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
   1772				block_t blkstart, block_t blklen)
   1773{
   1774	sector_t start = blkstart, len = 0;
   1775	struct block_device *bdev;
   1776	struct seg_entry *se;
   1777	unsigned int offset;
   1778	block_t i;
   1779	int err = 0;
   1780
   1781	bdev = f2fs_target_device(sbi, blkstart, NULL);
   1782
   1783	for (i = blkstart; i < blkstart + blklen; i++, len++) {
   1784		if (i != start) {
   1785			struct block_device *bdev2 =
   1786				f2fs_target_device(sbi, i, NULL);
   1787
   1788			if (bdev2 != bdev) {
   1789				err = __issue_discard_async(sbi, bdev,
   1790						start, len);
   1791				if (err)
   1792					return err;
   1793				bdev = bdev2;
   1794				start = i;
   1795				len = 0;
   1796			}
   1797		}
   1798
   1799		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
   1800		offset = GET_BLKOFF_FROM_SEG0(sbi, i);
   1801
   1802		if (f2fs_block_unit_discard(sbi) &&
   1803				!f2fs_test_and_set_bit(offset, se->discard_map))
   1804			sbi->discard_blks--;
   1805	}
   1806
   1807	if (len)
   1808		err = __issue_discard_async(sbi, bdev, start, len);
   1809	return err;
   1810}
   1811
   1812static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
   1813							bool check_only)
   1814{
   1815	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
   1816	int max_blocks = sbi->blocks_per_seg;
   1817	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
   1818	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
   1819	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
   1820	unsigned long *discard_map = (unsigned long *)se->discard_map;
   1821	unsigned long *dmap = SIT_I(sbi)->tmp_map;
   1822	unsigned int start = 0, end = -1;
   1823	bool force = (cpc->reason & CP_DISCARD);
   1824	struct discard_entry *de = NULL;
   1825	struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
   1826	int i;
   1827
   1828	if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi) ||
   1829			!f2fs_block_unit_discard(sbi))
   1830		return false;
   1831
   1832	if (!force) {
   1833		if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
   1834			SM_I(sbi)->dcc_info->nr_discards >=
   1835				SM_I(sbi)->dcc_info->max_discards)
   1836			return false;
   1837	}
   1838
   1839	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
   1840	for (i = 0; i < entries; i++)
   1841		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
   1842				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
   1843
   1844	while (force || SM_I(sbi)->dcc_info->nr_discards <=
   1845				SM_I(sbi)->dcc_info->max_discards) {
   1846		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
   1847		if (start >= max_blocks)
   1848			break;
   1849
   1850		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
   1851		if (force && start && end != max_blocks
   1852					&& (end - start) < cpc->trim_minlen)
   1853			continue;
   1854
   1855		if (check_only)
   1856			return true;
   1857
   1858		if (!de) {
   1859			de = f2fs_kmem_cache_alloc(discard_entry_slab,
   1860						GFP_F2FS_ZERO, true, NULL);
   1861			de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
   1862			list_add_tail(&de->list, head);
   1863		}
   1864
   1865		for (i = start; i < end; i++)
   1866			__set_bit_le(i, (void *)de->discard_map);
   1867
   1868		SM_I(sbi)->dcc_info->nr_discards += end - start;
   1869	}
   1870	return false;
   1871}
   1872
   1873static void release_discard_addr(struct discard_entry *entry)
   1874{
   1875	list_del(&entry->list);
   1876	kmem_cache_free(discard_entry_slab, entry);
   1877}
   1878
   1879void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
   1880{
   1881	struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
   1882	struct discard_entry *entry, *this;
   1883
   1884	/* drop caches */
   1885	list_for_each_entry_safe(entry, this, head, list)
   1886		release_discard_addr(entry);
   1887}
   1888
   1889/*
   1890 * Should call f2fs_clear_prefree_segments after checkpoint is done.
   1891 */
   1892static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
   1893{
   1894	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
   1895	unsigned int segno;
   1896
   1897	mutex_lock(&dirty_i->seglist_lock);
   1898	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
   1899		__set_test_and_free(sbi, segno, false);
   1900	mutex_unlock(&dirty_i->seglist_lock);
   1901}
   1902
   1903void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
   1904						struct cp_control *cpc)
   1905{
   1906	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   1907	struct list_head *head = &dcc->entry_list;
   1908	struct discard_entry *entry, *this;
   1909	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
   1910	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
   1911	unsigned int start = 0, end = -1;
   1912	unsigned int secno, start_segno;
   1913	bool force = (cpc->reason & CP_DISCARD);
   1914	bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
   1915						DISCARD_UNIT_SECTION;
   1916
   1917	if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
   1918		section_alignment = true;
   1919
   1920	mutex_lock(&dirty_i->seglist_lock);
   1921
   1922	while (1) {
   1923		int i;
   1924
   1925		if (section_alignment && end != -1)
   1926			end--;
   1927		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
   1928		if (start >= MAIN_SEGS(sbi))
   1929			break;
   1930		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
   1931								start + 1);
   1932
   1933		if (section_alignment) {
   1934			start = rounddown(start, sbi->segs_per_sec);
   1935			end = roundup(end, sbi->segs_per_sec);
   1936		}
   1937
   1938		for (i = start; i < end; i++) {
   1939			if (test_and_clear_bit(i, prefree_map))
   1940				dirty_i->nr_dirty[PRE]--;
   1941		}
   1942
   1943		if (!f2fs_realtime_discard_enable(sbi))
   1944			continue;
   1945
   1946		if (force && start >= cpc->trim_start &&
   1947					(end - 1) <= cpc->trim_end)
   1948				continue;
   1949
   1950		if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) {
   1951			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
   1952				(end - start) << sbi->log_blocks_per_seg);
   1953			continue;
   1954		}
   1955next:
   1956		secno = GET_SEC_FROM_SEG(sbi, start);
   1957		start_segno = GET_SEG_FROM_SEC(sbi, secno);
   1958		if (!IS_CURSEC(sbi, secno) &&
   1959			!get_valid_blocks(sbi, start, true))
   1960			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
   1961				sbi->segs_per_sec << sbi->log_blocks_per_seg);
   1962
   1963		start = start_segno + sbi->segs_per_sec;
   1964		if (start < end)
   1965			goto next;
   1966		else
   1967			end = start - 1;
   1968	}
   1969	mutex_unlock(&dirty_i->seglist_lock);
   1970
   1971	if (!f2fs_block_unit_discard(sbi))
   1972		goto wakeup;
   1973
   1974	/* send small discards */
   1975	list_for_each_entry_safe(entry, this, head, list) {
   1976		unsigned int cur_pos = 0, next_pos, len, total_len = 0;
   1977		bool is_valid = test_bit_le(0, entry->discard_map);
   1978
   1979find_next:
   1980		if (is_valid) {
   1981			next_pos = find_next_zero_bit_le(entry->discard_map,
   1982					sbi->blocks_per_seg, cur_pos);
   1983			len = next_pos - cur_pos;
   1984
   1985			if (f2fs_sb_has_blkzoned(sbi) ||
   1986			    (force && len < cpc->trim_minlen))
   1987				goto skip;
   1988
   1989			f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
   1990									len);
   1991			total_len += len;
   1992		} else {
   1993			next_pos = find_next_bit_le(entry->discard_map,
   1994					sbi->blocks_per_seg, cur_pos);
   1995		}
   1996skip:
   1997		cur_pos = next_pos;
   1998		is_valid = !is_valid;
   1999
   2000		if (cur_pos < sbi->blocks_per_seg)
   2001			goto find_next;
   2002
   2003		release_discard_addr(entry);
   2004		dcc->nr_discards -= total_len;
   2005	}
   2006
   2007wakeup:
   2008	wake_up_discard_thread(sbi, false);
   2009}
   2010
   2011int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
   2012{
   2013	dev_t dev = sbi->sb->s_bdev->bd_dev;
   2014	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   2015	int err = 0;
   2016
   2017	if (!f2fs_realtime_discard_enable(sbi))
   2018		return 0;
   2019
   2020	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
   2021				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
   2022	if (IS_ERR(dcc->f2fs_issue_discard))
   2023		err = PTR_ERR(dcc->f2fs_issue_discard);
   2024
   2025	return err;
   2026}
   2027
   2028static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
   2029{
   2030	struct discard_cmd_control *dcc;
   2031	int err = 0, i;
   2032
   2033	if (SM_I(sbi)->dcc_info) {
   2034		dcc = SM_I(sbi)->dcc_info;
   2035		goto init_thread;
   2036	}
   2037
   2038	dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
   2039	if (!dcc)
   2040		return -ENOMEM;
   2041
   2042	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
   2043	if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
   2044		dcc->discard_granularity = sbi->blocks_per_seg;
   2045	else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
   2046		dcc->discard_granularity = BLKS_PER_SEC(sbi);
   2047
   2048	INIT_LIST_HEAD(&dcc->entry_list);
   2049	for (i = 0; i < MAX_PLIST_NUM; i++)
   2050		INIT_LIST_HEAD(&dcc->pend_list[i]);
   2051	INIT_LIST_HEAD(&dcc->wait_list);
   2052	INIT_LIST_HEAD(&dcc->fstrim_list);
   2053	mutex_init(&dcc->cmd_lock);
   2054	atomic_set(&dcc->issued_discard, 0);
   2055	atomic_set(&dcc->queued_discard, 0);
   2056	atomic_set(&dcc->discard_cmd_cnt, 0);
   2057	dcc->nr_discards = 0;
   2058	dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
   2059	dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
   2060	dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
   2061	dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
   2062	dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
   2063	dcc->undiscard_blks = 0;
   2064	dcc->next_pos = 0;
   2065	dcc->root = RB_ROOT_CACHED;
   2066	dcc->rbtree_check = false;
   2067
   2068	init_waitqueue_head(&dcc->discard_wait_queue);
   2069	SM_I(sbi)->dcc_info = dcc;
   2070init_thread:
   2071	err = f2fs_start_discard_thread(sbi);
   2072	if (err) {
   2073		kfree(dcc);
   2074		SM_I(sbi)->dcc_info = NULL;
   2075	}
   2076
   2077	return err;
   2078}
   2079
   2080static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
   2081{
   2082	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   2083
   2084	if (!dcc)
   2085		return;
   2086
   2087	f2fs_stop_discard_thread(sbi);
   2088
   2089	/*
   2090	 * Recovery can cache discard commands, so in error path of
   2091	 * fill_super(), it needs to give a chance to handle them.
   2092	 */
   2093	if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
   2094		f2fs_issue_discard_timeout(sbi);
   2095
   2096	kfree(dcc);
   2097	SM_I(sbi)->dcc_info = NULL;
   2098}
   2099
   2100static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
   2101{
   2102	struct sit_info *sit_i = SIT_I(sbi);
   2103
   2104	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
   2105		sit_i->dirty_sentries++;
   2106		return false;
   2107	}
   2108
   2109	return true;
   2110}
   2111
   2112static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
   2113					unsigned int segno, int modified)
   2114{
   2115	struct seg_entry *se = get_seg_entry(sbi, segno);
   2116
   2117	se->type = type;
   2118	if (modified)
   2119		__mark_sit_entry_dirty(sbi, segno);
   2120}
   2121
   2122static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
   2123								block_t blkaddr)
   2124{
   2125	unsigned int segno = GET_SEGNO(sbi, blkaddr);
   2126
   2127	if (segno == NULL_SEGNO)
   2128		return 0;
   2129	return get_seg_entry(sbi, segno)->mtime;
   2130}
   2131
   2132static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
   2133						unsigned long long old_mtime)
   2134{
   2135	struct seg_entry *se;
   2136	unsigned int segno = GET_SEGNO(sbi, blkaddr);
   2137	unsigned long long ctime = get_mtime(sbi, false);
   2138	unsigned long long mtime = old_mtime ? old_mtime : ctime;
   2139
   2140	if (segno == NULL_SEGNO)
   2141		return;
   2142
   2143	se = get_seg_entry(sbi, segno);
   2144
   2145	if (!se->mtime)
   2146		se->mtime = mtime;
   2147	else
   2148		se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
   2149						se->valid_blocks + 1);
   2150
   2151	if (ctime > SIT_I(sbi)->max_mtime)
   2152		SIT_I(sbi)->max_mtime = ctime;
   2153}
   2154
   2155static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
   2156{
   2157	struct seg_entry *se;
   2158	unsigned int segno, offset;
   2159	long int new_vblocks;
   2160	bool exist;
   2161#ifdef CONFIG_F2FS_CHECK_FS
   2162	bool mir_exist;
   2163#endif
   2164
   2165	segno = GET_SEGNO(sbi, blkaddr);
   2166
   2167	se = get_seg_entry(sbi, segno);
   2168	new_vblocks = se->valid_blocks + del;
   2169	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
   2170
   2171	f2fs_bug_on(sbi, (new_vblocks < 0 ||
   2172			(new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
   2173
   2174	se->valid_blocks = new_vblocks;
   2175
   2176	/* Update valid block bitmap */
   2177	if (del > 0) {
   2178		exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
   2179#ifdef CONFIG_F2FS_CHECK_FS
   2180		mir_exist = f2fs_test_and_set_bit(offset,
   2181						se->cur_valid_map_mir);
   2182		if (unlikely(exist != mir_exist)) {
   2183			f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
   2184				 blkaddr, exist);
   2185			f2fs_bug_on(sbi, 1);
   2186		}
   2187#endif
   2188		if (unlikely(exist)) {
   2189			f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
   2190				 blkaddr);
   2191			f2fs_bug_on(sbi, 1);
   2192			se->valid_blocks--;
   2193			del = 0;
   2194		}
   2195
   2196		if (f2fs_block_unit_discard(sbi) &&
   2197				!f2fs_test_and_set_bit(offset, se->discard_map))
   2198			sbi->discard_blks--;
   2199
   2200		/*
   2201		 * SSR should never reuse block which is checkpointed
   2202		 * or newly invalidated.
   2203		 */
   2204		if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
   2205			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
   2206				se->ckpt_valid_blocks++;
   2207		}
   2208	} else {
   2209		exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
   2210#ifdef CONFIG_F2FS_CHECK_FS
   2211		mir_exist = f2fs_test_and_clear_bit(offset,
   2212						se->cur_valid_map_mir);
   2213		if (unlikely(exist != mir_exist)) {
   2214			f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
   2215				 blkaddr, exist);
   2216			f2fs_bug_on(sbi, 1);
   2217		}
   2218#endif
   2219		if (unlikely(!exist)) {
   2220			f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
   2221				 blkaddr);
   2222			f2fs_bug_on(sbi, 1);
   2223			se->valid_blocks++;
   2224			del = 0;
   2225		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
   2226			/*
   2227			 * If checkpoints are off, we must not reuse data that
   2228			 * was used in the previous checkpoint. If it was used
   2229			 * before, we must track that to know how much space we
   2230			 * really have.
   2231			 */
   2232			if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
   2233				spin_lock(&sbi->stat_lock);
   2234				sbi->unusable_block_count++;
   2235				spin_unlock(&sbi->stat_lock);
   2236			}
   2237		}
   2238
   2239		if (f2fs_block_unit_discard(sbi) &&
   2240			f2fs_test_and_clear_bit(offset, se->discard_map))
   2241			sbi->discard_blks++;
   2242	}
   2243	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
   2244		se->ckpt_valid_blocks += del;
   2245
   2246	__mark_sit_entry_dirty(sbi, segno);
   2247
   2248	/* update total number of valid blocks to be written in ckpt area */
   2249	SIT_I(sbi)->written_valid_blocks += del;
   2250
   2251	if (__is_large_section(sbi))
   2252		get_sec_entry(sbi, segno)->valid_blocks += del;
   2253}
   2254
   2255void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
   2256{
   2257	unsigned int segno = GET_SEGNO(sbi, addr);
   2258	struct sit_info *sit_i = SIT_I(sbi);
   2259
   2260	f2fs_bug_on(sbi, addr == NULL_ADDR);
   2261	if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
   2262		return;
   2263
   2264	invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
   2265	f2fs_invalidate_compress_page(sbi, addr);
   2266
   2267	/* add it into sit main buffer */
   2268	down_write(&sit_i->sentry_lock);
   2269
   2270	update_segment_mtime(sbi, addr, 0);
   2271	update_sit_entry(sbi, addr, -1);
   2272
   2273	/* add it into dirty seglist */
   2274	locate_dirty_segment(sbi, segno);
   2275
   2276	up_write(&sit_i->sentry_lock);
   2277}
   2278
   2279bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
   2280{
   2281	struct sit_info *sit_i = SIT_I(sbi);
   2282	unsigned int segno, offset;
   2283	struct seg_entry *se;
   2284	bool is_cp = false;
   2285
   2286	if (!__is_valid_data_blkaddr(blkaddr))
   2287		return true;
   2288
   2289	down_read(&sit_i->sentry_lock);
   2290
   2291	segno = GET_SEGNO(sbi, blkaddr);
   2292	se = get_seg_entry(sbi, segno);
   2293	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
   2294
   2295	if (f2fs_test_bit(offset, se->ckpt_valid_map))
   2296		is_cp = true;
   2297
   2298	up_read(&sit_i->sentry_lock);
   2299
   2300	return is_cp;
   2301}
   2302
   2303/*
   2304 * This function should be resided under the curseg_mutex lock
   2305 */
   2306static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
   2307					struct f2fs_summary *sum)
   2308{
   2309	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2310	void *addr = curseg->sum_blk;
   2311
   2312	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
   2313	memcpy(addr, sum, sizeof(struct f2fs_summary));
   2314}
   2315
   2316/*
   2317 * Calculate the number of current summary pages for writing
   2318 */
   2319int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
   2320{
   2321	int valid_sum_count = 0;
   2322	int i, sum_in_page;
   2323
   2324	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
   2325		if (sbi->ckpt->alloc_type[i] == SSR)
   2326			valid_sum_count += sbi->blocks_per_seg;
   2327		else {
   2328			if (for_ra)
   2329				valid_sum_count += le16_to_cpu(
   2330					F2FS_CKPT(sbi)->cur_data_blkoff[i]);
   2331			else
   2332				valid_sum_count += curseg_blkoff(sbi, i);
   2333		}
   2334	}
   2335
   2336	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
   2337			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
   2338	if (valid_sum_count <= sum_in_page)
   2339		return 1;
   2340	else if ((valid_sum_count - sum_in_page) <=
   2341		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
   2342		return 2;
   2343	return 3;
   2344}
   2345
   2346/*
   2347 * Caller should put this summary page
   2348 */
   2349struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
   2350{
   2351	if (unlikely(f2fs_cp_error(sbi)))
   2352		return ERR_PTR(-EIO);
   2353	return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
   2354}
   2355
   2356void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
   2357					void *src, block_t blk_addr)
   2358{
   2359	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
   2360
   2361	memcpy(page_address(page), src, PAGE_SIZE);
   2362	set_page_dirty(page);
   2363	f2fs_put_page(page, 1);
   2364}
   2365
   2366static void write_sum_page(struct f2fs_sb_info *sbi,
   2367			struct f2fs_summary_block *sum_blk, block_t blk_addr)
   2368{
   2369	f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
   2370}
   2371
   2372static void write_current_sum_page(struct f2fs_sb_info *sbi,
   2373						int type, block_t blk_addr)
   2374{
   2375	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2376	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
   2377	struct f2fs_summary_block *src = curseg->sum_blk;
   2378	struct f2fs_summary_block *dst;
   2379
   2380	dst = (struct f2fs_summary_block *)page_address(page);
   2381	memset(dst, 0, PAGE_SIZE);
   2382
   2383	mutex_lock(&curseg->curseg_mutex);
   2384
   2385	down_read(&curseg->journal_rwsem);
   2386	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
   2387	up_read(&curseg->journal_rwsem);
   2388
   2389	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
   2390	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
   2391
   2392	mutex_unlock(&curseg->curseg_mutex);
   2393
   2394	set_page_dirty(page);
   2395	f2fs_put_page(page, 1);
   2396}
   2397
   2398static int is_next_segment_free(struct f2fs_sb_info *sbi,
   2399				struct curseg_info *curseg, int type)
   2400{
   2401	unsigned int segno = curseg->segno + 1;
   2402	struct free_segmap_info *free_i = FREE_I(sbi);
   2403
   2404	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
   2405		return !test_bit(segno, free_i->free_segmap);
   2406	return 0;
   2407}
   2408
   2409/*
   2410 * Find a new segment from the free segments bitmap to right order
   2411 * This function should be returned with success, otherwise BUG
   2412 */
   2413static void get_new_segment(struct f2fs_sb_info *sbi,
   2414			unsigned int *newseg, bool new_sec, int dir)
   2415{
   2416	struct free_segmap_info *free_i = FREE_I(sbi);
   2417	unsigned int segno, secno, zoneno;
   2418	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
   2419	unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
   2420	unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
   2421	unsigned int left_start = hint;
   2422	bool init = true;
   2423	int go_left = 0;
   2424	int i;
   2425
   2426	spin_lock(&free_i->segmap_lock);
   2427
   2428	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
   2429		segno = find_next_zero_bit(free_i->free_segmap,
   2430			GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
   2431		if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
   2432			goto got_it;
   2433	}
   2434find_other_zone:
   2435	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
   2436	if (secno >= MAIN_SECS(sbi)) {
   2437		if (dir == ALLOC_RIGHT) {
   2438			secno = find_first_zero_bit(free_i->free_secmap,
   2439							MAIN_SECS(sbi));
   2440			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
   2441		} else {
   2442			go_left = 1;
   2443			left_start = hint - 1;
   2444		}
   2445	}
   2446	if (go_left == 0)
   2447		goto skip_left;
   2448
   2449	while (test_bit(left_start, free_i->free_secmap)) {
   2450		if (left_start > 0) {
   2451			left_start--;
   2452			continue;
   2453		}
   2454		left_start = find_first_zero_bit(free_i->free_secmap,
   2455							MAIN_SECS(sbi));
   2456		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
   2457		break;
   2458	}
   2459	secno = left_start;
   2460skip_left:
   2461	segno = GET_SEG_FROM_SEC(sbi, secno);
   2462	zoneno = GET_ZONE_FROM_SEC(sbi, secno);
   2463
   2464	/* give up on finding another zone */
   2465	if (!init)
   2466		goto got_it;
   2467	if (sbi->secs_per_zone == 1)
   2468		goto got_it;
   2469	if (zoneno == old_zoneno)
   2470		goto got_it;
   2471	if (dir == ALLOC_LEFT) {
   2472		if (!go_left && zoneno + 1 >= total_zones)
   2473			goto got_it;
   2474		if (go_left && zoneno == 0)
   2475			goto got_it;
   2476	}
   2477	for (i = 0; i < NR_CURSEG_TYPE; i++)
   2478		if (CURSEG_I(sbi, i)->zone == zoneno)
   2479			break;
   2480
   2481	if (i < NR_CURSEG_TYPE) {
   2482		/* zone is in user, try another */
   2483		if (go_left)
   2484			hint = zoneno * sbi->secs_per_zone - 1;
   2485		else if (zoneno + 1 >= total_zones)
   2486			hint = 0;
   2487		else
   2488			hint = (zoneno + 1) * sbi->secs_per_zone;
   2489		init = false;
   2490		goto find_other_zone;
   2491	}
   2492got_it:
   2493	/* set it as dirty segment in free segmap */
   2494	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
   2495	__set_inuse(sbi, segno);
   2496	*newseg = segno;
   2497	spin_unlock(&free_i->segmap_lock);
   2498}
   2499
   2500static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
   2501{
   2502	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2503	struct summary_footer *sum_footer;
   2504	unsigned short seg_type = curseg->seg_type;
   2505
   2506	curseg->inited = true;
   2507	curseg->segno = curseg->next_segno;
   2508	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
   2509	curseg->next_blkoff = 0;
   2510	curseg->next_segno = NULL_SEGNO;
   2511
   2512	sum_footer = &(curseg->sum_blk->footer);
   2513	memset(sum_footer, 0, sizeof(struct summary_footer));
   2514
   2515	sanity_check_seg_type(sbi, seg_type);
   2516
   2517	if (IS_DATASEG(seg_type))
   2518		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
   2519	if (IS_NODESEG(seg_type))
   2520		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
   2521	__set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
   2522}
   2523
   2524static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
   2525{
   2526	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2527	unsigned short seg_type = curseg->seg_type;
   2528
   2529	sanity_check_seg_type(sbi, seg_type);
   2530	if (f2fs_need_rand_seg(sbi))
   2531		return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
   2532
   2533	/* if segs_per_sec is large than 1, we need to keep original policy. */
   2534	if (__is_large_section(sbi))
   2535		return curseg->segno;
   2536
   2537	/* inmem log may not locate on any segment after mount */
   2538	if (!curseg->inited)
   2539		return 0;
   2540
   2541	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
   2542		return 0;
   2543
   2544	if (test_opt(sbi, NOHEAP) &&
   2545		(seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
   2546		return 0;
   2547
   2548	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
   2549		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
   2550
   2551	/* find segments from 0 to reuse freed segments */
   2552	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
   2553		return 0;
   2554
   2555	return curseg->segno;
   2556}
   2557
   2558/*
   2559 * Allocate a current working segment.
   2560 * This function always allocates a free segment in LFS manner.
   2561 */
   2562static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
   2563{
   2564	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2565	unsigned short seg_type = curseg->seg_type;
   2566	unsigned int segno = curseg->segno;
   2567	int dir = ALLOC_LEFT;
   2568
   2569	if (curseg->inited)
   2570		write_sum_page(sbi, curseg->sum_blk,
   2571				GET_SUM_BLOCK(sbi, segno));
   2572	if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
   2573		dir = ALLOC_RIGHT;
   2574
   2575	if (test_opt(sbi, NOHEAP))
   2576		dir = ALLOC_RIGHT;
   2577
   2578	segno = __get_next_segno(sbi, type);
   2579	get_new_segment(sbi, &segno, new_sec, dir);
   2580	curseg->next_segno = segno;
   2581	reset_curseg(sbi, type, 1);
   2582	curseg->alloc_type = LFS;
   2583	if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
   2584		curseg->fragment_remained_chunk =
   2585				prandom_u32() % sbi->max_fragment_chunk + 1;
   2586}
   2587
   2588static int __next_free_blkoff(struct f2fs_sb_info *sbi,
   2589					int segno, block_t start)
   2590{
   2591	struct seg_entry *se = get_seg_entry(sbi, segno);
   2592	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
   2593	unsigned long *target_map = SIT_I(sbi)->tmp_map;
   2594	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
   2595	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
   2596	int i;
   2597
   2598	for (i = 0; i < entries; i++)
   2599		target_map[i] = ckpt_map[i] | cur_map[i];
   2600
   2601	return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
   2602}
   2603
   2604/*
   2605 * If a segment is written by LFS manner, next block offset is just obtained
   2606 * by increasing the current block offset. However, if a segment is written by
   2607 * SSR manner, next block offset obtained by calling __next_free_blkoff
   2608 */
   2609static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
   2610				struct curseg_info *seg)
   2611{
   2612	if (seg->alloc_type == SSR) {
   2613		seg->next_blkoff =
   2614			__next_free_blkoff(sbi, seg->segno,
   2615						seg->next_blkoff + 1);
   2616	} else {
   2617		seg->next_blkoff++;
   2618		if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
   2619			/* To allocate block chunks in different sizes, use random number */
   2620			if (--seg->fragment_remained_chunk <= 0) {
   2621				seg->fragment_remained_chunk =
   2622				   prandom_u32() % sbi->max_fragment_chunk + 1;
   2623				seg->next_blkoff +=
   2624				   prandom_u32() % sbi->max_fragment_hole + 1;
   2625			}
   2626		}
   2627	}
   2628}
   2629
   2630bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
   2631{
   2632	return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
   2633}
   2634
   2635/*
   2636 * This function always allocates a used segment(from dirty seglist) by SSR
   2637 * manner, so it should recover the existing segment information of valid blocks
   2638 */
   2639static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
   2640{
   2641	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
   2642	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2643	unsigned int new_segno = curseg->next_segno;
   2644	struct f2fs_summary_block *sum_node;
   2645	struct page *sum_page;
   2646
   2647	if (flush)
   2648		write_sum_page(sbi, curseg->sum_blk,
   2649					GET_SUM_BLOCK(sbi, curseg->segno));
   2650
   2651	__set_test_and_inuse(sbi, new_segno);
   2652
   2653	mutex_lock(&dirty_i->seglist_lock);
   2654	__remove_dirty_segment(sbi, new_segno, PRE);
   2655	__remove_dirty_segment(sbi, new_segno, DIRTY);
   2656	mutex_unlock(&dirty_i->seglist_lock);
   2657
   2658	reset_curseg(sbi, type, 1);
   2659	curseg->alloc_type = SSR;
   2660	curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
   2661
   2662	sum_page = f2fs_get_sum_page(sbi, new_segno);
   2663	if (IS_ERR(sum_page)) {
   2664		/* GC won't be able to use stale summary pages by cp_error */
   2665		memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
   2666		return;
   2667	}
   2668	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
   2669	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
   2670	f2fs_put_page(sum_page, 1);
   2671}
   2672
   2673static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
   2674				int alloc_mode, unsigned long long age);
   2675
   2676static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
   2677					int target_type, int alloc_mode,
   2678					unsigned long long age)
   2679{
   2680	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2681
   2682	curseg->seg_type = target_type;
   2683
   2684	if (get_ssr_segment(sbi, type, alloc_mode, age)) {
   2685		struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
   2686
   2687		curseg->seg_type = se->type;
   2688		change_curseg(sbi, type, true);
   2689	} else {
   2690		/* allocate cold segment by default */
   2691		curseg->seg_type = CURSEG_COLD_DATA;
   2692		new_curseg(sbi, type, true);
   2693	}
   2694	stat_inc_seg_type(sbi, curseg);
   2695}
   2696
   2697static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
   2698{
   2699	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
   2700
   2701	if (!sbi->am.atgc_enabled)
   2702		return;
   2703
   2704	f2fs_down_read(&SM_I(sbi)->curseg_lock);
   2705
   2706	mutex_lock(&curseg->curseg_mutex);
   2707	down_write(&SIT_I(sbi)->sentry_lock);
   2708
   2709	get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
   2710
   2711	up_write(&SIT_I(sbi)->sentry_lock);
   2712	mutex_unlock(&curseg->curseg_mutex);
   2713
   2714	f2fs_up_read(&SM_I(sbi)->curseg_lock);
   2715
   2716}
   2717void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
   2718{
   2719	__f2fs_init_atgc_curseg(sbi);
   2720}
   2721
   2722static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
   2723{
   2724	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2725
   2726	mutex_lock(&curseg->curseg_mutex);
   2727	if (!curseg->inited)
   2728		goto out;
   2729
   2730	if (get_valid_blocks(sbi, curseg->segno, false)) {
   2731		write_sum_page(sbi, curseg->sum_blk,
   2732				GET_SUM_BLOCK(sbi, curseg->segno));
   2733	} else {
   2734		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
   2735		__set_test_and_free(sbi, curseg->segno, true);
   2736		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
   2737	}
   2738out:
   2739	mutex_unlock(&curseg->curseg_mutex);
   2740}
   2741
   2742void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
   2743{
   2744	__f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
   2745
   2746	if (sbi->am.atgc_enabled)
   2747		__f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
   2748}
   2749
   2750static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
   2751{
   2752	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2753
   2754	mutex_lock(&curseg->curseg_mutex);
   2755	if (!curseg->inited)
   2756		goto out;
   2757	if (get_valid_blocks(sbi, curseg->segno, false))
   2758		goto out;
   2759
   2760	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
   2761	__set_test_and_inuse(sbi, curseg->segno);
   2762	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
   2763out:
   2764	mutex_unlock(&curseg->curseg_mutex);
   2765}
   2766
   2767void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
   2768{
   2769	__f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
   2770
   2771	if (sbi->am.atgc_enabled)
   2772		__f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
   2773}
   2774
   2775static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
   2776				int alloc_mode, unsigned long long age)
   2777{
   2778	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2779	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
   2780	unsigned segno = NULL_SEGNO;
   2781	unsigned short seg_type = curseg->seg_type;
   2782	int i, cnt;
   2783	bool reversed = false;
   2784
   2785	sanity_check_seg_type(sbi, seg_type);
   2786
   2787	/* f2fs_need_SSR() already forces to do this */
   2788	if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
   2789		curseg->next_segno = segno;
   2790		return 1;
   2791	}
   2792
   2793	/* For node segments, let's do SSR more intensively */
   2794	if (IS_NODESEG(seg_type)) {
   2795		if (seg_type >= CURSEG_WARM_NODE) {
   2796			reversed = true;
   2797			i = CURSEG_COLD_NODE;
   2798		} else {
   2799			i = CURSEG_HOT_NODE;
   2800		}
   2801		cnt = NR_CURSEG_NODE_TYPE;
   2802	} else {
   2803		if (seg_type >= CURSEG_WARM_DATA) {
   2804			reversed = true;
   2805			i = CURSEG_COLD_DATA;
   2806		} else {
   2807			i = CURSEG_HOT_DATA;
   2808		}
   2809		cnt = NR_CURSEG_DATA_TYPE;
   2810	}
   2811
   2812	for (; cnt-- > 0; reversed ? i-- : i++) {
   2813		if (i == seg_type)
   2814			continue;
   2815		if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
   2816			curseg->next_segno = segno;
   2817			return 1;
   2818		}
   2819	}
   2820
   2821	/* find valid_blocks=0 in dirty list */
   2822	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
   2823		segno = get_free_segment(sbi);
   2824		if (segno != NULL_SEGNO) {
   2825			curseg->next_segno = segno;
   2826			return 1;
   2827		}
   2828	}
   2829	return 0;
   2830}
   2831
   2832/*
   2833 * flush out current segment and replace it with new segment
   2834 * This function should be returned with success, otherwise BUG
   2835 */
   2836static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
   2837						int type, bool force)
   2838{
   2839	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2840
   2841	if (force)
   2842		new_curseg(sbi, type, true);
   2843	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
   2844					curseg->seg_type == CURSEG_WARM_NODE)
   2845		new_curseg(sbi, type, false);
   2846	else if (curseg->alloc_type == LFS &&
   2847			is_next_segment_free(sbi, curseg, type) &&
   2848			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
   2849		new_curseg(sbi, type, false);
   2850	else if (f2fs_need_SSR(sbi) &&
   2851			get_ssr_segment(sbi, type, SSR, 0))
   2852		change_curseg(sbi, type, true);
   2853	else
   2854		new_curseg(sbi, type, false);
   2855
   2856	stat_inc_seg_type(sbi, curseg);
   2857}
   2858
   2859void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
   2860					unsigned int start, unsigned int end)
   2861{
   2862	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2863	unsigned int segno;
   2864
   2865	f2fs_down_read(&SM_I(sbi)->curseg_lock);
   2866	mutex_lock(&curseg->curseg_mutex);
   2867	down_write(&SIT_I(sbi)->sentry_lock);
   2868
   2869	segno = CURSEG_I(sbi, type)->segno;
   2870	if (segno < start || segno > end)
   2871		goto unlock;
   2872
   2873	if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
   2874		change_curseg(sbi, type, true);
   2875	else
   2876		new_curseg(sbi, type, true);
   2877
   2878	stat_inc_seg_type(sbi, curseg);
   2879
   2880	locate_dirty_segment(sbi, segno);
   2881unlock:
   2882	up_write(&SIT_I(sbi)->sentry_lock);
   2883
   2884	if (segno != curseg->segno)
   2885		f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
   2886			    type, segno, curseg->segno);
   2887
   2888	mutex_unlock(&curseg->curseg_mutex);
   2889	f2fs_up_read(&SM_I(sbi)->curseg_lock);
   2890}
   2891
   2892static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
   2893						bool new_sec, bool force)
   2894{
   2895	struct curseg_info *curseg = CURSEG_I(sbi, type);
   2896	unsigned int old_segno;
   2897
   2898	if (!curseg->inited)
   2899		goto alloc;
   2900
   2901	if (force || curseg->next_blkoff ||
   2902		get_valid_blocks(sbi, curseg->segno, new_sec))
   2903		goto alloc;
   2904
   2905	if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
   2906		return;
   2907alloc:
   2908	old_segno = curseg->segno;
   2909	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
   2910	locate_dirty_segment(sbi, old_segno);
   2911}
   2912
   2913static void __allocate_new_section(struct f2fs_sb_info *sbi,
   2914						int type, bool force)
   2915{
   2916	__allocate_new_segment(sbi, type, true, force);
   2917}
   2918
   2919void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
   2920{
   2921	f2fs_down_read(&SM_I(sbi)->curseg_lock);
   2922	down_write(&SIT_I(sbi)->sentry_lock);
   2923	__allocate_new_section(sbi, type, force);
   2924	up_write(&SIT_I(sbi)->sentry_lock);
   2925	f2fs_up_read(&SM_I(sbi)->curseg_lock);
   2926}
   2927
   2928void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
   2929{
   2930	int i;
   2931
   2932	f2fs_down_read(&SM_I(sbi)->curseg_lock);
   2933	down_write(&SIT_I(sbi)->sentry_lock);
   2934	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
   2935		__allocate_new_segment(sbi, i, false, false);
   2936	up_write(&SIT_I(sbi)->sentry_lock);
   2937	f2fs_up_read(&SM_I(sbi)->curseg_lock);
   2938}
   2939
   2940static const struct segment_allocation default_salloc_ops = {
   2941	.allocate_segment = allocate_segment_by_default,
   2942};
   2943
   2944bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
   2945						struct cp_control *cpc)
   2946{
   2947	__u64 trim_start = cpc->trim_start;
   2948	bool has_candidate = false;
   2949
   2950	down_write(&SIT_I(sbi)->sentry_lock);
   2951	for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
   2952		if (add_discard_addrs(sbi, cpc, true)) {
   2953			has_candidate = true;
   2954			break;
   2955		}
   2956	}
   2957	up_write(&SIT_I(sbi)->sentry_lock);
   2958
   2959	cpc->trim_start = trim_start;
   2960	return has_candidate;
   2961}
   2962
   2963static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
   2964					struct discard_policy *dpolicy,
   2965					unsigned int start, unsigned int end)
   2966{
   2967	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
   2968	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
   2969	struct rb_node **insert_p = NULL, *insert_parent = NULL;
   2970	struct discard_cmd *dc;
   2971	struct blk_plug plug;
   2972	int issued;
   2973	unsigned int trimmed = 0;
   2974
   2975next:
   2976	issued = 0;
   2977
   2978	mutex_lock(&dcc->cmd_lock);
   2979	if (unlikely(dcc->rbtree_check))
   2980		f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
   2981							&dcc->root, false));
   2982
   2983	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
   2984					NULL, start,
   2985					(struct rb_entry **)&prev_dc,
   2986					(struct rb_entry **)&next_dc,
   2987					&insert_p, &insert_parent, true, NULL);
   2988	if (!dc)
   2989		dc = next_dc;
   2990
   2991	blk_start_plug(&plug);
   2992
   2993	while (dc && dc->lstart <= end) {
   2994		struct rb_node *node;
   2995		int err = 0;
   2996
   2997		if (dc->len < dpolicy->granularity)
   2998			goto skip;
   2999
   3000		if (dc->state != D_PREP) {
   3001			list_move_tail(&dc->list, &dcc->fstrim_list);
   3002			goto skip;
   3003		}
   3004
   3005		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
   3006
   3007		if (issued >= dpolicy->max_requests) {
   3008			start = dc->lstart + dc->len;
   3009
   3010			if (err)
   3011				__remove_discard_cmd(sbi, dc);
   3012
   3013			blk_finish_plug(&plug);
   3014			mutex_unlock(&dcc->cmd_lock);
   3015			trimmed += __wait_all_discard_cmd(sbi, NULL);
   3016			f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
   3017			goto next;
   3018		}
   3019skip:
   3020		node = rb_next(&dc->rb_node);
   3021		if (err)
   3022			__remove_discard_cmd(sbi, dc);
   3023		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
   3024
   3025		if (fatal_signal_pending(current))
   3026			break;
   3027	}
   3028
   3029	blk_finish_plug(&plug);
   3030	mutex_unlock(&dcc->cmd_lock);
   3031
   3032	return trimmed;
   3033}
   3034
   3035int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
   3036{
   3037	__u64 start = F2FS_BYTES_TO_BLK(range->start);
   3038	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
   3039	unsigned int start_segno, end_segno;
   3040	block_t start_block, end_block;
   3041	struct cp_control cpc;
   3042	struct discard_policy dpolicy;
   3043	unsigned long long trimmed = 0;
   3044	int err = 0;
   3045	bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
   3046
   3047	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
   3048		return -EINVAL;
   3049
   3050	if (end < MAIN_BLKADDR(sbi))
   3051		goto out;
   3052
   3053	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
   3054		f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
   3055		return -EFSCORRUPTED;
   3056	}
   3057
   3058	/* start/end segment number in main_area */
   3059	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
   3060	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
   3061						GET_SEGNO(sbi, end);
   3062	if (need_align) {
   3063		start_segno = rounddown(start_segno, sbi->segs_per_sec);
   3064		end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
   3065	}
   3066
   3067	cpc.reason = CP_DISCARD;
   3068	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
   3069	cpc.trim_start = start_segno;
   3070	cpc.trim_end = end_segno;
   3071
   3072	if (sbi->discard_blks == 0)
   3073		goto out;
   3074
   3075	f2fs_down_write(&sbi->gc_lock);
   3076	err = f2fs_write_checkpoint(sbi, &cpc);
   3077	f2fs_up_write(&sbi->gc_lock);
   3078	if (err)
   3079		goto out;
   3080
   3081	/*
   3082	 * We filed discard candidates, but actually we don't need to wait for
   3083	 * all of them, since they'll be issued in idle time along with runtime
   3084	 * discard option. User configuration looks like using runtime discard
   3085	 * or periodic fstrim instead of it.
   3086	 */
   3087	if (f2fs_realtime_discard_enable(sbi))
   3088		goto out;
   3089
   3090	start_block = START_BLOCK(sbi, start_segno);
   3091	end_block = START_BLOCK(sbi, end_segno + 1);
   3092
   3093	__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
   3094	trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
   3095					start_block, end_block);
   3096
   3097	trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
   3098					start_block, end_block);
   3099out:
   3100	if (!err)
   3101		range->len = F2FS_BLK_TO_BYTES(trimmed);
   3102	return err;
   3103}
   3104
   3105static bool __has_curseg_space(struct f2fs_sb_info *sbi,
   3106					struct curseg_info *curseg)
   3107{
   3108	return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
   3109							curseg->segno);
   3110}
   3111
   3112int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
   3113{
   3114	switch (hint) {
   3115	case WRITE_LIFE_SHORT:
   3116		return CURSEG_HOT_DATA;
   3117	case WRITE_LIFE_EXTREME:
   3118		return CURSEG_COLD_DATA;
   3119	default:
   3120		return CURSEG_WARM_DATA;
   3121	}
   3122}
   3123
   3124static int __get_segment_type_2(struct f2fs_io_info *fio)
   3125{
   3126	if (fio->type == DATA)
   3127		return CURSEG_HOT_DATA;
   3128	else
   3129		return CURSEG_HOT_NODE;
   3130}
   3131
   3132static int __get_segment_type_4(struct f2fs_io_info *fio)
   3133{
   3134	if (fio->type == DATA) {
   3135		struct inode *inode = fio->page->mapping->host;
   3136
   3137		if (S_ISDIR(inode->i_mode))
   3138			return CURSEG_HOT_DATA;
   3139		else
   3140			return CURSEG_COLD_DATA;
   3141	} else {
   3142		if (IS_DNODE(fio->page) && is_cold_node(fio->page))
   3143			return CURSEG_WARM_NODE;
   3144		else
   3145			return CURSEG_COLD_NODE;
   3146	}
   3147}
   3148
   3149static int __get_segment_type_6(struct f2fs_io_info *fio)
   3150{
   3151	if (fio->type == DATA) {
   3152		struct inode *inode = fio->page->mapping->host;
   3153
   3154		if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
   3155			return CURSEG_COLD_DATA_PINNED;
   3156
   3157		if (page_private_gcing(fio->page)) {
   3158			if (fio->sbi->am.atgc_enabled &&
   3159				(fio->io_type == FS_DATA_IO) &&
   3160				(fio->sbi->gc_mode != GC_URGENT_HIGH))
   3161				return CURSEG_ALL_DATA_ATGC;
   3162			else
   3163				return CURSEG_COLD_DATA;
   3164		}
   3165		if (file_is_cold(inode) || f2fs_need_compress_data(inode))
   3166			return CURSEG_COLD_DATA;
   3167		if (file_is_hot(inode) ||
   3168				is_inode_flag_set(inode, FI_HOT_DATA) ||
   3169				f2fs_is_atomic_file(inode))
   3170			return CURSEG_HOT_DATA;
   3171		return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
   3172	} else {
   3173		if (IS_DNODE(fio->page))
   3174			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
   3175						CURSEG_HOT_NODE;
   3176		return CURSEG_COLD_NODE;
   3177	}
   3178}
   3179
   3180static int __get_segment_type(struct f2fs_io_info *fio)
   3181{
   3182	int type = 0;
   3183
   3184	switch (F2FS_OPTION(fio->sbi).active_logs) {
   3185	case 2:
   3186		type = __get_segment_type_2(fio);
   3187		break;
   3188	case 4:
   3189		type = __get_segment_type_4(fio);
   3190		break;
   3191	case 6:
   3192		type = __get_segment_type_6(fio);
   3193		break;
   3194	default:
   3195		f2fs_bug_on(fio->sbi, true);
   3196	}
   3197
   3198	if (IS_HOT(type))
   3199		fio->temp = HOT;
   3200	else if (IS_WARM(type))
   3201		fio->temp = WARM;
   3202	else
   3203		fio->temp = COLD;
   3204	return type;
   3205}
   3206
   3207void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
   3208		block_t old_blkaddr, block_t *new_blkaddr,
   3209		struct f2fs_summary *sum, int type,
   3210		struct f2fs_io_info *fio)
   3211{
   3212	struct sit_info *sit_i = SIT_I(sbi);
   3213	struct curseg_info *curseg = CURSEG_I(sbi, type);
   3214	unsigned long long old_mtime;
   3215	bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
   3216	struct seg_entry *se = NULL;
   3217
   3218	f2fs_down_read(&SM_I(sbi)->curseg_lock);
   3219
   3220	mutex_lock(&curseg->curseg_mutex);
   3221	down_write(&sit_i->sentry_lock);
   3222
   3223	if (from_gc) {
   3224		f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
   3225		se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
   3226		sanity_check_seg_type(sbi, se->type);
   3227		f2fs_bug_on(sbi, IS_NODESEG(se->type));
   3228	}
   3229	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
   3230
   3231	f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
   3232
   3233	f2fs_wait_discard_bio(sbi, *new_blkaddr);
   3234
   3235	/*
   3236	 * __add_sum_entry should be resided under the curseg_mutex
   3237	 * because, this function updates a summary entry in the
   3238	 * current summary block.
   3239	 */
   3240	__add_sum_entry(sbi, type, sum);
   3241
   3242	__refresh_next_blkoff(sbi, curseg);
   3243
   3244	stat_inc_block_count(sbi, curseg);
   3245
   3246	if (from_gc) {
   3247		old_mtime = get_segment_mtime(sbi, old_blkaddr);
   3248	} else {
   3249		update_segment_mtime(sbi, old_blkaddr, 0);
   3250		old_mtime = 0;
   3251	}
   3252	update_segment_mtime(sbi, *new_blkaddr, old_mtime);
   3253
   3254	/*
   3255	 * SIT information should be updated before segment allocation,
   3256	 * since SSR needs latest valid block information.
   3257	 */
   3258	update_sit_entry(sbi, *new_blkaddr, 1);
   3259	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
   3260		update_sit_entry(sbi, old_blkaddr, -1);
   3261
   3262	if (!__has_curseg_space(sbi, curseg)) {
   3263		if (from_gc)
   3264			get_atssr_segment(sbi, type, se->type,
   3265						AT_SSR, se->mtime);
   3266		else
   3267			sit_i->s_ops->allocate_segment(sbi, type, false);
   3268	}
   3269	/*
   3270	 * segment dirty status should be updated after segment allocation,
   3271	 * so we just need to update status only one time after previous
   3272	 * segment being closed.
   3273	 */
   3274	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
   3275	locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
   3276
   3277	up_write(&sit_i->sentry_lock);
   3278
   3279	if (page && IS_NODESEG(type)) {
   3280		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
   3281
   3282		f2fs_inode_chksum_set(sbi, page);
   3283	}
   3284
   3285	if (fio) {
   3286		struct f2fs_bio_info *io;
   3287
   3288		if (F2FS_IO_ALIGNED(sbi))
   3289			fio->retry = false;
   3290
   3291		INIT_LIST_HEAD(&fio->list);
   3292		fio->in_list = true;
   3293		io = sbi->write_io[fio->type] + fio->temp;
   3294		spin_lock(&io->io_lock);
   3295		list_add_tail(&fio->list, &io->io_list);
   3296		spin_unlock(&io->io_lock);
   3297	}
   3298
   3299	mutex_unlock(&curseg->curseg_mutex);
   3300
   3301	f2fs_up_read(&SM_I(sbi)->curseg_lock);
   3302}
   3303
   3304void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
   3305					block_t blkaddr, unsigned int blkcnt)
   3306{
   3307	if (!f2fs_is_multi_device(sbi))
   3308		return;
   3309
   3310	while (1) {
   3311		unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
   3312		unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
   3313
   3314		/* update device state for fsync */
   3315		f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
   3316
   3317		/* update device state for checkpoint */
   3318		if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
   3319			spin_lock(&sbi->dev_lock);
   3320			f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
   3321			spin_unlock(&sbi->dev_lock);
   3322		}
   3323
   3324		if (blkcnt <= blks)
   3325			break;
   3326		blkcnt -= blks;
   3327		blkaddr += blks;
   3328	}
   3329}
   3330
   3331static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
   3332{
   3333	int type = __get_segment_type(fio);
   3334	bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
   3335
   3336	if (keep_order)
   3337		f2fs_down_read(&fio->sbi->io_order_lock);
   3338reallocate:
   3339	f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
   3340			&fio->new_blkaddr, sum, type, fio);
   3341	if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) {
   3342		invalidate_mapping_pages(META_MAPPING(fio->sbi),
   3343					fio->old_blkaddr, fio->old_blkaddr);
   3344		f2fs_invalidate_compress_page(fio->sbi, fio->old_blkaddr);
   3345	}
   3346
   3347	/* writeout dirty page into bdev */
   3348	f2fs_submit_page_write(fio);
   3349	if (fio->retry) {
   3350		fio->old_blkaddr = fio->new_blkaddr;
   3351		goto reallocate;
   3352	}
   3353
   3354	f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
   3355
   3356	if (keep_order)
   3357		f2fs_up_read(&fio->sbi->io_order_lock);
   3358}
   3359
   3360void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
   3361					enum iostat_type io_type)
   3362{
   3363	struct f2fs_io_info fio = {
   3364		.sbi = sbi,
   3365		.type = META,
   3366		.temp = HOT,
   3367		.op = REQ_OP_WRITE,
   3368		.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
   3369		.old_blkaddr = page->index,
   3370		.new_blkaddr = page->index,
   3371		.page = page,
   3372		.encrypted_page = NULL,
   3373		.in_list = false,
   3374	};
   3375
   3376	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
   3377		fio.op_flags &= ~REQ_META;
   3378
   3379	set_page_writeback(page);
   3380	ClearPageError(page);
   3381	f2fs_submit_page_write(&fio);
   3382
   3383	stat_inc_meta_count(sbi, page->index);
   3384	f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
   3385}
   3386
   3387void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
   3388{
   3389	struct f2fs_summary sum;
   3390
   3391	set_summary(&sum, nid, 0, 0);
   3392	do_write_page(&sum, fio);
   3393
   3394	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
   3395}
   3396
   3397void f2fs_outplace_write_data(struct dnode_of_data *dn,
   3398					struct f2fs_io_info *fio)
   3399{
   3400	struct f2fs_sb_info *sbi = fio->sbi;
   3401	struct f2fs_summary sum;
   3402
   3403	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
   3404	set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
   3405	do_write_page(&sum, fio);
   3406	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
   3407
   3408	f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
   3409}
   3410
   3411int f2fs_inplace_write_data(struct f2fs_io_info *fio)
   3412{
   3413	int err;
   3414	struct f2fs_sb_info *sbi = fio->sbi;
   3415	unsigned int segno;
   3416
   3417	fio->new_blkaddr = fio->old_blkaddr;
   3418	/* i/o temperature is needed for passing down write hints */
   3419	__get_segment_type(fio);
   3420
   3421	segno = GET_SEGNO(sbi, fio->new_blkaddr);
   3422
   3423	if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
   3424		set_sbi_flag(sbi, SBI_NEED_FSCK);
   3425		f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
   3426			  __func__, segno);
   3427		err = -EFSCORRUPTED;
   3428		goto drop_bio;
   3429	}
   3430
   3431	if (f2fs_cp_error(sbi)) {
   3432		err = -EIO;
   3433		goto drop_bio;
   3434	}
   3435
   3436	invalidate_mapping_pages(META_MAPPING(sbi),
   3437				fio->new_blkaddr, fio->new_blkaddr);
   3438
   3439	stat_inc_inplace_blocks(fio->sbi);
   3440
   3441	if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
   3442		err = f2fs_merge_page_bio(fio);
   3443	else
   3444		err = f2fs_submit_page_bio(fio);
   3445	if (!err) {
   3446		f2fs_update_device_state(fio->sbi, fio->ino,
   3447						fio->new_blkaddr, 1);
   3448		f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
   3449	}
   3450
   3451	return err;
   3452drop_bio:
   3453	if (fio->bio && *(fio->bio)) {
   3454		struct bio *bio = *(fio->bio);
   3455
   3456		bio->bi_status = BLK_STS_IOERR;
   3457		bio_endio(bio);
   3458		*(fio->bio) = NULL;
   3459	}
   3460	return err;
   3461}
   3462
   3463static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
   3464						unsigned int segno)
   3465{
   3466	int i;
   3467
   3468	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
   3469		if (CURSEG_I(sbi, i)->segno == segno)
   3470			break;
   3471	}
   3472	return i;
   3473}
   3474
   3475void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
   3476				block_t old_blkaddr, block_t new_blkaddr,
   3477				bool recover_curseg, bool recover_newaddr,
   3478				bool from_gc)
   3479{
   3480	struct sit_info *sit_i = SIT_I(sbi);
   3481	struct curseg_info *curseg;
   3482	unsigned int segno, old_cursegno;
   3483	struct seg_entry *se;
   3484	int type;
   3485	unsigned short old_blkoff;
   3486	unsigned char old_alloc_type;
   3487
   3488	segno = GET_SEGNO(sbi, new_blkaddr);
   3489	se = get_seg_entry(sbi, segno);
   3490	type = se->type;
   3491
   3492	f2fs_down_write(&SM_I(sbi)->curseg_lock);
   3493
   3494	if (!recover_curseg) {
   3495		/* for recovery flow */
   3496		if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
   3497			if (old_blkaddr == NULL_ADDR)
   3498				type = CURSEG_COLD_DATA;
   3499			else
   3500				type = CURSEG_WARM_DATA;
   3501		}
   3502	} else {
   3503		if (IS_CURSEG(sbi, segno)) {
   3504			/* se->type is volatile as SSR allocation */
   3505			type = __f2fs_get_curseg(sbi, segno);
   3506			f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
   3507		} else {
   3508			type = CURSEG_WARM_DATA;
   3509		}
   3510	}
   3511
   3512	f2fs_bug_on(sbi, !IS_DATASEG(type));
   3513	curseg = CURSEG_I(sbi, type);
   3514
   3515	mutex_lock(&curseg->curseg_mutex);
   3516	down_write(&sit_i->sentry_lock);
   3517
   3518	old_cursegno = curseg->segno;
   3519	old_blkoff = curseg->next_blkoff;
   3520	old_alloc_type = curseg->alloc_type;
   3521
   3522	/* change the current segment */
   3523	if (segno != curseg->segno) {
   3524		curseg->next_segno = segno;
   3525		change_curseg(sbi, type, true);
   3526	}
   3527
   3528	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
   3529	__add_sum_entry(sbi, type, sum);
   3530
   3531	if (!recover_curseg || recover_newaddr) {
   3532		if (!from_gc)
   3533			update_segment_mtime(sbi, new_blkaddr, 0);
   3534		update_sit_entry(sbi, new_blkaddr, 1);
   3535	}
   3536	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
   3537		invalidate_mapping_pages(META_MAPPING(sbi),
   3538					old_blkaddr, old_blkaddr);
   3539		f2fs_invalidate_compress_page(sbi, old_blkaddr);
   3540		if (!from_gc)
   3541			update_segment_mtime(sbi, old_blkaddr, 0);
   3542		update_sit_entry(sbi, old_blkaddr, -1);
   3543	}
   3544
   3545	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
   3546	locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
   3547
   3548	locate_dirty_segment(sbi, old_cursegno);
   3549
   3550	if (recover_curseg) {
   3551		if (old_cursegno != curseg->segno) {
   3552			curseg->next_segno = old_cursegno;
   3553			change_curseg(sbi, type, true);
   3554		}
   3555		curseg->next_blkoff = old_blkoff;
   3556		curseg->alloc_type = old_alloc_type;
   3557	}
   3558
   3559	up_write(&sit_i->sentry_lock);
   3560	mutex_unlock(&curseg->curseg_mutex);
   3561	f2fs_up_write(&SM_I(sbi)->curseg_lock);
   3562}
   3563
   3564void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
   3565				block_t old_addr, block_t new_addr,
   3566				unsigned char version, bool recover_curseg,
   3567				bool recover_newaddr)
   3568{
   3569	struct f2fs_summary sum;
   3570
   3571	set_summary(&sum, dn->nid, dn->ofs_in_node, version);
   3572
   3573	f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
   3574					recover_curseg, recover_newaddr, false);
   3575
   3576	f2fs_update_data_blkaddr(dn, new_addr);
   3577}
   3578
   3579void f2fs_wait_on_page_writeback(struct page *page,
   3580				enum page_type type, bool ordered, bool locked)
   3581{
   3582	if (PageWriteback(page)) {
   3583		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
   3584
   3585		/* submit cached LFS IO */
   3586		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
   3587		/* sbumit cached IPU IO */
   3588		f2fs_submit_merged_ipu_write(sbi, NULL, page);
   3589		if (ordered) {
   3590			wait_on_page_writeback(page);
   3591			f2fs_bug_on(sbi, locked && PageWriteback(page));
   3592		} else {
   3593			wait_for_stable_page(page);
   3594		}
   3595	}
   3596}
   3597
   3598void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
   3599{
   3600	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
   3601	struct page *cpage;
   3602
   3603	if (!f2fs_post_read_required(inode))
   3604		return;
   3605
   3606	if (!__is_valid_data_blkaddr(blkaddr))
   3607		return;
   3608
   3609	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
   3610	if (cpage) {
   3611		f2fs_wait_on_page_writeback(cpage, DATA, true, true);
   3612		f2fs_put_page(cpage, 1);
   3613	}
   3614}
   3615
   3616void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
   3617								block_t len)
   3618{
   3619	block_t i;
   3620
   3621	for (i = 0; i < len; i++)
   3622		f2fs_wait_on_block_writeback(inode, blkaddr + i);
   3623}
   3624
   3625static int read_compacted_summaries(struct f2fs_sb_info *sbi)
   3626{
   3627	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
   3628	struct curseg_info *seg_i;
   3629	unsigned char *kaddr;
   3630	struct page *page;
   3631	block_t start;
   3632	int i, j, offset;
   3633
   3634	start = start_sum_block(sbi);
   3635
   3636	page = f2fs_get_meta_page(sbi, start++);
   3637	if (IS_ERR(page))
   3638		return PTR_ERR(page);
   3639	kaddr = (unsigned char *)page_address(page);
   3640
   3641	/* Step 1: restore nat cache */
   3642	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
   3643	memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
   3644
   3645	/* Step 2: restore sit cache */
   3646	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
   3647	memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
   3648	offset = 2 * SUM_JOURNAL_SIZE;
   3649
   3650	/* Step 3: restore summary entries */
   3651	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
   3652		unsigned short blk_off;
   3653		unsigned int segno;
   3654
   3655		seg_i = CURSEG_I(sbi, i);
   3656		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
   3657		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
   3658		seg_i->next_segno = segno;
   3659		reset_curseg(sbi, i, 0);
   3660		seg_i->alloc_type = ckpt->alloc_type[i];
   3661		seg_i->next_blkoff = blk_off;
   3662
   3663		if (seg_i->alloc_type == SSR)
   3664			blk_off = sbi->blocks_per_seg;
   3665
   3666		for (j = 0; j < blk_off; j++) {
   3667			struct f2fs_summary *s;
   3668
   3669			s = (struct f2fs_summary *)(kaddr + offset);
   3670			seg_i->sum_blk->entries[j] = *s;
   3671			offset += SUMMARY_SIZE;
   3672			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
   3673						SUM_FOOTER_SIZE)
   3674				continue;
   3675
   3676			f2fs_put_page(page, 1);
   3677			page = NULL;
   3678
   3679			page = f2fs_get_meta_page(sbi, start++);
   3680			if (IS_ERR(page))
   3681				return PTR_ERR(page);
   3682			kaddr = (unsigned char *)page_address(page);
   3683			offset = 0;
   3684		}
   3685	}
   3686	f2fs_put_page(page, 1);
   3687	return 0;
   3688}
   3689
   3690static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
   3691{
   3692	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
   3693	struct f2fs_summary_block *sum;
   3694	struct curseg_info *curseg;
   3695	struct page *new;
   3696	unsigned short blk_off;
   3697	unsigned int segno = 0;
   3698	block_t blk_addr = 0;
   3699	int err = 0;
   3700
   3701	/* get segment number and block addr */
   3702	if (IS_DATASEG(type)) {
   3703		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
   3704		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
   3705							CURSEG_HOT_DATA]);
   3706		if (__exist_node_summaries(sbi))
   3707			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
   3708		else
   3709			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
   3710	} else {
   3711		segno = le32_to_cpu(ckpt->cur_node_segno[type -
   3712							CURSEG_HOT_NODE]);
   3713		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
   3714							CURSEG_HOT_NODE]);
   3715		if (__exist_node_summaries(sbi))
   3716			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
   3717							type - CURSEG_HOT_NODE);
   3718		else
   3719			blk_addr = GET_SUM_BLOCK(sbi, segno);
   3720	}
   3721
   3722	new = f2fs_get_meta_page(sbi, blk_addr);
   3723	if (IS_ERR(new))
   3724		return PTR_ERR(new);
   3725	sum = (struct f2fs_summary_block *)page_address(new);
   3726
   3727	if (IS_NODESEG(type)) {
   3728		if (__exist_node_summaries(sbi)) {
   3729			struct f2fs_summary *ns = &sum->entries[0];
   3730			int i;
   3731
   3732			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
   3733				ns->version = 0;
   3734				ns->ofs_in_node = 0;
   3735			}
   3736		} else {
   3737			err = f2fs_restore_node_summary(sbi, segno, sum);
   3738			if (err)
   3739				goto out;
   3740		}
   3741	}
   3742
   3743	/* set uncompleted segment to curseg */
   3744	curseg = CURSEG_I(sbi, type);
   3745	mutex_lock(&curseg->curseg_mutex);
   3746
   3747	/* update journal info */
   3748	down_write(&curseg->journal_rwsem);
   3749	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
   3750	up_write(&curseg->journal_rwsem);
   3751
   3752	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
   3753	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
   3754	curseg->next_segno = segno;
   3755	reset_curseg(sbi, type, 0);
   3756	curseg->alloc_type = ckpt->alloc_type[type];
   3757	curseg->next_blkoff = blk_off;
   3758	mutex_unlock(&curseg->curseg_mutex);
   3759out:
   3760	f2fs_put_page(new, 1);
   3761	return err;
   3762}
   3763
   3764static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
   3765{
   3766	struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
   3767	struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
   3768	int type = CURSEG_HOT_DATA;
   3769	int err;
   3770
   3771	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
   3772		int npages = f2fs_npages_for_summary_flush(sbi, true);
   3773
   3774		if (npages >= 2)
   3775			f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
   3776							META_CP, true);
   3777
   3778		/* restore for compacted data summary */
   3779		err = read_compacted_summaries(sbi);
   3780		if (err)
   3781			return err;
   3782		type = CURSEG_HOT_NODE;
   3783	}
   3784
   3785	if (__exist_node_summaries(sbi))
   3786		f2fs_ra_meta_pages(sbi,
   3787				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
   3788				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
   3789
   3790	for (; type <= CURSEG_COLD_NODE; type++) {
   3791		err = read_normal_summaries(sbi, type);
   3792		if (err)
   3793			return err;
   3794	}
   3795
   3796	/* sanity check for summary blocks */
   3797	if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
   3798			sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
   3799		f2fs_err(sbi, "invalid journal entries nats %u sits %u",
   3800			 nats_in_cursum(nat_j), sits_in_cursum(sit_j));
   3801		return -EINVAL;
   3802	}
   3803
   3804	return 0;
   3805}
   3806
   3807static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
   3808{
   3809	struct page *page;
   3810	unsigned char *kaddr;
   3811	struct f2fs_summary *summary;
   3812	struct curseg_info *seg_i;
   3813	int written_size = 0;
   3814	int i, j;
   3815
   3816	page = f2fs_grab_meta_page(sbi, blkaddr++);
   3817	kaddr = (unsigned char *)page_address(page);
   3818	memset(kaddr, 0, PAGE_SIZE);
   3819
   3820	/* Step 1: write nat cache */
   3821	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
   3822	memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
   3823	written_size += SUM_JOURNAL_SIZE;
   3824
   3825	/* Step 2: write sit cache */
   3826	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
   3827	memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
   3828	written_size += SUM_JOURNAL_SIZE;
   3829
   3830	/* Step 3: write summary entries */
   3831	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
   3832		unsigned short blkoff;
   3833
   3834		seg_i = CURSEG_I(sbi, i);
   3835		if (sbi->ckpt->alloc_type[i] == SSR)
   3836			blkoff = sbi->blocks_per_seg;
   3837		else
   3838			blkoff = curseg_blkoff(sbi, i);
   3839
   3840		for (j = 0; j < blkoff; j++) {
   3841			if (!page) {
   3842				page = f2fs_grab_meta_page(sbi, blkaddr++);
   3843				kaddr = (unsigned char *)page_address(page);
   3844				memset(kaddr, 0, PAGE_SIZE);
   3845				written_size = 0;
   3846			}
   3847			summary = (struct f2fs_summary *)(kaddr + written_size);
   3848			*summary = seg_i->sum_blk->entries[j];
   3849			written_size += SUMMARY_SIZE;
   3850
   3851			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
   3852							SUM_FOOTER_SIZE)
   3853				continue;
   3854
   3855			set_page_dirty(page);
   3856			f2fs_put_page(page, 1);
   3857			page = NULL;
   3858		}
   3859	}
   3860	if (page) {
   3861		set_page_dirty(page);
   3862		f2fs_put_page(page, 1);
   3863	}
   3864}
   3865
   3866static void write_normal_summaries(struct f2fs_sb_info *sbi,
   3867					block_t blkaddr, int type)
   3868{
   3869	int i, end;
   3870
   3871	if (IS_DATASEG(type))
   3872		end = type + NR_CURSEG_DATA_TYPE;
   3873	else
   3874		end = type + NR_CURSEG_NODE_TYPE;
   3875
   3876	for (i = type; i < end; i++)
   3877		write_current_sum_page(sbi, i, blkaddr + (i - type));
   3878}
   3879
   3880void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
   3881{
   3882	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
   3883		write_compacted_summaries(sbi, start_blk);
   3884	else
   3885		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
   3886}
   3887
   3888void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
   3889{
   3890	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
   3891}
   3892
   3893int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
   3894					unsigned int val, int alloc)
   3895{
   3896	int i;
   3897
   3898	if (type == NAT_JOURNAL) {
   3899		for (i = 0; i < nats_in_cursum(journal); i++) {
   3900			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
   3901				return i;
   3902		}
   3903		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
   3904			return update_nats_in_cursum(journal, 1);
   3905	} else if (type == SIT_JOURNAL) {
   3906		for (i = 0; i < sits_in_cursum(journal); i++)
   3907			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
   3908				return i;
   3909		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
   3910			return update_sits_in_cursum(journal, 1);
   3911	}
   3912	return -1;
   3913}
   3914
   3915static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
   3916					unsigned int segno)
   3917{
   3918	return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
   3919}
   3920
   3921static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
   3922					unsigned int start)
   3923{
   3924	struct sit_info *sit_i = SIT_I(sbi);
   3925	struct page *page;
   3926	pgoff_t src_off, dst_off;
   3927
   3928	src_off = current_sit_addr(sbi, start);
   3929	dst_off = next_sit_addr(sbi, src_off);
   3930
   3931	page = f2fs_grab_meta_page(sbi, dst_off);
   3932	seg_info_to_sit_page(sbi, page, start);
   3933
   3934	set_page_dirty(page);
   3935	set_to_next_sit(sit_i, start);
   3936
   3937	return page;
   3938}
   3939
   3940static struct sit_entry_set *grab_sit_entry_set(void)
   3941{
   3942	struct sit_entry_set *ses =
   3943			f2fs_kmem_cache_alloc(sit_entry_set_slab,
   3944						GFP_NOFS, true, NULL);
   3945
   3946	ses->entry_cnt = 0;
   3947	INIT_LIST_HEAD(&ses->set_list);
   3948	return ses;
   3949}
   3950
   3951static void release_sit_entry_set(struct sit_entry_set *ses)
   3952{
   3953	list_del(&ses->set_list);
   3954	kmem_cache_free(sit_entry_set_slab, ses);
   3955}
   3956
   3957static void adjust_sit_entry_set(struct sit_entry_set *ses,
   3958						struct list_head *head)
   3959{
   3960	struct sit_entry_set *next = ses;
   3961
   3962	if (list_is_last(&ses->set_list, head))
   3963		return;
   3964
   3965	list_for_each_entry_continue(next, head, set_list)
   3966		if (ses->entry_cnt <= next->entry_cnt) {
   3967			list_move_tail(&ses->set_list, &next->set_list);
   3968			return;
   3969		}
   3970
   3971	list_move_tail(&ses->set_list, head);
   3972}
   3973
   3974static void add_sit_entry(unsigned int segno, struct list_head *head)
   3975{
   3976	struct sit_entry_set *ses;
   3977	unsigned int start_segno = START_SEGNO(segno);
   3978
   3979	list_for_each_entry(ses, head, set_list) {
   3980		if (ses->start_segno == start_segno) {
   3981			ses->entry_cnt++;
   3982			adjust_sit_entry_set(ses, head);
   3983			return;
   3984		}
   3985	}
   3986
   3987	ses = grab_sit_entry_set();
   3988
   3989	ses->start_segno = start_segno;
   3990	ses->entry_cnt++;
   3991	list_add(&ses->set_list, head);
   3992}
   3993
   3994static void add_sits_in_set(struct f2fs_sb_info *sbi)
   3995{
   3996	struct f2fs_sm_info *sm_info = SM_I(sbi);
   3997	struct list_head *set_list = &sm_info->sit_entry_set;
   3998	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
   3999	unsigned int segno;
   4000
   4001	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
   4002		add_sit_entry(segno, set_list);
   4003}
   4004
   4005static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
   4006{
   4007	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
   4008	struct f2fs_journal *journal = curseg->journal;
   4009	int i;
   4010
   4011	down_write(&curseg->journal_rwsem);
   4012	for (i = 0; i < sits_in_cursum(journal); i++) {
   4013		unsigned int segno;
   4014		bool dirtied;
   4015
   4016		segno = le32_to_cpu(segno_in_journal(journal, i));
   4017		dirtied = __mark_sit_entry_dirty(sbi, segno);
   4018
   4019		if (!dirtied)
   4020			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
   4021	}
   4022	update_sits_in_cursum(journal, -i);
   4023	up_write(&curseg->journal_rwsem);
   4024}
   4025
   4026/*
   4027 * CP calls this function, which flushes SIT entries including sit_journal,
   4028 * and moves prefree segs to free segs.
   4029 */
   4030void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
   4031{
   4032	struct sit_info *sit_i = SIT_I(sbi);
   4033	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
   4034	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
   4035	struct f2fs_journal *journal = curseg->journal;
   4036	struct sit_entry_set *ses, *tmp;
   4037	struct list_head *head = &SM_I(sbi)->sit_entry_set;
   4038	bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
   4039	struct seg_entry *se;
   4040
   4041	down_write(&sit_i->sentry_lock);
   4042
   4043	if (!sit_i->dirty_sentries)
   4044		goto out;
   4045
   4046	/*
   4047	 * add and account sit entries of dirty bitmap in sit entry
   4048	 * set temporarily
   4049	 */
   4050	add_sits_in_set(sbi);
   4051
   4052	/*
   4053	 * if there are no enough space in journal to store dirty sit
   4054	 * entries, remove all entries from journal and add and account
   4055	 * them in sit entry set.
   4056	 */
   4057	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
   4058								!to_journal)
   4059		remove_sits_in_journal(sbi);
   4060
   4061	/*
   4062	 * there are two steps to flush sit entries:
   4063	 * #1, flush sit entries to journal in current cold data summary block.
   4064	 * #2, flush sit entries to sit page.
   4065	 */
   4066	list_for_each_entry_safe(ses, tmp, head, set_list) {
   4067		struct page *page = NULL;
   4068		struct f2fs_sit_block *raw_sit = NULL;
   4069		unsigned int start_segno = ses->start_segno;
   4070		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
   4071						(unsigned long)MAIN_SEGS(sbi));
   4072		unsigned int segno = start_segno;
   4073
   4074		if (to_journal &&
   4075			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
   4076			to_journal = false;
   4077
   4078		if (to_journal) {
   4079			down_write(&curseg->journal_rwsem);
   4080		} else {
   4081			page = get_next_sit_page(sbi, start_segno);
   4082			raw_sit = page_address(page);
   4083		}
   4084
   4085		/* flush dirty sit entries in region of current sit set */
   4086		for_each_set_bit_from(segno, bitmap, end) {
   4087			int offset, sit_offset;
   4088
   4089			se = get_seg_entry(sbi, segno);
   4090#ifdef CONFIG_F2FS_CHECK_FS
   4091			if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
   4092						SIT_VBLOCK_MAP_SIZE))
   4093				f2fs_bug_on(sbi, 1);
   4094#endif
   4095
   4096			/* add discard candidates */
   4097			if (!(cpc->reason & CP_DISCARD)) {
   4098				cpc->trim_start = segno;
   4099				add_discard_addrs(sbi, cpc, false);
   4100			}
   4101
   4102			if (to_journal) {
   4103				offset = f2fs_lookup_journal_in_cursum(journal,
   4104							SIT_JOURNAL, segno, 1);
   4105				f2fs_bug_on(sbi, offset < 0);
   4106				segno_in_journal(journal, offset) =
   4107							cpu_to_le32(segno);
   4108				seg_info_to_raw_sit(se,
   4109					&sit_in_journal(journal, offset));
   4110				check_block_count(sbi, segno,
   4111					&sit_in_journal(journal, offset));
   4112			} else {
   4113				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
   4114				seg_info_to_raw_sit(se,
   4115						&raw_sit->entries[sit_offset]);
   4116				check_block_count(sbi, segno,
   4117						&raw_sit->entries[sit_offset]);
   4118			}
   4119
   4120			__clear_bit(segno, bitmap);
   4121			sit_i->dirty_sentries--;
   4122			ses->entry_cnt--;
   4123		}
   4124
   4125		if (to_journal)
   4126			up_write(&curseg->journal_rwsem);
   4127		else
   4128			f2fs_put_page(page, 1);
   4129
   4130		f2fs_bug_on(sbi, ses->entry_cnt);
   4131		release_sit_entry_set(ses);
   4132	}
   4133
   4134	f2fs_bug_on(sbi, !list_empty(head));
   4135	f2fs_bug_on(sbi, sit_i->dirty_sentries);
   4136out:
   4137	if (cpc->reason & CP_DISCARD) {
   4138		__u64 trim_start = cpc->trim_start;
   4139
   4140		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
   4141			add_discard_addrs(sbi, cpc, false);
   4142
   4143		cpc->trim_start = trim_start;
   4144	}
   4145	up_write(&sit_i->sentry_lock);
   4146
   4147	set_prefree_as_free_segments(sbi);
   4148}
   4149
   4150static int build_sit_info(struct f2fs_sb_info *sbi)
   4151{
   4152	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
   4153	struct sit_info *sit_i;
   4154	unsigned int sit_segs, start;
   4155	char *src_bitmap, *bitmap;
   4156	unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
   4157	unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
   4158
   4159	/* allocate memory for SIT information */
   4160	sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
   4161	if (!sit_i)
   4162		return -ENOMEM;
   4163
   4164	SM_I(sbi)->sit_info = sit_i;
   4165
   4166	sit_i->sentries =
   4167		f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
   4168					      MAIN_SEGS(sbi)),
   4169			      GFP_KERNEL);
   4170	if (!sit_i->sentries)
   4171		return -ENOMEM;
   4172
   4173	main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
   4174	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
   4175								GFP_KERNEL);
   4176	if (!sit_i->dirty_sentries_bitmap)
   4177		return -ENOMEM;
   4178
   4179#ifdef CONFIG_F2FS_CHECK_FS
   4180	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
   4181#else
   4182	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
   4183#endif
   4184	sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
   4185	if (!sit_i->bitmap)
   4186		return -ENOMEM;
   4187
   4188	bitmap = sit_i->bitmap;
   4189
   4190	for (start = 0; start < MAIN_SEGS(sbi); start++) {
   4191		sit_i->sentries[start].cur_valid_map = bitmap;
   4192		bitmap += SIT_VBLOCK_MAP_SIZE;
   4193
   4194		sit_i->sentries[start].ckpt_valid_map = bitmap;
   4195		bitmap += SIT_VBLOCK_MAP_SIZE;
   4196
   4197#ifdef CONFIG_F2FS_CHECK_FS
   4198		sit_i->sentries[start].cur_valid_map_mir = bitmap;
   4199		bitmap += SIT_VBLOCK_MAP_SIZE;
   4200#endif
   4201
   4202		if (discard_map) {
   4203			sit_i->sentries[start].discard_map = bitmap;
   4204			bitmap += SIT_VBLOCK_MAP_SIZE;
   4205		}
   4206	}
   4207
   4208	sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
   4209	if (!sit_i->tmp_map)
   4210		return -ENOMEM;
   4211
   4212	if (__is_large_section(sbi)) {
   4213		sit_i->sec_entries =
   4214			f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
   4215						      MAIN_SECS(sbi)),
   4216				      GFP_KERNEL);
   4217		if (!sit_i->sec_entries)
   4218			return -ENOMEM;
   4219	}
   4220
   4221	/* get information related with SIT */
   4222	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
   4223
   4224	/* setup SIT bitmap from ckeckpoint pack */
   4225	sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
   4226	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
   4227
   4228	sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
   4229	if (!sit_i->sit_bitmap)
   4230		return -ENOMEM;
   4231
   4232#ifdef CONFIG_F2FS_CHECK_FS
   4233	sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
   4234					sit_bitmap_size, GFP_KERNEL);
   4235	if (!sit_i->sit_bitmap_mir)
   4236		return -ENOMEM;
   4237
   4238	sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
   4239					main_bitmap_size, GFP_KERNEL);
   4240	if (!sit_i->invalid_segmap)
   4241		return -ENOMEM;
   4242#endif
   4243
   4244	/* init SIT information */
   4245	sit_i->s_ops = &default_salloc_ops;
   4246
   4247	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
   4248	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
   4249	sit_i->written_valid_blocks = 0;
   4250	sit_i->bitmap_size = sit_bitmap_size;
   4251	sit_i->dirty_sentries = 0;
   4252	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
   4253	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
   4254	sit_i->mounted_time = ktime_get_boottime_seconds();
   4255	init_rwsem(&sit_i->sentry_lock);
   4256	return 0;
   4257}
   4258
   4259static int build_free_segmap(struct f2fs_sb_info *sbi)
   4260{
   4261	struct free_segmap_info *free_i;
   4262	unsigned int bitmap_size, sec_bitmap_size;
   4263
   4264	/* allocate memory for free segmap information */
   4265	free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
   4266	if (!free_i)
   4267		return -ENOMEM;
   4268
   4269	SM_I(sbi)->free_info = free_i;
   4270
   4271	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
   4272	free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
   4273	if (!free_i->free_segmap)
   4274		return -ENOMEM;
   4275
   4276	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
   4277	free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
   4278	if (!free_i->free_secmap)
   4279		return -ENOMEM;
   4280
   4281	/* set all segments as dirty temporarily */
   4282	memset(free_i->free_segmap, 0xff, bitmap_size);
   4283	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
   4284
   4285	/* init free segmap information */
   4286	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
   4287	free_i->free_segments = 0;
   4288	free_i->free_sections = 0;
   4289	spin_lock_init(&free_i->segmap_lock);
   4290	return 0;
   4291}
   4292
   4293static int build_curseg(struct f2fs_sb_info *sbi)
   4294{
   4295	struct curseg_info *array;
   4296	int i;
   4297
   4298	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
   4299					sizeof(*array)), GFP_KERNEL);
   4300	if (!array)
   4301		return -ENOMEM;
   4302
   4303	SM_I(sbi)->curseg_array = array;
   4304
   4305	for (i = 0; i < NO_CHECK_TYPE; i++) {
   4306		mutex_init(&array[i].curseg_mutex);
   4307		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
   4308		if (!array[i].sum_blk)
   4309			return -ENOMEM;
   4310		init_rwsem(&array[i].journal_rwsem);
   4311		array[i].journal = f2fs_kzalloc(sbi,
   4312				sizeof(struct f2fs_journal), GFP_KERNEL);
   4313		if (!array[i].journal)
   4314			return -ENOMEM;
   4315		if (i < NR_PERSISTENT_LOG)
   4316			array[i].seg_type = CURSEG_HOT_DATA + i;
   4317		else if (i == CURSEG_COLD_DATA_PINNED)
   4318			array[i].seg_type = CURSEG_COLD_DATA;
   4319		else if (i == CURSEG_ALL_DATA_ATGC)
   4320			array[i].seg_type = CURSEG_COLD_DATA;
   4321		array[i].segno = NULL_SEGNO;
   4322		array[i].next_blkoff = 0;
   4323		array[i].inited = false;
   4324	}
   4325	return restore_curseg_summaries(sbi);
   4326}
   4327
   4328static int build_sit_entries(struct f2fs_sb_info *sbi)
   4329{
   4330	struct sit_info *sit_i = SIT_I(sbi);
   4331	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
   4332	struct f2fs_journal *journal = curseg->journal;
   4333	struct seg_entry *se;
   4334	struct f2fs_sit_entry sit;
   4335	int sit_blk_cnt = SIT_BLK_CNT(sbi);
   4336	unsigned int i, start, end;
   4337	unsigned int readed, start_blk = 0;
   4338	int err = 0;
   4339	block_t sit_valid_blocks[2] = {0, 0};
   4340
   4341	do {
   4342		readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
   4343							META_SIT, true);
   4344
   4345		start = start_blk * sit_i->sents_per_block;
   4346		end = (start_blk + readed) * sit_i->sents_per_block;
   4347
   4348		for (; start < end && start < MAIN_SEGS(sbi); start++) {
   4349			struct f2fs_sit_block *sit_blk;
   4350			struct page *page;
   4351
   4352			se = &sit_i->sentries[start];
   4353			page = get_current_sit_page(sbi, start);
   4354			if (IS_ERR(page))
   4355				return PTR_ERR(page);
   4356			sit_blk = (struct f2fs_sit_block *)page_address(page);
   4357			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
   4358			f2fs_put_page(page, 1);
   4359
   4360			err = check_block_count(sbi, start, &sit);
   4361			if (err)
   4362				return err;
   4363			seg_info_from_raw_sit(se, &sit);
   4364
   4365			sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
   4366
   4367			if (f2fs_block_unit_discard(sbi)) {
   4368				/* build discard map only one time */
   4369				if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
   4370					memset(se->discard_map, 0xff,
   4371						SIT_VBLOCK_MAP_SIZE);
   4372				} else {
   4373					memcpy(se->discard_map,
   4374						se->cur_valid_map,
   4375						SIT_VBLOCK_MAP_SIZE);
   4376					sbi->discard_blks +=
   4377						sbi->blocks_per_seg -
   4378						se->valid_blocks;
   4379				}
   4380			}
   4381
   4382			if (__is_large_section(sbi))
   4383				get_sec_entry(sbi, start)->valid_blocks +=
   4384							se->valid_blocks;
   4385		}
   4386		start_blk += readed;
   4387	} while (start_blk < sit_blk_cnt);
   4388
   4389	down_read(&curseg->journal_rwsem);
   4390	for (i = 0; i < sits_in_cursum(journal); i++) {
   4391		unsigned int old_valid_blocks;
   4392
   4393		start = le32_to_cpu(segno_in_journal(journal, i));
   4394		if (start >= MAIN_SEGS(sbi)) {
   4395			f2fs_err(sbi, "Wrong journal entry on segno %u",
   4396				 start);
   4397			err = -EFSCORRUPTED;
   4398			break;
   4399		}
   4400
   4401		se = &sit_i->sentries[start];
   4402		sit = sit_in_journal(journal, i);
   4403
   4404		old_valid_blocks = se->valid_blocks;
   4405
   4406		sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
   4407
   4408		err = check_block_count(sbi, start, &sit);
   4409		if (err)
   4410			break;
   4411		seg_info_from_raw_sit(se, &sit);
   4412
   4413		sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
   4414
   4415		if (f2fs_block_unit_discard(sbi)) {
   4416			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
   4417				memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
   4418			} else {
   4419				memcpy(se->discard_map, se->cur_valid_map,
   4420							SIT_VBLOCK_MAP_SIZE);
   4421				sbi->discard_blks += old_valid_blocks;
   4422				sbi->discard_blks -= se->valid_blocks;
   4423			}
   4424		}
   4425
   4426		if (__is_large_section(sbi)) {
   4427			get_sec_entry(sbi, start)->valid_blocks +=
   4428							se->valid_blocks;
   4429			get_sec_entry(sbi, start)->valid_blocks -=
   4430							old_valid_blocks;
   4431		}
   4432	}
   4433	up_read(&curseg->journal_rwsem);
   4434
   4435	if (err)
   4436		return err;
   4437
   4438	if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
   4439		f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
   4440			 sit_valid_blocks[NODE], valid_node_count(sbi));
   4441		return -EFSCORRUPTED;
   4442	}
   4443
   4444	if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
   4445				valid_user_blocks(sbi)) {
   4446		f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
   4447			 sit_valid_blocks[DATA], sit_valid_blocks[NODE],
   4448			 valid_user_blocks(sbi));
   4449		return -EFSCORRUPTED;
   4450	}
   4451
   4452	return 0;
   4453}
   4454
   4455static void init_free_segmap(struct f2fs_sb_info *sbi)
   4456{
   4457	unsigned int start;
   4458	int type;
   4459	struct seg_entry *sentry;
   4460
   4461	for (start = 0; start < MAIN_SEGS(sbi); start++) {
   4462		if (f2fs_usable_blks_in_seg(sbi, start) == 0)
   4463			continue;
   4464		sentry = get_seg_entry(sbi, start);
   4465		if (!sentry->valid_blocks)
   4466			__set_free(sbi, start);
   4467		else
   4468			SIT_I(sbi)->written_valid_blocks +=
   4469						sentry->valid_blocks;
   4470	}
   4471
   4472	/* set use the current segments */
   4473	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
   4474		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
   4475
   4476		__set_test_and_inuse(sbi, curseg_t->segno);
   4477	}
   4478}
   4479
   4480static void init_dirty_segmap(struct f2fs_sb_info *sbi)
   4481{
   4482	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
   4483	struct free_segmap_info *free_i = FREE_I(sbi);
   4484	unsigned int segno = 0, offset = 0, secno;
   4485	block_t valid_blocks, usable_blks_in_seg;
   4486	block_t blks_per_sec = BLKS_PER_SEC(sbi);
   4487
   4488	while (1) {
   4489		/* find dirty segment based on free segmap */
   4490		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
   4491		if (segno >= MAIN_SEGS(sbi))
   4492			break;
   4493		offset = segno + 1;
   4494		valid_blocks = get_valid_blocks(sbi, segno, false);
   4495		usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
   4496		if (valid_blocks == usable_blks_in_seg || !valid_blocks)
   4497			continue;
   4498		if (valid_blocks > usable_blks_in_seg) {
   4499			f2fs_bug_on(sbi, 1);
   4500			continue;
   4501		}
   4502		mutex_lock(&dirty_i->seglist_lock);
   4503		__locate_dirty_segment(sbi, segno, DIRTY);
   4504		mutex_unlock(&dirty_i->seglist_lock);
   4505	}
   4506
   4507	if (!__is_large_section(sbi))
   4508		return;
   4509
   4510	mutex_lock(&dirty_i->seglist_lock);
   4511	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
   4512		valid_blocks = get_valid_blocks(sbi, segno, true);
   4513		secno = GET_SEC_FROM_SEG(sbi, segno);
   4514
   4515		if (!valid_blocks || valid_blocks == blks_per_sec)
   4516			continue;
   4517		if (IS_CURSEC(sbi, secno))
   4518			continue;
   4519		set_bit(secno, dirty_i->dirty_secmap);
   4520	}
   4521	mutex_unlock(&dirty_i->seglist_lock);
   4522}
   4523
   4524static int init_victim_secmap(struct f2fs_sb_info *sbi)
   4525{
   4526	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
   4527	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
   4528
   4529	dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
   4530	if (!dirty_i->victim_secmap)
   4531		return -ENOMEM;
   4532
   4533	dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
   4534	if (!dirty_i->pinned_secmap)
   4535		return -ENOMEM;
   4536
   4537	dirty_i->pinned_secmap_cnt = 0;
   4538	dirty_i->enable_pin_section = true;
   4539	return 0;
   4540}
   4541
   4542static int build_dirty_segmap(struct f2fs_sb_info *sbi)
   4543{
   4544	struct dirty_seglist_info *dirty_i;
   4545	unsigned int bitmap_size, i;
   4546
   4547	/* allocate memory for dirty segments list information */
   4548	dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
   4549								GFP_KERNEL);
   4550	if (!dirty_i)
   4551		return -ENOMEM;
   4552
   4553	SM_I(sbi)->dirty_info = dirty_i;
   4554	mutex_init(&dirty_i->seglist_lock);
   4555
   4556	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
   4557
   4558	for (i = 0; i < NR_DIRTY_TYPE; i++) {
   4559		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
   4560								GFP_KERNEL);
   4561		if (!dirty_i->dirty_segmap[i])
   4562			return -ENOMEM;
   4563	}
   4564
   4565	if (__is_large_section(sbi)) {
   4566		bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
   4567		dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
   4568						bitmap_size, GFP_KERNEL);
   4569		if (!dirty_i->dirty_secmap)
   4570			return -ENOMEM;
   4571	}
   4572
   4573	init_dirty_segmap(sbi);
   4574	return init_victim_secmap(sbi);
   4575}
   4576
   4577static int sanity_check_curseg(struct f2fs_sb_info *sbi)
   4578{
   4579	int i;
   4580
   4581	/*
   4582	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
   4583	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
   4584	 */
   4585	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
   4586		struct curseg_info *curseg = CURSEG_I(sbi, i);
   4587		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
   4588		unsigned int blkofs = curseg->next_blkoff;
   4589
   4590		if (f2fs_sb_has_readonly(sbi) &&
   4591			i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
   4592			continue;
   4593
   4594		sanity_check_seg_type(sbi, curseg->seg_type);
   4595
   4596		if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
   4597			f2fs_err(sbi,
   4598				 "Current segment has invalid alloc_type:%d",
   4599				 curseg->alloc_type);
   4600			return -EFSCORRUPTED;
   4601		}
   4602
   4603		if (f2fs_test_bit(blkofs, se->cur_valid_map))
   4604			goto out;
   4605
   4606		if (curseg->alloc_type == SSR)
   4607			continue;
   4608
   4609		for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
   4610			if (!f2fs_test_bit(blkofs, se->cur_valid_map))
   4611				continue;
   4612out:
   4613			f2fs_err(sbi,
   4614				 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
   4615				 i, curseg->segno, curseg->alloc_type,
   4616				 curseg->next_blkoff, blkofs);
   4617			return -EFSCORRUPTED;
   4618		}
   4619	}
   4620	return 0;
   4621}
   4622
   4623#ifdef CONFIG_BLK_DEV_ZONED
   4624
   4625static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
   4626				    struct f2fs_dev_info *fdev,
   4627				    struct blk_zone *zone)
   4628{
   4629	unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
   4630	block_t zone_block, wp_block, last_valid_block;
   4631	unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
   4632	int i, s, b, ret;
   4633	struct seg_entry *se;
   4634
   4635	if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
   4636		return 0;
   4637
   4638	wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
   4639	wp_segno = GET_SEGNO(sbi, wp_block);
   4640	wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
   4641	zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
   4642	zone_segno = GET_SEGNO(sbi, zone_block);
   4643	zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
   4644
   4645	if (zone_segno >= MAIN_SEGS(sbi))
   4646		return 0;
   4647
   4648	/*
   4649	 * Skip check of zones cursegs point to, since
   4650	 * fix_curseg_write_pointer() checks them.
   4651	 */
   4652	for (i = 0; i < NO_CHECK_TYPE; i++)
   4653		if (zone_secno == GET_SEC_FROM_SEG(sbi,
   4654						   CURSEG_I(sbi, i)->segno))
   4655			return 0;
   4656
   4657	/*
   4658	 * Get last valid block of the zone.
   4659	 */
   4660	last_valid_block = zone_block - 1;
   4661	for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
   4662		segno = zone_segno + s;
   4663		se = get_seg_entry(sbi, segno);
   4664		for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
   4665			if (f2fs_test_bit(b, se->cur_valid_map)) {
   4666				last_valid_block = START_BLOCK(sbi, segno) + b;
   4667				break;
   4668			}
   4669		if (last_valid_block >= zone_block)
   4670			break;
   4671	}
   4672
   4673	/*
   4674	 * If last valid block is beyond the write pointer, report the
   4675	 * inconsistency. This inconsistency does not cause write error
   4676	 * because the zone will not be selected for write operation until
   4677	 * it get discarded. Just report it.
   4678	 */
   4679	if (last_valid_block >= wp_block) {
   4680		f2fs_notice(sbi, "Valid block beyond write pointer: "
   4681			    "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
   4682			    GET_SEGNO(sbi, last_valid_block),
   4683			    GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
   4684			    wp_segno, wp_blkoff);
   4685		return 0;
   4686	}
   4687
   4688	/*
   4689	 * If there is no valid block in the zone and if write pointer is
   4690	 * not at zone start, reset the write pointer.
   4691	 */
   4692	if (last_valid_block + 1 == zone_block && zone->wp != zone->start) {
   4693		f2fs_notice(sbi,
   4694			    "Zone without valid block has non-zero write "
   4695			    "pointer. Reset the write pointer: wp[0x%x,0x%x]",
   4696			    wp_segno, wp_blkoff);
   4697		ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
   4698					zone->len >> log_sectors_per_block);
   4699		if (ret) {
   4700			f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
   4701				 fdev->path, ret);
   4702			return ret;
   4703		}
   4704	}
   4705
   4706	return 0;
   4707}
   4708
   4709static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
   4710						  block_t zone_blkaddr)
   4711{
   4712	int i;
   4713
   4714	for (i = 0; i < sbi->s_ndevs; i++) {
   4715		if (!bdev_is_zoned(FDEV(i).bdev))
   4716			continue;
   4717		if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
   4718				zone_blkaddr <= FDEV(i).end_blk))
   4719			return &FDEV(i);
   4720	}
   4721
   4722	return NULL;
   4723}
   4724
   4725static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
   4726			      void *data)
   4727{
   4728	memcpy(data, zone, sizeof(struct blk_zone));
   4729	return 0;
   4730}
   4731
   4732static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
   4733{
   4734	struct curseg_info *cs = CURSEG_I(sbi, type);
   4735	struct f2fs_dev_info *zbd;
   4736	struct blk_zone zone;
   4737	unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
   4738	block_t cs_zone_block, wp_block;
   4739	unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
   4740	sector_t zone_sector;
   4741	int err;
   4742
   4743	cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
   4744	cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
   4745
   4746	zbd = get_target_zoned_dev(sbi, cs_zone_block);
   4747	if (!zbd)
   4748		return 0;
   4749
   4750	/* report zone for the sector the curseg points to */
   4751	zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
   4752		<< log_sectors_per_block;
   4753	err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
   4754				  report_one_zone_cb, &zone);
   4755	if (err != 1) {
   4756		f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
   4757			 zbd->path, err);
   4758		return err;
   4759	}
   4760
   4761	if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
   4762		return 0;
   4763
   4764	wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
   4765	wp_segno = GET_SEGNO(sbi, wp_block);
   4766	wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
   4767	wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
   4768
   4769	if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
   4770		wp_sector_off == 0)
   4771		return 0;
   4772
   4773	f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
   4774		    "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
   4775		    type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
   4776
   4777	f2fs_notice(sbi, "Assign new section to curseg[%d]: "
   4778		    "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
   4779
   4780	f2fs_allocate_new_section(sbi, type, true);
   4781
   4782	/* check consistency of the zone curseg pointed to */
   4783	if (check_zone_write_pointer(sbi, zbd, &zone))
   4784		return -EIO;
   4785
   4786	/* check newly assigned zone */
   4787	cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
   4788	cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
   4789
   4790	zbd = get_target_zoned_dev(sbi, cs_zone_block);
   4791	if (!zbd)
   4792		return 0;
   4793
   4794	zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
   4795		<< log_sectors_per_block;
   4796	err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
   4797				  report_one_zone_cb, &zone);
   4798	if (err != 1) {
   4799		f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
   4800			 zbd->path, err);
   4801		return err;
   4802	}
   4803
   4804	if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
   4805		return 0;
   4806
   4807	if (zone.wp != zone.start) {
   4808		f2fs_notice(sbi,
   4809			    "New zone for curseg[%d] is not yet discarded. "
   4810			    "Reset the zone: curseg[0x%x,0x%x]",
   4811			    type, cs->segno, cs->next_blkoff);
   4812		err = __f2fs_issue_discard_zone(sbi, zbd->bdev,
   4813				zone_sector >> log_sectors_per_block,
   4814				zone.len >> log_sectors_per_block);
   4815		if (err) {
   4816			f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
   4817				 zbd->path, err);
   4818			return err;
   4819		}
   4820	}
   4821
   4822	return 0;
   4823}
   4824
   4825int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
   4826{
   4827	int i, ret;
   4828
   4829	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
   4830		ret = fix_curseg_write_pointer(sbi, i);
   4831		if (ret)
   4832			return ret;
   4833	}
   4834
   4835	return 0;
   4836}
   4837
   4838struct check_zone_write_pointer_args {
   4839	struct f2fs_sb_info *sbi;
   4840	struct f2fs_dev_info *fdev;
   4841};
   4842
   4843static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
   4844				      void *data)
   4845{
   4846	struct check_zone_write_pointer_args *args;
   4847
   4848	args = (struct check_zone_write_pointer_args *)data;
   4849
   4850	return check_zone_write_pointer(args->sbi, args->fdev, zone);
   4851}
   4852
   4853int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
   4854{
   4855	int i, ret;
   4856	struct check_zone_write_pointer_args args;
   4857
   4858	for (i = 0; i < sbi->s_ndevs; i++) {
   4859		if (!bdev_is_zoned(FDEV(i).bdev))
   4860			continue;
   4861
   4862		args.sbi = sbi;
   4863		args.fdev = &FDEV(i);
   4864		ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
   4865					  check_zone_write_pointer_cb, &args);
   4866		if (ret < 0)
   4867			return ret;
   4868	}
   4869
   4870	return 0;
   4871}
   4872
   4873static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
   4874						unsigned int dev_idx)
   4875{
   4876	if (!bdev_is_zoned(FDEV(dev_idx).bdev))
   4877		return true;
   4878	return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
   4879}
   4880
   4881/* Return the zone index in the given device */
   4882static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
   4883					int dev_idx)
   4884{
   4885	block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
   4886
   4887	return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
   4888						sbi->log_blocks_per_blkz;
   4889}
   4890
   4891/*
   4892 * Return the usable segments in a section based on the zone's
   4893 * corresponding zone capacity. Zone is equal to a section.
   4894 */
   4895static inline unsigned int f2fs_usable_zone_segs_in_sec(
   4896		struct f2fs_sb_info *sbi, unsigned int segno)
   4897{
   4898	unsigned int dev_idx, zone_idx, unusable_segs_in_sec;
   4899
   4900	dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
   4901	zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
   4902
   4903	/* Conventional zone's capacity is always equal to zone size */
   4904	if (is_conv_zone(sbi, zone_idx, dev_idx))
   4905		return sbi->segs_per_sec;
   4906
   4907	/*
   4908	 * If the zone_capacity_blocks array is NULL, then zone capacity
   4909	 * is equal to the zone size for all zones
   4910	 */
   4911	if (!FDEV(dev_idx).zone_capacity_blocks)
   4912		return sbi->segs_per_sec;
   4913
   4914	/* Get the segment count beyond zone capacity block */
   4915	unusable_segs_in_sec = (sbi->blocks_per_blkz -
   4916				FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >>
   4917				sbi->log_blocks_per_seg;
   4918	return sbi->segs_per_sec - unusable_segs_in_sec;
   4919}
   4920
   4921/*
   4922 * Return the number of usable blocks in a segment. The number of blocks
   4923 * returned is always equal to the number of blocks in a segment for
   4924 * segments fully contained within a sequential zone capacity or a
   4925 * conventional zone. For segments partially contained in a sequential
   4926 * zone capacity, the number of usable blocks up to the zone capacity
   4927 * is returned. 0 is returned in all other cases.
   4928 */
   4929static inline unsigned int f2fs_usable_zone_blks_in_seg(
   4930			struct f2fs_sb_info *sbi, unsigned int segno)
   4931{
   4932	block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
   4933	unsigned int zone_idx, dev_idx, secno;
   4934
   4935	secno = GET_SEC_FROM_SEG(sbi, segno);
   4936	seg_start = START_BLOCK(sbi, segno);
   4937	dev_idx = f2fs_target_device_index(sbi, seg_start);
   4938	zone_idx = get_zone_idx(sbi, secno, dev_idx);
   4939
   4940	/*
   4941	 * Conventional zone's capacity is always equal to zone size,
   4942	 * so, blocks per segment is unchanged.
   4943	 */
   4944	if (is_conv_zone(sbi, zone_idx, dev_idx))
   4945		return sbi->blocks_per_seg;
   4946
   4947	if (!FDEV(dev_idx).zone_capacity_blocks)
   4948		return sbi->blocks_per_seg;
   4949
   4950	sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
   4951	sec_cap_blkaddr = sec_start_blkaddr +
   4952				FDEV(dev_idx).zone_capacity_blocks[zone_idx];
   4953
   4954	/*
   4955	 * If segment starts before zone capacity and spans beyond
   4956	 * zone capacity, then usable blocks are from seg start to
   4957	 * zone capacity. If the segment starts after the zone capacity,
   4958	 * then there are no usable blocks.
   4959	 */
   4960	if (seg_start >= sec_cap_blkaddr)
   4961		return 0;
   4962	if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
   4963		return sec_cap_blkaddr - seg_start;
   4964
   4965	return sbi->blocks_per_seg;
   4966}
   4967#else
   4968int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
   4969{
   4970	return 0;
   4971}
   4972
   4973int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
   4974{
   4975	return 0;
   4976}
   4977
   4978static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
   4979							unsigned int segno)
   4980{
   4981	return 0;
   4982}
   4983
   4984static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
   4985							unsigned int segno)
   4986{
   4987	return 0;
   4988}
   4989#endif
   4990unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
   4991					unsigned int segno)
   4992{
   4993	if (f2fs_sb_has_blkzoned(sbi))
   4994		return f2fs_usable_zone_blks_in_seg(sbi, segno);
   4995
   4996	return sbi->blocks_per_seg;
   4997}
   4998
   4999unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
   5000					unsigned int segno)
   5001{
   5002	if (f2fs_sb_has_blkzoned(sbi))
   5003		return f2fs_usable_zone_segs_in_sec(sbi, segno);
   5004
   5005	return sbi->segs_per_sec;
   5006}
   5007
   5008/*
   5009 * Update min, max modified time for cost-benefit GC algorithm
   5010 */
   5011static void init_min_max_mtime(struct f2fs_sb_info *sbi)
   5012{
   5013	struct sit_info *sit_i = SIT_I(sbi);
   5014	unsigned int segno;
   5015
   5016	down_write(&sit_i->sentry_lock);
   5017
   5018	sit_i->min_mtime = ULLONG_MAX;
   5019
   5020	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
   5021		unsigned int i;
   5022		unsigned long long mtime = 0;
   5023
   5024		for (i = 0; i < sbi->segs_per_sec; i++)
   5025			mtime += get_seg_entry(sbi, segno + i)->mtime;
   5026
   5027		mtime = div_u64(mtime, sbi->segs_per_sec);
   5028
   5029		if (sit_i->min_mtime > mtime)
   5030			sit_i->min_mtime = mtime;
   5031	}
   5032	sit_i->max_mtime = get_mtime(sbi, false);
   5033	sit_i->dirty_max_mtime = 0;
   5034	up_write(&sit_i->sentry_lock);
   5035}
   5036
   5037int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
   5038{
   5039	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
   5040	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
   5041	struct f2fs_sm_info *sm_info;
   5042	int err;
   5043
   5044	sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
   5045	if (!sm_info)
   5046		return -ENOMEM;
   5047
   5048	/* init sm info */
   5049	sbi->sm_info = sm_info;
   5050	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
   5051	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
   5052	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
   5053	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
   5054	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
   5055	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
   5056	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
   5057	sm_info->rec_prefree_segments = sm_info->main_segments *
   5058					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
   5059	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
   5060		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
   5061
   5062	if (!f2fs_lfs_mode(sbi))
   5063		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
   5064	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
   5065	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
   5066	sm_info->min_seq_blocks = sbi->blocks_per_seg;
   5067	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
   5068	sm_info->min_ssr_sections = reserved_sections(sbi);
   5069
   5070	INIT_LIST_HEAD(&sm_info->sit_entry_set);
   5071
   5072	init_f2fs_rwsem(&sm_info->curseg_lock);
   5073
   5074	if (!f2fs_readonly(sbi->sb)) {
   5075		err = f2fs_create_flush_cmd_control(sbi);
   5076		if (err)
   5077			return err;
   5078	}
   5079
   5080	err = create_discard_cmd_control(sbi);
   5081	if (err)
   5082		return err;
   5083
   5084	err = build_sit_info(sbi);
   5085	if (err)
   5086		return err;
   5087	err = build_free_segmap(sbi);
   5088	if (err)
   5089		return err;
   5090	err = build_curseg(sbi);
   5091	if (err)
   5092		return err;
   5093
   5094	/* reinit free segmap based on SIT */
   5095	err = build_sit_entries(sbi);
   5096	if (err)
   5097		return err;
   5098
   5099	init_free_segmap(sbi);
   5100	err = build_dirty_segmap(sbi);
   5101	if (err)
   5102		return err;
   5103
   5104	err = sanity_check_curseg(sbi);
   5105	if (err)
   5106		return err;
   5107
   5108	init_min_max_mtime(sbi);
   5109	return 0;
   5110}
   5111
   5112static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
   5113		enum dirty_type dirty_type)
   5114{
   5115	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
   5116
   5117	mutex_lock(&dirty_i->seglist_lock);
   5118	kvfree(dirty_i->dirty_segmap[dirty_type]);
   5119	dirty_i->nr_dirty[dirty_type] = 0;
   5120	mutex_unlock(&dirty_i->seglist_lock);
   5121}
   5122
   5123static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
   5124{
   5125	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
   5126
   5127	kvfree(dirty_i->pinned_secmap);
   5128	kvfree(dirty_i->victim_secmap);
   5129}
   5130
   5131static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
   5132{
   5133	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
   5134	int i;
   5135
   5136	if (!dirty_i)
   5137		return;
   5138
   5139	/* discard pre-free/dirty segments list */
   5140	for (i = 0; i < NR_DIRTY_TYPE; i++)
   5141		discard_dirty_segmap(sbi, i);
   5142
   5143	if (__is_large_section(sbi)) {
   5144		mutex_lock(&dirty_i->seglist_lock);
   5145		kvfree(dirty_i->dirty_secmap);
   5146		mutex_unlock(&dirty_i->seglist_lock);
   5147	}
   5148
   5149	destroy_victim_secmap(sbi);
   5150	SM_I(sbi)->dirty_info = NULL;
   5151	kfree(dirty_i);
   5152}
   5153
   5154static void destroy_curseg(struct f2fs_sb_info *sbi)
   5155{
   5156	struct curseg_info *array = SM_I(sbi)->curseg_array;
   5157	int i;
   5158
   5159	if (!array)
   5160		return;
   5161	SM_I(sbi)->curseg_array = NULL;
   5162	for (i = 0; i < NR_CURSEG_TYPE; i++) {
   5163		kfree(array[i].sum_blk);
   5164		kfree(array[i].journal);
   5165	}
   5166	kfree(array);
   5167}
   5168
   5169static void destroy_free_segmap(struct f2fs_sb_info *sbi)
   5170{
   5171	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
   5172
   5173	if (!free_i)
   5174		return;
   5175	SM_I(sbi)->free_info = NULL;
   5176	kvfree(free_i->free_segmap);
   5177	kvfree(free_i->free_secmap);
   5178	kfree(free_i);
   5179}
   5180
   5181static void destroy_sit_info(struct f2fs_sb_info *sbi)
   5182{
   5183	struct sit_info *sit_i = SIT_I(sbi);
   5184
   5185	if (!sit_i)
   5186		return;
   5187
   5188	if (sit_i->sentries)
   5189		kvfree(sit_i->bitmap);
   5190	kfree(sit_i->tmp_map);
   5191
   5192	kvfree(sit_i->sentries);
   5193	kvfree(sit_i->sec_entries);
   5194	kvfree(sit_i->dirty_sentries_bitmap);
   5195
   5196	SM_I(sbi)->sit_info = NULL;
   5197	kvfree(sit_i->sit_bitmap);
   5198#ifdef CONFIG_F2FS_CHECK_FS
   5199	kvfree(sit_i->sit_bitmap_mir);
   5200	kvfree(sit_i->invalid_segmap);
   5201#endif
   5202	kfree(sit_i);
   5203}
   5204
   5205void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
   5206{
   5207	struct f2fs_sm_info *sm_info = SM_I(sbi);
   5208
   5209	if (!sm_info)
   5210		return;
   5211	f2fs_destroy_flush_cmd_control(sbi, true);
   5212	destroy_discard_cmd_control(sbi);
   5213	destroy_dirty_segmap(sbi);
   5214	destroy_curseg(sbi);
   5215	destroy_free_segmap(sbi);
   5216	destroy_sit_info(sbi);
   5217	sbi->sm_info = NULL;
   5218	kfree(sm_info);
   5219}
   5220
   5221int __init f2fs_create_segment_manager_caches(void)
   5222{
   5223	discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
   5224			sizeof(struct discard_entry));
   5225	if (!discard_entry_slab)
   5226		goto fail;
   5227
   5228	discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
   5229			sizeof(struct discard_cmd));
   5230	if (!discard_cmd_slab)
   5231		goto destroy_discard_entry;
   5232
   5233	sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
   5234			sizeof(struct sit_entry_set));
   5235	if (!sit_entry_set_slab)
   5236		goto destroy_discard_cmd;
   5237
   5238	revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
   5239			sizeof(struct revoke_entry));
   5240	if (!revoke_entry_slab)
   5241		goto destroy_sit_entry_set;
   5242	return 0;
   5243
   5244destroy_sit_entry_set:
   5245	kmem_cache_destroy(sit_entry_set_slab);
   5246destroy_discard_cmd:
   5247	kmem_cache_destroy(discard_cmd_slab);
   5248destroy_discard_entry:
   5249	kmem_cache_destroy(discard_entry_slab);
   5250fail:
   5251	return -ENOMEM;
   5252}
   5253
   5254void f2fs_destroy_segment_manager_caches(void)
   5255{
   5256	kmem_cache_destroy(sit_entry_set_slab);
   5257	kmem_cache_destroy(discard_cmd_slab);
   5258	kmem_cache_destroy(discard_entry_slab);
   5259	kmem_cache_destroy(revoke_entry_slab);
   5260}