cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

segment.c (75642B)


      1// SPDX-License-Identifier: GPL-2.0+
      2/*
      3 * NILFS segment constructor.
      4 *
      5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
      6 *
      7 * Written by Ryusuke Konishi.
      8 *
      9 */
     10
     11#include <linux/pagemap.h>
     12#include <linux/buffer_head.h>
     13#include <linux/writeback.h>
     14#include <linux/bitops.h>
     15#include <linux/bio.h>
     16#include <linux/completion.h>
     17#include <linux/blkdev.h>
     18#include <linux/backing-dev.h>
     19#include <linux/freezer.h>
     20#include <linux/kthread.h>
     21#include <linux/crc32.h>
     22#include <linux/pagevec.h>
     23#include <linux/slab.h>
     24#include <linux/sched/signal.h>
     25
     26#include "nilfs.h"
     27#include "btnode.h"
     28#include "page.h"
     29#include "segment.h"
     30#include "sufile.h"
     31#include "cpfile.h"
     32#include "ifile.h"
     33#include "segbuf.h"
     34
     35
     36/*
     37 * Segment constructor
     38 */
     39#define SC_N_INODEVEC	16   /* Size of locally allocated inode vector */
     40
     41#define SC_MAX_SEGDELTA 64   /*
     42			      * Upper limit of the number of segments
     43			      * appended in collection retry loop
     44			      */
     45
     46/* Construction mode */
     47enum {
     48	SC_LSEG_SR = 1,	/* Make a logical segment having a super root */
     49	SC_LSEG_DSYNC,	/*
     50			 * Flush data blocks of a given file and make
     51			 * a logical segment without a super root.
     52			 */
     53	SC_FLUSH_FILE,	/*
     54			 * Flush data files, leads to segment writes without
     55			 * creating a checkpoint.
     56			 */
     57	SC_FLUSH_DAT,	/*
     58			 * Flush DAT file.  This also creates segments
     59			 * without a checkpoint.
     60			 */
     61};
     62
     63/* Stage numbers of dirty block collection */
     64enum {
     65	NILFS_ST_INIT = 0,
     66	NILFS_ST_GC,		/* Collecting dirty blocks for GC */
     67	NILFS_ST_FILE,
     68	NILFS_ST_IFILE,
     69	NILFS_ST_CPFILE,
     70	NILFS_ST_SUFILE,
     71	NILFS_ST_DAT,
     72	NILFS_ST_SR,		/* Super root */
     73	NILFS_ST_DSYNC,		/* Data sync blocks */
     74	NILFS_ST_DONE,
     75};
     76
     77#define CREATE_TRACE_POINTS
     78#include <trace/events/nilfs2.h>
     79
     80/*
     81 * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are
     82 * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of
     83 * the variable must use them because transition of stage count must involve
     84 * trace events (trace_nilfs2_collection_stage_transition).
     85 *
     86 * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't
     87 * produce tracepoint events. It is provided just for making the intention
     88 * clear.
     89 */
     90static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci)
     91{
     92	sci->sc_stage.scnt++;
     93	trace_nilfs2_collection_stage_transition(sci);
     94}
     95
     96static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt)
     97{
     98	sci->sc_stage.scnt = next_scnt;
     99	trace_nilfs2_collection_stage_transition(sci);
    100}
    101
    102static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci)
    103{
    104	return sci->sc_stage.scnt;
    105}
    106
    107/* State flags of collection */
    108#define NILFS_CF_NODE		0x0001	/* Collecting node blocks */
    109#define NILFS_CF_IFILE_STARTED	0x0002	/* IFILE stage has started */
    110#define NILFS_CF_SUFREED	0x0004	/* segment usages has been freed */
    111#define NILFS_CF_HISTORY_MASK	(NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED)
    112
    113/* Operations depending on the construction mode and file type */
    114struct nilfs_sc_operations {
    115	int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *,
    116			    struct inode *);
    117	int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *,
    118			    struct inode *);
    119	int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *,
    120			    struct inode *);
    121	void (*write_data_binfo)(struct nilfs_sc_info *,
    122				 struct nilfs_segsum_pointer *,
    123				 union nilfs_binfo *);
    124	void (*write_node_binfo)(struct nilfs_sc_info *,
    125				 struct nilfs_segsum_pointer *,
    126				 union nilfs_binfo *);
    127};
    128
    129/*
    130 * Other definitions
    131 */
    132static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
    133static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
    134static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
    135static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
    136
    137#define nilfs_cnt32_ge(a, b)   \
    138	(typecheck(__u32, a) && typecheck(__u32, b) && \
    139	 ((__s32)(a) - (__s32)(b) >= 0))
    140
    141static int nilfs_prepare_segment_lock(struct super_block *sb,
    142				      struct nilfs_transaction_info *ti)
    143{
    144	struct nilfs_transaction_info *cur_ti = current->journal_info;
    145	void *save = NULL;
    146
    147	if (cur_ti) {
    148		if (cur_ti->ti_magic == NILFS_TI_MAGIC)
    149			return ++cur_ti->ti_count;
    150
    151		/*
    152		 * If journal_info field is occupied by other FS,
    153		 * it is saved and will be restored on
    154		 * nilfs_transaction_commit().
    155		 */
    156		nilfs_warn(sb, "journal info from a different FS");
    157		save = current->journal_info;
    158	}
    159	if (!ti) {
    160		ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS);
    161		if (!ti)
    162			return -ENOMEM;
    163		ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC;
    164	} else {
    165		ti->ti_flags = 0;
    166	}
    167	ti->ti_count = 0;
    168	ti->ti_save = save;
    169	ti->ti_magic = NILFS_TI_MAGIC;
    170	current->journal_info = ti;
    171	return 0;
    172}
    173
    174/**
    175 * nilfs_transaction_begin - start indivisible file operations.
    176 * @sb: super block
    177 * @ti: nilfs_transaction_info
    178 * @vacancy_check: flags for vacancy rate checks
    179 *
    180 * nilfs_transaction_begin() acquires a reader/writer semaphore, called
    181 * the segment semaphore, to make a segment construction and write tasks
    182 * exclusive.  The function is used with nilfs_transaction_commit() in pairs.
    183 * The region enclosed by these two functions can be nested.  To avoid a
    184 * deadlock, the semaphore is only acquired or released in the outermost call.
    185 *
    186 * This function allocates a nilfs_transaction_info struct to keep context
    187 * information on it.  It is initialized and hooked onto the current task in
    188 * the outermost call.  If a pre-allocated struct is given to @ti, it is used
    189 * instead; otherwise a new struct is assigned from a slab.
    190 *
    191 * When @vacancy_check flag is set, this function will check the amount of
    192 * free space, and will wait for the GC to reclaim disk space if low capacity.
    193 *
    194 * Return Value: On success, 0 is returned. On error, one of the following
    195 * negative error code is returned.
    196 *
    197 * %-ENOMEM - Insufficient memory available.
    198 *
    199 * %-ENOSPC - No space left on device
    200 */
    201int nilfs_transaction_begin(struct super_block *sb,
    202			    struct nilfs_transaction_info *ti,
    203			    int vacancy_check)
    204{
    205	struct the_nilfs *nilfs;
    206	int ret = nilfs_prepare_segment_lock(sb, ti);
    207	struct nilfs_transaction_info *trace_ti;
    208
    209	if (unlikely(ret < 0))
    210		return ret;
    211	if (ret > 0) {
    212		trace_ti = current->journal_info;
    213
    214		trace_nilfs2_transaction_transition(sb, trace_ti,
    215				    trace_ti->ti_count, trace_ti->ti_flags,
    216				    TRACE_NILFS2_TRANSACTION_BEGIN);
    217		return 0;
    218	}
    219
    220	sb_start_intwrite(sb);
    221
    222	nilfs = sb->s_fs_info;
    223	down_read(&nilfs->ns_segctor_sem);
    224	if (vacancy_check && nilfs_near_disk_full(nilfs)) {
    225		up_read(&nilfs->ns_segctor_sem);
    226		ret = -ENOSPC;
    227		goto failed;
    228	}
    229
    230	trace_ti = current->journal_info;
    231	trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count,
    232					    trace_ti->ti_flags,
    233					    TRACE_NILFS2_TRANSACTION_BEGIN);
    234	return 0;
    235
    236 failed:
    237	ti = current->journal_info;
    238	current->journal_info = ti->ti_save;
    239	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
    240		kmem_cache_free(nilfs_transaction_cachep, ti);
    241	sb_end_intwrite(sb);
    242	return ret;
    243}
    244
    245/**
    246 * nilfs_transaction_commit - commit indivisible file operations.
    247 * @sb: super block
    248 *
    249 * nilfs_transaction_commit() releases the read semaphore which is
    250 * acquired by nilfs_transaction_begin(). This is only performed
    251 * in outermost call of this function.  If a commit flag is set,
    252 * nilfs_transaction_commit() sets a timer to start the segment
    253 * constructor.  If a sync flag is set, it starts construction
    254 * directly.
    255 */
    256int nilfs_transaction_commit(struct super_block *sb)
    257{
    258	struct nilfs_transaction_info *ti = current->journal_info;
    259	struct the_nilfs *nilfs = sb->s_fs_info;
    260	int err = 0;
    261
    262	BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
    263	ti->ti_flags |= NILFS_TI_COMMIT;
    264	if (ti->ti_count > 0) {
    265		ti->ti_count--;
    266		trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
    267			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
    268		return 0;
    269	}
    270	if (nilfs->ns_writer) {
    271		struct nilfs_sc_info *sci = nilfs->ns_writer;
    272
    273		if (ti->ti_flags & NILFS_TI_COMMIT)
    274			nilfs_segctor_start_timer(sci);
    275		if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark)
    276			nilfs_segctor_do_flush(sci, 0);
    277	}
    278	up_read(&nilfs->ns_segctor_sem);
    279	trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
    280			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT);
    281
    282	current->journal_info = ti->ti_save;
    283
    284	if (ti->ti_flags & NILFS_TI_SYNC)
    285		err = nilfs_construct_segment(sb);
    286	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
    287		kmem_cache_free(nilfs_transaction_cachep, ti);
    288	sb_end_intwrite(sb);
    289	return err;
    290}
    291
    292void nilfs_transaction_abort(struct super_block *sb)
    293{
    294	struct nilfs_transaction_info *ti = current->journal_info;
    295	struct the_nilfs *nilfs = sb->s_fs_info;
    296
    297	BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
    298	if (ti->ti_count > 0) {
    299		ti->ti_count--;
    300		trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
    301			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
    302		return;
    303	}
    304	up_read(&nilfs->ns_segctor_sem);
    305
    306	trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
    307		    ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT);
    308
    309	current->journal_info = ti->ti_save;
    310	if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
    311		kmem_cache_free(nilfs_transaction_cachep, ti);
    312	sb_end_intwrite(sb);
    313}
    314
    315void nilfs_relax_pressure_in_lock(struct super_block *sb)
    316{
    317	struct the_nilfs *nilfs = sb->s_fs_info;
    318	struct nilfs_sc_info *sci = nilfs->ns_writer;
    319
    320	if (!sci || !sci->sc_flush_request)
    321		return;
    322
    323	set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
    324	up_read(&nilfs->ns_segctor_sem);
    325
    326	down_write(&nilfs->ns_segctor_sem);
    327	if (sci->sc_flush_request &&
    328	    test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) {
    329		struct nilfs_transaction_info *ti = current->journal_info;
    330
    331		ti->ti_flags |= NILFS_TI_WRITER;
    332		nilfs_segctor_do_immediate_flush(sci);
    333		ti->ti_flags &= ~NILFS_TI_WRITER;
    334	}
    335	downgrade_write(&nilfs->ns_segctor_sem);
    336}
    337
    338static void nilfs_transaction_lock(struct super_block *sb,
    339				   struct nilfs_transaction_info *ti,
    340				   int gcflag)
    341{
    342	struct nilfs_transaction_info *cur_ti = current->journal_info;
    343	struct the_nilfs *nilfs = sb->s_fs_info;
    344	struct nilfs_sc_info *sci = nilfs->ns_writer;
    345
    346	WARN_ON(cur_ti);
    347	ti->ti_flags = NILFS_TI_WRITER;
    348	ti->ti_count = 0;
    349	ti->ti_save = cur_ti;
    350	ti->ti_magic = NILFS_TI_MAGIC;
    351	current->journal_info = ti;
    352
    353	for (;;) {
    354		trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
    355			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK);
    356
    357		down_write(&nilfs->ns_segctor_sem);
    358		if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
    359			break;
    360
    361		nilfs_segctor_do_immediate_flush(sci);
    362
    363		up_write(&nilfs->ns_segctor_sem);
    364		cond_resched();
    365	}
    366	if (gcflag)
    367		ti->ti_flags |= NILFS_TI_GC;
    368
    369	trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
    370			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK);
    371}
    372
    373static void nilfs_transaction_unlock(struct super_block *sb)
    374{
    375	struct nilfs_transaction_info *ti = current->journal_info;
    376	struct the_nilfs *nilfs = sb->s_fs_info;
    377
    378	BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
    379	BUG_ON(ti->ti_count > 0);
    380
    381	up_write(&nilfs->ns_segctor_sem);
    382	current->journal_info = ti->ti_save;
    383
    384	trace_nilfs2_transaction_transition(sb, ti, ti->ti_count,
    385			    ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK);
    386}
    387
    388static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
    389					    struct nilfs_segsum_pointer *ssp,
    390					    unsigned int bytes)
    391{
    392	struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
    393	unsigned int blocksize = sci->sc_super->s_blocksize;
    394	void *p;
    395
    396	if (unlikely(ssp->offset + bytes > blocksize)) {
    397		ssp->offset = 0;
    398		BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh,
    399					       &segbuf->sb_segsum_buffers));
    400		ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh);
    401	}
    402	p = ssp->bh->b_data + ssp->offset;
    403	ssp->offset += bytes;
    404	return p;
    405}
    406
    407/**
    408 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer
    409 * @sci: nilfs_sc_info
    410 */
    411static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
    412{
    413	struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
    414	struct buffer_head *sumbh;
    415	unsigned int sumbytes;
    416	unsigned int flags = 0;
    417	int err;
    418
    419	if (nilfs_doing_gc())
    420		flags = NILFS_SS_GC;
    421	err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno);
    422	if (unlikely(err))
    423		return err;
    424
    425	sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
    426	sumbytes = segbuf->sb_sum.sumbytes;
    427	sci->sc_finfo_ptr.bh = sumbh;  sci->sc_finfo_ptr.offset = sumbytes;
    428	sci->sc_binfo_ptr.bh = sumbh;  sci->sc_binfo_ptr.offset = sumbytes;
    429	sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
    430	return 0;
    431}
    432
    433static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
    434{
    435	sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
    436	if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs))
    437		return -E2BIG; /*
    438				* The current segment is filled up
    439				* (internal code)
    440				*/
    441	sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
    442	return nilfs_segctor_reset_segment_buffer(sci);
    443}
    444
    445static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci)
    446{
    447	struct nilfs_segment_buffer *segbuf = sci->sc_curseg;
    448	int err;
    449
    450	if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) {
    451		err = nilfs_segctor_feed_segment(sci);
    452		if (err)
    453			return err;
    454		segbuf = sci->sc_curseg;
    455	}
    456	err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root);
    457	if (likely(!err))
    458		segbuf->sb_sum.flags |= NILFS_SS_SR;
    459	return err;
    460}
    461
    462/*
    463 * Functions for making segment summary and payloads
    464 */
    465static int nilfs_segctor_segsum_block_required(
    466	struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp,
    467	unsigned int binfo_size)
    468{
    469	unsigned int blocksize = sci->sc_super->s_blocksize;
    470	/* Size of finfo and binfo is enough small against blocksize */
    471
    472	return ssp->offset + binfo_size +
    473		(!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) >
    474		blocksize;
    475}
    476
    477static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
    478				      struct inode *inode)
    479{
    480	sci->sc_curseg->sb_sum.nfinfo++;
    481	sci->sc_binfo_ptr = sci->sc_finfo_ptr;
    482	nilfs_segctor_map_segsum_entry(
    483		sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
    484
    485	if (NILFS_I(inode)->i_root &&
    486	    !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
    487		set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
    488	/* skip finfo */
    489}
    490
    491static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci,
    492				    struct inode *inode)
    493{
    494	struct nilfs_finfo *finfo;
    495	struct nilfs_inode_info *ii;
    496	struct nilfs_segment_buffer *segbuf;
    497	__u64 cno;
    498
    499	if (sci->sc_blk_cnt == 0)
    500		return;
    501
    502	ii = NILFS_I(inode);
    503
    504	if (test_bit(NILFS_I_GCINODE, &ii->i_state))
    505		cno = ii->i_cno;
    506	else if (NILFS_ROOT_METADATA_FILE(inode->i_ino))
    507		cno = 0;
    508	else
    509		cno = sci->sc_cno;
    510
    511	finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr,
    512						 sizeof(*finfo));
    513	finfo->fi_ino = cpu_to_le64(inode->i_ino);
    514	finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt);
    515	finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt);
    516	finfo->fi_cno = cpu_to_le64(cno);
    517
    518	segbuf = sci->sc_curseg;
    519	segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset +
    520		sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1);
    521	sci->sc_finfo_ptr = sci->sc_binfo_ptr;
    522	sci->sc_blk_cnt = sci->sc_datablk_cnt = 0;
    523}
    524
    525static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
    526					struct buffer_head *bh,
    527					struct inode *inode,
    528					unsigned int binfo_size)
    529{
    530	struct nilfs_segment_buffer *segbuf;
    531	int required, err = 0;
    532
    533 retry:
    534	segbuf = sci->sc_curseg;
    535	required = nilfs_segctor_segsum_block_required(
    536		sci, &sci->sc_binfo_ptr, binfo_size);
    537	if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) {
    538		nilfs_segctor_end_finfo(sci, inode);
    539		err = nilfs_segctor_feed_segment(sci);
    540		if (err)
    541			return err;
    542		goto retry;
    543	}
    544	if (unlikely(required)) {
    545		err = nilfs_segbuf_extend_segsum(segbuf);
    546		if (unlikely(err))
    547			goto failed;
    548	}
    549	if (sci->sc_blk_cnt == 0)
    550		nilfs_segctor_begin_finfo(sci, inode);
    551
    552	nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size);
    553	/* Substitution to vblocknr is delayed until update_blocknr() */
    554	nilfs_segbuf_add_file_buffer(segbuf, bh);
    555	sci->sc_blk_cnt++;
    556 failed:
    557	return err;
    558}
    559
    560/*
    561 * Callback functions that enumerate, mark, and collect dirty blocks
    562 */
    563static int nilfs_collect_file_data(struct nilfs_sc_info *sci,
    564				   struct buffer_head *bh, struct inode *inode)
    565{
    566	int err;
    567
    568	err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
    569	if (err < 0)
    570		return err;
    571
    572	err = nilfs_segctor_add_file_block(sci, bh, inode,
    573					   sizeof(struct nilfs_binfo_v));
    574	if (!err)
    575		sci->sc_datablk_cnt++;
    576	return err;
    577}
    578
    579static int nilfs_collect_file_node(struct nilfs_sc_info *sci,
    580				   struct buffer_head *bh,
    581				   struct inode *inode)
    582{
    583	return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
    584}
    585
    586static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci,
    587				   struct buffer_head *bh,
    588				   struct inode *inode)
    589{
    590	WARN_ON(!buffer_dirty(bh));
    591	return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
    592}
    593
    594static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci,
    595					struct nilfs_segsum_pointer *ssp,
    596					union nilfs_binfo *binfo)
    597{
    598	struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry(
    599		sci, ssp, sizeof(*binfo_v));
    600	*binfo_v = binfo->bi_v;
    601}
    602
    603static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci,
    604					struct nilfs_segsum_pointer *ssp,
    605					union nilfs_binfo *binfo)
    606{
    607	__le64 *vblocknr = nilfs_segctor_map_segsum_entry(
    608		sci, ssp, sizeof(*vblocknr));
    609	*vblocknr = binfo->bi_v.bi_vblocknr;
    610}
    611
    612static const struct nilfs_sc_operations nilfs_sc_file_ops = {
    613	.collect_data = nilfs_collect_file_data,
    614	.collect_node = nilfs_collect_file_node,
    615	.collect_bmap = nilfs_collect_file_bmap,
    616	.write_data_binfo = nilfs_write_file_data_binfo,
    617	.write_node_binfo = nilfs_write_file_node_binfo,
    618};
    619
    620static int nilfs_collect_dat_data(struct nilfs_sc_info *sci,
    621				  struct buffer_head *bh, struct inode *inode)
    622{
    623	int err;
    624
    625	err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh);
    626	if (err < 0)
    627		return err;
    628
    629	err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64));
    630	if (!err)
    631		sci->sc_datablk_cnt++;
    632	return err;
    633}
    634
    635static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci,
    636				  struct buffer_head *bh, struct inode *inode)
    637{
    638	WARN_ON(!buffer_dirty(bh));
    639	return nilfs_segctor_add_file_block(sci, bh, inode,
    640					    sizeof(struct nilfs_binfo_dat));
    641}
    642
    643static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci,
    644				       struct nilfs_segsum_pointer *ssp,
    645				       union nilfs_binfo *binfo)
    646{
    647	__le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp,
    648							  sizeof(*blkoff));
    649	*blkoff = binfo->bi_dat.bi_blkoff;
    650}
    651
    652static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci,
    653				       struct nilfs_segsum_pointer *ssp,
    654				       union nilfs_binfo *binfo)
    655{
    656	struct nilfs_binfo_dat *binfo_dat =
    657		nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat));
    658	*binfo_dat = binfo->bi_dat;
    659}
    660
    661static const struct nilfs_sc_operations nilfs_sc_dat_ops = {
    662	.collect_data = nilfs_collect_dat_data,
    663	.collect_node = nilfs_collect_file_node,
    664	.collect_bmap = nilfs_collect_dat_bmap,
    665	.write_data_binfo = nilfs_write_dat_data_binfo,
    666	.write_node_binfo = nilfs_write_dat_node_binfo,
    667};
    668
    669static const struct nilfs_sc_operations nilfs_sc_dsync_ops = {
    670	.collect_data = nilfs_collect_file_data,
    671	.collect_node = NULL,
    672	.collect_bmap = NULL,
    673	.write_data_binfo = nilfs_write_file_data_binfo,
    674	.write_node_binfo = NULL,
    675};
    676
    677static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
    678					      struct list_head *listp,
    679					      size_t nlimit,
    680					      loff_t start, loff_t end)
    681{
    682	struct address_space *mapping = inode->i_mapping;
    683	struct pagevec pvec;
    684	pgoff_t index = 0, last = ULONG_MAX;
    685	size_t ndirties = 0;
    686	int i;
    687
    688	if (unlikely(start != 0 || end != LLONG_MAX)) {
    689		/*
    690		 * A valid range is given for sync-ing data pages. The
    691		 * range is rounded to per-page; extra dirty buffers
    692		 * may be included if blocksize < pagesize.
    693		 */
    694		index = start >> PAGE_SHIFT;
    695		last = end >> PAGE_SHIFT;
    696	}
    697	pagevec_init(&pvec);
    698 repeat:
    699	if (unlikely(index > last) ||
    700	    !pagevec_lookup_range_tag(&pvec, mapping, &index, last,
    701				PAGECACHE_TAG_DIRTY))
    702		return ndirties;
    703
    704	for (i = 0; i < pagevec_count(&pvec); i++) {
    705		struct buffer_head *bh, *head;
    706		struct page *page = pvec.pages[i];
    707
    708		lock_page(page);
    709		if (!page_has_buffers(page))
    710			create_empty_buffers(page, i_blocksize(inode), 0);
    711		unlock_page(page);
    712
    713		bh = head = page_buffers(page);
    714		do {
    715			if (!buffer_dirty(bh) || buffer_async_write(bh))
    716				continue;
    717			get_bh(bh);
    718			list_add_tail(&bh->b_assoc_buffers, listp);
    719			ndirties++;
    720			if (unlikely(ndirties >= nlimit)) {
    721				pagevec_release(&pvec);
    722				cond_resched();
    723				return ndirties;
    724			}
    725		} while (bh = bh->b_this_page, bh != head);
    726	}
    727	pagevec_release(&pvec);
    728	cond_resched();
    729	goto repeat;
    730}
    731
    732static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
    733					    struct list_head *listp)
    734{
    735	struct nilfs_inode_info *ii = NILFS_I(inode);
    736	struct inode *btnc_inode = ii->i_assoc_inode;
    737	struct pagevec pvec;
    738	struct buffer_head *bh, *head;
    739	unsigned int i;
    740	pgoff_t index = 0;
    741
    742	if (!btnc_inode)
    743		return;
    744
    745	pagevec_init(&pvec);
    746
    747	while (pagevec_lookup_tag(&pvec, btnc_inode->i_mapping, &index,
    748					PAGECACHE_TAG_DIRTY)) {
    749		for (i = 0; i < pagevec_count(&pvec); i++) {
    750			bh = head = page_buffers(pvec.pages[i]);
    751			do {
    752				if (buffer_dirty(bh) &&
    753						!buffer_async_write(bh)) {
    754					get_bh(bh);
    755					list_add_tail(&bh->b_assoc_buffers,
    756						      listp);
    757				}
    758				bh = bh->b_this_page;
    759			} while (bh != head);
    760		}
    761		pagevec_release(&pvec);
    762		cond_resched();
    763	}
    764}
    765
    766static void nilfs_dispose_list(struct the_nilfs *nilfs,
    767			       struct list_head *head, int force)
    768{
    769	struct nilfs_inode_info *ii, *n;
    770	struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii;
    771	unsigned int nv = 0;
    772
    773	while (!list_empty(head)) {
    774		spin_lock(&nilfs->ns_inode_lock);
    775		list_for_each_entry_safe(ii, n, head, i_dirty) {
    776			list_del_init(&ii->i_dirty);
    777			if (force) {
    778				if (unlikely(ii->i_bh)) {
    779					brelse(ii->i_bh);
    780					ii->i_bh = NULL;
    781				}
    782			} else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
    783				set_bit(NILFS_I_QUEUED, &ii->i_state);
    784				list_add_tail(&ii->i_dirty,
    785					      &nilfs->ns_dirty_files);
    786				continue;
    787			}
    788			ivec[nv++] = ii;
    789			if (nv == SC_N_INODEVEC)
    790				break;
    791		}
    792		spin_unlock(&nilfs->ns_inode_lock);
    793
    794		for (pii = ivec; nv > 0; pii++, nv--)
    795			iput(&(*pii)->vfs_inode);
    796	}
    797}
    798
    799static void nilfs_iput_work_func(struct work_struct *work)
    800{
    801	struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info,
    802						 sc_iput_work);
    803	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
    804
    805	nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0);
    806}
    807
    808static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
    809				     struct nilfs_root *root)
    810{
    811	int ret = 0;
    812
    813	if (nilfs_mdt_fetch_dirty(root->ifile))
    814		ret++;
    815	if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile))
    816		ret++;
    817	if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile))
    818		ret++;
    819	if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat))
    820		ret++;
    821	return ret;
    822}
    823
    824static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
    825{
    826	return list_empty(&sci->sc_dirty_files) &&
    827		!test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
    828		sci->sc_nfreesegs == 0 &&
    829		(!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
    830}
    831
    832static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
    833{
    834	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
    835	int ret = 0;
    836
    837	if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
    838		set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
    839
    840	spin_lock(&nilfs->ns_inode_lock);
    841	if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci))
    842		ret++;
    843
    844	spin_unlock(&nilfs->ns_inode_lock);
    845	return ret;
    846}
    847
    848static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
    849{
    850	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
    851
    852	nilfs_mdt_clear_dirty(sci->sc_root->ifile);
    853	nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
    854	nilfs_mdt_clear_dirty(nilfs->ns_sufile);
    855	nilfs_mdt_clear_dirty(nilfs->ns_dat);
    856}
    857
    858static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
    859{
    860	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
    861	struct buffer_head *bh_cp;
    862	struct nilfs_checkpoint *raw_cp;
    863	int err;
    864
    865	/* XXX: this interface will be changed */
    866	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1,
    867					  &raw_cp, &bh_cp);
    868	if (likely(!err)) {
    869		/*
    870		 * The following code is duplicated with cpfile.  But, it is
    871		 * needed to collect the checkpoint even if it was not newly
    872		 * created.
    873		 */
    874		mark_buffer_dirty(bh_cp);
    875		nilfs_mdt_mark_dirty(nilfs->ns_cpfile);
    876		nilfs_cpfile_put_checkpoint(
    877			nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
    878	} else
    879		WARN_ON(err == -EINVAL || err == -ENOENT);
    880
    881	return err;
    882}
    883
    884static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
    885{
    886	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
    887	struct buffer_head *bh_cp;
    888	struct nilfs_checkpoint *raw_cp;
    889	int err;
    890
    891	err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0,
    892					  &raw_cp, &bh_cp);
    893	if (unlikely(err)) {
    894		WARN_ON(err == -EINVAL || err == -ENOENT);
    895		goto failed_ibh;
    896	}
    897	raw_cp->cp_snapshot_list.ssl_next = 0;
    898	raw_cp->cp_snapshot_list.ssl_prev = 0;
    899	raw_cp->cp_inodes_count =
    900		cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count));
    901	raw_cp->cp_blocks_count =
    902		cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count));
    903	raw_cp->cp_nblk_inc =
    904		cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc);
    905	raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime);
    906	raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno);
    907
    908	if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
    909		nilfs_checkpoint_clear_minor(raw_cp);
    910	else
    911		nilfs_checkpoint_set_minor(raw_cp);
    912
    913	nilfs_write_inode_common(sci->sc_root->ifile,
    914				 &raw_cp->cp_ifile_inode, 1);
    915	nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp);
    916	return 0;
    917
    918 failed_ibh:
    919	return err;
    920}
    921
    922static void nilfs_fill_in_file_bmap(struct inode *ifile,
    923				    struct nilfs_inode_info *ii)
    924
    925{
    926	struct buffer_head *ibh;
    927	struct nilfs_inode *raw_inode;
    928
    929	if (test_bit(NILFS_I_BMAP, &ii->i_state)) {
    930		ibh = ii->i_bh;
    931		BUG_ON(!ibh);
    932		raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino,
    933						  ibh);
    934		nilfs_bmap_write(ii->i_bmap, raw_inode);
    935		nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh);
    936	}
    937}
    938
    939static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci)
    940{
    941	struct nilfs_inode_info *ii;
    942
    943	list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) {
    944		nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii);
    945		set_bit(NILFS_I_COLLECTED, &ii->i_state);
    946	}
    947}
    948
    949static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
    950					     struct the_nilfs *nilfs)
    951{
    952	struct buffer_head *bh_sr;
    953	struct nilfs_super_root *raw_sr;
    954	unsigned int isz, srsz;
    955
    956	bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
    957	raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
    958	isz = nilfs->ns_inode_size;
    959	srsz = NILFS_SR_BYTES(isz);
    960
    961	raw_sr->sr_bytes = cpu_to_le16(srsz);
    962	raw_sr->sr_nongc_ctime
    963		= cpu_to_le64(nilfs_doing_gc() ?
    964			      nilfs->ns_nongc_ctime : sci->sc_seg_ctime);
    965	raw_sr->sr_flags = 0;
    966
    967	nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr +
    968				 NILFS_SR_DAT_OFFSET(isz), 1);
    969	nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr +
    970				 NILFS_SR_CPFILE_OFFSET(isz), 1);
    971	nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
    972				 NILFS_SR_SUFILE_OFFSET(isz), 1);
    973	memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
    974}
    975
    976static void nilfs_redirty_inodes(struct list_head *head)
    977{
    978	struct nilfs_inode_info *ii;
    979
    980	list_for_each_entry(ii, head, i_dirty) {
    981		if (test_bit(NILFS_I_COLLECTED, &ii->i_state))
    982			clear_bit(NILFS_I_COLLECTED, &ii->i_state);
    983	}
    984}
    985
    986static void nilfs_drop_collected_inodes(struct list_head *head)
    987{
    988	struct nilfs_inode_info *ii;
    989
    990	list_for_each_entry(ii, head, i_dirty) {
    991		if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state))
    992			continue;
    993
    994		clear_bit(NILFS_I_INODE_SYNC, &ii->i_state);
    995		set_bit(NILFS_I_UPDATED, &ii->i_state);
    996	}
    997}
    998
    999static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
   1000				       struct inode *inode,
   1001				       struct list_head *listp,
   1002				       int (*collect)(struct nilfs_sc_info *,
   1003						      struct buffer_head *,
   1004						      struct inode *))
   1005{
   1006	struct buffer_head *bh, *n;
   1007	int err = 0;
   1008
   1009	if (collect) {
   1010		list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) {
   1011			list_del_init(&bh->b_assoc_buffers);
   1012			err = collect(sci, bh, inode);
   1013			brelse(bh);
   1014			if (unlikely(err))
   1015				goto dispose_buffers;
   1016		}
   1017		return 0;
   1018	}
   1019
   1020 dispose_buffers:
   1021	while (!list_empty(listp)) {
   1022		bh = list_first_entry(listp, struct buffer_head,
   1023				      b_assoc_buffers);
   1024		list_del_init(&bh->b_assoc_buffers);
   1025		brelse(bh);
   1026	}
   1027	return err;
   1028}
   1029
   1030static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci)
   1031{
   1032	/* Remaining number of blocks within segment buffer */
   1033	return sci->sc_segbuf_nblocks -
   1034		(sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks);
   1035}
   1036
   1037static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci,
   1038				   struct inode *inode,
   1039				   const struct nilfs_sc_operations *sc_ops)
   1040{
   1041	LIST_HEAD(data_buffers);
   1042	LIST_HEAD(node_buffers);
   1043	int err;
   1044
   1045	if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
   1046		size_t n, rest = nilfs_segctor_buffer_rest(sci);
   1047
   1048		n = nilfs_lookup_dirty_data_buffers(
   1049			inode, &data_buffers, rest + 1, 0, LLONG_MAX);
   1050		if (n > rest) {
   1051			err = nilfs_segctor_apply_buffers(
   1052				sci, inode, &data_buffers,
   1053				sc_ops->collect_data);
   1054			BUG_ON(!err); /* always receive -E2BIG or true error */
   1055			goto break_or_fail;
   1056		}
   1057	}
   1058	nilfs_lookup_dirty_node_buffers(inode, &node_buffers);
   1059
   1060	if (!(sci->sc_stage.flags & NILFS_CF_NODE)) {
   1061		err = nilfs_segctor_apply_buffers(
   1062			sci, inode, &data_buffers, sc_ops->collect_data);
   1063		if (unlikely(err)) {
   1064			/* dispose node list */
   1065			nilfs_segctor_apply_buffers(
   1066				sci, inode, &node_buffers, NULL);
   1067			goto break_or_fail;
   1068		}
   1069		sci->sc_stage.flags |= NILFS_CF_NODE;
   1070	}
   1071	/* Collect node */
   1072	err = nilfs_segctor_apply_buffers(
   1073		sci, inode, &node_buffers, sc_ops->collect_node);
   1074	if (unlikely(err))
   1075		goto break_or_fail;
   1076
   1077	nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers);
   1078	err = nilfs_segctor_apply_buffers(
   1079		sci, inode, &node_buffers, sc_ops->collect_bmap);
   1080	if (unlikely(err))
   1081		goto break_or_fail;
   1082
   1083	nilfs_segctor_end_finfo(sci, inode);
   1084	sci->sc_stage.flags &= ~NILFS_CF_NODE;
   1085
   1086 break_or_fail:
   1087	return err;
   1088}
   1089
   1090static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
   1091					 struct inode *inode)
   1092{
   1093	LIST_HEAD(data_buffers);
   1094	size_t n, rest = nilfs_segctor_buffer_rest(sci);
   1095	int err;
   1096
   1097	n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1,
   1098					    sci->sc_dsync_start,
   1099					    sci->sc_dsync_end);
   1100
   1101	err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers,
   1102					  nilfs_collect_file_data);
   1103	if (!err) {
   1104		nilfs_segctor_end_finfo(sci, inode);
   1105		BUG_ON(n > rest);
   1106		/* always receive -E2BIG or true error if n > rest */
   1107	}
   1108	return err;
   1109}
   1110
   1111static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
   1112{
   1113	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
   1114	struct list_head *head;
   1115	struct nilfs_inode_info *ii;
   1116	size_t ndone;
   1117	int err = 0;
   1118
   1119	switch (nilfs_sc_cstage_get(sci)) {
   1120	case NILFS_ST_INIT:
   1121		/* Pre-processes */
   1122		sci->sc_stage.flags = 0;
   1123
   1124		if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) {
   1125			sci->sc_nblk_inc = 0;
   1126			sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN;
   1127			if (mode == SC_LSEG_DSYNC) {
   1128				nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC);
   1129				goto dsync_mode;
   1130			}
   1131		}
   1132
   1133		sci->sc_stage.dirty_file_ptr = NULL;
   1134		sci->sc_stage.gc_inode_ptr = NULL;
   1135		if (mode == SC_FLUSH_DAT) {
   1136			nilfs_sc_cstage_set(sci, NILFS_ST_DAT);
   1137			goto dat_stage;
   1138		}
   1139		nilfs_sc_cstage_inc(sci);
   1140		fallthrough;
   1141	case NILFS_ST_GC:
   1142		if (nilfs_doing_gc()) {
   1143			head = &sci->sc_gc_inodes;
   1144			ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr,
   1145						head, i_dirty);
   1146			list_for_each_entry_continue(ii, head, i_dirty) {
   1147				err = nilfs_segctor_scan_file(
   1148					sci, &ii->vfs_inode,
   1149					&nilfs_sc_file_ops);
   1150				if (unlikely(err)) {
   1151					sci->sc_stage.gc_inode_ptr = list_entry(
   1152						ii->i_dirty.prev,
   1153						struct nilfs_inode_info,
   1154						i_dirty);
   1155					goto break_or_fail;
   1156				}
   1157				set_bit(NILFS_I_COLLECTED, &ii->i_state);
   1158			}
   1159			sci->sc_stage.gc_inode_ptr = NULL;
   1160		}
   1161		nilfs_sc_cstage_inc(sci);
   1162		fallthrough;
   1163	case NILFS_ST_FILE:
   1164		head = &sci->sc_dirty_files;
   1165		ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head,
   1166					i_dirty);
   1167		list_for_each_entry_continue(ii, head, i_dirty) {
   1168			clear_bit(NILFS_I_DIRTY, &ii->i_state);
   1169
   1170			err = nilfs_segctor_scan_file(sci, &ii->vfs_inode,
   1171						      &nilfs_sc_file_ops);
   1172			if (unlikely(err)) {
   1173				sci->sc_stage.dirty_file_ptr =
   1174					list_entry(ii->i_dirty.prev,
   1175						   struct nilfs_inode_info,
   1176						   i_dirty);
   1177				goto break_or_fail;
   1178			}
   1179			/* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */
   1180			/* XXX: required ? */
   1181		}
   1182		sci->sc_stage.dirty_file_ptr = NULL;
   1183		if (mode == SC_FLUSH_FILE) {
   1184			nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
   1185			return 0;
   1186		}
   1187		nilfs_sc_cstage_inc(sci);
   1188		sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED;
   1189		fallthrough;
   1190	case NILFS_ST_IFILE:
   1191		err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile,
   1192					      &nilfs_sc_file_ops);
   1193		if (unlikely(err))
   1194			break;
   1195		nilfs_sc_cstage_inc(sci);
   1196		/* Creating a checkpoint */
   1197		err = nilfs_segctor_create_checkpoint(sci);
   1198		if (unlikely(err))
   1199			break;
   1200		fallthrough;
   1201	case NILFS_ST_CPFILE:
   1202		err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile,
   1203					      &nilfs_sc_file_ops);
   1204		if (unlikely(err))
   1205			break;
   1206		nilfs_sc_cstage_inc(sci);
   1207		fallthrough;
   1208	case NILFS_ST_SUFILE:
   1209		err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
   1210					 sci->sc_nfreesegs, &ndone);
   1211		if (unlikely(err)) {
   1212			nilfs_sufile_cancel_freev(nilfs->ns_sufile,
   1213						  sci->sc_freesegs, ndone,
   1214						  NULL);
   1215			break;
   1216		}
   1217		sci->sc_stage.flags |= NILFS_CF_SUFREED;
   1218
   1219		err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
   1220					      &nilfs_sc_file_ops);
   1221		if (unlikely(err))
   1222			break;
   1223		nilfs_sc_cstage_inc(sci);
   1224		fallthrough;
   1225	case NILFS_ST_DAT:
   1226 dat_stage:
   1227		err = nilfs_segctor_scan_file(sci, nilfs->ns_dat,
   1228					      &nilfs_sc_dat_ops);
   1229		if (unlikely(err))
   1230			break;
   1231		if (mode == SC_FLUSH_DAT) {
   1232			nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
   1233			return 0;
   1234		}
   1235		nilfs_sc_cstage_inc(sci);
   1236		fallthrough;
   1237	case NILFS_ST_SR:
   1238		if (mode == SC_LSEG_SR) {
   1239			/* Appending a super root */
   1240			err = nilfs_segctor_add_super_root(sci);
   1241			if (unlikely(err))
   1242				break;
   1243		}
   1244		/* End of a logical segment */
   1245		sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
   1246		nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
   1247		return 0;
   1248	case NILFS_ST_DSYNC:
   1249 dsync_mode:
   1250		sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT;
   1251		ii = sci->sc_dsync_inode;
   1252		if (!test_bit(NILFS_I_BUSY, &ii->i_state))
   1253			break;
   1254
   1255		err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode);
   1256		if (unlikely(err))
   1257			break;
   1258		sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND;
   1259		nilfs_sc_cstage_set(sci, NILFS_ST_DONE);
   1260		return 0;
   1261	case NILFS_ST_DONE:
   1262		return 0;
   1263	default:
   1264		BUG();
   1265	}
   1266
   1267 break_or_fail:
   1268	return err;
   1269}
   1270
   1271/**
   1272 * nilfs_segctor_begin_construction - setup segment buffer to make a new log
   1273 * @sci: nilfs_sc_info
   1274 * @nilfs: nilfs object
   1275 */
   1276static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci,
   1277					    struct the_nilfs *nilfs)
   1278{
   1279	struct nilfs_segment_buffer *segbuf, *prev;
   1280	__u64 nextnum;
   1281	int err, alloc = 0;
   1282
   1283	segbuf = nilfs_segbuf_new(sci->sc_super);
   1284	if (unlikely(!segbuf))
   1285		return -ENOMEM;
   1286
   1287	if (list_empty(&sci->sc_write_logs)) {
   1288		nilfs_segbuf_map(segbuf, nilfs->ns_segnum,
   1289				 nilfs->ns_pseg_offset, nilfs);
   1290		if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
   1291			nilfs_shift_to_next_segment(nilfs);
   1292			nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs);
   1293		}
   1294
   1295		segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq;
   1296		nextnum = nilfs->ns_nextnum;
   1297
   1298		if (nilfs->ns_segnum == nilfs->ns_nextnum)
   1299			/* Start from the head of a new full segment */
   1300			alloc++;
   1301	} else {
   1302		/* Continue logs */
   1303		prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
   1304		nilfs_segbuf_map_cont(segbuf, prev);
   1305		segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq;
   1306		nextnum = prev->sb_nextnum;
   1307
   1308		if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) {
   1309			nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
   1310			segbuf->sb_sum.seg_seq++;
   1311			alloc++;
   1312		}
   1313	}
   1314
   1315	err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum);
   1316	if (err)
   1317		goto failed;
   1318
   1319	if (alloc) {
   1320		err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum);
   1321		if (err)
   1322			goto failed;
   1323	}
   1324	nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs);
   1325
   1326	BUG_ON(!list_empty(&sci->sc_segbufs));
   1327	list_add_tail(&segbuf->sb_list, &sci->sc_segbufs);
   1328	sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks;
   1329	return 0;
   1330
   1331 failed:
   1332	nilfs_segbuf_free(segbuf);
   1333	return err;
   1334}
   1335
   1336static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci,
   1337					 struct the_nilfs *nilfs, int nadd)
   1338{
   1339	struct nilfs_segment_buffer *segbuf, *prev;
   1340	struct inode *sufile = nilfs->ns_sufile;
   1341	__u64 nextnextnum;
   1342	LIST_HEAD(list);
   1343	int err, ret, i;
   1344
   1345	prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs);
   1346	/*
   1347	 * Since the segment specified with nextnum might be allocated during
   1348	 * the previous construction, the buffer including its segusage may
   1349	 * not be dirty.  The following call ensures that the buffer is dirty
   1350	 * and will pin the buffer on memory until the sufile is written.
   1351	 */
   1352	err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum);
   1353	if (unlikely(err))
   1354		return err;
   1355
   1356	for (i = 0; i < nadd; i++) {
   1357		/* extend segment info */
   1358		err = -ENOMEM;
   1359		segbuf = nilfs_segbuf_new(sci->sc_super);
   1360		if (unlikely(!segbuf))
   1361			goto failed;
   1362
   1363		/* map this buffer to region of segment on-disk */
   1364		nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs);
   1365		sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks;
   1366
   1367		/* allocate the next next full segment */
   1368		err = nilfs_sufile_alloc(sufile, &nextnextnum);
   1369		if (unlikely(err))
   1370			goto failed_segbuf;
   1371
   1372		segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1;
   1373		nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs);
   1374
   1375		list_add_tail(&segbuf->sb_list, &list);
   1376		prev = segbuf;
   1377	}
   1378	list_splice_tail(&list, &sci->sc_segbufs);
   1379	return 0;
   1380
   1381 failed_segbuf:
   1382	nilfs_segbuf_free(segbuf);
   1383 failed:
   1384	list_for_each_entry(segbuf, &list, sb_list) {
   1385		ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
   1386		WARN_ON(ret); /* never fails */
   1387	}
   1388	nilfs_destroy_logs(&list);
   1389	return err;
   1390}
   1391
   1392static void nilfs_free_incomplete_logs(struct list_head *logs,
   1393				       struct the_nilfs *nilfs)
   1394{
   1395	struct nilfs_segment_buffer *segbuf, *prev;
   1396	struct inode *sufile = nilfs->ns_sufile;
   1397	int ret;
   1398
   1399	segbuf = NILFS_FIRST_SEGBUF(logs);
   1400	if (nilfs->ns_nextnum != segbuf->sb_nextnum) {
   1401		ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
   1402		WARN_ON(ret); /* never fails */
   1403	}
   1404	if (atomic_read(&segbuf->sb_err)) {
   1405		/* Case 1: The first segment failed */
   1406		if (segbuf->sb_pseg_start != segbuf->sb_fseg_start)
   1407			/*
   1408			 * Case 1a:  Partial segment appended into an existing
   1409			 * segment
   1410			 */
   1411			nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start,
   1412						segbuf->sb_fseg_end);
   1413		else /* Case 1b:  New full segment */
   1414			set_nilfs_discontinued(nilfs);
   1415	}
   1416
   1417	prev = segbuf;
   1418	list_for_each_entry_continue(segbuf, logs, sb_list) {
   1419		if (prev->sb_nextnum != segbuf->sb_nextnum) {
   1420			ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
   1421			WARN_ON(ret); /* never fails */
   1422		}
   1423		if (atomic_read(&segbuf->sb_err) &&
   1424		    segbuf->sb_segnum != nilfs->ns_nextnum)
   1425			/* Case 2: extended segment (!= next) failed */
   1426			nilfs_sufile_set_error(sufile, segbuf->sb_segnum);
   1427		prev = segbuf;
   1428	}
   1429}
   1430
   1431static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci,
   1432					  struct inode *sufile)
   1433{
   1434	struct nilfs_segment_buffer *segbuf;
   1435	unsigned long live_blocks;
   1436	int ret;
   1437
   1438	list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
   1439		live_blocks = segbuf->sb_sum.nblocks +
   1440			(segbuf->sb_pseg_start - segbuf->sb_fseg_start);
   1441		ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
   1442						     live_blocks,
   1443						     sci->sc_seg_ctime);
   1444		WARN_ON(ret); /* always succeed because the segusage is dirty */
   1445	}
   1446}
   1447
   1448static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile)
   1449{
   1450	struct nilfs_segment_buffer *segbuf;
   1451	int ret;
   1452
   1453	segbuf = NILFS_FIRST_SEGBUF(logs);
   1454	ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
   1455					     segbuf->sb_pseg_start -
   1456					     segbuf->sb_fseg_start, 0);
   1457	WARN_ON(ret); /* always succeed because the segusage is dirty */
   1458
   1459	list_for_each_entry_continue(segbuf, logs, sb_list) {
   1460		ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum,
   1461						     0, 0);
   1462		WARN_ON(ret); /* always succeed */
   1463	}
   1464}
   1465
   1466static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci,
   1467					    struct nilfs_segment_buffer *last,
   1468					    struct inode *sufile)
   1469{
   1470	struct nilfs_segment_buffer *segbuf = last;
   1471	int ret;
   1472
   1473	list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) {
   1474		sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks;
   1475		ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum);
   1476		WARN_ON(ret);
   1477	}
   1478	nilfs_truncate_logs(&sci->sc_segbufs, last);
   1479}
   1480
   1481
   1482static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
   1483				 struct the_nilfs *nilfs, int mode)
   1484{
   1485	struct nilfs_cstage prev_stage = sci->sc_stage;
   1486	int err, nadd = 1;
   1487
   1488	/* Collection retry loop */
   1489	for (;;) {
   1490		sci->sc_nblk_this_inc = 0;
   1491		sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs);
   1492
   1493		err = nilfs_segctor_reset_segment_buffer(sci);
   1494		if (unlikely(err))
   1495			goto failed;
   1496
   1497		err = nilfs_segctor_collect_blocks(sci, mode);
   1498		sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
   1499		if (!err)
   1500			break;
   1501
   1502		if (unlikely(err != -E2BIG))
   1503			goto failed;
   1504
   1505		/* The current segment is filled up */
   1506		if (mode != SC_LSEG_SR ||
   1507		    nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE)
   1508			break;
   1509
   1510		nilfs_clear_logs(&sci->sc_segbufs);
   1511
   1512		if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
   1513			err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
   1514							sci->sc_freesegs,
   1515							sci->sc_nfreesegs,
   1516							NULL);
   1517			WARN_ON(err); /* do not happen */
   1518			sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
   1519		}
   1520
   1521		err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
   1522		if (unlikely(err))
   1523			return err;
   1524
   1525		nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
   1526		sci->sc_stage = prev_stage;
   1527	}
   1528	nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
   1529	return 0;
   1530
   1531 failed:
   1532	return err;
   1533}
   1534
   1535static void nilfs_list_replace_buffer(struct buffer_head *old_bh,
   1536				      struct buffer_head *new_bh)
   1537{
   1538	BUG_ON(!list_empty(&new_bh->b_assoc_buffers));
   1539
   1540	list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers);
   1541	/* The caller must release old_bh */
   1542}
   1543
   1544static int
   1545nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci,
   1546				     struct nilfs_segment_buffer *segbuf,
   1547				     int mode)
   1548{
   1549	struct inode *inode = NULL;
   1550	sector_t blocknr;
   1551	unsigned long nfinfo = segbuf->sb_sum.nfinfo;
   1552	unsigned long nblocks = 0, ndatablk = 0;
   1553	const struct nilfs_sc_operations *sc_op = NULL;
   1554	struct nilfs_segsum_pointer ssp;
   1555	struct nilfs_finfo *finfo = NULL;
   1556	union nilfs_binfo binfo;
   1557	struct buffer_head *bh, *bh_org;
   1558	ino_t ino = 0;
   1559	int err = 0;
   1560
   1561	if (!nfinfo)
   1562		goto out;
   1563
   1564	blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk;
   1565	ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers);
   1566	ssp.offset = sizeof(struct nilfs_segment_summary);
   1567
   1568	list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) {
   1569		if (bh == segbuf->sb_super_root)
   1570			break;
   1571		if (!finfo) {
   1572			finfo =	nilfs_segctor_map_segsum_entry(
   1573				sci, &ssp, sizeof(*finfo));
   1574			ino = le64_to_cpu(finfo->fi_ino);
   1575			nblocks = le32_to_cpu(finfo->fi_nblocks);
   1576			ndatablk = le32_to_cpu(finfo->fi_ndatablk);
   1577
   1578			inode = bh->b_page->mapping->host;
   1579
   1580			if (mode == SC_LSEG_DSYNC)
   1581				sc_op = &nilfs_sc_dsync_ops;
   1582			else if (ino == NILFS_DAT_INO)
   1583				sc_op = &nilfs_sc_dat_ops;
   1584			else /* file blocks */
   1585				sc_op = &nilfs_sc_file_ops;
   1586		}
   1587		bh_org = bh;
   1588		get_bh(bh_org);
   1589		err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr,
   1590					&binfo);
   1591		if (bh != bh_org)
   1592			nilfs_list_replace_buffer(bh_org, bh);
   1593		brelse(bh_org);
   1594		if (unlikely(err))
   1595			goto failed_bmap;
   1596
   1597		if (ndatablk > 0)
   1598			sc_op->write_data_binfo(sci, &ssp, &binfo);
   1599		else
   1600			sc_op->write_node_binfo(sci, &ssp, &binfo);
   1601
   1602		blocknr++;
   1603		if (--nblocks == 0) {
   1604			finfo = NULL;
   1605			if (--nfinfo == 0)
   1606				break;
   1607		} else if (ndatablk > 0)
   1608			ndatablk--;
   1609	}
   1610 out:
   1611	return 0;
   1612
   1613 failed_bmap:
   1614	return err;
   1615}
   1616
   1617static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode)
   1618{
   1619	struct nilfs_segment_buffer *segbuf;
   1620	int err;
   1621
   1622	list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
   1623		err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode);
   1624		if (unlikely(err))
   1625			return err;
   1626		nilfs_segbuf_fill_in_segsum(segbuf);
   1627	}
   1628	return 0;
   1629}
   1630
   1631static void nilfs_begin_page_io(struct page *page)
   1632{
   1633	if (!page || PageWriteback(page))
   1634		/*
   1635		 * For split b-tree node pages, this function may be called
   1636		 * twice.  We ignore the 2nd or later calls by this check.
   1637		 */
   1638		return;
   1639
   1640	lock_page(page);
   1641	clear_page_dirty_for_io(page);
   1642	set_page_writeback(page);
   1643	unlock_page(page);
   1644}
   1645
   1646static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
   1647{
   1648	struct nilfs_segment_buffer *segbuf;
   1649	struct page *bd_page = NULL, *fs_page = NULL;
   1650
   1651	list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) {
   1652		struct buffer_head *bh;
   1653
   1654		list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
   1655				    b_assoc_buffers) {
   1656			if (bh->b_page != bd_page) {
   1657				if (bd_page) {
   1658					lock_page(bd_page);
   1659					clear_page_dirty_for_io(bd_page);
   1660					set_page_writeback(bd_page);
   1661					unlock_page(bd_page);
   1662				}
   1663				bd_page = bh->b_page;
   1664			}
   1665		}
   1666
   1667		list_for_each_entry(bh, &segbuf->sb_payload_buffers,
   1668				    b_assoc_buffers) {
   1669			set_buffer_async_write(bh);
   1670			if (bh == segbuf->sb_super_root) {
   1671				if (bh->b_page != bd_page) {
   1672					lock_page(bd_page);
   1673					clear_page_dirty_for_io(bd_page);
   1674					set_page_writeback(bd_page);
   1675					unlock_page(bd_page);
   1676					bd_page = bh->b_page;
   1677				}
   1678				break;
   1679			}
   1680			if (bh->b_page != fs_page) {
   1681				nilfs_begin_page_io(fs_page);
   1682				fs_page = bh->b_page;
   1683			}
   1684		}
   1685	}
   1686	if (bd_page) {
   1687		lock_page(bd_page);
   1688		clear_page_dirty_for_io(bd_page);
   1689		set_page_writeback(bd_page);
   1690		unlock_page(bd_page);
   1691	}
   1692	nilfs_begin_page_io(fs_page);
   1693}
   1694
   1695static int nilfs_segctor_write(struct nilfs_sc_info *sci,
   1696			       struct the_nilfs *nilfs)
   1697{
   1698	int ret;
   1699
   1700	ret = nilfs_write_logs(&sci->sc_segbufs, nilfs);
   1701	list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs);
   1702	return ret;
   1703}
   1704
   1705static void nilfs_end_page_io(struct page *page, int err)
   1706{
   1707	if (!page)
   1708		return;
   1709
   1710	if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) {
   1711		/*
   1712		 * For b-tree node pages, this function may be called twice
   1713		 * or more because they might be split in a segment.
   1714		 */
   1715		if (PageDirty(page)) {
   1716			/*
   1717			 * For pages holding split b-tree node buffers, dirty
   1718			 * flag on the buffers may be cleared discretely.
   1719			 * In that case, the page is once redirtied for
   1720			 * remaining buffers, and it must be cancelled if
   1721			 * all the buffers get cleaned later.
   1722			 */
   1723			lock_page(page);
   1724			if (nilfs_page_buffers_clean(page))
   1725				__nilfs_clear_page_dirty(page);
   1726			unlock_page(page);
   1727		}
   1728		return;
   1729	}
   1730
   1731	if (!err) {
   1732		if (!nilfs_page_buffers_clean(page))
   1733			__set_page_dirty_nobuffers(page);
   1734		ClearPageError(page);
   1735	} else {
   1736		__set_page_dirty_nobuffers(page);
   1737		SetPageError(page);
   1738	}
   1739
   1740	end_page_writeback(page);
   1741}
   1742
   1743static void nilfs_abort_logs(struct list_head *logs, int err)
   1744{
   1745	struct nilfs_segment_buffer *segbuf;
   1746	struct page *bd_page = NULL, *fs_page = NULL;
   1747	struct buffer_head *bh;
   1748
   1749	if (list_empty(logs))
   1750		return;
   1751
   1752	list_for_each_entry(segbuf, logs, sb_list) {
   1753		list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
   1754				    b_assoc_buffers) {
   1755			if (bh->b_page != bd_page) {
   1756				if (bd_page)
   1757					end_page_writeback(bd_page);
   1758				bd_page = bh->b_page;
   1759			}
   1760		}
   1761
   1762		list_for_each_entry(bh, &segbuf->sb_payload_buffers,
   1763				    b_assoc_buffers) {
   1764			clear_buffer_async_write(bh);
   1765			if (bh == segbuf->sb_super_root) {
   1766				if (bh->b_page != bd_page) {
   1767					end_page_writeback(bd_page);
   1768					bd_page = bh->b_page;
   1769				}
   1770				break;
   1771			}
   1772			if (bh->b_page != fs_page) {
   1773				nilfs_end_page_io(fs_page, err);
   1774				fs_page = bh->b_page;
   1775			}
   1776		}
   1777	}
   1778	if (bd_page)
   1779		end_page_writeback(bd_page);
   1780
   1781	nilfs_end_page_io(fs_page, err);
   1782}
   1783
   1784static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci,
   1785					     struct the_nilfs *nilfs, int err)
   1786{
   1787	LIST_HEAD(logs);
   1788	int ret;
   1789
   1790	list_splice_tail_init(&sci->sc_write_logs, &logs);
   1791	ret = nilfs_wait_on_logs(&logs);
   1792	nilfs_abort_logs(&logs, ret ? : err);
   1793
   1794	list_splice_tail_init(&sci->sc_segbufs, &logs);
   1795	nilfs_cancel_segusage(&logs, nilfs->ns_sufile);
   1796	nilfs_free_incomplete_logs(&logs, nilfs);
   1797
   1798	if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
   1799		ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
   1800						sci->sc_freesegs,
   1801						sci->sc_nfreesegs,
   1802						NULL);
   1803		WARN_ON(ret); /* do not happen */
   1804	}
   1805
   1806	nilfs_destroy_logs(&logs);
   1807}
   1808
   1809static void nilfs_set_next_segment(struct the_nilfs *nilfs,
   1810				   struct nilfs_segment_buffer *segbuf)
   1811{
   1812	nilfs->ns_segnum = segbuf->sb_segnum;
   1813	nilfs->ns_nextnum = segbuf->sb_nextnum;
   1814	nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start
   1815		+ segbuf->sb_sum.nblocks;
   1816	nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq;
   1817	nilfs->ns_ctime = segbuf->sb_sum.ctime;
   1818}
   1819
   1820static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
   1821{
   1822	struct nilfs_segment_buffer *segbuf;
   1823	struct page *bd_page = NULL, *fs_page = NULL;
   1824	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
   1825	int update_sr = false;
   1826
   1827	list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
   1828		struct buffer_head *bh;
   1829
   1830		list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
   1831				    b_assoc_buffers) {
   1832			set_buffer_uptodate(bh);
   1833			clear_buffer_dirty(bh);
   1834			if (bh->b_page != bd_page) {
   1835				if (bd_page)
   1836					end_page_writeback(bd_page);
   1837				bd_page = bh->b_page;
   1838			}
   1839		}
   1840		/*
   1841		 * We assume that the buffers which belong to the same page
   1842		 * continue over the buffer list.
   1843		 * Under this assumption, the last BHs of pages is
   1844		 * identifiable by the discontinuity of bh->b_page
   1845		 * (page != fs_page).
   1846		 *
   1847		 * For B-tree node blocks, however, this assumption is not
   1848		 * guaranteed.  The cleanup code of B-tree node pages needs
   1849		 * special care.
   1850		 */
   1851		list_for_each_entry(bh, &segbuf->sb_payload_buffers,
   1852				    b_assoc_buffers) {
   1853			const unsigned long set_bits = BIT(BH_Uptodate);
   1854			const unsigned long clear_bits =
   1855				(BIT(BH_Dirty) | BIT(BH_Async_Write) |
   1856				 BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
   1857				 BIT(BH_NILFS_Redirected));
   1858
   1859			set_mask_bits(&bh->b_state, clear_bits, set_bits);
   1860			if (bh == segbuf->sb_super_root) {
   1861				if (bh->b_page != bd_page) {
   1862					end_page_writeback(bd_page);
   1863					bd_page = bh->b_page;
   1864				}
   1865				update_sr = true;
   1866				break;
   1867			}
   1868			if (bh->b_page != fs_page) {
   1869				nilfs_end_page_io(fs_page, 0);
   1870				fs_page = bh->b_page;
   1871			}
   1872		}
   1873
   1874		if (!nilfs_segbuf_simplex(segbuf)) {
   1875			if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) {
   1876				set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
   1877				sci->sc_lseg_stime = jiffies;
   1878			}
   1879			if (segbuf->sb_sum.flags & NILFS_SS_LOGEND)
   1880				clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags);
   1881		}
   1882	}
   1883	/*
   1884	 * Since pages may continue over multiple segment buffers,
   1885	 * end of the last page must be checked outside of the loop.
   1886	 */
   1887	if (bd_page)
   1888		end_page_writeback(bd_page);
   1889
   1890	nilfs_end_page_io(fs_page, 0);
   1891
   1892	nilfs_drop_collected_inodes(&sci->sc_dirty_files);
   1893
   1894	if (nilfs_doing_gc())
   1895		nilfs_drop_collected_inodes(&sci->sc_gc_inodes);
   1896	else
   1897		nilfs->ns_nongc_ctime = sci->sc_seg_ctime;
   1898
   1899	sci->sc_nblk_inc += sci->sc_nblk_this_inc;
   1900
   1901	segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs);
   1902	nilfs_set_next_segment(nilfs, segbuf);
   1903
   1904	if (update_sr) {
   1905		nilfs->ns_flushed_device = 0;
   1906		nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start,
   1907				       segbuf->sb_sum.seg_seq, nilfs->ns_cno++);
   1908
   1909		clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
   1910		clear_bit(NILFS_SC_DIRTY, &sci->sc_flags);
   1911		set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
   1912		nilfs_segctor_clear_metadata_dirty(sci);
   1913	} else
   1914		clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags);
   1915}
   1916
   1917static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
   1918{
   1919	int ret;
   1920
   1921	ret = nilfs_wait_on_logs(&sci->sc_write_logs);
   1922	if (!ret) {
   1923		nilfs_segctor_complete_write(sci);
   1924		nilfs_destroy_logs(&sci->sc_write_logs);
   1925	}
   1926	return ret;
   1927}
   1928
   1929static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
   1930					     struct the_nilfs *nilfs)
   1931{
   1932	struct nilfs_inode_info *ii, *n;
   1933	struct inode *ifile = sci->sc_root->ifile;
   1934
   1935	spin_lock(&nilfs->ns_inode_lock);
   1936 retry:
   1937	list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) {
   1938		if (!ii->i_bh) {
   1939			struct buffer_head *ibh;
   1940			int err;
   1941
   1942			spin_unlock(&nilfs->ns_inode_lock);
   1943			err = nilfs_ifile_get_inode_block(
   1944				ifile, ii->vfs_inode.i_ino, &ibh);
   1945			if (unlikely(err)) {
   1946				nilfs_warn(sci->sc_super,
   1947					   "log writer: error %d getting inode block (ino=%lu)",
   1948					   err, ii->vfs_inode.i_ino);
   1949				return err;
   1950			}
   1951			spin_lock(&nilfs->ns_inode_lock);
   1952			if (likely(!ii->i_bh))
   1953				ii->i_bh = ibh;
   1954			else
   1955				brelse(ibh);
   1956			goto retry;
   1957		}
   1958
   1959		// Always redirty the buffer to avoid race condition
   1960		mark_buffer_dirty(ii->i_bh);
   1961		nilfs_mdt_mark_dirty(ifile);
   1962
   1963		clear_bit(NILFS_I_QUEUED, &ii->i_state);
   1964		set_bit(NILFS_I_BUSY, &ii->i_state);
   1965		list_move_tail(&ii->i_dirty, &sci->sc_dirty_files);
   1966	}
   1967	spin_unlock(&nilfs->ns_inode_lock);
   1968
   1969	return 0;
   1970}
   1971
   1972static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
   1973					     struct the_nilfs *nilfs)
   1974{
   1975	struct nilfs_inode_info *ii, *n;
   1976	int during_mount = !(sci->sc_super->s_flags & SB_ACTIVE);
   1977	int defer_iput = false;
   1978
   1979	spin_lock(&nilfs->ns_inode_lock);
   1980	list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
   1981		if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
   1982		    test_bit(NILFS_I_DIRTY, &ii->i_state))
   1983			continue;
   1984
   1985		clear_bit(NILFS_I_BUSY, &ii->i_state);
   1986		brelse(ii->i_bh);
   1987		ii->i_bh = NULL;
   1988		list_del_init(&ii->i_dirty);
   1989		if (!ii->vfs_inode.i_nlink || during_mount) {
   1990			/*
   1991			 * Defer calling iput() to avoid deadlocks if
   1992			 * i_nlink == 0 or mount is not yet finished.
   1993			 */
   1994			list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
   1995			defer_iput = true;
   1996		} else {
   1997			spin_unlock(&nilfs->ns_inode_lock);
   1998			iput(&ii->vfs_inode);
   1999			spin_lock(&nilfs->ns_inode_lock);
   2000		}
   2001	}
   2002	spin_unlock(&nilfs->ns_inode_lock);
   2003
   2004	if (defer_iput)
   2005		schedule_work(&sci->sc_iput_work);
   2006}
   2007
   2008/*
   2009 * Main procedure of segment constructor
   2010 */
   2011static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
   2012{
   2013	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
   2014	int err;
   2015
   2016	nilfs_sc_cstage_set(sci, NILFS_ST_INIT);
   2017	sci->sc_cno = nilfs->ns_cno;
   2018
   2019	err = nilfs_segctor_collect_dirty_files(sci, nilfs);
   2020	if (unlikely(err))
   2021		goto out;
   2022
   2023	if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
   2024		set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
   2025
   2026	if (nilfs_segctor_clean(sci))
   2027		goto out;
   2028
   2029	do {
   2030		sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK;
   2031
   2032		err = nilfs_segctor_begin_construction(sci, nilfs);
   2033		if (unlikely(err))
   2034			goto out;
   2035
   2036		/* Update time stamp */
   2037		sci->sc_seg_ctime = ktime_get_real_seconds();
   2038
   2039		err = nilfs_segctor_collect(sci, nilfs, mode);
   2040		if (unlikely(err))
   2041			goto failed;
   2042
   2043		/* Avoid empty segment */
   2044		if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE &&
   2045		    nilfs_segbuf_empty(sci->sc_curseg)) {
   2046			nilfs_segctor_abort_construction(sci, nilfs, 1);
   2047			goto out;
   2048		}
   2049
   2050		err = nilfs_segctor_assign(sci, mode);
   2051		if (unlikely(err))
   2052			goto failed;
   2053
   2054		if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
   2055			nilfs_segctor_fill_in_file_bmap(sci);
   2056
   2057		if (mode == SC_LSEG_SR &&
   2058		    nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) {
   2059			err = nilfs_segctor_fill_in_checkpoint(sci);
   2060			if (unlikely(err))
   2061				goto failed_to_write;
   2062
   2063			nilfs_segctor_fill_in_super_root(sci, nilfs);
   2064		}
   2065		nilfs_segctor_update_segusage(sci, nilfs->ns_sufile);
   2066
   2067		/* Write partial segments */
   2068		nilfs_segctor_prepare_write(sci);
   2069
   2070		nilfs_add_checksums_on_logs(&sci->sc_segbufs,
   2071					    nilfs->ns_crc_seed);
   2072
   2073		err = nilfs_segctor_write(sci, nilfs);
   2074		if (unlikely(err))
   2075			goto failed_to_write;
   2076
   2077		if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE ||
   2078		    nilfs->ns_blocksize_bits != PAGE_SHIFT) {
   2079			/*
   2080			 * At this point, we avoid double buffering
   2081			 * for blocksize < pagesize because page dirty
   2082			 * flag is turned off during write and dirty
   2083			 * buffers are not properly collected for
   2084			 * pages crossing over segments.
   2085			 */
   2086			err = nilfs_segctor_wait(sci);
   2087			if (err)
   2088				goto failed_to_write;
   2089		}
   2090	} while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE);
   2091
   2092 out:
   2093	nilfs_segctor_drop_written_files(sci, nilfs);
   2094	return err;
   2095
   2096 failed_to_write:
   2097	if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED)
   2098		nilfs_redirty_inodes(&sci->sc_dirty_files);
   2099
   2100 failed:
   2101	if (nilfs_doing_gc())
   2102		nilfs_redirty_inodes(&sci->sc_gc_inodes);
   2103	nilfs_segctor_abort_construction(sci, nilfs, err);
   2104	goto out;
   2105}
   2106
   2107/**
   2108 * nilfs_segctor_start_timer - set timer of background write
   2109 * @sci: nilfs_sc_info
   2110 *
   2111 * If the timer has already been set, it ignores the new request.
   2112 * This function MUST be called within a section locking the segment
   2113 * semaphore.
   2114 */
   2115static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci)
   2116{
   2117	spin_lock(&sci->sc_state_lock);
   2118	if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) {
   2119		sci->sc_timer.expires = jiffies + sci->sc_interval;
   2120		add_timer(&sci->sc_timer);
   2121		sci->sc_state |= NILFS_SEGCTOR_COMMIT;
   2122	}
   2123	spin_unlock(&sci->sc_state_lock);
   2124}
   2125
   2126static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
   2127{
   2128	spin_lock(&sci->sc_state_lock);
   2129	if (!(sci->sc_flush_request & BIT(bn))) {
   2130		unsigned long prev_req = sci->sc_flush_request;
   2131
   2132		sci->sc_flush_request |= BIT(bn);
   2133		if (!prev_req)
   2134			wake_up(&sci->sc_wait_daemon);
   2135	}
   2136	spin_unlock(&sci->sc_state_lock);
   2137}
   2138
   2139/**
   2140 * nilfs_flush_segment - trigger a segment construction for resource control
   2141 * @sb: super block
   2142 * @ino: inode number of the file to be flushed out.
   2143 */
   2144void nilfs_flush_segment(struct super_block *sb, ino_t ino)
   2145{
   2146	struct the_nilfs *nilfs = sb->s_fs_info;
   2147	struct nilfs_sc_info *sci = nilfs->ns_writer;
   2148
   2149	if (!sci || nilfs_doing_construction())
   2150		return;
   2151	nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0);
   2152					/* assign bit 0 to data files */
   2153}
   2154
   2155struct nilfs_segctor_wait_request {
   2156	wait_queue_entry_t	wq;
   2157	__u32		seq;
   2158	int		err;
   2159	atomic_t	done;
   2160};
   2161
   2162static int nilfs_segctor_sync(struct nilfs_sc_info *sci)
   2163{
   2164	struct nilfs_segctor_wait_request wait_req;
   2165	int err = 0;
   2166
   2167	spin_lock(&sci->sc_state_lock);
   2168	init_wait(&wait_req.wq);
   2169	wait_req.err = 0;
   2170	atomic_set(&wait_req.done, 0);
   2171	wait_req.seq = ++sci->sc_seq_request;
   2172	spin_unlock(&sci->sc_state_lock);
   2173
   2174	init_waitqueue_entry(&wait_req.wq, current);
   2175	add_wait_queue(&sci->sc_wait_request, &wait_req.wq);
   2176	set_current_state(TASK_INTERRUPTIBLE);
   2177	wake_up(&sci->sc_wait_daemon);
   2178
   2179	for (;;) {
   2180		if (atomic_read(&wait_req.done)) {
   2181			err = wait_req.err;
   2182			break;
   2183		}
   2184		if (!signal_pending(current)) {
   2185			schedule();
   2186			continue;
   2187		}
   2188		err = -ERESTARTSYS;
   2189		break;
   2190	}
   2191	finish_wait(&sci->sc_wait_request, &wait_req.wq);
   2192	return err;
   2193}
   2194
   2195static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
   2196{
   2197	struct nilfs_segctor_wait_request *wrq, *n;
   2198	unsigned long flags;
   2199
   2200	spin_lock_irqsave(&sci->sc_wait_request.lock, flags);
   2201	list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) {
   2202		if (!atomic_read(&wrq->done) &&
   2203		    nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) {
   2204			wrq->err = err;
   2205			atomic_set(&wrq->done, 1);
   2206		}
   2207		if (atomic_read(&wrq->done)) {
   2208			wrq->wq.func(&wrq->wq,
   2209				     TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
   2210				     0, NULL);
   2211		}
   2212	}
   2213	spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags);
   2214}
   2215
   2216/**
   2217 * nilfs_construct_segment - construct a logical segment
   2218 * @sb: super block
   2219 *
   2220 * Return Value: On success, 0 is returned. On errors, one of the following
   2221 * negative error code is returned.
   2222 *
   2223 * %-EROFS - Read only filesystem.
   2224 *
   2225 * %-EIO - I/O error
   2226 *
   2227 * %-ENOSPC - No space left on device (only in a panic state).
   2228 *
   2229 * %-ERESTARTSYS - Interrupted.
   2230 *
   2231 * %-ENOMEM - Insufficient memory available.
   2232 */
   2233int nilfs_construct_segment(struct super_block *sb)
   2234{
   2235	struct the_nilfs *nilfs = sb->s_fs_info;
   2236	struct nilfs_sc_info *sci = nilfs->ns_writer;
   2237	struct nilfs_transaction_info *ti;
   2238	int err;
   2239
   2240	if (!sci)
   2241		return -EROFS;
   2242
   2243	/* A call inside transactions causes a deadlock. */
   2244	BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC);
   2245
   2246	err = nilfs_segctor_sync(sci);
   2247	return err;
   2248}
   2249
   2250/**
   2251 * nilfs_construct_dsync_segment - construct a data-only logical segment
   2252 * @sb: super block
   2253 * @inode: inode whose data blocks should be written out
   2254 * @start: start byte offset
   2255 * @end: end byte offset (inclusive)
   2256 *
   2257 * Return Value: On success, 0 is returned. On errors, one of the following
   2258 * negative error code is returned.
   2259 *
   2260 * %-EROFS - Read only filesystem.
   2261 *
   2262 * %-EIO - I/O error
   2263 *
   2264 * %-ENOSPC - No space left on device (only in a panic state).
   2265 *
   2266 * %-ERESTARTSYS - Interrupted.
   2267 *
   2268 * %-ENOMEM - Insufficient memory available.
   2269 */
   2270int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
   2271				  loff_t start, loff_t end)
   2272{
   2273	struct the_nilfs *nilfs = sb->s_fs_info;
   2274	struct nilfs_sc_info *sci = nilfs->ns_writer;
   2275	struct nilfs_inode_info *ii;
   2276	struct nilfs_transaction_info ti;
   2277	int err = 0;
   2278
   2279	if (!sci)
   2280		return -EROFS;
   2281
   2282	nilfs_transaction_lock(sb, &ti, 0);
   2283
   2284	ii = NILFS_I(inode);
   2285	if (test_bit(NILFS_I_INODE_SYNC, &ii->i_state) ||
   2286	    nilfs_test_opt(nilfs, STRICT_ORDER) ||
   2287	    test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
   2288	    nilfs_discontinued(nilfs)) {
   2289		nilfs_transaction_unlock(sb);
   2290		err = nilfs_segctor_sync(sci);
   2291		return err;
   2292	}
   2293
   2294	spin_lock(&nilfs->ns_inode_lock);
   2295	if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
   2296	    !test_bit(NILFS_I_BUSY, &ii->i_state)) {
   2297		spin_unlock(&nilfs->ns_inode_lock);
   2298		nilfs_transaction_unlock(sb);
   2299		return 0;
   2300	}
   2301	spin_unlock(&nilfs->ns_inode_lock);
   2302	sci->sc_dsync_inode = ii;
   2303	sci->sc_dsync_start = start;
   2304	sci->sc_dsync_end = end;
   2305
   2306	err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
   2307	if (!err)
   2308		nilfs->ns_flushed_device = 0;
   2309
   2310	nilfs_transaction_unlock(sb);
   2311	return err;
   2312}
   2313
   2314#define FLUSH_FILE_BIT	(0x1) /* data file only */
   2315#define FLUSH_DAT_BIT	BIT(NILFS_DAT_INO) /* DAT only */
   2316
   2317/**
   2318 * nilfs_segctor_accept - record accepted sequence count of log-write requests
   2319 * @sci: segment constructor object
   2320 */
   2321static void nilfs_segctor_accept(struct nilfs_sc_info *sci)
   2322{
   2323	spin_lock(&sci->sc_state_lock);
   2324	sci->sc_seq_accepted = sci->sc_seq_request;
   2325	spin_unlock(&sci->sc_state_lock);
   2326	del_timer_sync(&sci->sc_timer);
   2327}
   2328
   2329/**
   2330 * nilfs_segctor_notify - notify the result of request to caller threads
   2331 * @sci: segment constructor object
   2332 * @mode: mode of log forming
   2333 * @err: error code to be notified
   2334 */
   2335static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
   2336{
   2337	/* Clear requests (even when the construction failed) */
   2338	spin_lock(&sci->sc_state_lock);
   2339
   2340	if (mode == SC_LSEG_SR) {
   2341		sci->sc_state &= ~NILFS_SEGCTOR_COMMIT;
   2342		sci->sc_seq_done = sci->sc_seq_accepted;
   2343		nilfs_segctor_wakeup(sci, err);
   2344		sci->sc_flush_request = 0;
   2345	} else {
   2346		if (mode == SC_FLUSH_FILE)
   2347			sci->sc_flush_request &= ~FLUSH_FILE_BIT;
   2348		else if (mode == SC_FLUSH_DAT)
   2349			sci->sc_flush_request &= ~FLUSH_DAT_BIT;
   2350
   2351		/* re-enable timer if checkpoint creation was not done */
   2352		if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
   2353		    time_before(jiffies, sci->sc_timer.expires))
   2354			add_timer(&sci->sc_timer);
   2355	}
   2356	spin_unlock(&sci->sc_state_lock);
   2357}
   2358
   2359/**
   2360 * nilfs_segctor_construct - form logs and write them to disk
   2361 * @sci: segment constructor object
   2362 * @mode: mode of log forming
   2363 */
   2364static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
   2365{
   2366	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
   2367	struct nilfs_super_block **sbp;
   2368	int err = 0;
   2369
   2370	nilfs_segctor_accept(sci);
   2371
   2372	if (nilfs_discontinued(nilfs))
   2373		mode = SC_LSEG_SR;
   2374	if (!nilfs_segctor_confirm(sci))
   2375		err = nilfs_segctor_do_construct(sci, mode);
   2376
   2377	if (likely(!err)) {
   2378		if (mode != SC_FLUSH_DAT)
   2379			atomic_set(&nilfs->ns_ndirtyblks, 0);
   2380		if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
   2381		    nilfs_discontinued(nilfs)) {
   2382			down_write(&nilfs->ns_sem);
   2383			err = -EIO;
   2384			sbp = nilfs_prepare_super(sci->sc_super,
   2385						  nilfs_sb_will_flip(nilfs));
   2386			if (likely(sbp)) {
   2387				nilfs_set_log_cursor(sbp[0], nilfs);
   2388				err = nilfs_commit_super(sci->sc_super,
   2389							 NILFS_SB_COMMIT);
   2390			}
   2391			up_write(&nilfs->ns_sem);
   2392		}
   2393	}
   2394
   2395	nilfs_segctor_notify(sci, mode, err);
   2396	return err;
   2397}
   2398
   2399static void nilfs_construction_timeout(struct timer_list *t)
   2400{
   2401	struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer);
   2402
   2403	wake_up_process(sci->sc_timer_task);
   2404}
   2405
   2406static void
   2407nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
   2408{
   2409	struct nilfs_inode_info *ii, *n;
   2410
   2411	list_for_each_entry_safe(ii, n, head, i_dirty) {
   2412		if (!test_bit(NILFS_I_UPDATED, &ii->i_state))
   2413			continue;
   2414		list_del_init(&ii->i_dirty);
   2415		truncate_inode_pages(&ii->vfs_inode.i_data, 0);
   2416		nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
   2417		iput(&ii->vfs_inode);
   2418	}
   2419}
   2420
   2421int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
   2422			 void **kbufs)
   2423{
   2424	struct the_nilfs *nilfs = sb->s_fs_info;
   2425	struct nilfs_sc_info *sci = nilfs->ns_writer;
   2426	struct nilfs_transaction_info ti;
   2427	int err;
   2428
   2429	if (unlikely(!sci))
   2430		return -EROFS;
   2431
   2432	nilfs_transaction_lock(sb, &ti, 1);
   2433
   2434	err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat);
   2435	if (unlikely(err))
   2436		goto out_unlock;
   2437
   2438	err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs);
   2439	if (unlikely(err)) {
   2440		nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat);
   2441		goto out_unlock;
   2442	}
   2443
   2444	sci->sc_freesegs = kbufs[4];
   2445	sci->sc_nfreesegs = argv[4].v_nmembs;
   2446	list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes);
   2447
   2448	for (;;) {
   2449		err = nilfs_segctor_construct(sci, SC_LSEG_SR);
   2450		nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes);
   2451
   2452		if (likely(!err))
   2453			break;
   2454
   2455		nilfs_warn(sb, "error %d cleaning segments", err);
   2456		set_current_state(TASK_INTERRUPTIBLE);
   2457		schedule_timeout(sci->sc_interval);
   2458	}
   2459	if (nilfs_test_opt(nilfs, DISCARD)) {
   2460		int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
   2461						 sci->sc_nfreesegs);
   2462		if (ret) {
   2463			nilfs_warn(sb,
   2464				   "error %d on discard request, turning discards off for the device",
   2465				   ret);
   2466			nilfs_clear_opt(nilfs, DISCARD);
   2467		}
   2468	}
   2469
   2470 out_unlock:
   2471	sci->sc_freesegs = NULL;
   2472	sci->sc_nfreesegs = 0;
   2473	nilfs_mdt_clear_shadow_map(nilfs->ns_dat);
   2474	nilfs_transaction_unlock(sb);
   2475	return err;
   2476}
   2477
   2478static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
   2479{
   2480	struct nilfs_transaction_info ti;
   2481
   2482	nilfs_transaction_lock(sci->sc_super, &ti, 0);
   2483	nilfs_segctor_construct(sci, mode);
   2484
   2485	/*
   2486	 * Unclosed segment should be retried.  We do this using sc_timer.
   2487	 * Timeout of sc_timer will invoke complete construction which leads
   2488	 * to close the current logical segment.
   2489	 */
   2490	if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
   2491		nilfs_segctor_start_timer(sci);
   2492
   2493	nilfs_transaction_unlock(sci->sc_super);
   2494}
   2495
   2496static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
   2497{
   2498	int mode = 0;
   2499
   2500	spin_lock(&sci->sc_state_lock);
   2501	mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ?
   2502		SC_FLUSH_DAT : SC_FLUSH_FILE;
   2503	spin_unlock(&sci->sc_state_lock);
   2504
   2505	if (mode) {
   2506		nilfs_segctor_do_construct(sci, mode);
   2507
   2508		spin_lock(&sci->sc_state_lock);
   2509		sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ?
   2510			~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT;
   2511		spin_unlock(&sci->sc_state_lock);
   2512	}
   2513	clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags);
   2514}
   2515
   2516static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
   2517{
   2518	if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
   2519	    time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) {
   2520		if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT))
   2521			return SC_FLUSH_FILE;
   2522		else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT))
   2523			return SC_FLUSH_DAT;
   2524	}
   2525	return SC_LSEG_SR;
   2526}
   2527
   2528/**
   2529 * nilfs_segctor_thread - main loop of the segment constructor thread.
   2530 * @arg: pointer to a struct nilfs_sc_info.
   2531 *
   2532 * nilfs_segctor_thread() initializes a timer and serves as a daemon
   2533 * to execute segment constructions.
   2534 */
   2535static int nilfs_segctor_thread(void *arg)
   2536{
   2537	struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
   2538	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
   2539	int timeout = 0;
   2540
   2541	sci->sc_timer_task = current;
   2542
   2543	/* start sync. */
   2544	sci->sc_task = current;
   2545	wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */
   2546	nilfs_info(sci->sc_super,
   2547		   "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds",
   2548		   sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ);
   2549
   2550	spin_lock(&sci->sc_state_lock);
   2551 loop:
   2552	for (;;) {
   2553		int mode;
   2554
   2555		if (sci->sc_state & NILFS_SEGCTOR_QUIT)
   2556			goto end_thread;
   2557
   2558		if (timeout || sci->sc_seq_request != sci->sc_seq_done)
   2559			mode = SC_LSEG_SR;
   2560		else if (sci->sc_flush_request)
   2561			mode = nilfs_segctor_flush_mode(sci);
   2562		else
   2563			break;
   2564
   2565		spin_unlock(&sci->sc_state_lock);
   2566		nilfs_segctor_thread_construct(sci, mode);
   2567		spin_lock(&sci->sc_state_lock);
   2568		timeout = 0;
   2569	}
   2570
   2571
   2572	if (freezing(current)) {
   2573		spin_unlock(&sci->sc_state_lock);
   2574		try_to_freeze();
   2575		spin_lock(&sci->sc_state_lock);
   2576	} else {
   2577		DEFINE_WAIT(wait);
   2578		int should_sleep = 1;
   2579
   2580		prepare_to_wait(&sci->sc_wait_daemon, &wait,
   2581				TASK_INTERRUPTIBLE);
   2582
   2583		if (sci->sc_seq_request != sci->sc_seq_done)
   2584			should_sleep = 0;
   2585		else if (sci->sc_flush_request)
   2586			should_sleep = 0;
   2587		else if (sci->sc_state & NILFS_SEGCTOR_COMMIT)
   2588			should_sleep = time_before(jiffies,
   2589					sci->sc_timer.expires);
   2590
   2591		if (should_sleep) {
   2592			spin_unlock(&sci->sc_state_lock);
   2593			schedule();
   2594			spin_lock(&sci->sc_state_lock);
   2595		}
   2596		finish_wait(&sci->sc_wait_daemon, &wait);
   2597		timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
   2598			   time_after_eq(jiffies, sci->sc_timer.expires));
   2599
   2600		if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs))
   2601			set_nilfs_discontinued(nilfs);
   2602	}
   2603	goto loop;
   2604
   2605 end_thread:
   2606	spin_unlock(&sci->sc_state_lock);
   2607
   2608	/* end sync. */
   2609	sci->sc_task = NULL;
   2610	wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
   2611	return 0;
   2612}
   2613
   2614static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci)
   2615{
   2616	struct task_struct *t;
   2617
   2618	t = kthread_run(nilfs_segctor_thread, sci, "segctord");
   2619	if (IS_ERR(t)) {
   2620		int err = PTR_ERR(t);
   2621
   2622		nilfs_err(sci->sc_super, "error %d creating segctord thread",
   2623			  err);
   2624		return err;
   2625	}
   2626	wait_event(sci->sc_wait_task, sci->sc_task != NULL);
   2627	return 0;
   2628}
   2629
   2630static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
   2631	__acquires(&sci->sc_state_lock)
   2632	__releases(&sci->sc_state_lock)
   2633{
   2634	sci->sc_state |= NILFS_SEGCTOR_QUIT;
   2635
   2636	while (sci->sc_task) {
   2637		wake_up(&sci->sc_wait_daemon);
   2638		spin_unlock(&sci->sc_state_lock);
   2639		wait_event(sci->sc_wait_task, sci->sc_task == NULL);
   2640		spin_lock(&sci->sc_state_lock);
   2641	}
   2642}
   2643
   2644/*
   2645 * Setup & clean-up functions
   2646 */
   2647static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
   2648					       struct nilfs_root *root)
   2649{
   2650	struct the_nilfs *nilfs = sb->s_fs_info;
   2651	struct nilfs_sc_info *sci;
   2652
   2653	sci = kzalloc(sizeof(*sci), GFP_KERNEL);
   2654	if (!sci)
   2655		return NULL;
   2656
   2657	sci->sc_super = sb;
   2658
   2659	nilfs_get_root(root);
   2660	sci->sc_root = root;
   2661
   2662	init_waitqueue_head(&sci->sc_wait_request);
   2663	init_waitqueue_head(&sci->sc_wait_daemon);
   2664	init_waitqueue_head(&sci->sc_wait_task);
   2665	spin_lock_init(&sci->sc_state_lock);
   2666	INIT_LIST_HEAD(&sci->sc_dirty_files);
   2667	INIT_LIST_HEAD(&sci->sc_segbufs);
   2668	INIT_LIST_HEAD(&sci->sc_write_logs);
   2669	INIT_LIST_HEAD(&sci->sc_gc_inodes);
   2670	INIT_LIST_HEAD(&sci->sc_iput_queue);
   2671	INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
   2672	timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0);
   2673
   2674	sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
   2675	sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
   2676	sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
   2677
   2678	if (nilfs->ns_interval)
   2679		sci->sc_interval = HZ * nilfs->ns_interval;
   2680	if (nilfs->ns_watermark)
   2681		sci->sc_watermark = nilfs->ns_watermark;
   2682	return sci;
   2683}
   2684
   2685static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
   2686{
   2687	int ret, retrycount = NILFS_SC_CLEANUP_RETRY;
   2688
   2689	/*
   2690	 * The segctord thread was stopped and its timer was removed.
   2691	 * But some tasks remain.
   2692	 */
   2693	do {
   2694		struct nilfs_transaction_info ti;
   2695
   2696		nilfs_transaction_lock(sci->sc_super, &ti, 0);
   2697		ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
   2698		nilfs_transaction_unlock(sci->sc_super);
   2699
   2700		flush_work(&sci->sc_iput_work);
   2701
   2702	} while (ret && retrycount-- > 0);
   2703}
   2704
   2705/**
   2706 * nilfs_segctor_destroy - destroy the segment constructor.
   2707 * @sci: nilfs_sc_info
   2708 *
   2709 * nilfs_segctor_destroy() kills the segctord thread and frees
   2710 * the nilfs_sc_info struct.
   2711 * Caller must hold the segment semaphore.
   2712 */
   2713static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
   2714{
   2715	struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
   2716	int flag;
   2717
   2718	up_write(&nilfs->ns_segctor_sem);
   2719
   2720	spin_lock(&sci->sc_state_lock);
   2721	nilfs_segctor_kill_thread(sci);
   2722	flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request
   2723		|| sci->sc_seq_request != sci->sc_seq_done);
   2724	spin_unlock(&sci->sc_state_lock);
   2725
   2726	if (flush_work(&sci->sc_iput_work))
   2727		flag = true;
   2728
   2729	if (flag || !nilfs_segctor_confirm(sci))
   2730		nilfs_segctor_write_out(sci);
   2731
   2732	if (!list_empty(&sci->sc_dirty_files)) {
   2733		nilfs_warn(sci->sc_super,
   2734			   "disposed unprocessed dirty file(s) when stopping log writer");
   2735		nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
   2736	}
   2737
   2738	if (!list_empty(&sci->sc_iput_queue)) {
   2739		nilfs_warn(sci->sc_super,
   2740			   "disposed unprocessed inode(s) in iput queue when stopping log writer");
   2741		nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1);
   2742	}
   2743
   2744	WARN_ON(!list_empty(&sci->sc_segbufs));
   2745	WARN_ON(!list_empty(&sci->sc_write_logs));
   2746
   2747	nilfs_put_root(sci->sc_root);
   2748
   2749	down_write(&nilfs->ns_segctor_sem);
   2750
   2751	del_timer_sync(&sci->sc_timer);
   2752	kfree(sci);
   2753}
   2754
   2755/**
   2756 * nilfs_attach_log_writer - attach log writer
   2757 * @sb: super block instance
   2758 * @root: root object of the current filesystem tree
   2759 *
   2760 * This allocates a log writer object, initializes it, and starts the
   2761 * log writer.
   2762 *
   2763 * Return Value: On success, 0 is returned. On error, one of the following
   2764 * negative error code is returned.
   2765 *
   2766 * %-ENOMEM - Insufficient memory available.
   2767 */
   2768int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
   2769{
   2770	struct the_nilfs *nilfs = sb->s_fs_info;
   2771	int err;
   2772
   2773	if (nilfs->ns_writer) {
   2774		/*
   2775		 * This happens if the filesystem was remounted
   2776		 * read/write after nilfs_error degenerated it into a
   2777		 * read-only mount.
   2778		 */
   2779		nilfs_detach_log_writer(sb);
   2780	}
   2781
   2782	nilfs->ns_writer = nilfs_segctor_new(sb, root);
   2783	if (!nilfs->ns_writer)
   2784		return -ENOMEM;
   2785
   2786	inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL);
   2787
   2788	err = nilfs_segctor_start_thread(nilfs->ns_writer);
   2789	if (err) {
   2790		kfree(nilfs->ns_writer);
   2791		nilfs->ns_writer = NULL;
   2792	}
   2793	return err;
   2794}
   2795
   2796/**
   2797 * nilfs_detach_log_writer - destroy log writer
   2798 * @sb: super block instance
   2799 *
   2800 * This kills log writer daemon, frees the log writer object, and
   2801 * destroys list of dirty files.
   2802 */
   2803void nilfs_detach_log_writer(struct super_block *sb)
   2804{
   2805	struct the_nilfs *nilfs = sb->s_fs_info;
   2806	LIST_HEAD(garbage_list);
   2807
   2808	down_write(&nilfs->ns_segctor_sem);
   2809	if (nilfs->ns_writer) {
   2810		nilfs_segctor_destroy(nilfs->ns_writer);
   2811		nilfs->ns_writer = NULL;
   2812	}
   2813
   2814	/* Force to free the list of dirty files */
   2815	spin_lock(&nilfs->ns_inode_lock);
   2816	if (!list_empty(&nilfs->ns_dirty_files)) {
   2817		list_splice_init(&nilfs->ns_dirty_files, &garbage_list);
   2818		nilfs_warn(sb,
   2819			   "disposed unprocessed dirty file(s) when detaching log writer");
   2820	}
   2821	spin_unlock(&nilfs->ns_inode_lock);
   2822	up_write(&nilfs->ns_segctor_sem);
   2823
   2824	nilfs_dispose_list(nilfs, &garbage_list, 1);
   2825}