cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

fast_commit.c (64928B)


      1// SPDX-License-Identifier: GPL-2.0
      2
      3/*
      4 * fs/ext4/fast_commit.c
      5 *
      6 * Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
      7 *
      8 * Ext4 fast commits routines.
      9 */
     10#include "ext4.h"
     11#include "ext4_jbd2.h"
     12#include "ext4_extents.h"
     13#include "mballoc.h"
     14
     15/*
     16 * Ext4 Fast Commits
     17 * -----------------
     18 *
     19 * Ext4 fast commits implement fine grained journalling for Ext4.
     20 *
     21 * Fast commits are organized as a log of tag-length-value (TLV) structs. (See
     22 * struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
     23 * TLV during the recovery phase. For the scenarios for which we currently
     24 * don't have replay code, fast commit falls back to full commits.
     25 * Fast commits record delta in one of the following three categories.
     26 *
     27 * (A) Directory entry updates:
     28 *
     29 * - EXT4_FC_TAG_UNLINK		- records directory entry unlink
     30 * - EXT4_FC_TAG_LINK		- records directory entry link
     31 * - EXT4_FC_TAG_CREAT		- records inode and directory entry creation
     32 *
     33 * (B) File specific data range updates:
     34 *
     35 * - EXT4_FC_TAG_ADD_RANGE	- records addition of new blocks to an inode
     36 * - EXT4_FC_TAG_DEL_RANGE	- records deletion of blocks from an inode
     37 *
     38 * (C) Inode metadata (mtime / ctime etc):
     39 *
     40 * - EXT4_FC_TAG_INODE		- record the inode that should be replayed
     41 *				  during recovery. Note that iblocks field is
     42 *				  not replayed and instead derived during
     43 *				  replay.
     44 * Commit Operation
     45 * ----------------
     46 * With fast commits, we maintain all the directory entry operations in the
     47 * order in which they are issued in an in-memory queue. This queue is flushed
     48 * to disk during the commit operation. We also maintain a list of inodes
     49 * that need to be committed during a fast commit in another in memory queue of
     50 * inodes. During the commit operation, we commit in the following order:
     51 *
     52 * [1] Lock inodes for any further data updates by setting COMMITTING state
     53 * [2] Submit data buffers of all the inodes
     54 * [3] Wait for [2] to complete
     55 * [4] Commit all the directory entry updates in the fast commit space
     56 * [5] Commit all the changed inode structures
     57 * [6] Write tail tag (this tag ensures the atomicity, please read the following
     58 *     section for more details).
     59 * [7] Wait for [4], [5] and [6] to complete.
     60 *
     61 * All the inode updates must call ext4_fc_start_update() before starting an
     62 * update. If such an ongoing update is present, fast commit waits for it to
     63 * complete. The completion of such an update is marked by
     64 * ext4_fc_stop_update().
     65 *
     66 * Fast Commit Ineligibility
     67 * -------------------------
     68 *
     69 * Not all operations are supported by fast commits today (e.g extended
     70 * attributes). Fast commit ineligibility is marked by calling
     71 * ext4_fc_mark_ineligible(): This makes next fast commit operation to fall back
     72 * to full commit.
     73 *
     74 * Atomicity of commits
     75 * --------------------
     76 * In order to guarantee atomicity during the commit operation, fast commit
     77 * uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
     78 * tag contains CRC of the contents and TID of the transaction after which
     79 * this fast commit should be applied. Recovery code replays fast commit
     80 * logs only if there's at least 1 valid tail present. For every fast commit
     81 * operation, there is 1 tail. This means, we may end up with multiple tails
     82 * in the fast commit space. Here's an example:
     83 *
     84 * - Create a new file A and remove existing file B
     85 * - fsync()
     86 * - Append contents to file A
     87 * - Truncate file A
     88 * - fsync()
     89 *
     90 * The fast commit space at the end of above operations would look like this:
     91 *      [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
     92 *             |<---  Fast Commit 1   --->|<---      Fast Commit 2     ---->|
     93 *
     94 * Replay code should thus check for all the valid tails in the FC area.
     95 *
     96 * Fast Commit Replay Idempotence
     97 * ------------------------------
     98 *
     99 * Fast commits tags are idempotent in nature provided the recovery code follows
    100 * certain rules. The guiding principle that the commit path follows while
    101 * committing is that it stores the result of a particular operation instead of
    102 * storing the procedure.
    103 *
    104 * Let's consider this rename operation: 'mv /a /b'. Let's assume dirent '/a'
    105 * was associated with inode 10. During fast commit, instead of storing this
    106 * operation as a procedure "rename a to b", we store the resulting file system
    107 * state as a "series" of outcomes:
    108 *
    109 * - Link dirent b to inode 10
    110 * - Unlink dirent a
    111 * - Inode <10> with valid refcount
    112 *
    113 * Now when recovery code runs, it needs "enforce" this state on the file
    114 * system. This is what guarantees idempotence of fast commit replay.
    115 *
    116 * Let's take an example of a procedure that is not idempotent and see how fast
    117 * commits make it idempotent. Consider following sequence of operations:
    118 *
    119 *     rm A;    mv B A;    read A
    120 *  (x)     (y)        (z)
    121 *
    122 * (x), (y) and (z) are the points at which we can crash. If we store this
    123 * sequence of operations as is then the replay is not idempotent. Let's say
    124 * while in replay, we crash at (z). During the second replay, file A (which was
    125 * actually created as a result of "mv B A" operation) would get deleted. Thus,
    126 * file named A would be absent when we try to read A. So, this sequence of
    127 * operations is not idempotent. However, as mentioned above, instead of storing
    128 * the procedure fast commits store the outcome of each procedure. Thus the fast
    129 * commit log for above procedure would be as follows:
    130 *
    131 * (Let's assume dirent A was linked to inode 10 and dirent B was linked to
    132 * inode 11 before the replay)
    133 *
    134 *    [Unlink A]   [Link A to inode 11]   [Unlink B]   [Inode 11]
    135 * (w)          (x)                    (y)          (z)
    136 *
    137 * If we crash at (z), we will have file A linked to inode 11. During the second
    138 * replay, we will remove file A (inode 11). But we will create it back and make
    139 * it point to inode 11. We won't find B, so we'll just skip that step. At this
    140 * point, the refcount for inode 11 is not reliable, but that gets fixed by the
    141 * replay of last inode 11 tag. Crashes at points (w), (x) and (y) get handled
    142 * similarly. Thus, by converting a non-idempotent procedure into a series of
    143 * idempotent outcomes, fast commits ensured idempotence during the replay.
    144 *
    145 * TODOs
    146 * -----
    147 *
    148 * 0) Fast commit replay path hardening: Fast commit replay code should use
    149 *    journal handles to make sure all the updates it does during the replay
    150 *    path are atomic. With that if we crash during fast commit replay, after
    151 *    trying to do recovery again, we will find a file system where fast commit
    152 *    area is invalid (because new full commit would be found). In order to deal
    153 *    with that, fast commit replay code should ensure that the "FC_REPLAY"
    154 *    superblock state is persisted before starting the replay, so that after
    155 *    the crash, fast commit recovery code can look at that flag and perform
    156 *    fast commit recovery even if that area is invalidated by later full
    157 *    commits.
    158 *
    159 * 1) Fast commit's commit path locks the entire file system during fast
    160 *    commit. This has significant performance penalty. Instead of that, we
    161 *    should use ext4_fc_start/stop_update functions to start inode level
    162 *    updates from ext4_journal_start/stop. Once we do that we can drop file
    163 *    system locking during commit path.
    164 *
    165 * 2) Handle more ineligible cases.
    166 */
    167
    168#include <trace/events/ext4.h>
    169static struct kmem_cache *ext4_fc_dentry_cachep;
    170
    171static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
    172{
    173	BUFFER_TRACE(bh, "");
    174	if (uptodate) {
    175		ext4_debug("%s: Block %lld up-to-date",
    176			   __func__, bh->b_blocknr);
    177		set_buffer_uptodate(bh);
    178	} else {
    179		ext4_debug("%s: Block %lld not up-to-date",
    180			   __func__, bh->b_blocknr);
    181		clear_buffer_uptodate(bh);
    182	}
    183
    184	unlock_buffer(bh);
    185}
    186
    187static inline void ext4_fc_reset_inode(struct inode *inode)
    188{
    189	struct ext4_inode_info *ei = EXT4_I(inode);
    190
    191	ei->i_fc_lblk_start = 0;
    192	ei->i_fc_lblk_len = 0;
    193}
    194
    195void ext4_fc_init_inode(struct inode *inode)
    196{
    197	struct ext4_inode_info *ei = EXT4_I(inode);
    198
    199	ext4_fc_reset_inode(inode);
    200	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
    201	INIT_LIST_HEAD(&ei->i_fc_list);
    202	INIT_LIST_HEAD(&ei->i_fc_dilist);
    203	init_waitqueue_head(&ei->i_fc_wait);
    204	atomic_set(&ei->i_fc_updates, 0);
    205}
    206
    207/* This function must be called with sbi->s_fc_lock held. */
    208static void ext4_fc_wait_committing_inode(struct inode *inode)
    209__releases(&EXT4_SB(inode->i_sb)->s_fc_lock)
    210{
    211	wait_queue_head_t *wq;
    212	struct ext4_inode_info *ei = EXT4_I(inode);
    213
    214#if (BITS_PER_LONG < 64)
    215	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
    216			EXT4_STATE_FC_COMMITTING);
    217	wq = bit_waitqueue(&ei->i_state_flags,
    218				EXT4_STATE_FC_COMMITTING);
    219#else
    220	DEFINE_WAIT_BIT(wait, &ei->i_flags,
    221			EXT4_STATE_FC_COMMITTING);
    222	wq = bit_waitqueue(&ei->i_flags,
    223				EXT4_STATE_FC_COMMITTING);
    224#endif
    225	lockdep_assert_held(&EXT4_SB(inode->i_sb)->s_fc_lock);
    226	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
    227	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
    228	schedule();
    229	finish_wait(wq, &wait.wq_entry);
    230}
    231
    232/*
    233 * Inform Ext4's fast about start of an inode update
    234 *
    235 * This function is called by the high level call VFS callbacks before
    236 * performing any inode update. This function blocks if there's an ongoing
    237 * fast commit on the inode in question.
    238 */
    239void ext4_fc_start_update(struct inode *inode)
    240{
    241	struct ext4_inode_info *ei = EXT4_I(inode);
    242
    243	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
    244	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
    245		return;
    246
    247restart:
    248	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
    249	if (list_empty(&ei->i_fc_list))
    250		goto out;
    251
    252	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
    253		ext4_fc_wait_committing_inode(inode);
    254		goto restart;
    255	}
    256out:
    257	atomic_inc(&ei->i_fc_updates);
    258	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
    259}
    260
    261/*
    262 * Stop inode update and wake up waiting fast commits if any.
    263 */
    264void ext4_fc_stop_update(struct inode *inode)
    265{
    266	struct ext4_inode_info *ei = EXT4_I(inode);
    267
    268	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
    269	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
    270		return;
    271
    272	if (atomic_dec_and_test(&ei->i_fc_updates))
    273		wake_up_all(&ei->i_fc_wait);
    274}
    275
    276/*
    277 * Remove inode from fast commit list. If the inode is being committed
    278 * we wait until inode commit is done.
    279 */
    280void ext4_fc_del(struct inode *inode)
    281{
    282	struct ext4_inode_info *ei = EXT4_I(inode);
    283	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    284	struct ext4_fc_dentry_update *fc_dentry;
    285
    286	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
    287	    (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
    288		return;
    289
    290restart:
    291	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
    292	if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
    293		spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
    294		return;
    295	}
    296
    297	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
    298		ext4_fc_wait_committing_inode(inode);
    299		goto restart;
    300	}
    301
    302	if (!list_empty(&ei->i_fc_list))
    303		list_del_init(&ei->i_fc_list);
    304
    305	/*
    306	 * Since this inode is getting removed, let's also remove all FC
    307	 * dentry create references, since it is not needed to log it anyways.
    308	 */
    309	if (list_empty(&ei->i_fc_dilist)) {
    310		spin_unlock(&sbi->s_fc_lock);
    311		return;
    312	}
    313
    314	fc_dentry = list_first_entry(&ei->i_fc_dilist, struct ext4_fc_dentry_update, fcd_dilist);
    315	WARN_ON(fc_dentry->fcd_op != EXT4_FC_TAG_CREAT);
    316	list_del_init(&fc_dentry->fcd_list);
    317	list_del_init(&fc_dentry->fcd_dilist);
    318
    319	WARN_ON(!list_empty(&ei->i_fc_dilist));
    320	spin_unlock(&sbi->s_fc_lock);
    321
    322	if (fc_dentry->fcd_name.name &&
    323		fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
    324		kfree(fc_dentry->fcd_name.name);
    325	kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
    326
    327	return;
    328}
    329
    330/*
    331 * Mark file system as fast commit ineligible, and record latest
    332 * ineligible transaction tid. This means until the recorded
    333 * transaction, commit operation would result in a full jbd2 commit.
    334 */
    335void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
    336{
    337	struct ext4_sb_info *sbi = EXT4_SB(sb);
    338	tid_t tid;
    339
    340	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
    341	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
    342		return;
    343
    344	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
    345	if (handle && !IS_ERR(handle))
    346		tid = handle->h_transaction->t_tid;
    347	else {
    348		read_lock(&sbi->s_journal->j_state_lock);
    349		tid = sbi->s_journal->j_running_transaction ?
    350				sbi->s_journal->j_running_transaction->t_tid : 0;
    351		read_unlock(&sbi->s_journal->j_state_lock);
    352	}
    353	spin_lock(&sbi->s_fc_lock);
    354	if (sbi->s_fc_ineligible_tid < tid)
    355		sbi->s_fc_ineligible_tid = tid;
    356	spin_unlock(&sbi->s_fc_lock);
    357	WARN_ON(reason >= EXT4_FC_REASON_MAX);
    358	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
    359}
    360
    361/*
    362 * Generic fast commit tracking function. If this is the first time this we are
    363 * called after a full commit, we initialize fast commit fields and then call
    364 * __fc_track_fn() with update = 0. If we have already been called after a full
    365 * commit, we pass update = 1. Based on that, the track function can determine
    366 * if it needs to track a field for the first time or if it needs to just
    367 * update the previously tracked value.
    368 *
    369 * If enqueue is set, this function enqueues the inode in fast commit list.
    370 */
    371static int ext4_fc_track_template(
    372	handle_t *handle, struct inode *inode,
    373	int (*__fc_track_fn)(struct inode *, void *, bool),
    374	void *args, int enqueue)
    375{
    376	bool update = false;
    377	struct ext4_inode_info *ei = EXT4_I(inode);
    378	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    379	tid_t tid = 0;
    380	int ret;
    381
    382	tid = handle->h_transaction->t_tid;
    383	mutex_lock(&ei->i_fc_lock);
    384	if (tid == ei->i_sync_tid) {
    385		update = true;
    386	} else {
    387		ext4_fc_reset_inode(inode);
    388		ei->i_sync_tid = tid;
    389	}
    390	ret = __fc_track_fn(inode, args, update);
    391	mutex_unlock(&ei->i_fc_lock);
    392
    393	if (!enqueue)
    394		return ret;
    395
    396	spin_lock(&sbi->s_fc_lock);
    397	if (list_empty(&EXT4_I(inode)->i_fc_list))
    398		list_add_tail(&EXT4_I(inode)->i_fc_list,
    399				(sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
    400				 sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ?
    401				&sbi->s_fc_q[FC_Q_STAGING] :
    402				&sbi->s_fc_q[FC_Q_MAIN]);
    403	spin_unlock(&sbi->s_fc_lock);
    404
    405	return ret;
    406}
    407
    408struct __track_dentry_update_args {
    409	struct dentry *dentry;
    410	int op;
    411};
    412
    413/* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
    414static int __track_dentry_update(struct inode *inode, void *arg, bool update)
    415{
    416	struct ext4_fc_dentry_update *node;
    417	struct ext4_inode_info *ei = EXT4_I(inode);
    418	struct __track_dentry_update_args *dentry_update =
    419		(struct __track_dentry_update_args *)arg;
    420	struct dentry *dentry = dentry_update->dentry;
    421	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    422
    423	mutex_unlock(&ei->i_fc_lock);
    424	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
    425	if (!node) {
    426		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
    427		mutex_lock(&ei->i_fc_lock);
    428		return -ENOMEM;
    429	}
    430
    431	node->fcd_op = dentry_update->op;
    432	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
    433	node->fcd_ino = inode->i_ino;
    434	if (dentry->d_name.len > DNAME_INLINE_LEN) {
    435		node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
    436		if (!node->fcd_name.name) {
    437			kmem_cache_free(ext4_fc_dentry_cachep, node);
    438			ext4_fc_mark_ineligible(inode->i_sb,
    439				EXT4_FC_REASON_NOMEM, NULL);
    440			mutex_lock(&ei->i_fc_lock);
    441			return -ENOMEM;
    442		}
    443		memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
    444			dentry->d_name.len);
    445	} else {
    446		memcpy(node->fcd_iname, dentry->d_name.name,
    447			dentry->d_name.len);
    448		node->fcd_name.name = node->fcd_iname;
    449	}
    450	node->fcd_name.len = dentry->d_name.len;
    451	INIT_LIST_HEAD(&node->fcd_dilist);
    452	spin_lock(&sbi->s_fc_lock);
    453	if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
    454		sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING)
    455		list_add_tail(&node->fcd_list,
    456				&sbi->s_fc_dentry_q[FC_Q_STAGING]);
    457	else
    458		list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
    459
    460	/*
    461	 * This helps us keep a track of all fc_dentry updates which is part of
    462	 * this ext4 inode. So in case the inode is getting unlinked, before
    463	 * even we get a chance to fsync, we could remove all fc_dentry
    464	 * references while evicting the inode in ext4_fc_del().
    465	 * Also with this, we don't need to loop over all the inodes in
    466	 * sbi->s_fc_q to get the corresponding inode in
    467	 * ext4_fc_commit_dentry_updates().
    468	 */
    469	if (dentry_update->op == EXT4_FC_TAG_CREAT) {
    470		WARN_ON(!list_empty(&ei->i_fc_dilist));
    471		list_add_tail(&node->fcd_dilist, &ei->i_fc_dilist);
    472	}
    473	spin_unlock(&sbi->s_fc_lock);
    474	mutex_lock(&ei->i_fc_lock);
    475
    476	return 0;
    477}
    478
    479void __ext4_fc_track_unlink(handle_t *handle,
    480		struct inode *inode, struct dentry *dentry)
    481{
    482	struct __track_dentry_update_args args;
    483	int ret;
    484
    485	args.dentry = dentry;
    486	args.op = EXT4_FC_TAG_UNLINK;
    487
    488	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
    489					(void *)&args, 0);
    490	trace_ext4_fc_track_unlink(handle, inode, dentry, ret);
    491}
    492
    493void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry)
    494{
    495	struct inode *inode = d_inode(dentry);
    496	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    497
    498	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
    499	    (sbi->s_mount_state & EXT4_FC_REPLAY))
    500		return;
    501
    502	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
    503		return;
    504
    505	__ext4_fc_track_unlink(handle, inode, dentry);
    506}
    507
    508void __ext4_fc_track_link(handle_t *handle,
    509	struct inode *inode, struct dentry *dentry)
    510{
    511	struct __track_dentry_update_args args;
    512	int ret;
    513
    514	args.dentry = dentry;
    515	args.op = EXT4_FC_TAG_LINK;
    516
    517	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
    518					(void *)&args, 0);
    519	trace_ext4_fc_track_link(handle, inode, dentry, ret);
    520}
    521
    522void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
    523{
    524	struct inode *inode = d_inode(dentry);
    525	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    526
    527	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
    528	    (sbi->s_mount_state & EXT4_FC_REPLAY))
    529		return;
    530
    531	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
    532		return;
    533
    534	__ext4_fc_track_link(handle, inode, dentry);
    535}
    536
    537void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
    538			  struct dentry *dentry)
    539{
    540	struct __track_dentry_update_args args;
    541	int ret;
    542
    543	args.dentry = dentry;
    544	args.op = EXT4_FC_TAG_CREAT;
    545
    546	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
    547					(void *)&args, 0);
    548	trace_ext4_fc_track_create(handle, inode, dentry, ret);
    549}
    550
    551void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
    552{
    553	struct inode *inode = d_inode(dentry);
    554	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    555
    556	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
    557	    (sbi->s_mount_state & EXT4_FC_REPLAY))
    558		return;
    559
    560	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
    561		return;
    562
    563	__ext4_fc_track_create(handle, inode, dentry);
    564}
    565
    566/* __track_fn for inode tracking */
    567static int __track_inode(struct inode *inode, void *arg, bool update)
    568{
    569	if (update)
    570		return -EEXIST;
    571
    572	EXT4_I(inode)->i_fc_lblk_len = 0;
    573
    574	return 0;
    575}
    576
    577void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
    578{
    579	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    580	int ret;
    581
    582	if (S_ISDIR(inode->i_mode))
    583		return;
    584
    585	if (ext4_should_journal_data(inode)) {
    586		ext4_fc_mark_ineligible(inode->i_sb,
    587					EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
    588		return;
    589	}
    590
    591	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
    592	    (sbi->s_mount_state & EXT4_FC_REPLAY))
    593		return;
    594
    595	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
    596		return;
    597
    598	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
    599	trace_ext4_fc_track_inode(handle, inode, ret);
    600}
    601
    602struct __track_range_args {
    603	ext4_lblk_t start, end;
    604};
    605
    606/* __track_fn for tracking data updates */
    607static int __track_range(struct inode *inode, void *arg, bool update)
    608{
    609	struct ext4_inode_info *ei = EXT4_I(inode);
    610	ext4_lblk_t oldstart;
    611	struct __track_range_args *__arg =
    612		(struct __track_range_args *)arg;
    613
    614	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
    615		ext4_debug("Special inode %ld being modified\n", inode->i_ino);
    616		return -ECANCELED;
    617	}
    618
    619	oldstart = ei->i_fc_lblk_start;
    620
    621	if (update && ei->i_fc_lblk_len > 0) {
    622		ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
    623		ei->i_fc_lblk_len =
    624			max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
    625				ei->i_fc_lblk_start + 1;
    626	} else {
    627		ei->i_fc_lblk_start = __arg->start;
    628		ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
    629	}
    630
    631	return 0;
    632}
    633
    634void ext4_fc_track_range(handle_t *handle, struct inode *inode, ext4_lblk_t start,
    635			 ext4_lblk_t end)
    636{
    637	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
    638	struct __track_range_args args;
    639	int ret;
    640
    641	if (S_ISDIR(inode->i_mode))
    642		return;
    643
    644	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) ||
    645	    (sbi->s_mount_state & EXT4_FC_REPLAY))
    646		return;
    647
    648	if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE))
    649		return;
    650
    651	args.start = start;
    652	args.end = end;
    653
    654	ret = ext4_fc_track_template(handle, inode,  __track_range, &args, 1);
    655
    656	trace_ext4_fc_track_range(handle, inode, start, end, ret);
    657}
    658
    659static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail)
    660{
    661	int write_flags = REQ_SYNC;
    662	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
    663
    664	/* Add REQ_FUA | REQ_PREFLUSH only its tail */
    665	if (test_opt(sb, BARRIER) && is_tail)
    666		write_flags |= REQ_FUA | REQ_PREFLUSH;
    667	lock_buffer(bh);
    668	set_buffer_dirty(bh);
    669	set_buffer_uptodate(bh);
    670	bh->b_end_io = ext4_end_buffer_io_sync;
    671	submit_bh(REQ_OP_WRITE, write_flags, bh);
    672	EXT4_SB(sb)->s_fc_bh = NULL;
    673}
    674
    675/* Ext4 commit path routines */
    676
    677/* memzero and update CRC */
    678static void *ext4_fc_memzero(struct super_block *sb, void *dst, int len,
    679				u32 *crc)
    680{
    681	void *ret;
    682
    683	ret = memset(dst, 0, len);
    684	if (crc)
    685		*crc = ext4_chksum(EXT4_SB(sb), *crc, dst, len);
    686	return ret;
    687}
    688
    689/*
    690 * Allocate len bytes on a fast commit buffer.
    691 *
    692 * During the commit time this function is used to manage fast commit
    693 * block space. We don't split a fast commit log onto different
    694 * blocks. So this function makes sure that if there's not enough space
    695 * on the current block, the remaining space in the current block is
    696 * marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
    697 * new block is from jbd2 and CRC is updated to reflect the padding
    698 * we added.
    699 */
    700static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
    701{
    702	struct ext4_fc_tl *tl;
    703	struct ext4_sb_info *sbi = EXT4_SB(sb);
    704	struct buffer_head *bh;
    705	int bsize = sbi->s_journal->j_blocksize;
    706	int ret, off = sbi->s_fc_bytes % bsize;
    707	int pad_len;
    708
    709	/*
    710	 * After allocating len, we should have space at least for a 0 byte
    711	 * padding.
    712	 */
    713	if (len + sizeof(struct ext4_fc_tl) > bsize)
    714		return NULL;
    715
    716	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
    717		/*
    718		 * Only allocate from current buffer if we have enough space for
    719		 * this request AND we have space to add a zero byte padding.
    720		 */
    721		if (!sbi->s_fc_bh) {
    722			ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
    723			if (ret)
    724				return NULL;
    725			sbi->s_fc_bh = bh;
    726		}
    727		sbi->s_fc_bytes += len;
    728		return sbi->s_fc_bh->b_data + off;
    729	}
    730	/* Need to add PAD tag */
    731	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
    732	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
    733	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
    734	tl->fc_len = cpu_to_le16(pad_len);
    735	if (crc)
    736		*crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
    737	if (pad_len > 0)
    738		ext4_fc_memzero(sb, tl + 1, pad_len, crc);
    739	ext4_fc_submit_bh(sb, false);
    740
    741	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
    742	if (ret)
    743		return NULL;
    744	sbi->s_fc_bh = bh;
    745	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
    746	return sbi->s_fc_bh->b_data;
    747}
    748
    749/* memcpy to fc reserved space and update CRC */
    750static void *ext4_fc_memcpy(struct super_block *sb, void *dst, const void *src,
    751				int len, u32 *crc)
    752{
    753	if (crc)
    754		*crc = ext4_chksum(EXT4_SB(sb), *crc, src, len);
    755	return memcpy(dst, src, len);
    756}
    757
    758/*
    759 * Complete a fast commit by writing tail tag.
    760 *
    761 * Writing tail tag marks the end of a fast commit. In order to guarantee
    762 * atomicity, after writing tail tag, even if there's space remaining
    763 * in the block, next commit shouldn't use it. That's why tail tag
    764 * has the length as that of the remaining space on the block.
    765 */
    766static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
    767{
    768	struct ext4_sb_info *sbi = EXT4_SB(sb);
    769	struct ext4_fc_tl tl;
    770	struct ext4_fc_tail tail;
    771	int off, bsize = sbi->s_journal->j_blocksize;
    772	u8 *dst;
    773
    774	/*
    775	 * ext4_fc_reserve_space takes care of allocating an extra block if
    776	 * there's no enough space on this block for accommodating this tail.
    777	 */
    778	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
    779	if (!dst)
    780		return -ENOSPC;
    781
    782	off = sbi->s_fc_bytes % bsize;
    783
    784	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
    785	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
    786	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
    787
    788	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
    789	dst += sizeof(tl);
    790	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
    791	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
    792	dst += sizeof(tail.fc_tid);
    793	tail.fc_crc = cpu_to_le32(crc);
    794	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
    795
    796	ext4_fc_submit_bh(sb, true);
    797
    798	return 0;
    799}
    800
    801/*
    802 * Adds tag, length, value and updates CRC. Returns true if tlv was added.
    803 * Returns false if there's not enough space.
    804 */
    805static bool ext4_fc_add_tlv(struct super_block *sb, u16 tag, u16 len, u8 *val,
    806			   u32 *crc)
    807{
    808	struct ext4_fc_tl tl;
    809	u8 *dst;
    810
    811	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
    812	if (!dst)
    813		return false;
    814
    815	tl.fc_tag = cpu_to_le16(tag);
    816	tl.fc_len = cpu_to_le16(len);
    817
    818	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
    819	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
    820
    821	return true;
    822}
    823
    824/* Same as above, but adds dentry tlv. */
    825static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u32 *crc,
    826				   struct ext4_fc_dentry_update *fc_dentry)
    827{
    828	struct ext4_fc_dentry_info fcd;
    829	struct ext4_fc_tl tl;
    830	int dlen = fc_dentry->fcd_name.len;
    831	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
    832					crc);
    833
    834	if (!dst)
    835		return false;
    836
    837	fcd.fc_parent_ino = cpu_to_le32(fc_dentry->fcd_parent);
    838	fcd.fc_ino = cpu_to_le32(fc_dentry->fcd_ino);
    839	tl.fc_tag = cpu_to_le16(fc_dentry->fcd_op);
    840	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
    841	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
    842	dst += sizeof(tl);
    843	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
    844	dst += sizeof(fcd);
    845	ext4_fc_memcpy(sb, dst, fc_dentry->fcd_name.name, dlen, crc);
    846
    847	return true;
    848}
    849
    850/*
    851 * Writes inode in the fast commit space under TLV with tag @tag.
    852 * Returns 0 on success, error on failure.
    853 */
    854static int ext4_fc_write_inode(struct inode *inode, u32 *crc)
    855{
    856	struct ext4_inode_info *ei = EXT4_I(inode);
    857	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
    858	int ret;
    859	struct ext4_iloc iloc;
    860	struct ext4_fc_inode fc_inode;
    861	struct ext4_fc_tl tl;
    862	u8 *dst;
    863
    864	ret = ext4_get_inode_loc(inode, &iloc);
    865	if (ret)
    866		return ret;
    867
    868	if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
    869		inode_len = EXT4_INODE_SIZE(inode->i_sb);
    870	else if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
    871		inode_len += ei->i_extra_isize;
    872
    873	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
    874	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
    875	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
    876
    877	dst = ext4_fc_reserve_space(inode->i_sb,
    878			sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
    879	if (!dst)
    880		return -ECANCELED;
    881
    882	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
    883		return -ECANCELED;
    884	dst += sizeof(tl);
    885	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
    886		return -ECANCELED;
    887	dst += sizeof(fc_inode);
    888	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
    889					inode_len, crc))
    890		return -ECANCELED;
    891
    892	return 0;
    893}
    894
    895/*
    896 * Writes updated data ranges for the inode in question. Updates CRC.
    897 * Returns 0 on success, error otherwise.
    898 */
    899static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc)
    900{
    901	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
    902	struct ext4_inode_info *ei = EXT4_I(inode);
    903	struct ext4_map_blocks map;
    904	struct ext4_fc_add_range fc_ext;
    905	struct ext4_fc_del_range lrange;
    906	struct ext4_extent *ex;
    907	int ret;
    908
    909	mutex_lock(&ei->i_fc_lock);
    910	if (ei->i_fc_lblk_len == 0) {
    911		mutex_unlock(&ei->i_fc_lock);
    912		return 0;
    913	}
    914	old_blk_size = ei->i_fc_lblk_start;
    915	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
    916	ei->i_fc_lblk_len = 0;
    917	mutex_unlock(&ei->i_fc_lock);
    918
    919	cur_lblk_off = old_blk_size;
    920	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
    921		  __func__, cur_lblk_off, new_blk_size, inode->i_ino);
    922
    923	while (cur_lblk_off <= new_blk_size) {
    924		map.m_lblk = cur_lblk_off;
    925		map.m_len = new_blk_size - cur_lblk_off + 1;
    926		ret = ext4_map_blocks(NULL, inode, &map, 0);
    927		if (ret < 0)
    928			return -ECANCELED;
    929
    930		if (map.m_len == 0) {
    931			cur_lblk_off++;
    932			continue;
    933		}
    934
    935		if (ret == 0) {
    936			lrange.fc_ino = cpu_to_le32(inode->i_ino);
    937			lrange.fc_lblk = cpu_to_le32(map.m_lblk);
    938			lrange.fc_len = cpu_to_le32(map.m_len);
    939			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
    940					    sizeof(lrange), (u8 *)&lrange, crc))
    941				return -ENOSPC;
    942		} else {
    943			unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ?
    944				EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN;
    945
    946			/* Limit the number of blocks in one extent */
    947			map.m_len = min(max, map.m_len);
    948
    949			fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
    950			ex = (struct ext4_extent *)&fc_ext.fc_ex;
    951			ex->ee_block = cpu_to_le32(map.m_lblk);
    952			ex->ee_len = cpu_to_le16(map.m_len);
    953			ext4_ext_store_pblock(ex, map.m_pblk);
    954			if (map.m_flags & EXT4_MAP_UNWRITTEN)
    955				ext4_ext_mark_unwritten(ex);
    956			else
    957				ext4_ext_mark_initialized(ex);
    958			if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
    959					    sizeof(fc_ext), (u8 *)&fc_ext, crc))
    960				return -ENOSPC;
    961		}
    962
    963		cur_lblk_off += map.m_len;
    964	}
    965
    966	return 0;
    967}
    968
    969
    970/* Submit data for all the fast commit inodes */
    971static int ext4_fc_submit_inode_data_all(journal_t *journal)
    972{
    973	struct super_block *sb = journal->j_private;
    974	struct ext4_sb_info *sbi = EXT4_SB(sb);
    975	struct ext4_inode_info *ei;
    976	int ret = 0;
    977
    978	spin_lock(&sbi->s_fc_lock);
    979	list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
    980		ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
    981		while (atomic_read(&ei->i_fc_updates)) {
    982			DEFINE_WAIT(wait);
    983
    984			prepare_to_wait(&ei->i_fc_wait, &wait,
    985						TASK_UNINTERRUPTIBLE);
    986			if (atomic_read(&ei->i_fc_updates)) {
    987				spin_unlock(&sbi->s_fc_lock);
    988				schedule();
    989				spin_lock(&sbi->s_fc_lock);
    990			}
    991			finish_wait(&ei->i_fc_wait, &wait);
    992		}
    993		spin_unlock(&sbi->s_fc_lock);
    994		ret = jbd2_submit_inode_data(ei->jinode);
    995		if (ret)
    996			return ret;
    997		spin_lock(&sbi->s_fc_lock);
    998	}
    999	spin_unlock(&sbi->s_fc_lock);
   1000
   1001	return ret;
   1002}
   1003
   1004/* Wait for completion of data for all the fast commit inodes */
   1005static int ext4_fc_wait_inode_data_all(journal_t *journal)
   1006{
   1007	struct super_block *sb = journal->j_private;
   1008	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1009	struct ext4_inode_info *pos, *n;
   1010	int ret = 0;
   1011
   1012	spin_lock(&sbi->s_fc_lock);
   1013	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
   1014		if (!ext4_test_inode_state(&pos->vfs_inode,
   1015					   EXT4_STATE_FC_COMMITTING))
   1016			continue;
   1017		spin_unlock(&sbi->s_fc_lock);
   1018
   1019		ret = jbd2_wait_inode_data(journal, pos->jinode);
   1020		if (ret)
   1021			return ret;
   1022		spin_lock(&sbi->s_fc_lock);
   1023	}
   1024	spin_unlock(&sbi->s_fc_lock);
   1025
   1026	return 0;
   1027}
   1028
   1029/* Commit all the directory entry updates */
   1030static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc)
   1031__acquires(&sbi->s_fc_lock)
   1032__releases(&sbi->s_fc_lock)
   1033{
   1034	struct super_block *sb = journal->j_private;
   1035	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1036	struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
   1037	struct inode *inode;
   1038	struct ext4_inode_info *ei;
   1039	int ret;
   1040
   1041	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
   1042		return 0;
   1043	list_for_each_entry_safe(fc_dentry, fc_dentry_n,
   1044				 &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
   1045		if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
   1046			spin_unlock(&sbi->s_fc_lock);
   1047			if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
   1048				ret = -ENOSPC;
   1049				goto lock_and_exit;
   1050			}
   1051			spin_lock(&sbi->s_fc_lock);
   1052			continue;
   1053		}
   1054		/*
   1055		 * With fcd_dilist we need not loop in sbi->s_fc_q to get the
   1056		 * corresponding inode pointer
   1057		 */
   1058		WARN_ON(list_empty(&fc_dentry->fcd_dilist));
   1059		ei = list_first_entry(&fc_dentry->fcd_dilist,
   1060				struct ext4_inode_info, i_fc_dilist);
   1061		inode = &ei->vfs_inode;
   1062		WARN_ON(inode->i_ino != fc_dentry->fcd_ino);
   1063
   1064		spin_unlock(&sbi->s_fc_lock);
   1065
   1066		/*
   1067		 * We first write the inode and then the create dirent. This
   1068		 * allows the recovery code to create an unnamed inode first
   1069		 * and then link it to a directory entry. This allows us
   1070		 * to use namei.c routines almost as is and simplifies
   1071		 * the recovery code.
   1072		 */
   1073		ret = ext4_fc_write_inode(inode, crc);
   1074		if (ret)
   1075			goto lock_and_exit;
   1076
   1077		ret = ext4_fc_write_inode_data(inode, crc);
   1078		if (ret)
   1079			goto lock_and_exit;
   1080
   1081		if (!ext4_fc_add_dentry_tlv(sb, crc, fc_dentry)) {
   1082			ret = -ENOSPC;
   1083			goto lock_and_exit;
   1084		}
   1085
   1086		spin_lock(&sbi->s_fc_lock);
   1087	}
   1088	return 0;
   1089lock_and_exit:
   1090	spin_lock(&sbi->s_fc_lock);
   1091	return ret;
   1092}
   1093
   1094static int ext4_fc_perform_commit(journal_t *journal)
   1095{
   1096	struct super_block *sb = journal->j_private;
   1097	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1098	struct ext4_inode_info *iter;
   1099	struct ext4_fc_head head;
   1100	struct inode *inode;
   1101	struct blk_plug plug;
   1102	int ret = 0;
   1103	u32 crc = 0;
   1104
   1105	ret = ext4_fc_submit_inode_data_all(journal);
   1106	if (ret)
   1107		return ret;
   1108
   1109	ret = ext4_fc_wait_inode_data_all(journal);
   1110	if (ret)
   1111		return ret;
   1112
   1113	/*
   1114	 * If file system device is different from journal device, issue a cache
   1115	 * flush before we start writing fast commit blocks.
   1116	 */
   1117	if (journal->j_fs_dev != journal->j_dev)
   1118		blkdev_issue_flush(journal->j_fs_dev);
   1119
   1120	blk_start_plug(&plug);
   1121	if (sbi->s_fc_bytes == 0) {
   1122		/*
   1123		 * Add a head tag only if this is the first fast commit
   1124		 * in this TID.
   1125		 */
   1126		head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
   1127		head.fc_tid = cpu_to_le32(
   1128			sbi->s_journal->j_running_transaction->t_tid);
   1129		if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
   1130			(u8 *)&head, &crc)) {
   1131			ret = -ENOSPC;
   1132			goto out;
   1133		}
   1134	}
   1135
   1136	spin_lock(&sbi->s_fc_lock);
   1137	ret = ext4_fc_commit_dentry_updates(journal, &crc);
   1138	if (ret) {
   1139		spin_unlock(&sbi->s_fc_lock);
   1140		goto out;
   1141	}
   1142
   1143	list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
   1144		inode = &iter->vfs_inode;
   1145		if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
   1146			continue;
   1147
   1148		spin_unlock(&sbi->s_fc_lock);
   1149		ret = ext4_fc_write_inode_data(inode, &crc);
   1150		if (ret)
   1151			goto out;
   1152		ret = ext4_fc_write_inode(inode, &crc);
   1153		if (ret)
   1154			goto out;
   1155		spin_lock(&sbi->s_fc_lock);
   1156	}
   1157	spin_unlock(&sbi->s_fc_lock);
   1158
   1159	ret = ext4_fc_write_tail(sb, crc);
   1160
   1161out:
   1162	blk_finish_plug(&plug);
   1163	return ret;
   1164}
   1165
   1166static void ext4_fc_update_stats(struct super_block *sb, int status,
   1167				 u64 commit_time, int nblks, tid_t commit_tid)
   1168{
   1169	struct ext4_fc_stats *stats = &EXT4_SB(sb)->s_fc_stats;
   1170
   1171	jbd_debug(1, "Fast commit ended with status = %d for tid %u",
   1172			status, commit_tid);
   1173	if (status == EXT4_FC_STATUS_OK) {
   1174		stats->fc_num_commits++;
   1175		stats->fc_numblks += nblks;
   1176		if (likely(stats->s_fc_avg_commit_time))
   1177			stats->s_fc_avg_commit_time =
   1178				(commit_time +
   1179				 stats->s_fc_avg_commit_time * 3) / 4;
   1180		else
   1181			stats->s_fc_avg_commit_time = commit_time;
   1182	} else if (status == EXT4_FC_STATUS_FAILED ||
   1183		   status == EXT4_FC_STATUS_INELIGIBLE) {
   1184		if (status == EXT4_FC_STATUS_FAILED)
   1185			stats->fc_failed_commits++;
   1186		stats->fc_ineligible_commits++;
   1187	} else {
   1188		stats->fc_skipped_commits++;
   1189	}
   1190	trace_ext4_fc_commit_stop(sb, nblks, status, commit_tid);
   1191}
   1192
   1193/*
   1194 * The main commit entry point. Performs a fast commit for transaction
   1195 * commit_tid if needed. If it's not possible to perform a fast commit
   1196 * due to various reasons, we fall back to full commit. Returns 0
   1197 * on success, error otherwise.
   1198 */
   1199int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
   1200{
   1201	struct super_block *sb = journal->j_private;
   1202	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1203	int nblks = 0, ret, bsize = journal->j_blocksize;
   1204	int subtid = atomic_read(&sbi->s_fc_subtid);
   1205	int status = EXT4_FC_STATUS_OK, fc_bufs_before = 0;
   1206	ktime_t start_time, commit_time;
   1207
   1208	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
   1209		return jbd2_complete_transaction(journal, commit_tid);
   1210
   1211	trace_ext4_fc_commit_start(sb, commit_tid);
   1212
   1213	start_time = ktime_get();
   1214
   1215restart_fc:
   1216	ret = jbd2_fc_begin_commit(journal, commit_tid);
   1217	if (ret == -EALREADY) {
   1218		/* There was an ongoing commit, check if we need to restart */
   1219		if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
   1220			commit_tid > journal->j_commit_sequence)
   1221			goto restart_fc;
   1222		ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0,
   1223				commit_tid);
   1224		return 0;
   1225	} else if (ret) {
   1226		/*
   1227		 * Commit couldn't start. Just update stats and perform a
   1228		 * full commit.
   1229		 */
   1230		ext4_fc_update_stats(sb, EXT4_FC_STATUS_FAILED, 0, 0,
   1231				commit_tid);
   1232		return jbd2_complete_transaction(journal, commit_tid);
   1233	}
   1234
   1235	/*
   1236	 * After establishing journal barrier via jbd2_fc_begin_commit(), check
   1237	 * if we are fast commit ineligible.
   1238	 */
   1239	if (ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE)) {
   1240		status = EXT4_FC_STATUS_INELIGIBLE;
   1241		goto fallback;
   1242	}
   1243
   1244	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
   1245	ret = ext4_fc_perform_commit(journal);
   1246	if (ret < 0) {
   1247		status = EXT4_FC_STATUS_FAILED;
   1248		goto fallback;
   1249	}
   1250	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
   1251	ret = jbd2_fc_wait_bufs(journal, nblks);
   1252	if (ret < 0) {
   1253		status = EXT4_FC_STATUS_FAILED;
   1254		goto fallback;
   1255	}
   1256	atomic_inc(&sbi->s_fc_subtid);
   1257	ret = jbd2_fc_end_commit(journal);
   1258	/*
   1259	 * weight the commit time higher than the average time so we
   1260	 * don't react too strongly to vast changes in the commit time
   1261	 */
   1262	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
   1263	ext4_fc_update_stats(sb, status, commit_time, nblks, commit_tid);
   1264	return ret;
   1265
   1266fallback:
   1267	ret = jbd2_fc_end_commit_fallback(journal);
   1268	ext4_fc_update_stats(sb, status, 0, 0, commit_tid);
   1269	return ret;
   1270}
   1271
   1272/*
   1273 * Fast commit cleanup routine. This is called after every fast commit and
   1274 * full commit. full is true if we are called after a full commit.
   1275 */
   1276static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
   1277{
   1278	struct super_block *sb = journal->j_private;
   1279	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1280	struct ext4_inode_info *iter, *iter_n;
   1281	struct ext4_fc_dentry_update *fc_dentry;
   1282
   1283	if (full && sbi->s_fc_bh)
   1284		sbi->s_fc_bh = NULL;
   1285
   1286	trace_ext4_fc_cleanup(journal, full, tid);
   1287	jbd2_fc_release_bufs(journal);
   1288
   1289	spin_lock(&sbi->s_fc_lock);
   1290	list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
   1291				 i_fc_list) {
   1292		list_del_init(&iter->i_fc_list);
   1293		ext4_clear_inode_state(&iter->vfs_inode,
   1294				       EXT4_STATE_FC_COMMITTING);
   1295		if (iter->i_sync_tid <= tid)
   1296			ext4_fc_reset_inode(&iter->vfs_inode);
   1297		/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
   1298		smp_mb();
   1299#if (BITS_PER_LONG < 64)
   1300		wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
   1301#else
   1302		wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
   1303#endif
   1304	}
   1305
   1306	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
   1307		fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
   1308					     struct ext4_fc_dentry_update,
   1309					     fcd_list);
   1310		list_del_init(&fc_dentry->fcd_list);
   1311		list_del_init(&fc_dentry->fcd_dilist);
   1312		spin_unlock(&sbi->s_fc_lock);
   1313
   1314		if (fc_dentry->fcd_name.name &&
   1315			fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
   1316			kfree(fc_dentry->fcd_name.name);
   1317		kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
   1318		spin_lock(&sbi->s_fc_lock);
   1319	}
   1320
   1321	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
   1322				&sbi->s_fc_dentry_q[FC_Q_MAIN]);
   1323	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
   1324				&sbi->s_fc_q[FC_Q_MAIN]);
   1325
   1326	if (tid >= sbi->s_fc_ineligible_tid) {
   1327		sbi->s_fc_ineligible_tid = 0;
   1328		ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
   1329	}
   1330
   1331	if (full)
   1332		sbi->s_fc_bytes = 0;
   1333	spin_unlock(&sbi->s_fc_lock);
   1334	trace_ext4_fc_stats(sb);
   1335}
   1336
   1337/* Ext4 Replay Path Routines */
   1338
   1339/* Helper struct for dentry replay routines */
   1340struct dentry_info_args {
   1341	int parent_ino, dname_len, ino, inode_len;
   1342	char *dname;
   1343};
   1344
   1345static inline void tl_to_darg(struct dentry_info_args *darg,
   1346			      struct  ext4_fc_tl *tl, u8 *val)
   1347{
   1348	struct ext4_fc_dentry_info fcd;
   1349
   1350	memcpy(&fcd, val, sizeof(fcd));
   1351
   1352	darg->parent_ino = le32_to_cpu(fcd.fc_parent_ino);
   1353	darg->ino = le32_to_cpu(fcd.fc_ino);
   1354	darg->dname = val + offsetof(struct ext4_fc_dentry_info, fc_dname);
   1355	darg->dname_len = le16_to_cpu(tl->fc_len) -
   1356		sizeof(struct ext4_fc_dentry_info);
   1357}
   1358
   1359/* Unlink replay function */
   1360static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl,
   1361				 u8 *val)
   1362{
   1363	struct inode *inode, *old_parent;
   1364	struct qstr entry;
   1365	struct dentry_info_args darg;
   1366	int ret = 0;
   1367
   1368	tl_to_darg(&darg, tl, val);
   1369
   1370	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
   1371			darg.parent_ino, darg.dname_len);
   1372
   1373	entry.name = darg.dname;
   1374	entry.len = darg.dname_len;
   1375	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
   1376
   1377	if (IS_ERR(inode)) {
   1378		jbd_debug(1, "Inode %d not found", darg.ino);
   1379		return 0;
   1380	}
   1381
   1382	old_parent = ext4_iget(sb, darg.parent_ino,
   1383				EXT4_IGET_NORMAL);
   1384	if (IS_ERR(old_parent)) {
   1385		jbd_debug(1, "Dir with inode  %d not found", darg.parent_ino);
   1386		iput(inode);
   1387		return 0;
   1388	}
   1389
   1390	ret = __ext4_unlink(NULL, old_parent, &entry, inode);
   1391	/* -ENOENT ok coz it might not exist anymore. */
   1392	if (ret == -ENOENT)
   1393		ret = 0;
   1394	iput(old_parent);
   1395	iput(inode);
   1396	return ret;
   1397}
   1398
   1399static int ext4_fc_replay_link_internal(struct super_block *sb,
   1400				struct dentry_info_args *darg,
   1401				struct inode *inode)
   1402{
   1403	struct inode *dir = NULL;
   1404	struct dentry *dentry_dir = NULL, *dentry_inode = NULL;
   1405	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
   1406	int ret = 0;
   1407
   1408	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
   1409	if (IS_ERR(dir)) {
   1410		jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
   1411		dir = NULL;
   1412		goto out;
   1413	}
   1414
   1415	dentry_dir = d_obtain_alias(dir);
   1416	if (IS_ERR(dentry_dir)) {
   1417		jbd_debug(1, "Failed to obtain dentry");
   1418		dentry_dir = NULL;
   1419		goto out;
   1420	}
   1421
   1422	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
   1423	if (!dentry_inode) {
   1424		jbd_debug(1, "Inode dentry not created.");
   1425		ret = -ENOMEM;
   1426		goto out;
   1427	}
   1428
   1429	ret = __ext4_link(dir, inode, dentry_inode);
   1430	/*
   1431	 * It's possible that link already existed since data blocks
   1432	 * for the dir in question got persisted before we crashed OR
   1433	 * we replayed this tag and crashed before the entire replay
   1434	 * could complete.
   1435	 */
   1436	if (ret && ret != -EEXIST) {
   1437		jbd_debug(1, "Failed to link\n");
   1438		goto out;
   1439	}
   1440
   1441	ret = 0;
   1442out:
   1443	if (dentry_dir) {
   1444		d_drop(dentry_dir);
   1445		dput(dentry_dir);
   1446	} else if (dir) {
   1447		iput(dir);
   1448	}
   1449	if (dentry_inode) {
   1450		d_drop(dentry_inode);
   1451		dput(dentry_inode);
   1452	}
   1453
   1454	return ret;
   1455}
   1456
   1457/* Link replay function */
   1458static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl,
   1459			       u8 *val)
   1460{
   1461	struct inode *inode;
   1462	struct dentry_info_args darg;
   1463	int ret = 0;
   1464
   1465	tl_to_darg(&darg, tl, val);
   1466	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
   1467			darg.parent_ino, darg.dname_len);
   1468
   1469	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
   1470	if (IS_ERR(inode)) {
   1471		jbd_debug(1, "Inode not found.");
   1472		return 0;
   1473	}
   1474
   1475	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
   1476	iput(inode);
   1477	return ret;
   1478}
   1479
   1480/*
   1481 * Record all the modified inodes during replay. We use this later to setup
   1482 * block bitmaps correctly.
   1483 */
   1484static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
   1485{
   1486	struct ext4_fc_replay_state *state;
   1487	int i;
   1488
   1489	state = &EXT4_SB(sb)->s_fc_replay_state;
   1490	for (i = 0; i < state->fc_modified_inodes_used; i++)
   1491		if (state->fc_modified_inodes[i] == ino)
   1492			return 0;
   1493	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
   1494		state->fc_modified_inodes = krealloc(
   1495				state->fc_modified_inodes,
   1496				sizeof(int) * (state->fc_modified_inodes_size +
   1497				EXT4_FC_REPLAY_REALLOC_INCREMENT),
   1498				GFP_KERNEL);
   1499		if (!state->fc_modified_inodes)
   1500			return -ENOMEM;
   1501		state->fc_modified_inodes_size +=
   1502			EXT4_FC_REPLAY_REALLOC_INCREMENT;
   1503	}
   1504	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
   1505	return 0;
   1506}
   1507
   1508/*
   1509 * Inode replay function
   1510 */
   1511static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
   1512				u8 *val)
   1513{
   1514	struct ext4_fc_inode fc_inode;
   1515	struct ext4_inode *raw_inode;
   1516	struct ext4_inode *raw_fc_inode;
   1517	struct inode *inode = NULL;
   1518	struct ext4_iloc iloc;
   1519	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
   1520	struct ext4_extent_header *eh;
   1521
   1522	memcpy(&fc_inode, val, sizeof(fc_inode));
   1523
   1524	ino = le32_to_cpu(fc_inode.fc_ino);
   1525	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
   1526
   1527	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
   1528	if (!IS_ERR(inode)) {
   1529		ext4_ext_clear_bb(inode);
   1530		iput(inode);
   1531	}
   1532	inode = NULL;
   1533
   1534	ret = ext4_fc_record_modified_inode(sb, ino);
   1535	if (ret)
   1536		goto out;
   1537
   1538	raw_fc_inode = (struct ext4_inode *)
   1539		(val + offsetof(struct ext4_fc_inode, fc_raw_inode));
   1540	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
   1541	if (ret)
   1542		goto out;
   1543
   1544	inode_len = le16_to_cpu(tl->fc_len) - sizeof(struct ext4_fc_inode);
   1545	raw_inode = ext4_raw_inode(&iloc);
   1546
   1547	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
   1548	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
   1549		inode_len - offsetof(struct ext4_inode, i_generation));
   1550	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
   1551		eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
   1552		if (eh->eh_magic != EXT4_EXT_MAGIC) {
   1553			memset(eh, 0, sizeof(*eh));
   1554			eh->eh_magic = EXT4_EXT_MAGIC;
   1555			eh->eh_max = cpu_to_le16(
   1556				(sizeof(raw_inode->i_block) -
   1557				 sizeof(struct ext4_extent_header))
   1558				 / sizeof(struct ext4_extent));
   1559		}
   1560	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
   1561		memcpy(raw_inode->i_block, raw_fc_inode->i_block,
   1562			sizeof(raw_inode->i_block));
   1563	}
   1564
   1565	/* Immediately update the inode on disk. */
   1566	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
   1567	if (ret)
   1568		goto out;
   1569	ret = sync_dirty_buffer(iloc.bh);
   1570	if (ret)
   1571		goto out;
   1572	ret = ext4_mark_inode_used(sb, ino);
   1573	if (ret)
   1574		goto out;
   1575
   1576	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
   1577	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
   1578	if (IS_ERR(inode)) {
   1579		jbd_debug(1, "Inode not found.");
   1580		return -EFSCORRUPTED;
   1581	}
   1582
   1583	/*
   1584	 * Our allocator could have made different decisions than before
   1585	 * crashing. This should be fixed but until then, we calculate
   1586	 * the number of blocks the inode.
   1587	 */
   1588	if (!ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
   1589		ext4_ext_replay_set_iblocks(inode);
   1590
   1591	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
   1592	ext4_reset_inode_seed(inode);
   1593
   1594	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
   1595	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
   1596	sync_dirty_buffer(iloc.bh);
   1597	brelse(iloc.bh);
   1598out:
   1599	iput(inode);
   1600	if (!ret)
   1601		blkdev_issue_flush(sb->s_bdev);
   1602
   1603	return 0;
   1604}
   1605
   1606/*
   1607 * Dentry create replay function.
   1608 *
   1609 * EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
   1610 * inode for which we are trying to create a dentry here, should already have
   1611 * been replayed before we start here.
   1612 */
   1613static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl,
   1614				 u8 *val)
   1615{
   1616	int ret = 0;
   1617	struct inode *inode = NULL;
   1618	struct inode *dir = NULL;
   1619	struct dentry_info_args darg;
   1620
   1621	tl_to_darg(&darg, tl, val);
   1622
   1623	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
   1624			darg.parent_ino, darg.dname_len);
   1625
   1626	/* This takes care of update group descriptor and other metadata */
   1627	ret = ext4_mark_inode_used(sb, darg.ino);
   1628	if (ret)
   1629		goto out;
   1630
   1631	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
   1632	if (IS_ERR(inode)) {
   1633		jbd_debug(1, "inode %d not found.", darg.ino);
   1634		inode = NULL;
   1635		ret = -EINVAL;
   1636		goto out;
   1637	}
   1638
   1639	if (S_ISDIR(inode->i_mode)) {
   1640		/*
   1641		 * If we are creating a directory, we need to make sure that the
   1642		 * dot and dot dot dirents are setup properly.
   1643		 */
   1644		dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
   1645		if (IS_ERR(dir)) {
   1646			jbd_debug(1, "Dir %d not found.", darg.ino);
   1647			goto out;
   1648		}
   1649		ret = ext4_init_new_dir(NULL, dir, inode);
   1650		iput(dir);
   1651		if (ret) {
   1652			ret = 0;
   1653			goto out;
   1654		}
   1655	}
   1656	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
   1657	if (ret)
   1658		goto out;
   1659	set_nlink(inode, 1);
   1660	ext4_mark_inode_dirty(NULL, inode);
   1661out:
   1662	iput(inode);
   1663	return ret;
   1664}
   1665
   1666/*
   1667 * Record physical disk regions which are in use as per fast commit area,
   1668 * and used by inodes during replay phase. Our simple replay phase
   1669 * allocator excludes these regions from allocation.
   1670 */
   1671int ext4_fc_record_regions(struct super_block *sb, int ino,
   1672		ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay)
   1673{
   1674	struct ext4_fc_replay_state *state;
   1675	struct ext4_fc_alloc_region *region;
   1676
   1677	state = &EXT4_SB(sb)->s_fc_replay_state;
   1678	/*
   1679	 * during replay phase, the fc_regions_valid may not same as
   1680	 * fc_regions_used, update it when do new additions.
   1681	 */
   1682	if (replay && state->fc_regions_used != state->fc_regions_valid)
   1683		state->fc_regions_used = state->fc_regions_valid;
   1684	if (state->fc_regions_used == state->fc_regions_size) {
   1685		state->fc_regions_size +=
   1686			EXT4_FC_REPLAY_REALLOC_INCREMENT;
   1687		state->fc_regions = krealloc(
   1688					state->fc_regions,
   1689					state->fc_regions_size *
   1690					sizeof(struct ext4_fc_alloc_region),
   1691					GFP_KERNEL);
   1692		if (!state->fc_regions)
   1693			return -ENOMEM;
   1694	}
   1695	region = &state->fc_regions[state->fc_regions_used++];
   1696	region->ino = ino;
   1697	region->lblk = lblk;
   1698	region->pblk = pblk;
   1699	region->len = len;
   1700
   1701	if (replay)
   1702		state->fc_regions_valid++;
   1703
   1704	return 0;
   1705}
   1706
   1707/* Replay add range tag */
   1708static int ext4_fc_replay_add_range(struct super_block *sb,
   1709				    struct ext4_fc_tl *tl, u8 *val)
   1710{
   1711	struct ext4_fc_add_range fc_add_ex;
   1712	struct ext4_extent newex, *ex;
   1713	struct inode *inode;
   1714	ext4_lblk_t start, cur;
   1715	int remaining, len;
   1716	ext4_fsblk_t start_pblk;
   1717	struct ext4_map_blocks map;
   1718	struct ext4_ext_path *path = NULL;
   1719	int ret;
   1720
   1721	memcpy(&fc_add_ex, val, sizeof(fc_add_ex));
   1722	ex = (struct ext4_extent *)&fc_add_ex.fc_ex;
   1723
   1724	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
   1725		le32_to_cpu(fc_add_ex.fc_ino), le32_to_cpu(ex->ee_block),
   1726		ext4_ext_get_actual_len(ex));
   1727
   1728	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex.fc_ino), EXT4_IGET_NORMAL);
   1729	if (IS_ERR(inode)) {
   1730		jbd_debug(1, "Inode not found.");
   1731		return 0;
   1732	}
   1733
   1734	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
   1735	if (ret)
   1736		goto out;
   1737
   1738	start = le32_to_cpu(ex->ee_block);
   1739	start_pblk = ext4_ext_pblock(ex);
   1740	len = ext4_ext_get_actual_len(ex);
   1741
   1742	cur = start;
   1743	remaining = len;
   1744	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
   1745		  start, start_pblk, len, ext4_ext_is_unwritten(ex),
   1746		  inode->i_ino);
   1747
   1748	while (remaining > 0) {
   1749		map.m_lblk = cur;
   1750		map.m_len = remaining;
   1751		map.m_pblk = 0;
   1752		ret = ext4_map_blocks(NULL, inode, &map, 0);
   1753
   1754		if (ret < 0)
   1755			goto out;
   1756
   1757		if (ret == 0) {
   1758			/* Range is not mapped */
   1759			path = ext4_find_extent(inode, cur, NULL, 0);
   1760			if (IS_ERR(path))
   1761				goto out;
   1762			memset(&newex, 0, sizeof(newex));
   1763			newex.ee_block = cpu_to_le32(cur);
   1764			ext4_ext_store_pblock(
   1765				&newex, start_pblk + cur - start);
   1766			newex.ee_len = cpu_to_le16(map.m_len);
   1767			if (ext4_ext_is_unwritten(ex))
   1768				ext4_ext_mark_unwritten(&newex);
   1769			down_write(&EXT4_I(inode)->i_data_sem);
   1770			ret = ext4_ext_insert_extent(
   1771				NULL, inode, &path, &newex, 0);
   1772			up_write((&EXT4_I(inode)->i_data_sem));
   1773			ext4_ext_drop_refs(path);
   1774			kfree(path);
   1775			if (ret)
   1776				goto out;
   1777			goto next;
   1778		}
   1779
   1780		if (start_pblk + cur - start != map.m_pblk) {
   1781			/*
   1782			 * Logical to physical mapping changed. This can happen
   1783			 * if this range was removed and then reallocated to
   1784			 * map to new physical blocks during a fast commit.
   1785			 */
   1786			ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
   1787					ext4_ext_is_unwritten(ex),
   1788					start_pblk + cur - start);
   1789			if (ret)
   1790				goto out;
   1791			/*
   1792			 * Mark the old blocks as free since they aren't used
   1793			 * anymore. We maintain an array of all the modified
   1794			 * inodes. In case these blocks are still used at either
   1795			 * a different logical range in the same inode or in
   1796			 * some different inode, we will mark them as allocated
   1797			 * at the end of the FC replay using our array of
   1798			 * modified inodes.
   1799			 */
   1800			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
   1801			goto next;
   1802		}
   1803
   1804		/* Range is mapped and needs a state change */
   1805		jbd_debug(1, "Converting from %ld to %d %lld",
   1806				map.m_flags & EXT4_MAP_UNWRITTEN,
   1807			ext4_ext_is_unwritten(ex), map.m_pblk);
   1808		ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
   1809					ext4_ext_is_unwritten(ex), map.m_pblk);
   1810		if (ret)
   1811			goto out;
   1812		/*
   1813		 * We may have split the extent tree while toggling the state.
   1814		 * Try to shrink the extent tree now.
   1815		 */
   1816		ext4_ext_replay_shrink_inode(inode, start + len);
   1817next:
   1818		cur += map.m_len;
   1819		remaining -= map.m_len;
   1820	}
   1821	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
   1822					sb->s_blocksize_bits);
   1823out:
   1824	iput(inode);
   1825	return 0;
   1826}
   1827
   1828/* Replay DEL_RANGE tag */
   1829static int
   1830ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl,
   1831			 u8 *val)
   1832{
   1833	struct inode *inode;
   1834	struct ext4_fc_del_range lrange;
   1835	struct ext4_map_blocks map;
   1836	ext4_lblk_t cur, remaining;
   1837	int ret;
   1838
   1839	memcpy(&lrange, val, sizeof(lrange));
   1840	cur = le32_to_cpu(lrange.fc_lblk);
   1841	remaining = le32_to_cpu(lrange.fc_len);
   1842
   1843	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
   1844		le32_to_cpu(lrange.fc_ino), cur, remaining);
   1845
   1846	inode = ext4_iget(sb, le32_to_cpu(lrange.fc_ino), EXT4_IGET_NORMAL);
   1847	if (IS_ERR(inode)) {
   1848		jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange.fc_ino));
   1849		return 0;
   1850	}
   1851
   1852	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
   1853	if (ret)
   1854		goto out;
   1855
   1856	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
   1857			inode->i_ino, le32_to_cpu(lrange.fc_lblk),
   1858			le32_to_cpu(lrange.fc_len));
   1859	while (remaining > 0) {
   1860		map.m_lblk = cur;
   1861		map.m_len = remaining;
   1862
   1863		ret = ext4_map_blocks(NULL, inode, &map, 0);
   1864		if (ret < 0)
   1865			goto out;
   1866		if (ret > 0) {
   1867			remaining -= ret;
   1868			cur += ret;
   1869			ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
   1870		} else {
   1871			remaining -= map.m_len;
   1872			cur += map.m_len;
   1873		}
   1874	}
   1875
   1876	down_write(&EXT4_I(inode)->i_data_sem);
   1877	ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk),
   1878				le32_to_cpu(lrange.fc_lblk) +
   1879				le32_to_cpu(lrange.fc_len) - 1);
   1880	up_write(&EXT4_I(inode)->i_data_sem);
   1881	if (ret)
   1882		goto out;
   1883	ext4_ext_replay_shrink_inode(inode,
   1884		i_size_read(inode) >> sb->s_blocksize_bits);
   1885	ext4_mark_inode_dirty(NULL, inode);
   1886out:
   1887	iput(inode);
   1888	return 0;
   1889}
   1890
   1891static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
   1892{
   1893	struct ext4_fc_replay_state *state;
   1894	struct inode *inode;
   1895	struct ext4_ext_path *path = NULL;
   1896	struct ext4_map_blocks map;
   1897	int i, ret, j;
   1898	ext4_lblk_t cur, end;
   1899
   1900	state = &EXT4_SB(sb)->s_fc_replay_state;
   1901	for (i = 0; i < state->fc_modified_inodes_used; i++) {
   1902		inode = ext4_iget(sb, state->fc_modified_inodes[i],
   1903			EXT4_IGET_NORMAL);
   1904		if (IS_ERR(inode)) {
   1905			jbd_debug(1, "Inode %d not found.",
   1906				state->fc_modified_inodes[i]);
   1907			continue;
   1908		}
   1909		cur = 0;
   1910		end = EXT_MAX_BLOCKS;
   1911		if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA)) {
   1912			iput(inode);
   1913			continue;
   1914		}
   1915		while (cur < end) {
   1916			map.m_lblk = cur;
   1917			map.m_len = end - cur;
   1918
   1919			ret = ext4_map_blocks(NULL, inode, &map, 0);
   1920			if (ret < 0)
   1921				break;
   1922
   1923			if (ret > 0) {
   1924				path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
   1925				if (!IS_ERR(path)) {
   1926					for (j = 0; j < path->p_depth; j++)
   1927						ext4_mb_mark_bb(inode->i_sb,
   1928							path[j].p_block, 1, 1);
   1929					ext4_ext_drop_refs(path);
   1930					kfree(path);
   1931				}
   1932				cur += ret;
   1933				ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
   1934							map.m_len, 1);
   1935			} else {
   1936				cur = cur + (map.m_len ? map.m_len : 1);
   1937			}
   1938		}
   1939		iput(inode);
   1940	}
   1941}
   1942
   1943/*
   1944 * Check if block is in excluded regions for block allocation. The simple
   1945 * allocator that runs during replay phase is calls this function to see
   1946 * if it is okay to use a block.
   1947 */
   1948bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
   1949{
   1950	int i;
   1951	struct ext4_fc_replay_state *state;
   1952
   1953	state = &EXT4_SB(sb)->s_fc_replay_state;
   1954	for (i = 0; i < state->fc_regions_valid; i++) {
   1955		if (state->fc_regions[i].ino == 0 ||
   1956			state->fc_regions[i].len == 0)
   1957			continue;
   1958		if (in_range(blk, state->fc_regions[i].pblk,
   1959					state->fc_regions[i].len))
   1960			return true;
   1961	}
   1962	return false;
   1963}
   1964
   1965/* Cleanup function called after replay */
   1966void ext4_fc_replay_cleanup(struct super_block *sb)
   1967{
   1968	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1969
   1970	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
   1971	kfree(sbi->s_fc_replay_state.fc_regions);
   1972	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
   1973}
   1974
   1975/*
   1976 * Recovery Scan phase handler
   1977 *
   1978 * This function is called during the scan phase and is responsible
   1979 * for doing following things:
   1980 * - Make sure the fast commit area has valid tags for replay
   1981 * - Count number of tags that need to be replayed by the replay handler
   1982 * - Verify CRC
   1983 * - Create a list of excluded blocks for allocation during replay phase
   1984 *
   1985 * This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
   1986 * incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
   1987 * to indicate that scan has finished and JBD2 can now start replay phase.
   1988 * It returns a negative error to indicate that there was an error. At the end
   1989 * of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
   1990 * to indicate the number of tags that need to replayed during the replay phase.
   1991 */
   1992static int ext4_fc_replay_scan(journal_t *journal,
   1993				struct buffer_head *bh, int off,
   1994				tid_t expected_tid)
   1995{
   1996	struct super_block *sb = journal->j_private;
   1997	struct ext4_sb_info *sbi = EXT4_SB(sb);
   1998	struct ext4_fc_replay_state *state;
   1999	int ret = JBD2_FC_REPLAY_CONTINUE;
   2000	struct ext4_fc_add_range ext;
   2001	struct ext4_fc_tl tl;
   2002	struct ext4_fc_tail tail;
   2003	__u8 *start, *end, *cur, *val;
   2004	struct ext4_fc_head head;
   2005	struct ext4_extent *ex;
   2006
   2007	state = &sbi->s_fc_replay_state;
   2008
   2009	start = (u8 *)bh->b_data;
   2010	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
   2011
   2012	if (state->fc_replay_expected_off == 0) {
   2013		state->fc_cur_tag = 0;
   2014		state->fc_replay_num_tags = 0;
   2015		state->fc_crc = 0;
   2016		state->fc_regions = NULL;
   2017		state->fc_regions_valid = state->fc_regions_used =
   2018			state->fc_regions_size = 0;
   2019		/* Check if we can stop early */
   2020		if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
   2021			!= EXT4_FC_TAG_HEAD)
   2022			return 0;
   2023	}
   2024
   2025	if (off != state->fc_replay_expected_off) {
   2026		ret = -EFSCORRUPTED;
   2027		goto out_err;
   2028	}
   2029
   2030	state->fc_replay_expected_off++;
   2031	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
   2032		memcpy(&tl, cur, sizeof(tl));
   2033		val = cur + sizeof(tl);
   2034		jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
   2035			  tag2str(le16_to_cpu(tl.fc_tag)), bh->b_blocknr);
   2036		switch (le16_to_cpu(tl.fc_tag)) {
   2037		case EXT4_FC_TAG_ADD_RANGE:
   2038			memcpy(&ext, val, sizeof(ext));
   2039			ex = (struct ext4_extent *)&ext.fc_ex;
   2040			ret = ext4_fc_record_regions(sb,
   2041				le32_to_cpu(ext.fc_ino),
   2042				le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
   2043				ext4_ext_get_actual_len(ex), 0);
   2044			if (ret < 0)
   2045				break;
   2046			ret = JBD2_FC_REPLAY_CONTINUE;
   2047			fallthrough;
   2048		case EXT4_FC_TAG_DEL_RANGE:
   2049		case EXT4_FC_TAG_LINK:
   2050		case EXT4_FC_TAG_UNLINK:
   2051		case EXT4_FC_TAG_CREAT:
   2052		case EXT4_FC_TAG_INODE:
   2053		case EXT4_FC_TAG_PAD:
   2054			state->fc_cur_tag++;
   2055			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
   2056					sizeof(tl) + le16_to_cpu(tl.fc_len));
   2057			break;
   2058		case EXT4_FC_TAG_TAIL:
   2059			state->fc_cur_tag++;
   2060			memcpy(&tail, val, sizeof(tail));
   2061			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
   2062						sizeof(tl) +
   2063						offsetof(struct ext4_fc_tail,
   2064						fc_crc));
   2065			if (le32_to_cpu(tail.fc_tid) == expected_tid &&
   2066				le32_to_cpu(tail.fc_crc) == state->fc_crc) {
   2067				state->fc_replay_num_tags = state->fc_cur_tag;
   2068				state->fc_regions_valid =
   2069					state->fc_regions_used;
   2070			} else {
   2071				ret = state->fc_replay_num_tags ?
   2072					JBD2_FC_REPLAY_STOP : -EFSBADCRC;
   2073			}
   2074			state->fc_crc = 0;
   2075			break;
   2076		case EXT4_FC_TAG_HEAD:
   2077			memcpy(&head, val, sizeof(head));
   2078			if (le32_to_cpu(head.fc_features) &
   2079				~EXT4_FC_SUPPORTED_FEATURES) {
   2080				ret = -EOPNOTSUPP;
   2081				break;
   2082			}
   2083			if (le32_to_cpu(head.fc_tid) != expected_tid) {
   2084				ret = JBD2_FC_REPLAY_STOP;
   2085				break;
   2086			}
   2087			state->fc_cur_tag++;
   2088			state->fc_crc = ext4_chksum(sbi, state->fc_crc, cur,
   2089					    sizeof(tl) + le16_to_cpu(tl.fc_len));
   2090			break;
   2091		default:
   2092			ret = state->fc_replay_num_tags ?
   2093				JBD2_FC_REPLAY_STOP : -ECANCELED;
   2094		}
   2095		if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
   2096			break;
   2097	}
   2098
   2099out_err:
   2100	trace_ext4_fc_replay_scan(sb, ret, off);
   2101	return ret;
   2102}
   2103
   2104/*
   2105 * Main recovery path entry point.
   2106 * The meaning of return codes is similar as above.
   2107 */
   2108static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
   2109				enum passtype pass, int off, tid_t expected_tid)
   2110{
   2111	struct super_block *sb = journal->j_private;
   2112	struct ext4_sb_info *sbi = EXT4_SB(sb);
   2113	struct ext4_fc_tl tl;
   2114	__u8 *start, *end, *cur, *val;
   2115	int ret = JBD2_FC_REPLAY_CONTINUE;
   2116	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
   2117	struct ext4_fc_tail tail;
   2118
   2119	if (pass == PASS_SCAN) {
   2120		state->fc_current_pass = PASS_SCAN;
   2121		return ext4_fc_replay_scan(journal, bh, off, expected_tid);
   2122	}
   2123
   2124	if (state->fc_current_pass != pass) {
   2125		state->fc_current_pass = pass;
   2126		sbi->s_mount_state |= EXT4_FC_REPLAY;
   2127	}
   2128	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
   2129		jbd_debug(1, "Replay stops\n");
   2130		ext4_fc_set_bitmaps_and_counters(sb);
   2131		return 0;
   2132	}
   2133
   2134#ifdef CONFIG_EXT4_DEBUG
   2135	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
   2136		pr_warn("Dropping fc block %d because max_replay set\n", off);
   2137		return JBD2_FC_REPLAY_STOP;
   2138	}
   2139#endif
   2140
   2141	start = (u8 *)bh->b_data;
   2142	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
   2143
   2144	for (cur = start; cur < end; cur = cur + sizeof(tl) + le16_to_cpu(tl.fc_len)) {
   2145		memcpy(&tl, cur, sizeof(tl));
   2146		val = cur + sizeof(tl);
   2147
   2148		if (state->fc_replay_num_tags == 0) {
   2149			ret = JBD2_FC_REPLAY_STOP;
   2150			ext4_fc_set_bitmaps_and_counters(sb);
   2151			break;
   2152		}
   2153		jbd_debug(3, "Replay phase, tag:%s\n",
   2154				tag2str(le16_to_cpu(tl.fc_tag)));
   2155		state->fc_replay_num_tags--;
   2156		switch (le16_to_cpu(tl.fc_tag)) {
   2157		case EXT4_FC_TAG_LINK:
   2158			ret = ext4_fc_replay_link(sb, &tl, val);
   2159			break;
   2160		case EXT4_FC_TAG_UNLINK:
   2161			ret = ext4_fc_replay_unlink(sb, &tl, val);
   2162			break;
   2163		case EXT4_FC_TAG_ADD_RANGE:
   2164			ret = ext4_fc_replay_add_range(sb, &tl, val);
   2165			break;
   2166		case EXT4_FC_TAG_CREAT:
   2167			ret = ext4_fc_replay_create(sb, &tl, val);
   2168			break;
   2169		case EXT4_FC_TAG_DEL_RANGE:
   2170			ret = ext4_fc_replay_del_range(sb, &tl, val);
   2171			break;
   2172		case EXT4_FC_TAG_INODE:
   2173			ret = ext4_fc_replay_inode(sb, &tl, val);
   2174			break;
   2175		case EXT4_FC_TAG_PAD:
   2176			trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
   2177					     le16_to_cpu(tl.fc_len), 0);
   2178			break;
   2179		case EXT4_FC_TAG_TAIL:
   2180			trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
   2181					     le16_to_cpu(tl.fc_len), 0);
   2182			memcpy(&tail, val, sizeof(tail));
   2183			WARN_ON(le32_to_cpu(tail.fc_tid) != expected_tid);
   2184			break;
   2185		case EXT4_FC_TAG_HEAD:
   2186			break;
   2187		default:
   2188			trace_ext4_fc_replay(sb, le16_to_cpu(tl.fc_tag), 0,
   2189					     le16_to_cpu(tl.fc_len), 0);
   2190			ret = -ECANCELED;
   2191			break;
   2192		}
   2193		if (ret < 0)
   2194			break;
   2195		ret = JBD2_FC_REPLAY_CONTINUE;
   2196	}
   2197	return ret;
   2198}
   2199
   2200void ext4_fc_init(struct super_block *sb, journal_t *journal)
   2201{
   2202	/*
   2203	 * We set replay callback even if fast commit disabled because we may
   2204	 * could still have fast commit blocks that need to be replayed even if
   2205	 * fast commit has now been turned off.
   2206	 */
   2207	journal->j_fc_replay_callback = ext4_fc_replay;
   2208	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
   2209		return;
   2210	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
   2211}
   2212
   2213static const char *fc_ineligible_reasons[] = {
   2214	"Extended attributes changed",
   2215	"Cross rename",
   2216	"Journal flag changed",
   2217	"Insufficient memory",
   2218	"Swap boot",
   2219	"Resize",
   2220	"Dir renamed",
   2221	"Falloc range op",
   2222	"Data journalling",
   2223	"FC Commit Failed"
   2224};
   2225
   2226int ext4_fc_info_show(struct seq_file *seq, void *v)
   2227{
   2228	struct ext4_sb_info *sbi = EXT4_SB((struct super_block *)seq->private);
   2229	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
   2230	int i;
   2231
   2232	if (v != SEQ_START_TOKEN)
   2233		return 0;
   2234
   2235	seq_printf(seq,
   2236		"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
   2237		   stats->fc_num_commits, stats->fc_ineligible_commits,
   2238		   stats->fc_numblks,
   2239		   div_u64(stats->s_fc_avg_commit_time, 1000));
   2240	seq_puts(seq, "Ineligible reasons:\n");
   2241	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
   2242		seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
   2243			stats->fc_ineligible_reason_count[i]);
   2244
   2245	return 0;
   2246}
   2247
   2248int __init ext4_fc_init_dentry_cache(void)
   2249{
   2250	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
   2251					   SLAB_RECLAIM_ACCOUNT);
   2252
   2253	if (ext4_fc_dentry_cachep == NULL)
   2254		return -ENOMEM;
   2255
   2256	return 0;
   2257}
   2258
   2259void ext4_fc_destroy_dentry_cache(void)
   2260{
   2261	kmem_cache_destroy(ext4_fc_dentry_cachep);
   2262}