log.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
log.c (37820B)
      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
      4 * Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
      5 */
      6
      7#include <linux/sched.h>
      8#include <linux/slab.h>
      9#include <linux/spinlock.h>
     10#include <linux/completion.h>
     11#include <linux/buffer_head.h>
     12#include <linux/gfs2_ondisk.h>
     13#include <linux/crc32.h>
     14#include <linux/crc32c.h>
     15#include <linux/delay.h>
     16#include <linux/kthread.h>
     17#include <linux/freezer.h>
     18#include <linux/bio.h>
     19#include <linux/blkdev.h>
     20#include <linux/writeback.h>
     21#include <linux/list_sort.h>
     22
     23#include "gfs2.h"
     24#include "incore.h"
     25#include "bmap.h"
     26#include "glock.h"
     27#include "log.h"
     28#include "lops.h"
     29#include "meta_io.h"
     30#include "util.h"
     31#include "dir.h"
     32#include "trace_gfs2.h"
     33#include "trans.h"
     34
     35static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
     36
     37/**
     38 * gfs2_struct2blk - compute stuff
     39 * @sdp: the filesystem
     40 * @nstruct: the number of structures
     41 *
     42 * Compute the number of log descriptor blocks needed to hold a certain number
     43 * of structures of a certain size.
     44 *
     45 * Returns: the number of blocks needed (minimum is always 1)
     46 */
     47
     48unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct)
     49{
     50	unsigned int blks;
     51	unsigned int first, second;
     52
     53	/* The initial struct gfs2_log_descriptor block */
     54	blks = 1;
     55	first = sdp->sd_ldptrs;
     56
     57	if (nstruct > first) {
     58		/* Subsequent struct gfs2_meta_header blocks */
     59		second = sdp->sd_inptrs;
     60		blks += DIV_ROUND_UP(nstruct - first, second);
     61	}
     62
     63	return blks;
     64}
     65
     66/**
     67 * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters
     68 * @bd: The gfs2_bufdata to remove
     69 *
     70 * The ail lock _must_ be held when calling this function
     71 *
     72 */
     73
     74void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
     75{
     76	bd->bd_tr = NULL;
     77	list_del_init(&bd->bd_ail_st_list);
     78	list_del_init(&bd->bd_ail_gl_list);
     79	atomic_dec(&bd->bd_gl->gl_ail_count);
     80	brelse(bd->bd_bh);
     81}
     82
     83/**
     84 * gfs2_ail1_start_one - Start I/O on a transaction
     85 * @sdp: The superblock
     86 * @wbc: The writeback control structure
     87 * @tr: The transaction to start I/O on
     88 * @plug: The block plug currently active
     89 */
     90
     91static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
     92			       struct writeback_control *wbc,
     93			       struct gfs2_trans *tr, struct blk_plug *plug)
     94__releases(&sdp->sd_ail_lock)
     95__acquires(&sdp->sd_ail_lock)
     96{
     97	struct gfs2_glock *gl = NULL;
     98	struct address_space *mapping;
     99	struct gfs2_bufdata *bd, *s;
    100	struct buffer_head *bh;
    101	int ret = 0;
    102
    103	list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
    104		bh = bd->bd_bh;
    105
    106		gfs2_assert(sdp, bd->bd_tr == tr);
    107
    108		if (!buffer_busy(bh)) {
    109			if (buffer_uptodate(bh)) {
    110				list_move(&bd->bd_ail_st_list,
    111					  &tr->tr_ail2_list);
    112				continue;
    113			}
    114			if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
    115				gfs2_io_error_bh(sdp, bh);
    116				gfs2_withdraw_delayed(sdp);
    117			}
    118		}
    119
    120		if (gfs2_withdrawn(sdp)) {
    121			gfs2_remove_from_ail(bd);
    122			continue;
    123		}
    124		if (!buffer_dirty(bh))
    125			continue;
    126		if (gl == bd->bd_gl)
    127			continue;
    128		gl = bd->bd_gl;
    129		list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list);
    130		mapping = bh->b_page->mapping;
    131		if (!mapping)
    132			continue;
    133		spin_unlock(&sdp->sd_ail_lock);
    134		ret = generic_writepages(mapping, wbc);
    135		if (need_resched()) {
    136			blk_finish_plug(plug);
    137			cond_resched();
    138			blk_start_plug(plug);
    139		}
    140		spin_lock(&sdp->sd_ail_lock);
    141		if (ret == -ENODATA) /* if a jdata write into a new hole */
    142			ret = 0; /* ignore it */
    143		if (ret || wbc->nr_to_write <= 0)
    144			break;
    145		return -EBUSY;
    146	}
    147
    148	return ret;
    149}
    150
    151static void dump_ail_list(struct gfs2_sbd *sdp)
    152{
    153	struct gfs2_trans *tr;
    154	struct gfs2_bufdata *bd;
    155	struct buffer_head *bh;
    156
    157	list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
    158		list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
    159					    bd_ail_st_list) {
    160			bh = bd->bd_bh;
    161			fs_err(sdp, "bd %p: blk:0x%llx bh=%p ", bd,
    162			       (unsigned long long)bd->bd_blkno, bh);
    163			if (!bh) {
    164				fs_err(sdp, "\n");
    165				continue;
    166			}
    167			fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d "
    168			       "map:%d new:%d ar:%d aw:%d delay:%d "
    169			       "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n",
    170			       (unsigned long long)bh->b_blocknr,
    171			       buffer_uptodate(bh), buffer_dirty(bh),
    172			       buffer_locked(bh), buffer_req(bh),
    173			       buffer_mapped(bh), buffer_new(bh),
    174			       buffer_async_read(bh), buffer_async_write(bh),
    175			       buffer_delay(bh), buffer_write_io_error(bh),
    176			       buffer_unwritten(bh),
    177			       buffer_defer_completion(bh),
    178			       buffer_pinned(bh), buffer_escaped(bh));
    179		}
    180	}
    181}
    182
    183/**
    184 * gfs2_ail1_flush - start writeback of some ail1 entries 
    185 * @sdp: The super block
    186 * @wbc: The writeback control structure
    187 *
    188 * Writes back some ail1 entries, according to the limits in the
    189 * writeback control structure
    190 */
    191
    192void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
    193{
    194	struct list_head *head = &sdp->sd_ail1_list;
    195	struct gfs2_trans *tr;
    196	struct blk_plug plug;
    197	int ret;
    198	unsigned long flush_start = jiffies;
    199
    200	trace_gfs2_ail_flush(sdp, wbc, 1);
    201	blk_start_plug(&plug);
    202	spin_lock(&sdp->sd_ail_lock);
    203restart:
    204	ret = 0;
    205	if (time_after(jiffies, flush_start + (HZ * 600))) {
    206		fs_err(sdp, "Error: In %s for ten minutes! t=%d\n",
    207		       __func__, current->journal_info ? 1 : 0);
    208		dump_ail_list(sdp);
    209		goto out;
    210	}
    211	list_for_each_entry_reverse(tr, head, tr_list) {
    212		if (wbc->nr_to_write <= 0)
    213			break;
    214		ret = gfs2_ail1_start_one(sdp, wbc, tr, &plug);
    215		if (ret) {
    216			if (ret == -EBUSY)
    217				goto restart;
    218			break;
    219		}
    220	}
    221out:
    222	spin_unlock(&sdp->sd_ail_lock);
    223	blk_finish_plug(&plug);
    224	if (ret) {
    225		gfs2_lm(sdp, "gfs2_ail1_start_one (generic_writepages) "
    226			"returned: %d\n", ret);
    227		gfs2_withdraw(sdp);
    228	}
    229	trace_gfs2_ail_flush(sdp, wbc, 0);
    230}
    231
    232/**
    233 * gfs2_ail1_start - start writeback of all ail1 entries
    234 * @sdp: The superblock
    235 */
    236
    237static void gfs2_ail1_start(struct gfs2_sbd *sdp)
    238{
    239	struct writeback_control wbc = {
    240		.sync_mode = WB_SYNC_NONE,
    241		.nr_to_write = LONG_MAX,
    242		.range_start = 0,
    243		.range_end = LLONG_MAX,
    244	};
    245
    246	return gfs2_ail1_flush(sdp, &wbc);
    247}
    248
    249static void gfs2_log_update_flush_tail(struct gfs2_sbd *sdp)
    250{
    251	unsigned int new_flush_tail = sdp->sd_log_head;
    252	struct gfs2_trans *tr;
    253
    254	if (!list_empty(&sdp->sd_ail1_list)) {
    255		tr = list_last_entry(&sdp->sd_ail1_list,
    256				     struct gfs2_trans, tr_list);
    257		new_flush_tail = tr->tr_first;
    258	}
    259	sdp->sd_log_flush_tail = new_flush_tail;
    260}
    261
    262static void gfs2_log_update_head(struct gfs2_sbd *sdp)
    263{
    264	unsigned int new_head = sdp->sd_log_flush_head;
    265
    266	if (sdp->sd_log_flush_tail == sdp->sd_log_head)
    267		sdp->sd_log_flush_tail = new_head;
    268	sdp->sd_log_head = new_head;
    269}
    270
    271/*
    272 * gfs2_ail_empty_tr - empty one of the ail lists of a transaction
    273 */
    274
    275static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
    276			      struct list_head *head)
    277{
    278	struct gfs2_bufdata *bd;
    279
    280	while (!list_empty(head)) {
    281		bd = list_first_entry(head, struct gfs2_bufdata,
    282				      bd_ail_st_list);
    283		gfs2_assert(sdp, bd->bd_tr == tr);
    284		gfs2_remove_from_ail(bd);
    285	}
    286}
    287
    288/**
    289 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
    290 * @sdp: the filesystem
    291 * @tr: the transaction
    292 * @max_revokes: If nonzero, issue revokes for the bd items for written buffers
    293 *
    294 * returns: the transaction's count of remaining active items
    295 */
    296
    297static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
    298				int *max_revokes)
    299{
    300	struct gfs2_bufdata *bd, *s;
    301	struct buffer_head *bh;
    302	int active_count = 0;
    303
    304	list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,
    305					 bd_ail_st_list) {
    306		bh = bd->bd_bh;
    307		gfs2_assert(sdp, bd->bd_tr == tr);
    308		/*
    309		 * If another process flagged an io error, e.g. writing to the
    310		 * journal, error all other bhs and move them off the ail1 to
    311		 * prevent a tight loop when unmount tries to flush ail1,
    312		 * regardless of whether they're still busy. If no outside
    313		 * errors were found and the buffer is busy, move to the next.
    314		 * If the ail buffer is not busy and caught an error, flag it
    315		 * for others.
    316		 */
    317		if (!sdp->sd_log_error && buffer_busy(bh)) {
    318			active_count++;
    319			continue;
    320		}
    321		if (!buffer_uptodate(bh) &&
    322		    !cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
    323			gfs2_io_error_bh(sdp, bh);
    324			gfs2_withdraw_delayed(sdp);
    325		}
    326		/*
    327		 * If we have space for revokes and the bd is no longer on any
    328		 * buf list, we can just add a revoke for it immediately and
    329		 * avoid having to put it on the ail2 list, where it would need
    330		 * to be revoked later.
    331		 */
    332		if (*max_revokes && list_empty(&bd->bd_list)) {
    333			gfs2_add_revoke(sdp, bd);
    334			(*max_revokes)--;
    335			continue;
    336		}
    337		list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
    338	}
    339	return active_count;
    340}
    341
    342/**
    343 * gfs2_ail1_empty - Try to empty the ail1 lists
    344 * @sdp: The superblock
    345 * @max_revokes: If non-zero, add revokes where appropriate
    346 *
    347 * Tries to empty the ail1 lists, starting with the oldest first
    348 */
    349
    350static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
    351{
    352	struct gfs2_trans *tr, *s;
    353	int oldest_tr = 1;
    354	int ret;
    355
    356	spin_lock(&sdp->sd_ail_lock);
    357	list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
    358		if (!gfs2_ail1_empty_one(sdp, tr, &max_revokes) && oldest_tr)
    359			list_move(&tr->tr_list, &sdp->sd_ail2_list);
    360		else
    361			oldest_tr = 0;
    362	}
    363	gfs2_log_update_flush_tail(sdp);
    364	ret = list_empty(&sdp->sd_ail1_list);
    365	spin_unlock(&sdp->sd_ail_lock);
    366
    367	if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) {
    368		gfs2_lm(sdp, "fatal: I/O error(s)\n");
    369		gfs2_withdraw(sdp);
    370	}
    371
    372	return ret;
    373}
    374
    375static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
    376{
    377	struct gfs2_trans *tr;
    378	struct gfs2_bufdata *bd;
    379	struct buffer_head *bh;
    380
    381	spin_lock(&sdp->sd_ail_lock);
    382	list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
    383		list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) {
    384			bh = bd->bd_bh;
    385			if (!buffer_locked(bh))
    386				continue;
    387			get_bh(bh);
    388			spin_unlock(&sdp->sd_ail_lock);
    389			wait_on_buffer(bh);
    390			brelse(bh);
    391			return;
    392		}
    393	}
    394	spin_unlock(&sdp->sd_ail_lock);
    395}
    396
    397static void __ail2_empty(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
    398{
    399	gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
    400	list_del(&tr->tr_list);
    401	gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
    402	gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
    403	gfs2_trans_free(sdp, tr);
    404}
    405
    406static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
    407{
    408	struct list_head *ail2_list = &sdp->sd_ail2_list;
    409	unsigned int old_tail = sdp->sd_log_tail;
    410	struct gfs2_trans *tr, *safe;
    411
    412	spin_lock(&sdp->sd_ail_lock);
    413	if (old_tail <= new_tail) {
    414		list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
    415			if (old_tail <= tr->tr_first && tr->tr_first < new_tail)
    416				__ail2_empty(sdp, tr);
    417		}
    418	} else {
    419		list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
    420			if (old_tail <= tr->tr_first || tr->tr_first < new_tail)
    421				__ail2_empty(sdp, tr);
    422		}
    423	}
    424	spin_unlock(&sdp->sd_ail_lock);
    425}
    426
    427/**
    428 * gfs2_log_is_empty - Check if the log is empty
    429 * @sdp: The GFS2 superblock
    430 */
    431
    432bool gfs2_log_is_empty(struct gfs2_sbd *sdp) {
    433	return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks;
    434}
    435
    436static bool __gfs2_log_try_reserve_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
    437{
    438	unsigned int available;
    439
    440	available = atomic_read(&sdp->sd_log_revokes_available);
    441	while (available >= revokes) {
    442		if (atomic_try_cmpxchg(&sdp->sd_log_revokes_available,
    443				       &available, available - revokes))
    444			return true;
    445	}
    446	return false;
    447}
    448
    449/**
    450 * gfs2_log_release_revokes - Release a given number of revokes
    451 * @sdp: The GFS2 superblock
    452 * @revokes: The number of revokes to release
    453 *
    454 * sdp->sd_log_flush_lock must be held.
    455 */
    456void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
    457{
    458	if (revokes)
    459		atomic_add(revokes, &sdp->sd_log_revokes_available);
    460}
    461
    462/**
    463 * gfs2_log_release - Release a given number of log blocks
    464 * @sdp: The GFS2 superblock
    465 * @blks: The number of blocks
    466 *
    467 */
    468
    469void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
    470{
    471	atomic_add(blks, &sdp->sd_log_blks_free);
    472	trace_gfs2_log_blocks(sdp, blks);
    473	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
    474				  sdp->sd_jdesc->jd_blocks);
    475	if (atomic_read(&sdp->sd_log_blks_needed))
    476		wake_up(&sdp->sd_log_waitq);
    477}
    478
    479/**
    480 * __gfs2_log_try_reserve - Try to make a log reservation
    481 * @sdp: The GFS2 superblock
    482 * @blks: The number of blocks to reserve
    483 * @taboo_blks: The number of blocks to leave free
    484 *
    485 * Try to do the same as __gfs2_log_reserve(), but fail if no more log
    486 * space is immediately available.
    487 */
    488static bool __gfs2_log_try_reserve(struct gfs2_sbd *sdp, unsigned int blks,
    489				   unsigned int taboo_blks)
    490{
    491	unsigned wanted = blks + taboo_blks;
    492	unsigned int free_blocks;
    493
    494	free_blocks = atomic_read(&sdp->sd_log_blks_free);
    495	while (free_blocks >= wanted) {
    496		if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks,
    497				       free_blocks - blks)) {
    498			trace_gfs2_log_blocks(sdp, -blks);
    499			return true;
    500		}
    501	}
    502	return false;
    503}
    504
    505/**
    506 * __gfs2_log_reserve - Make a log reservation
    507 * @sdp: The GFS2 superblock
    508 * @blks: The number of blocks to reserve
    509 * @taboo_blks: The number of blocks to leave free
    510 *
    511 * @taboo_blks is set to 0 for logd, and to GFS2_LOG_FLUSH_MIN_BLOCKS
    512 * for all other processes.  This ensures that when the log is almost full,
    513 * logd will still be able to call gfs2_log_flush one more time  without
    514 * blocking, which will advance the tail and make some more log space
    515 * available.
    516 *
    517 * We no longer flush the log here, instead we wake up logd to do that
    518 * for us. To avoid the thundering herd and to ensure that we deal fairly
    519 * with queued waiters, we use an exclusive wait. This means that when we
    520 * get woken with enough journal space to get our reservation, we need to
    521 * wake the next waiter on the list.
    522 */
    523
    524static void __gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks,
    525			       unsigned int taboo_blks)
    526{
    527	unsigned wanted = blks + taboo_blks;
    528	unsigned int free_blocks;
    529
    530	atomic_add(blks, &sdp->sd_log_blks_needed);
    531	for (;;) {
    532		if (current != sdp->sd_logd_process)
    533			wake_up(&sdp->sd_logd_waitq);
    534		io_wait_event(sdp->sd_log_waitq,
    535			(free_blocks = atomic_read(&sdp->sd_log_blks_free),
    536			 free_blocks >= wanted));
    537		do {
    538			if (atomic_try_cmpxchg(&sdp->sd_log_blks_free,
    539					       &free_blocks,
    540					       free_blocks - blks))
    541				goto reserved;
    542		} while (free_blocks >= wanted);
    543	}
    544
    545reserved:
    546	trace_gfs2_log_blocks(sdp, -blks);
    547	if (atomic_sub_return(blks, &sdp->sd_log_blks_needed))
    548		wake_up(&sdp->sd_log_waitq);
    549}
    550
    551/**
    552 * gfs2_log_try_reserve - Try to make a log reservation
    553 * @sdp: The GFS2 superblock
    554 * @tr: The transaction
    555 * @extra_revokes: The number of additional revokes reserved (output)
    556 *
    557 * This is similar to gfs2_log_reserve, but sdp->sd_log_flush_lock must be
    558 * held for correct revoke accounting.
    559 */
    560
    561bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
    562			  unsigned int *extra_revokes)
    563{
    564	unsigned int blks = tr->tr_reserved;
    565	unsigned int revokes = tr->tr_revokes;
    566	unsigned int revoke_blks = 0;
    567
    568	*extra_revokes = 0;
    569	if (revokes && !__gfs2_log_try_reserve_revokes(sdp, revokes)) {
    570		revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
    571		*extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
    572		blks += revoke_blks;
    573	}
    574	if (!blks)
    575		return true;
    576	if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS))
    577		return true;
    578	if (!revoke_blks)
    579		gfs2_log_release_revokes(sdp, revokes);
    580	return false;
    581}
    582
    583/**
    584 * gfs2_log_reserve - Make a log reservation
    585 * @sdp: The GFS2 superblock
    586 * @tr: The transaction
    587 * @extra_revokes: The number of additional revokes reserved (output)
    588 *
    589 * sdp->sd_log_flush_lock must not be held.
    590 */
    591
    592void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
    593		      unsigned int *extra_revokes)
    594{
    595	unsigned int blks = tr->tr_reserved;
    596	unsigned int revokes = tr->tr_revokes;
    597	unsigned int revoke_blks;
    598
    599	*extra_revokes = 0;
    600	if (revokes) {
    601		revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
    602		*extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
    603		blks += revoke_blks;
    604	}
    605	__gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS);
    606}
    607
    608/**
    609 * log_distance - Compute distance between two journal blocks
    610 * @sdp: The GFS2 superblock
    611 * @newer: The most recent journal block of the pair
    612 * @older: The older journal block of the pair
    613 *
    614 *   Compute the distance (in the journal direction) between two
    615 *   blocks in the journal
    616 *
    617 * Returns: the distance in blocks
    618 */
    619
    620static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
    621					unsigned int older)
    622{
    623	int dist;
    624
    625	dist = newer - older;
    626	if (dist < 0)
    627		dist += sdp->sd_jdesc->jd_blocks;
    628
    629	return dist;
    630}
    631
    632/**
    633 * calc_reserved - Calculate the number of blocks to keep reserved
    634 * @sdp: The GFS2 superblock
    635 *
    636 * This is complex.  We need to reserve room for all our currently used
    637 * metadata blocks (e.g. normal file I/O rewriting file time stamps) and
    638 * all our journaled data blocks for journaled files (e.g. files in the
    639 * meta_fs like rindex, or files for which chattr +j was done.)
    640 * If we don't reserve enough space, corruption will follow.
    641 *
    642 * We can have metadata blocks and jdata blocks in the same journal.  Each
    643 * type gets its own log descriptor, for which we need to reserve a block.
    644 * In fact, each type has the potential for needing more than one log descriptor
    645 * in cases where we have more blocks than will fit in a log descriptor.
    646 * Metadata journal entries take up half the space of journaled buffer entries.
    647 *
    648 * Also, we need to reserve blocks for revoke journal entries and one for an
    649 * overall header for the lot.
    650 *
    651 * Returns: the number of blocks reserved
    652 */
    653static unsigned int calc_reserved(struct gfs2_sbd *sdp)
    654{
    655	unsigned int reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
    656	unsigned int blocks;
    657	struct gfs2_trans *tr = sdp->sd_log_tr;
    658
    659	if (tr) {
    660		blocks = tr->tr_num_buf_new - tr->tr_num_buf_rm;
    661		reserved += blocks + DIV_ROUND_UP(blocks, buf_limit(sdp));
    662		blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
    663		reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp));
    664	}
    665	return reserved;
    666}
    667
    668static void log_pull_tail(struct gfs2_sbd *sdp)
    669{
    670	unsigned int new_tail = sdp->sd_log_flush_tail;
    671	unsigned int dist;
    672
    673	if (new_tail == sdp->sd_log_tail)
    674		return;
    675	dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
    676	ail2_empty(sdp, new_tail);
    677	gfs2_log_release(sdp, dist);
    678	sdp->sd_log_tail = new_tail;
    679}
    680
    681
    682void log_flush_wait(struct gfs2_sbd *sdp)
    683{
    684	DEFINE_WAIT(wait);
    685
    686	if (atomic_read(&sdp->sd_log_in_flight)) {
    687		do {
    688			prepare_to_wait(&sdp->sd_log_flush_wait, &wait,
    689					TASK_UNINTERRUPTIBLE);
    690			if (atomic_read(&sdp->sd_log_in_flight))
    691				io_schedule();
    692		} while(atomic_read(&sdp->sd_log_in_flight));
    693		finish_wait(&sdp->sd_log_flush_wait, &wait);
    694	}
    695}
    696
    697static int ip_cmp(void *priv, const struct list_head *a, const struct list_head *b)
    698{
    699	struct gfs2_inode *ipa, *ipb;
    700
    701	ipa = list_entry(a, struct gfs2_inode, i_ordered);
    702	ipb = list_entry(b, struct gfs2_inode, i_ordered);
    703
    704	if (ipa->i_no_addr < ipb->i_no_addr)
    705		return -1;
    706	if (ipa->i_no_addr > ipb->i_no_addr)
    707		return 1;
    708	return 0;
    709}
    710
    711static void __ordered_del_inode(struct gfs2_inode *ip)
    712{
    713	if (!list_empty(&ip->i_ordered))
    714		list_del_init(&ip->i_ordered);
    715}
    716
    717static void gfs2_ordered_write(struct gfs2_sbd *sdp)
    718{
    719	struct gfs2_inode *ip;
    720	LIST_HEAD(written);
    721
    722	spin_lock(&sdp->sd_ordered_lock);
    723	list_sort(NULL, &sdp->sd_log_ordered, &ip_cmp);
    724	while (!list_empty(&sdp->sd_log_ordered)) {
    725		ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
    726		if (ip->i_inode.i_mapping->nrpages == 0) {
    727			__ordered_del_inode(ip);
    728			continue;
    729		}
    730		list_move(&ip->i_ordered, &written);
    731		spin_unlock(&sdp->sd_ordered_lock);
    732		filemap_fdatawrite(ip->i_inode.i_mapping);
    733		spin_lock(&sdp->sd_ordered_lock);
    734	}
    735	list_splice(&written, &sdp->sd_log_ordered);
    736	spin_unlock(&sdp->sd_ordered_lock);
    737}
    738
    739static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
    740{
    741	struct gfs2_inode *ip;
    742
    743	spin_lock(&sdp->sd_ordered_lock);
    744	while (!list_empty(&sdp->sd_log_ordered)) {
    745		ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
    746		__ordered_del_inode(ip);
    747		if (ip->i_inode.i_mapping->nrpages == 0)
    748			continue;
    749		spin_unlock(&sdp->sd_ordered_lock);
    750		filemap_fdatawait(ip->i_inode.i_mapping);
    751		spin_lock(&sdp->sd_ordered_lock);
    752	}
    753	spin_unlock(&sdp->sd_ordered_lock);
    754}
    755
    756void gfs2_ordered_del_inode(struct gfs2_inode *ip)
    757{
    758	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
    759
    760	spin_lock(&sdp->sd_ordered_lock);
    761	__ordered_del_inode(ip);
    762	spin_unlock(&sdp->sd_ordered_lock);
    763}
    764
    765void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
    766{
    767	struct buffer_head *bh = bd->bd_bh;
    768	struct gfs2_glock *gl = bd->bd_gl;
    769
    770	sdp->sd_log_num_revoke++;
    771	if (atomic_inc_return(&gl->gl_revokes) == 1)
    772		gfs2_glock_hold(gl);
    773	bh->b_private = NULL;
    774	bd->bd_blkno = bh->b_blocknr;
    775	gfs2_remove_from_ail(bd); /* drops ref on bh */
    776	bd->bd_bh = NULL;
    777	set_bit(GLF_LFLUSH, &gl->gl_flags);
    778	list_add(&bd->bd_list, &sdp->sd_log_revokes);
    779}
    780
    781void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
    782{
    783	if (atomic_dec_return(&gl->gl_revokes) == 0) {
    784		clear_bit(GLF_LFLUSH, &gl->gl_flags);
    785		gfs2_glock_queue_put(gl);
    786	}
    787}
    788
    789/**
    790 * gfs2_flush_revokes - Add as many revokes to the system transaction as we can
    791 * @sdp: The GFS2 superblock
    792 *
    793 * Our usual strategy is to defer writing revokes as much as we can in the hope
    794 * that we'll eventually overwrite the journal, which will make those revokes
    795 * go away.  This changes when we flush the log: at that point, there will
    796 * likely be some left-over space in the last revoke block of that transaction.
    797 * We can fill that space with additional revokes for blocks that have already
    798 * been written back.  This will basically come at no cost now, and will save
    799 * us from having to keep track of those blocks on the AIL2 list later.
    800 */
    801void gfs2_flush_revokes(struct gfs2_sbd *sdp)
    802{
    803	/* number of revokes we still have room for */
    804	unsigned int max_revokes = atomic_read(&sdp->sd_log_revokes_available);
    805
    806	gfs2_log_lock(sdp);
    807	gfs2_ail1_empty(sdp, max_revokes);
    808	gfs2_log_unlock(sdp);
    809}
    810
    811/**
    812 * gfs2_write_log_header - Write a journal log header buffer at lblock
    813 * @sdp: The GFS2 superblock
    814 * @jd: journal descriptor of the journal to which we are writing
    815 * @seq: sequence number
    816 * @tail: tail of the log
    817 * @lblock: value for lh_blkno (block number relative to start of journal)
    818 * @flags: log header flags GFS2_LOG_HEAD_*
    819 * @op_flags: flags to pass to the bio
    820 *
    821 * Returns: the initialized log buffer descriptor
    822 */
    823
    824void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
    825			   u64 seq, u32 tail, u32 lblock, u32 flags,
    826			   int op_flags)
    827{
    828	struct gfs2_log_header *lh;
    829	u32 hash, crc;
    830	struct page *page;
    831	struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
    832	struct timespec64 tv;
    833	struct super_block *sb = sdp->sd_vfs;
    834	u64 dblock;
    835
    836	if (gfs2_withdrawn(sdp))
    837		return;
    838
    839	page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
    840	lh = page_address(page);
    841	clear_page(lh);
    842
    843	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
    844	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
    845	lh->lh_header.__pad0 = cpu_to_be64(0);
    846	lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
    847	lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
    848	lh->lh_sequence = cpu_to_be64(seq);
    849	lh->lh_flags = cpu_to_be32(flags);
    850	lh->lh_tail = cpu_to_be32(tail);
    851	lh->lh_blkno = cpu_to_be32(lblock);
    852	hash = ~crc32(~0, lh, LH_V1_SIZE);
    853	lh->lh_hash = cpu_to_be32(hash);
    854
    855	ktime_get_coarse_real_ts64(&tv);
    856	lh->lh_nsec = cpu_to_be32(tv.tv_nsec);
    857	lh->lh_sec = cpu_to_be64(tv.tv_sec);
    858	if (!list_empty(&jd->extent_list))
    859		dblock = gfs2_log_bmap(jd, lblock);
    860	else {
    861		unsigned int extlen;
    862		int ret;
    863
    864		extlen = 1;
    865		ret = gfs2_get_extent(jd->jd_inode, lblock, &dblock, &extlen);
    866		if (gfs2_assert_withdraw(sdp, ret == 0))
    867			return;
    868	}
    869	lh->lh_addr = cpu_to_be64(dblock);
    870	lh->lh_jinode = cpu_to_be64(GFS2_I(jd->jd_inode)->i_no_addr);
    871
    872	/* We may only write local statfs, quota, etc., when writing to our
    873	   own journal. The values are left 0 when recovering a journal
    874	   different from our own. */
    875	if (!(flags & GFS2_LOG_HEAD_RECOVERY)) {
    876		lh->lh_statfs_addr =
    877			cpu_to_be64(GFS2_I(sdp->sd_sc_inode)->i_no_addr);
    878		lh->lh_quota_addr =
    879			cpu_to_be64(GFS2_I(sdp->sd_qc_inode)->i_no_addr);
    880
    881		spin_lock(&sdp->sd_statfs_spin);
    882		lh->lh_local_total = cpu_to_be64(l_sc->sc_total);
    883		lh->lh_local_free = cpu_to_be64(l_sc->sc_free);
    884		lh->lh_local_dinodes = cpu_to_be64(l_sc->sc_dinodes);
    885		spin_unlock(&sdp->sd_statfs_spin);
    886	}
    887
    888	BUILD_BUG_ON(offsetof(struct gfs2_log_header, lh_crc) != LH_V1_SIZE);
    889
    890	crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4,
    891		     sb->s_blocksize - LH_V1_SIZE - 4);
    892	lh->lh_crc = cpu_to_be32(crc);
    893
    894	gfs2_log_write(sdp, jd, page, sb->s_blocksize, 0, dblock);
    895	gfs2_log_submit_bio(&jd->jd_log_bio, REQ_OP_WRITE | op_flags);
    896}
    897
    898/**
    899 * log_write_header - Get and initialize a journal header buffer
    900 * @sdp: The GFS2 superblock
    901 * @flags: The log header flags, including log header origin
    902 *
    903 * Returns: the initialized log buffer descriptor
    904 */
    905
    906static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
    907{
    908	int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
    909	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
    910
    911	gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
    912
    913	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
    914		gfs2_ordered_wait(sdp);
    915		log_flush_wait(sdp);
    916		op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
    917	}
    918	sdp->sd_log_idle = (sdp->sd_log_flush_tail == sdp->sd_log_flush_head);
    919	gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++,
    920			      sdp->sd_log_flush_tail, sdp->sd_log_flush_head,
    921			      flags, op_flags);
    922	gfs2_log_incr_head(sdp);
    923	log_flush_wait(sdp);
    924	log_pull_tail(sdp);
    925	gfs2_log_update_head(sdp);
    926}
    927
    928/**
    929 * gfs2_ail_drain - drain the ail lists after a withdraw
    930 * @sdp: Pointer to GFS2 superblock
    931 */
    932void gfs2_ail_drain(struct gfs2_sbd *sdp)
    933{
    934	struct gfs2_trans *tr;
    935
    936	spin_lock(&sdp->sd_ail_lock);
    937	/*
    938	 * For transactions on the sd_ail1_list we need to drain both the
    939	 * ail1 and ail2 lists. That's because function gfs2_ail1_start_one
    940	 * (temporarily) moves items from its tr_ail1 list to tr_ail2 list
    941	 * before revokes are sent for that block. Items on the sd_ail2_list
    942	 * should have already gotten beyond that point, so no need.
    943	 */
    944	while (!list_empty(&sdp->sd_ail1_list)) {
    945		tr = list_first_entry(&sdp->sd_ail1_list, struct gfs2_trans,
    946				      tr_list);
    947		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list);
    948		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
    949		list_del(&tr->tr_list);
    950		gfs2_trans_free(sdp, tr);
    951	}
    952	while (!list_empty(&sdp->sd_ail2_list)) {
    953		tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans,
    954				      tr_list);
    955		gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
    956		list_del(&tr->tr_list);
    957		gfs2_trans_free(sdp, tr);
    958	}
    959	gfs2_drain_revokes(sdp);
    960	spin_unlock(&sdp->sd_ail_lock);
    961}
    962
    963/**
    964 * empty_ail1_list - try to start IO and empty the ail1 list
    965 * @sdp: Pointer to GFS2 superblock
    966 */
    967static void empty_ail1_list(struct gfs2_sbd *sdp)
    968{
    969	unsigned long start = jiffies;
    970
    971	for (;;) {
    972		if (time_after(jiffies, start + (HZ * 600))) {
    973			fs_err(sdp, "Error: In %s for 10 minutes! t=%d\n",
    974			       __func__, current->journal_info ? 1 : 0);
    975			dump_ail_list(sdp);
    976			return;
    977		}
    978		gfs2_ail1_start(sdp);
    979		gfs2_ail1_wait(sdp);
    980		if (gfs2_ail1_empty(sdp, 0))
    981			return;
    982	}
    983}
    984
    985/**
    986 * trans_drain - drain the buf and databuf queue for a failed transaction
    987 * @tr: the transaction to drain
    988 *
    989 * When this is called, we're taking an error exit for a log write that failed
    990 * but since we bypassed the after_commit functions, we need to remove the
    991 * items from the buf and databuf queue.
    992 */
    993static void trans_drain(struct gfs2_trans *tr)
    994{
    995	struct gfs2_bufdata *bd;
    996	struct list_head *head;
    997
    998	if (!tr)
    999		return;
   1000
   1001	head = &tr->tr_buf;
   1002	while (!list_empty(head)) {
   1003		bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
   1004		list_del_init(&bd->bd_list);
   1005		if (!list_empty(&bd->bd_ail_st_list))
   1006			gfs2_remove_from_ail(bd);
   1007		kmem_cache_free(gfs2_bufdata_cachep, bd);
   1008	}
   1009	head = &tr->tr_databuf;
   1010	while (!list_empty(head)) {
   1011		bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
   1012		list_del_init(&bd->bd_list);
   1013		if (!list_empty(&bd->bd_ail_st_list))
   1014			gfs2_remove_from_ail(bd);
   1015		kmem_cache_free(gfs2_bufdata_cachep, bd);
   1016	}
   1017}
   1018
   1019/**
   1020 * gfs2_log_flush - flush incore transaction(s)
   1021 * @sdp: The filesystem
   1022 * @gl: The glock structure to flush.  If NULL, flush the whole incore log
   1023 * @flags: The log header flags: GFS2_LOG_HEAD_FLUSH_* and debug flags
   1024 *
   1025 */
   1026
   1027void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
   1028{
   1029	struct gfs2_trans *tr = NULL;
   1030	unsigned int reserved_blocks = 0, used_blocks = 0;
   1031	enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
   1032	unsigned int first_log_head;
   1033	unsigned int reserved_revokes = 0;
   1034
   1035	down_write(&sdp->sd_log_flush_lock);
   1036	trace_gfs2_log_flush(sdp, 1, flags);
   1037
   1038repeat:
   1039	/*
   1040	 * Do this check while holding the log_flush_lock to prevent new
   1041	 * buffers from being added to the ail via gfs2_pin()
   1042	 */
   1043	if (gfs2_withdrawn(sdp) || !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
   1044		goto out;
   1045
   1046	/* Log might have been flushed while we waited for the flush lock */
   1047	if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags))
   1048		goto out;
   1049
   1050	first_log_head = sdp->sd_log_head;
   1051	sdp->sd_log_flush_head = first_log_head;
   1052
   1053	tr = sdp->sd_log_tr;
   1054	if (tr || sdp->sd_log_num_revoke) {
   1055		if (reserved_blocks)
   1056			gfs2_log_release(sdp, reserved_blocks);
   1057		reserved_blocks = sdp->sd_log_blks_reserved;
   1058		reserved_revokes = sdp->sd_log_num_revoke;
   1059		if (tr) {
   1060			sdp->sd_log_tr = NULL;
   1061			tr->tr_first = first_log_head;
   1062			if (unlikely (state == SFS_FROZEN)) {
   1063				if (gfs2_assert_withdraw_delayed(sdp,
   1064				       !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
   1065					goto out_withdraw;
   1066			}
   1067		}
   1068	} else if (!reserved_blocks) {
   1069		unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
   1070
   1071		reserved_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
   1072		if (current == sdp->sd_logd_process)
   1073			taboo_blocks = 0;
   1074
   1075		if (!__gfs2_log_try_reserve(sdp, reserved_blocks, taboo_blocks)) {
   1076			up_write(&sdp->sd_log_flush_lock);
   1077			__gfs2_log_reserve(sdp, reserved_blocks, taboo_blocks);
   1078			down_write(&sdp->sd_log_flush_lock);
   1079			goto repeat;
   1080		}
   1081		BUG_ON(sdp->sd_log_num_revoke);
   1082	}
   1083
   1084	if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
   1085		clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
   1086
   1087	if (unlikely(state == SFS_FROZEN))
   1088		if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes))
   1089			goto out_withdraw;
   1090
   1091	gfs2_ordered_write(sdp);
   1092	if (gfs2_withdrawn(sdp))
   1093		goto out_withdraw;
   1094	lops_before_commit(sdp, tr);
   1095	if (gfs2_withdrawn(sdp))
   1096		goto out_withdraw;
   1097	gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE);
   1098	if (gfs2_withdrawn(sdp))
   1099		goto out_withdraw;
   1100
   1101	if (sdp->sd_log_head != sdp->sd_log_flush_head) {
   1102		log_write_header(sdp, flags);
   1103	} else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) {
   1104		log_write_header(sdp, flags);
   1105	}
   1106	if (gfs2_withdrawn(sdp))
   1107		goto out_withdraw;
   1108	lops_after_commit(sdp, tr);
   1109
   1110	gfs2_log_lock(sdp);
   1111	sdp->sd_log_blks_reserved = 0;
   1112
   1113	spin_lock(&sdp->sd_ail_lock);
   1114	if (tr && !list_empty(&tr->tr_ail1_list)) {
   1115		list_add(&tr->tr_list, &sdp->sd_ail1_list);
   1116		tr = NULL;
   1117	}
   1118	spin_unlock(&sdp->sd_ail_lock);
   1119	gfs2_log_unlock(sdp);
   1120
   1121	if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) {
   1122		if (!sdp->sd_log_idle) {
   1123			empty_ail1_list(sdp);
   1124			if (gfs2_withdrawn(sdp))
   1125				goto out_withdraw;
   1126			log_write_header(sdp, flags);
   1127		}
   1128		if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
   1129			     GFS2_LOG_HEAD_FLUSH_FREEZE))
   1130			gfs2_log_shutdown(sdp);
   1131		if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE)
   1132			atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
   1133	}
   1134
   1135out_end:
   1136	used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head);
   1137	reserved_revokes += atomic_read(&sdp->sd_log_revokes_available);
   1138	atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
   1139	gfs2_assert_withdraw(sdp, reserved_revokes % sdp->sd_inptrs == sdp->sd_ldptrs);
   1140	if (reserved_revokes > sdp->sd_ldptrs)
   1141		reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs;
   1142out:
   1143	if (used_blocks != reserved_blocks) {
   1144		gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks);
   1145		gfs2_log_release(sdp, reserved_blocks - used_blocks);
   1146	}
   1147	up_write(&sdp->sd_log_flush_lock);
   1148	gfs2_trans_free(sdp, tr);
   1149	if (gfs2_withdrawing(sdp))
   1150		gfs2_withdraw(sdp);
   1151	trace_gfs2_log_flush(sdp, 0, flags);
   1152	return;
   1153
   1154out_withdraw:
   1155	trans_drain(tr);
   1156	/**
   1157	 * If the tr_list is empty, we're withdrawing during a log
   1158	 * flush that targets a transaction, but the transaction was
   1159	 * never queued onto any of the ail lists. Here we add it to
   1160	 * ail1 just so that ail_drain() will find and free it.
   1161	 */
   1162	spin_lock(&sdp->sd_ail_lock);
   1163	if (tr && list_empty(&tr->tr_list))
   1164		list_add(&tr->tr_list, &sdp->sd_ail1_list);
   1165	spin_unlock(&sdp->sd_ail_lock);
   1166	tr = NULL;
   1167	goto out_end;
   1168}
   1169
   1170/**
   1171 * gfs2_merge_trans - Merge a new transaction into a cached transaction
   1172 * @sdp: the filesystem
   1173 * @new: New transaction to be merged
   1174 */
   1175
   1176static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
   1177{
   1178	struct gfs2_trans *old = sdp->sd_log_tr;
   1179
   1180	WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags));
   1181
   1182	old->tr_num_buf_new	+= new->tr_num_buf_new;
   1183	old->tr_num_databuf_new	+= new->tr_num_databuf_new;
   1184	old->tr_num_buf_rm	+= new->tr_num_buf_rm;
   1185	old->tr_num_databuf_rm	+= new->tr_num_databuf_rm;
   1186	old->tr_revokes		+= new->tr_revokes;
   1187	old->tr_num_revoke	+= new->tr_num_revoke;
   1188
   1189	list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
   1190	list_splice_tail_init(&new->tr_buf, &old->tr_buf);
   1191
   1192	spin_lock(&sdp->sd_ail_lock);
   1193	list_splice_tail_init(&new->tr_ail1_list, &old->tr_ail1_list);
   1194	list_splice_tail_init(&new->tr_ail2_list, &old->tr_ail2_list);
   1195	spin_unlock(&sdp->sd_ail_lock);
   1196}
   1197
   1198static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
   1199{
   1200	unsigned int reserved;
   1201	unsigned int unused;
   1202	unsigned int maxres;
   1203
   1204	gfs2_log_lock(sdp);
   1205
   1206	if (sdp->sd_log_tr) {
   1207		gfs2_merge_trans(sdp, tr);
   1208	} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
   1209		gfs2_assert_withdraw(sdp, !test_bit(TR_ONSTACK, &tr->tr_flags));
   1210		sdp->sd_log_tr = tr;
   1211		set_bit(TR_ATTACHED, &tr->tr_flags);
   1212	}
   1213
   1214	reserved = calc_reserved(sdp);
   1215	maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
   1216	gfs2_assert_withdraw(sdp, maxres >= reserved);
   1217	unused = maxres - reserved;
   1218	if (unused)
   1219		gfs2_log_release(sdp, unused);
   1220	sdp->sd_log_blks_reserved = reserved;
   1221
   1222	gfs2_log_unlock(sdp);
   1223}
   1224
   1225/**
   1226 * gfs2_log_commit - Commit a transaction to the log
   1227 * @sdp: the filesystem
   1228 * @tr: the transaction
   1229 *
   1230 * We wake up gfs2_logd if the number of pinned blocks exceed thresh1
   1231 * or the total number of used blocks (pinned blocks plus AIL blocks)
   1232 * is greater than thresh2.
   1233 *
   1234 * At mount time thresh1 is 2/5ths of journal size, thresh2 is 4/5ths of
   1235 * journal size.
   1236 *
   1237 * Returns: errno
   1238 */
   1239
   1240void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
   1241{
   1242	log_refund(sdp, tr);
   1243
   1244	if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
   1245	    ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
   1246	    atomic_read(&sdp->sd_log_thresh2)))
   1247		wake_up(&sdp->sd_logd_waitq);
   1248}
   1249
   1250/**
   1251 * gfs2_log_shutdown - write a shutdown header into a journal
   1252 * @sdp: the filesystem
   1253 *
   1254 */
   1255
   1256static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
   1257{
   1258	gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
   1259	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
   1260	gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
   1261
   1262	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN);
   1263	log_pull_tail(sdp);
   1264
   1265	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
   1266	gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
   1267}
   1268
   1269static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
   1270{
   1271	return (atomic_read(&sdp->sd_log_pinned) +
   1272		atomic_read(&sdp->sd_log_blks_needed) >=
   1273		atomic_read(&sdp->sd_log_thresh1));
   1274}
   1275
   1276static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
   1277{
   1278	unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
   1279
   1280	if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
   1281		return 1;
   1282
   1283	return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
   1284		atomic_read(&sdp->sd_log_thresh2);
   1285}
   1286
   1287/**
   1288 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
   1289 * @data: Pointer to GFS2 superblock
   1290 *
   1291 * Also, periodically check to make sure that we're using the most recent
   1292 * journal index.
   1293 */
   1294
   1295int gfs2_logd(void *data)
   1296{
   1297	struct gfs2_sbd *sdp = data;
   1298	unsigned long t = 1;
   1299	DEFINE_WAIT(wait);
   1300
   1301	while (!kthread_should_stop()) {
   1302
   1303		if (gfs2_withdrawn(sdp)) {
   1304			msleep_interruptible(HZ);
   1305			continue;
   1306		}
   1307		/* Check for errors writing to the journal */
   1308		if (sdp->sd_log_error) {
   1309			gfs2_lm(sdp,
   1310				"GFS2: fsid=%s: error %d: "
   1311				"withdrawing the file system to "
   1312				"prevent further damage.\n",
   1313				sdp->sd_fsname, sdp->sd_log_error);
   1314			gfs2_withdraw(sdp);
   1315			continue;
   1316		}
   1317
   1318		if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
   1319			gfs2_ail1_empty(sdp, 0);
   1320			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
   1321						  GFS2_LFC_LOGD_JFLUSH_REQD);
   1322		}
   1323
   1324		if (gfs2_ail_flush_reqd(sdp)) {
   1325			gfs2_ail1_start(sdp);
   1326			gfs2_ail1_wait(sdp);
   1327			gfs2_ail1_empty(sdp, 0);
   1328			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
   1329						  GFS2_LFC_LOGD_AIL_FLUSH_REQD);
   1330		}
   1331
   1332		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
   1333
   1334		try_to_freeze();
   1335
   1336		do {
   1337			prepare_to_wait(&sdp->sd_logd_waitq, &wait,
   1338					TASK_INTERRUPTIBLE);
   1339			if (!gfs2_ail_flush_reqd(sdp) &&
   1340			    !gfs2_jrnl_flush_reqd(sdp) &&
   1341			    !kthread_should_stop())
   1342				t = schedule_timeout(t);
   1343		} while(t && !gfs2_ail_flush_reqd(sdp) &&
   1344			!gfs2_jrnl_flush_reqd(sdp) &&
   1345			!kthread_should_stop());
   1346		finish_wait(&sdp->sd_logd_waitq, &wait);
   1347	}
   1348
   1349	return 0;
   1350}
   1351